public static function recognizeByUrlCheck($url, $proxy = "") { if (self::$_proxies == null) { self::$_proxies = file(dirname(__FILE__) . "/proxies.txt"); } if (self::$_current_proxy > count(self::$_proxies)) { return $unrecognized; } $recognurl = "http://filterdb.iss.net/urlcheck/url-report-dboem.asp"; $user_agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"; $params = "fullurl=" . $url; $unrecognized = 'Неопознанная'; $content = self::page_post($recognurl, $user_agent, $params, $proxy); if (strstr($content, 'Not yet categorized or does not fit into any category')) { return ''; } if (strstr($content, 'Error: user time restriction')) { return self::recognize($url, self::$_proxies[++self::$_current_proxy]); } if (preg_match("/<\\/b> is classified as<\\/h2><ul><li>([^<.]+)<\\/li><\\/ul>/", $content, $matches)) { } return $matches[1]; return self::recognize($url, self::$_proxies[++self::$_current_proxy]); }
function recognize(&$ret, &$writing, $rank) { try { $debug_start = microtime(true); $c = new Character(); $w = $c->create_sparse_writing($writing); //骨架化笔画 $first_stroke_type = $c->get_first_stroke_type($w); //首笔的笔画类型(横竖撇点折) $int_strokes = sizeof($w->s); //笔画数 $f = new Feature(); $features = $f->make_feature($w); //获取特征 $debug_end = microtime(true); $ret->debug .= "计算特征所需的时间:" . debug_time($debug_start, $debug_end) . "<br>"; $debug_start = microtime(true); $dic = new Dictionary(); $cands = $dic->get_candidates_by_strokes($int_strokes, $first_stroke_type, $rank); //初步获取候选字 $debug_end = microtime(true); $ret->debug .= "获取候选字所需的时间:" . debug_time($debug_start, $debug_end) . "<br>"; $debug_start = microtime(true); $rec = new Recognizer(); $res = $rec->get_results($cands, $features, $debug_msg); //获取结果 $debug_end = microtime(true); $ret->debug .= "匹配候选字所需的时间:" . debug_time($debug_start, $debug_end) . "<br>"; if ($res != null) { $ret->msgno = MSG_OK; $ret->msg = MSG_OK_TXT; $ret->res = $res; } else { $ret->msgno = MSG_ERR; $ret->msg = MSG_ERR_NOCAND; } } catch (Exception $e) { $ret->msgno = MSG_ERR; $ret->msg = $e->getMessage(); } }
$url_cats_matched_grid = new ajax_grid('url_cats_grid_matched', $url_cats_matched_ds, $ajaxbuf_url_matched_cats); $url_cats_matched_grid_pager = new ajax_grid_pager('url_cats_grid_pager_matched', $BILL->GetCategoriesUrlMatchedCount($url_cats_matched_grid->get_filterfield(), $url_cats_matched_grid->get_filtering()), 10); $url_cats_matched_grid->attach_pager($url_cats_matched_grid_pager); /** * Check if we need to make some actions */ if ($emanager->isAnyAction()) { //special actions if ($emanager->getAction() == 'changeCatByName') { $item = json_decode($emanager->getItem()); $BILL->UpdateUrlCategoryMatchByName($item->url, $item->name); } elseif ($emanager->getAction() == 'recognizeAll') { $url_cats = $BILL->GetUrlCategoriesMatch($url_cats_unmatched_grid_pager->get_curpage(), 10, $url_cats_unmatched_grid->get_sorting(), $url_cats_unmatched_grid->get_sort_direction(), array(0), array()); require_once dirname(__FILE__) . '/CADBiS/recognize.php'; foreach ($url_cats as $url) { $catname = Recognizer::recognizeByUrlCheck($url['url']); if (!empty($catname)) { $BILL->UpdateUrlCategoryMatchByName($url['url'], $catname); } } } switch ($emanager->getAction()) { case $emanager->action->UPD: $item = json_decode($emanager->getItem()); $BILL->UpdateUrlCategoryMatch($item->u2cid, $item->url, $item->cid); break; case $emanager->action->DEL: $item = json_decode($emanager->getItem()); $BILL->DeleteUrlCategoryMatch($item->u2cid); break; case $emanager->action->ADD:
/** * Triggers regognize request based on current params * @param string $country_id | optional * @return Name */ public function recognize($country_id = null) { $Recognizer = new Recognizer($country_id); $Recognizer->recognize($country_id); return $this; }
case 'replace': $BILL->ReplaceUrlCategoryKeyword($word, $setcid); $BILL->ResolveUrlCategoryConflict($word); break; case 'unsense': $BILL->DeleteUrlCategoryKeyword($word); $BILL->AddUrlCategoryUnsenseword($word); $BILL->ResolveUrlCategoryConflict($word); break; } } } // Recognize content if (isset($_POST['btnSubmit']) || isset($_GET['manualcheck'])) { $uswords = $BILL->GetUrlCategoriesUnsenseWords(); $result = Recognizer::recognizeByMyself($url, $cats, $uswords, $kwds_weights, isset($_REQUEST['debug'])); } // Other (finding conflicts etc) if (isset($result) && isset($set)) { $conflict_cats = array(); foreach ($result['cwords'] as $cword => $wcount) { if ($wcount < Recognizer::MINIMAL_CWORD_COEF) { continue; } $c_cid = $BILL->GetUrlCategoryKeyword($cword); if ($c_cid > 0) { if ($c_cid != $setcid) { $conflict_cats[$c_cid][$cword] = $wcount; $BILL->AddUrlCategoryConflict($cword, $setcid, $c_cid, $url); } } else {
function debug_array() { $str = '{"strokes":[{"points":[{"x":60,"y":66},{"x":61,"y":67}, {"x":63,"y":70},{"x":66,"y":77},{"x":69,"y":84},{"x":71,"y":91}, {"x":71,"y":91}]},{"points":[{"x":36,"y":130},{"x":37,"y":130}, {"x":38,"y":130},{"x":38,"y":131},{"x":40,"y":131},{"x":47,"y":132}, {"x":53,"y":132},{"x":57,"y":130},{"x":67,"y":127},{"x":82,"y":121}, {"x":87,"y":119},{"x":91,"y":116},{"x":92,"y":115},{"x":93,"y":115}, {"x":92,"y":115},{"x":91,"y":116},{"x":89,"y":119},{"x":86,"y":123}, {"x":85,"y":126},{"x":83,"y":131},{"x":80,"y":135},{"x":78,"y":143}, {"x":75,"y":152},{"x":72,"y":162},{"x":68,"y":175},{"x":65,"y":188}, {"x":64,"y":200},{"x":64,"y":212},{"x":64,"y":222},{"x":64,"y":231}, {"x":64,"y":239},{"x":65,"y":242},{"x":66,"y":244},{"x":67,"y":246}, {"x":69,"y":248},{"x":71,"y":248},{"x":72,"y":248},{"x":75,"y":248}, {"x":79,"y":246},{"x":88,"y":241},{"x":98,"y":235},{"x":104,"y":231}, {"x":104,"y":230},{"x":104,"y":230}]},{"points":[{"x":132,"y":56}, {"x":132,"y":57},{"x":133,"y":60},{"x":137,"y":68},{"x":145,"y":78}, {"x":149,"y":84},{"x":150,"y":85},{"x":150,"y":85}]},{"points": [{"x":243,"y":59},{"x":242,"y":59},{"x":227,"y":72},{"x":214,"y":82}, {"x":205,"y":88},{"x":205,"y":88}]},{"points":[{"x":183,"y":46}, {"x":183,"y":47},{"x":174,"y":75},{"x":154,"y":116},{"x":143,"y":132}, {"x":141,"y":133},{"x":140,"y":133},{"x":139,"y":133},{"x":138,"y":133}, {"x":137,"y":133},{"x":136,"y":134},{"x":135,"y":134},{"x":133,"y":134}, {"x":132,"y":134},{"x":129,"y":134},{"x":127,"y":134},{"x":125,"y":134}, {"x":124,"y":134},{"x":123,"y":134},{"x":120,"y":134},{"x":119,"y":134}, {"x":119,"y":134}]},{"points":[{"x":164,"y":99},{"x":165,"y":99}, {"x":166,"y":100},{"x":170,"y":104},{"x":172,"y":107},{"x":182,"y":116}, {"x":188,"y":122},{"x":196,"y":128},{"x":203,"y":132},{"x":211,"y":135}, {"x":212,"y":135},{"x":213,"y":135},{"x":214,"y":135},{"x":215,"y":135}, {"x":215,"y":135}]},{"points":[{"x":131,"y":174},{"x":132,"y":175}, {"x":139,"y":183},{"x":153,"y":196},{"x":161,"y":203},{"x":164,"y":206}, {"x":165,"y":206},{"x":165,"y":206}]},{"points":[{"x":249,"y":172}, {"x":248,"y":172},{"x":243,"y":178},{"x":234,"y":186},{"x":227,"y":192}, {"x":219,"y":196},{"x":217,"y":197},{"x":217,"y":197}]},{"points": [{"x":194,"y":164},{"x":193,"y":164},{"x":186,"y":180},{"x":172,"y":213}, {"x":163,"y":228},{"x":130,"y":247},{"x":130,"y":247}]},{"points": [{"x":177,"y":211},{"x":177,"y":212},{"x":184,"y":225},{"x":196,"y":246}, {"x":202,"y":253},{"x":205,"y":257},{"x":209,"y":259},{"x":209,"y":260}, {"x":209,"y":260}]}],"width":213,"height":214,"left":36,"top":46,"type":1}'; $obj = json_decode($str); debug_show($obj); $test = new Recognizer(); $nodePairs = array(); $test->tomoe_recognizer_get_candidates($obj, $obj); //echo $res->distance."<br>"; //echo $res->point->x.":".$res->point->y; //echo var_dump($nodePairs); //debug_show($new_points); }