Пример #1
0
 public static function recognizeByUrlCheck($url, $proxy = "")
 {
     if (self::$_proxies == null) {
         self::$_proxies = file(dirname(__FILE__) . "/proxies.txt");
     }
     if (self::$_current_proxy > count(self::$_proxies)) {
         return $unrecognized;
     }
     $recognurl = "http://filterdb.iss.net/urlcheck/url-report-dboem.asp";
     $user_agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)";
     $params = "fullurl=" . $url;
     $unrecognized = 'Неопознанная';
     $content = self::page_post($recognurl, $user_agent, $params, $proxy);
     if (strstr($content, 'Not yet categorized or does not fit into any category')) {
         return '';
     }
     if (strstr($content, 'Error: user time restriction')) {
         return self::recognize($url, self::$_proxies[++self::$_current_proxy]);
     }
     if (preg_match("/<\\/b> is classified as<\\/h2><ul><li>([^<.]+)<\\/li><\\/ul>/", $content, $matches)) {
     }
     return $matches[1];
     return self::recognize($url, self::$_proxies[++self::$_current_proxy]);
 }
Пример #2
0
function recognize(&$ret, &$writing, $rank)
{
    try {
        $debug_start = microtime(true);
        $c = new Character();
        $w = $c->create_sparse_writing($writing);
        //骨架化笔画
        $first_stroke_type = $c->get_first_stroke_type($w);
        //首笔的笔画类型(横竖撇点折)
        $int_strokes = sizeof($w->s);
        //笔画数
        $f = new Feature();
        $features = $f->make_feature($w);
        //获取特征
        $debug_end = microtime(true);
        $ret->debug .= "计算特征所需的时间:" . debug_time($debug_start, $debug_end) . "<br>";
        $debug_start = microtime(true);
        $dic = new Dictionary();
        $cands = $dic->get_candidates_by_strokes($int_strokes, $first_stroke_type, $rank);
        //初步获取候选字
        $debug_end = microtime(true);
        $ret->debug .= "获取候选字所需的时间:" . debug_time($debug_start, $debug_end) . "<br>";
        $debug_start = microtime(true);
        $rec = new Recognizer();
        $res = $rec->get_results($cands, $features, $debug_msg);
        //获取结果
        $debug_end = microtime(true);
        $ret->debug .= "匹配候选字所需的时间:" . debug_time($debug_start, $debug_end) . "<br>";
        if ($res != null) {
            $ret->msgno = MSG_OK;
            $ret->msg = MSG_OK_TXT;
            $ret->res = $res;
        } else {
            $ret->msgno = MSG_ERR;
            $ret->msg = MSG_ERR_NOCAND;
        }
    } catch (Exception $e) {
        $ret->msgno = MSG_ERR;
        $ret->msg = $e->getMessage();
    }
}
$url_cats_matched_grid = new ajax_grid('url_cats_grid_matched', $url_cats_matched_ds, $ajaxbuf_url_matched_cats);
$url_cats_matched_grid_pager = new ajax_grid_pager('url_cats_grid_pager_matched', $BILL->GetCategoriesUrlMatchedCount($url_cats_matched_grid->get_filterfield(), $url_cats_matched_grid->get_filtering()), 10);
$url_cats_matched_grid->attach_pager($url_cats_matched_grid_pager);
/**
 * Check if we need to make some actions
 */
if ($emanager->isAnyAction()) {
    //special actions
    if ($emanager->getAction() == 'changeCatByName') {
        $item = json_decode($emanager->getItem());
        $BILL->UpdateUrlCategoryMatchByName($item->url, $item->name);
    } elseif ($emanager->getAction() == 'recognizeAll') {
        $url_cats = $BILL->GetUrlCategoriesMatch($url_cats_unmatched_grid_pager->get_curpage(), 10, $url_cats_unmatched_grid->get_sorting(), $url_cats_unmatched_grid->get_sort_direction(), array(0), array());
        require_once dirname(__FILE__) . '/CADBiS/recognize.php';
        foreach ($url_cats as $url) {
            $catname = Recognizer::recognizeByUrlCheck($url['url']);
            if (!empty($catname)) {
                $BILL->UpdateUrlCategoryMatchByName($url['url'], $catname);
            }
        }
    }
    switch ($emanager->getAction()) {
        case $emanager->action->UPD:
            $item = json_decode($emanager->getItem());
            $BILL->UpdateUrlCategoryMatch($item->u2cid, $item->url, $item->cid);
            break;
        case $emanager->action->DEL:
            $item = json_decode($emanager->getItem());
            $BILL->DeleteUrlCategoryMatch($item->u2cid);
            break;
        case $emanager->action->ADD:
Пример #4
0
 /**
  * Triggers regognize request based on current params
  * @param string $country_id | optional
  * @return Name
  */
 public function recognize($country_id = null)
 {
     $Recognizer = new Recognizer($country_id);
     $Recognizer->recognize($country_id);
     return $this;
 }
             case 'replace':
                 $BILL->ReplaceUrlCategoryKeyword($word, $setcid);
                 $BILL->ResolveUrlCategoryConflict($word);
                 break;
             case 'unsense':
                 $BILL->DeleteUrlCategoryKeyword($word);
                 $BILL->AddUrlCategoryUnsenseword($word);
                 $BILL->ResolveUrlCategoryConflict($word);
                 break;
         }
     }
 }
 // Recognize content
 if (isset($_POST['btnSubmit']) || isset($_GET['manualcheck'])) {
     $uswords = $BILL->GetUrlCategoriesUnsenseWords();
     $result = Recognizer::recognizeByMyself($url, $cats, $uswords, $kwds_weights, isset($_REQUEST['debug']));
 }
 // Other (finding conflicts etc)
 if (isset($result) && isset($set)) {
     $conflict_cats = array();
     foreach ($result['cwords'] as $cword => $wcount) {
         if ($wcount < Recognizer::MINIMAL_CWORD_COEF) {
             continue;
         }
         $c_cid = $BILL->GetUrlCategoryKeyword($cword);
         if ($c_cid > 0) {
             if ($c_cid != $setcid) {
                 $conflict_cats[$c_cid][$cword] = $wcount;
                 $BILL->AddUrlCategoryConflict($cword, $setcid, $c_cid, $url);
             }
         } else {
Пример #6
0
function debug_array()
{
    $str = '{"strokes":[{"points":[{"x":60,"y":66},{"x":61,"y":67},
{"x":63,"y":70},{"x":66,"y":77},{"x":69,"y":84},{"x":71,"y":91},
{"x":71,"y":91}]},{"points":[{"x":36,"y":130},{"x":37,"y":130},
{"x":38,"y":130},{"x":38,"y":131},{"x":40,"y":131},{"x":47,"y":132},
{"x":53,"y":132},{"x":57,"y":130},{"x":67,"y":127},{"x":82,"y":121},
{"x":87,"y":119},{"x":91,"y":116},{"x":92,"y":115},{"x":93,"y":115},
{"x":92,"y":115},{"x":91,"y":116},{"x":89,"y":119},{"x":86,"y":123},
{"x":85,"y":126},{"x":83,"y":131},{"x":80,"y":135},{"x":78,"y":143},
{"x":75,"y":152},{"x":72,"y":162},{"x":68,"y":175},{"x":65,"y":188},
{"x":64,"y":200},{"x":64,"y":212},{"x":64,"y":222},{"x":64,"y":231},
{"x":64,"y":239},{"x":65,"y":242},{"x":66,"y":244},{"x":67,"y":246},
{"x":69,"y":248},{"x":71,"y":248},{"x":72,"y":248},{"x":75,"y":248},
{"x":79,"y":246},{"x":88,"y":241},{"x":98,"y":235},{"x":104,"y":231},
{"x":104,"y":230},{"x":104,"y":230}]},{"points":[{"x":132,"y":56},
{"x":132,"y":57},{"x":133,"y":60},{"x":137,"y":68},{"x":145,"y":78},
{"x":149,"y":84},{"x":150,"y":85},{"x":150,"y":85}]},{"points":
[{"x":243,"y":59},{"x":242,"y":59},{"x":227,"y":72},{"x":214,"y":82},
{"x":205,"y":88},{"x":205,"y":88}]},{"points":[{"x":183,"y":46},
{"x":183,"y":47},{"x":174,"y":75},{"x":154,"y":116},{"x":143,"y":132},
{"x":141,"y":133},{"x":140,"y":133},{"x":139,"y":133},{"x":138,"y":133},
{"x":137,"y":133},{"x":136,"y":134},{"x":135,"y":134},{"x":133,"y":134},
{"x":132,"y":134},{"x":129,"y":134},{"x":127,"y":134},{"x":125,"y":134},
{"x":124,"y":134},{"x":123,"y":134},{"x":120,"y":134},{"x":119,"y":134},
{"x":119,"y":134}]},{"points":[{"x":164,"y":99},{"x":165,"y":99},
{"x":166,"y":100},{"x":170,"y":104},{"x":172,"y":107},{"x":182,"y":116},
{"x":188,"y":122},{"x":196,"y":128},{"x":203,"y":132},{"x":211,"y":135},
{"x":212,"y":135},{"x":213,"y":135},{"x":214,"y":135},{"x":215,"y":135},
{"x":215,"y":135}]},{"points":[{"x":131,"y":174},{"x":132,"y":175},
{"x":139,"y":183},{"x":153,"y":196},{"x":161,"y":203},{"x":164,"y":206},
{"x":165,"y":206},{"x":165,"y":206}]},{"points":[{"x":249,"y":172},
{"x":248,"y":172},{"x":243,"y":178},{"x":234,"y":186},{"x":227,"y":192},
{"x":219,"y":196},{"x":217,"y":197},{"x":217,"y":197}]},{"points":
[{"x":194,"y":164},{"x":193,"y":164},{"x":186,"y":180},{"x":172,"y":213},
{"x":163,"y":228},{"x":130,"y":247},{"x":130,"y":247}]},{"points":
[{"x":177,"y":211},{"x":177,"y":212},{"x":184,"y":225},{"x":196,"y":246},
{"x":202,"y":253},{"x":205,"y":257},{"x":209,"y":259},{"x":209,"y":260},
{"x":209,"y":260}]}],"width":213,"height":214,"left":36,"top":46,"type":1}';
    $obj = json_decode($str);
    debug_show($obj);
    $test = new Recognizer();
    $nodePairs = array();
    $test->tomoe_recognizer_get_candidates($obj, $obj);
    //echo $res->distance."<br>";
    //echo $res->point->x.":".$res->point->y;
    //echo var_dump($nodePairs);
    //debug_show($new_points);
}