コード例 #1
0
ファイル: fetcher.php プロジェクト: yakar/yioop
                    $summary[self::HASH] = $link_id;
                    $summary[self::TYPE] = "link";
                    $summary[self::HTTP_CODE] = $link_keys;
                    $summary[self::LANG] = $lang;
                    $this->found_sites[self::LINK_SEEN_URLS][$part_num][] = $summary;
                    $link_word_lists = PhraseParser::extractPhrasesInLists($link_text, $lang);
                    $link_meta_ids = PhraseParser::calculateLinkMetas($url, $link_host, $link_text, $site_url);
                    if (!isset($this->found_sites[self::INVERTED_INDEX][$part_num])) {
                        $this->found_sites[self::INVERTED_INDEX][$part_num] = new IndexShard("fetcher_shard_{$part_num}");
                    }
                    $this->found_sites[self::INVERTED_INDEX][$part_num]->addDocumentWords($link_keys, self::NEEDS_OFFSET_FLAG, $link_word_lists, $link_meta_ids, PhraseParser::$materialized_metas, false, $link_rank);
                }
            }
            $iterim_elapse = changeInMicrotime($interim_time);
            if ($iterim_elapse > 5) {
                crawlLog("..Inverting " . $site[self::URL] . "...took > 5s.");
            }
            crawlTimeoutLog("..Still building inverted index. Have processed " . "%s of %s documents.\nLast url processed was %s.", $i, $num_seen, $site[self::URL]);
        }
        if ($this->crawl_type == self::ARCHIVE_CRAWL) {
            $this->recrawl_check_scheduler = true;
        }
        crawlLog("  Build mini inverted index time " . changeInMicrotime($start_time));
    }
}
/*
 * Instantiate and runs the Fetcher
 */
$fetcher = new Fetcher();
$fetcher->start();