$summary[self::HASH] = $link_id; $summary[self::TYPE] = "link"; $summary[self::HTTP_CODE] = $link_keys; $summary[self::LANG] = $lang; $this->found_sites[self::LINK_SEEN_URLS][$part_num][] = $summary; $link_word_lists = PhraseParser::extractPhrasesInLists($link_text, $lang); $link_meta_ids = PhraseParser::calculateLinkMetas($url, $link_host, $link_text, $site_url); if (!isset($this->found_sites[self::INVERTED_INDEX][$part_num])) { $this->found_sites[self::INVERTED_INDEX][$part_num] = new IndexShard("fetcher_shard_{$part_num}"); } $this->found_sites[self::INVERTED_INDEX][$part_num]->addDocumentWords($link_keys, self::NEEDS_OFFSET_FLAG, $link_word_lists, $link_meta_ids, PhraseParser::$materialized_metas, false, $link_rank); } } $iterim_elapse = changeInMicrotime($interim_time); if ($iterim_elapse > 5) { crawlLog("..Inverting " . $site[self::URL] . "...took > 5s."); } crawlTimeoutLog("..Still building inverted index. Have processed " . "%s of %s documents.\nLast url processed was %s.", $i, $num_seen, $site[self::URL]); } if ($this->crawl_type == self::ARCHIVE_CRAWL) { $this->recrawl_check_scheduler = true; } crawlLog(" Build mini inverted index time " . changeInMicrotime($start_time)); } } /* * Instantiate and runs the Fetcher */ $fetcher = new Fetcher(); $fetcher->start();