/** * Outputs basic index statistics. * * @return void * @author John Anderson */ public function stats() { $count = Page::find('count'); $this->out(); $this->out("Total indexed documents: {:cyan}{$count}{:end}"); $this->out(); $this->out("To re-index, use {:purple}li3 crawl{:end} {:cyan}--url={:end}{:green}[URL]{:end}"); $this->out(); }
/** * Fetches the page, scrapes it for links, and creates the * related database records. * * @param string $string * @return void * @author John Anderson */ protected function process($rawURL) { $page = Page::find('first', array('conditions' => array('url' => $rawURL))); if ($page === null) { $this->delegate->out("Processing {:cyan}{$rawURL}{:end}..."); $string = $this->fetch($rawURL); $data = array('links' => $this->extractLinks($string), 'content' => $string); $page = Page::create(); $page->created = date('Y-m-d H:i:s'); $page->modified = date('Y-m-d H:i:s'); $page->url = $rawURL; $page->content = $data['content']; $page->save(); $this->generateStems($string, $page); foreach ($data['links'] as $link) { if (strstr($link, $this->url['parts']['host']) === false) { $link = $this->url['raw'] . $link; } $this->process($link); } } return $page; }