Exemplo n.º 1
0
 /**
  * Outputs basic index statistics.
  *
  * @return void
  * @author John Anderson
  */
 public function stats()
 {
     $count = Page::find('count');
     $this->out();
     $this->out("Total indexed documents: {:cyan}{$count}{:end}");
     $this->out();
     $this->out("To re-index, use {:purple}li3 crawl{:end} {:cyan}--url={:end}{:green}[URL]{:end}");
     $this->out();
 }
Exemplo n.º 2
0
 /**
  * Fetches the page, scrapes it for links, and creates the
  * related database records.
  *
  * @param string $string 
  * @return void
  * @author John Anderson
  */
 protected function process($rawURL)
 {
     $page = Page::find('first', array('conditions' => array('url' => $rawURL)));
     if ($page === null) {
         $this->delegate->out("Processing {:cyan}{$rawURL}{:end}...");
         $string = $this->fetch($rawURL);
         $data = array('links' => $this->extractLinks($string), 'content' => $string);
         $page = Page::create();
         $page->created = date('Y-m-d H:i:s');
         $page->modified = date('Y-m-d H:i:s');
         $page->url = $rawURL;
         $page->content = $data['content'];
         $page->save();
         $this->generateStems($string, $page);
         foreach ($data['links'] as $link) {
             if (strstr($link, $this->url['parts']['host']) === false) {
                 $link = $this->url['raw'] . $link;
             }
             $this->process($link);
         }
     }
     return $page;
 }