Пример #1
0
 protected function analyze($document)
 {
     try {
         $title = "";
         if ($document->contentType != "application/pdf") {
             $document->content = html_entity_decode($document->content, ENT_QUOTES);
             $document->title = HTMLRobot::findTitle($document->content);
             $document->title = htmlentities($document->title, ENT_QUOTES);
             $document->content = HTMLRobot::clean($document->content);
         }
         if ($document->title == "") {
             $document->title = $document->url;
         }
         $md5 = md5($document->content);
         $this->setMD5($document->id, $md5);
         $this->saveNodes($document);
     } catch (Exception $e) {
         $this->collection->log("failed adding {$document->url} " . $e->getMessage());
     }
 }
Пример #2
0
 public function add($document)
 {
     try {
         $title = "";
         if (URL::hasDuplicate($this->accountId, $document->url)) {
             return false;
         }
         if (URL::filter($this->accountId, $document->url, "indexerfilter")) {
             return false;
         }
         if ($document->contenttype != "application/pdf") {
             //default to HTML
             $document->content = html_entity_decode($document->content, ENT_QUOTES);
             $document->title = HTMLRobot::findTitle($this->accountId, $document->content);
             $document->title = htmlentities($document->title, ENT_QUOTES);
             $document->content = HTMLRobot::clean($document->content);
         }
         //default title
         if ($document->title == "") {
             $document->title = $document->url;
         }
         $md5 = md5($document->content);
         if ($Document::hasDuplicateContent($accountId, $md5)) {
             return false;
         }
         $this->update_index_info($document->id, $md5);
         $length = strlen($document->content);
         if ($length > 0 && strlen($document->url) > 0) {
             $SQL = "INSERT INTO facet(account_id,document_id,name,content) values('" . $this->accountId . "','" . $document->id . "','title','" . $document->title . "');";
             mysql_query($SQL) or die(mysql_error());
             $SQL = "INSERT INTO facet(account_id,document_id,name,content) values('" . $this->accountId . "','" . $document->id . "','content','" . $document->content . "');";
             mysql_query($SQL) or die(mysql_error());
         } else {
             print $document->url . " empty doc <br />\r\n";
         }
     } catch (Exception $e) {
         print "failed adding {$document->url}\r\n";
     }
 }