Ejemplo n.º 1
0
 /**
  * Index a file
  *
  * @param   string  $filePath   The file path
  */
 public function index($filePath)
 {
     $content = file_get_contents($filePath);
     $modificationTime = filemtime($filePath);
     $checksum = md5($content);
     // Get the document
     $hits = $this->_data->find('path:' . $filePath);
     if (count($hits) > 0) {
         $hit = $hits[0];
         $document = $hit->getDocument();
         // If the checksums are the same, no need to update
         if ($checksum === $document->checksum) {
             return;
         }
         // Delete the document
         $this->_data->delete($hit);
     }
     // Create a new document
     $document = new Zend_Search_Lucene_Document();
     $document->addField(Zend_Search_Lucene_Field::keyword('path', $filePath));
     $document->addField(Zend_Search_Lucene_Field::keyword('modificationTime', $modificationTime));
     $document->addField(Zend_Search_Lucene_Field::keyword('checksum', $checksum));
     $document->addField(Zend_Search_Lucene_Field::unStored('content', $content, 'utf-8'));
     $this->_data->addDocument($document);
     // Commit the changes
     $this->_data->commit();
     $this->_data->optimize();
 }
Ejemplo n.º 2
0
 public static function addPageToIndex($page, $toasterSearchIndex = false)
 {
     if (!self::initIndex()) {
         return false;
     }
     if ($page instanceof Application_Model_Models_Page) {
         $page = $page->toArray();
         $containers = Application_Model_Mappers_ContainerMapper::getInstance()->findByPageId($page['id']);
         $page['content'] = '';
         if (!empty($containers)) {
             foreach ($containers as $container) {
                 $page['content'] .= $container->getContent();
             }
         }
     }
     $document = new Zend_Search_Lucene_Document();
     $document->addField(Zend_Search_Lucene_Field::keyword('pageId', $page['id']));
     $document->addField(Zend_Search_Lucene_Field::unStored('metaKeyWords', $page['metaKeywords'], 'UTF-8'));
     $document->addField(Zend_Search_Lucene_Field::unStored('metaDescription', $page['metaDescription'], 'UTF-8'));
     $document->addField(Zend_Search_Lucene_Field::unStored('headerTitle', $page['headerTitle'], 'UTF-8'));
     $document->addField(Zend_Search_Lucene_Field::unStored('content', $page['content'], 'UTF-8'));
     $document->addField(Zend_Search_Lucene_Field::text('draft', $page['draft'], 'UTF-8'));
     $document->addField(Zend_Search_Lucene_Field::text('teaserText', $page['teaserText'], 'UTF-8'));
     $document->addField(Zend_Search_Lucene_Field::text('url', $page['url'], 'UTF-8'));
     $document->addField(Zend_Search_Lucene_Field::text('navName', $page['navName'], 'UTF-8'));
     $document->addField(Zend_Search_Lucene_Field::text('h1', $page['h1'], 'UTF-8'));
     //		$document->addField(Zend_Search_Lucene_Field::text('previewImage', $page['previewImage']));
     self::$_index->addDocument($document);
 }
Ejemplo n.º 3
0
 /**
  * Add a post to the index. 
  * 
  * @param Post $post the post being inserted
  */
 public function index_post($post)
 {
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Text('url', $post->permalink));
     $title = Zend_Search_Lucene_Field::Text('title', strtolower($post->title), 'utf-8');
     $title->boost = 50;
     $doc->addField($title);
     $doc->addField(Zend_Search_Lucene_Field::UnStored('contents', strtolower($post->content), 'utf-8'));
     // Add tags
     $tags = $post->tags;
     $tagstring = '';
     foreach ($tags as $tag) {
         $tagstring .= $tag . ' ';
     }
     $dtag = Zend_Search_Lucene_Field::UnStored('tags', strtolower($tagstring), 'utf-8');
     $dtag->boost = 10;
     $doc->addField($dtag);
     // Add ID
     $doc->addField(Zend_Search_Lucene_Field::keyword('postid', $post->id));
     $this->_index->addDocument($doc);
 }
Ejemplo n.º 4
0
 /**
  * Index an Zym_Search_Lucene_Indexable_Interface
  *
  * @throws Zym_Search_Lucene_Exception
  * @param Zym_Search_Lucene_Indexable_Interface|array $indexables
  * @param boolean $update
  * @param string $searchField
  * @return Zym_Search_Lucene_Index
  */
 public function index($indexables, $update = true, $searchField = null)
 {
     if (!is_array($indexables)) {
         $indexables = array($indexables);
     }
     if (!$searchField) {
         $searchField = $this->_idKey;
     }
     foreach ($indexables as $indexable) {
         if (!$indexable instanceof Zym_Search_Lucene_IIndexable) {
             /**
              * @see Zym_Search_Lucene_Exception
              */
             require_once 'Zym/Search/Lucene/Exception.php';
             throw new Zym_Search_Lucene_Exception('The object needs to have Zym_Search_Lucene_Indexable_Interface implemented.');
         }
         if ($update) {
             $recordId = $indexable->getRecordID();
             if (!$recordId) {
                 /**
                  * @see Zym_Search_Lucene_Exception
                  */
                 require_once 'Zym/Search/Lucene/Exception.php';
                 throw new Zym_Search_Lucene_Exception('The record ID must not be null');
             }
             $this->delete($recordId, $searchField);
         }
         $document = $indexable->getSearchDocument();
         if (!$document instanceof Zend_Search_Lucene_Document) {
             /**
              * @see Zym_Search_Lucene_Exception
              */
             require_once 'Zym/Search/Lucene/Exception.php';
             throw new Zym_Search_Lucene_Exception('The document is not an instance of Zend_Search_Lucene_Document.');
         }
         $this->_searchIndex->addDocument($document);
     }
     return $this;
 }
Ejemplo n.º 5
0
 /**
  * Adds a document to this index.
  *
  * @param Zend_Search_Lucene_Document $document
  */
 public function addDocument(Zend_Search_Lucene_Document $document)
 {
     $this->_index->addDocument($document);
 }
 /**
  * @param AJXP_Node $ajxpNode
  * @param Zend_Search_Lucene_Interface $index
  * @throws Exception
  * @return Zend_Search_Lucene_Document
  */
 public function createIndexedDocument($ajxpNode, &$index)
 {
     if (!empty($this->metaFields)) {
         $ajxpNode->loadNodeInfo(false, false, "all");
     } else {
         $ajxpNode->loadNodeInfo();
     }
     $ext = strtolower(pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION));
     $parseContent = $this->indexContent;
     if ($parseContent && $ajxpNode->bytesize > $this->getFilteredOption("PARSE_CONTENT_MAX_SIZE")) {
         $parseContent = false;
     }
     if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_HTML")))) {
         $doc = @Zend_Search_Lucene_Document_Html::loadHTMLFile($ajxpNode->getUrl());
     } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Docx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Docx::loadDocxFile($realFile);
     } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Pptx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Pptx::loadPptxFile($realFile);
     } elseif ($parseContent && $ext == "xlsx" && class_exists("Zend_Search_Lucene_Document_Xlsx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($realFile);
     } else {
         $doc = new Zend_Search_Lucene_Document();
     }
     if ($doc == null) {
         throw new Exception("Could not load document");
     }
     $doc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl()), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath())), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Text("basename", basename($ajxpNode->getPath())), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_node", "yes"), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "shared"));
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_modiftime", date("Ymd", $ajxpNode->ajxp_modiftime)));
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_bytesize", $ajxpNode->bytesize));
     $ajxpMime = $ajxpNode->ajxp_mime;
     if (empty($ajxpMime)) {
         $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION)));
     } else {
         $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", $ajxpNode->ajxp_mime));
     }
     // Store a cached copy of the metadata
     $serializedMeta = base64_encode(serialize($ajxpNode->metadata));
     $doc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta));
     if (isset($ajxpNode->indexableMetaKeys["shared"])) {
         foreach ($ajxpNode->indexableMetaKeys["shared"] as $sharedField) {
             if ($ajxpNode->{$sharedField}) {
                 $doc->addField(Zend_search_Lucene_Field::keyword($sharedField, $ajxpNode->{$sharedField}));
             }
         }
     }
     foreach ($this->metaFields as $field) {
         if ($ajxpNode->{$field} != null) {
             $doc->addField(Zend_Search_Lucene_Field::Text("ajxp_meta_{$field}", $ajxpNode->{$field}), SystemTextEncoding::getEncoding());
         }
     }
     if (isset($ajxpNode->indexableMetaKeys["user"]) && count($ajxpNode->indexableMetaKeys["user"]) && AuthService::usersEnabled() && AuthService::getLoggedUser() != null) {
         $privateDoc = new Zend_Search_Lucene_Document();
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl(), SystemTextEncoding::getEncoding()));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath()), SystemTextEncoding::getEncoding()));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "user"));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_user", AuthService::getLoggedUser()->getId()));
         foreach ($ajxpNode->indexableMetaKeys["user"] as $userField) {
             if ($ajxpNode->{$userField}) {
                 $privateDoc->addField(Zend_search_Lucene_Field::keyword($userField, $ajxpNode->{$userField}));
             }
         }
         $privateDoc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta));
         $index->addDocument($privateDoc);
     }
     if ($parseContent) {
         $body = $this->extractIndexableContent($ajxpNode);
         if (!empty($body)) {
             $doc->addField(Zend_Search_Lucene_Field::unStored("body", $body));
         }
     }
     $index->addDocument($doc);
     return $doc;
 }
Ejemplo n.º 7
0
 /**
  * @param AJXP_Node $ajxpNode
  * @param Zend_Search_Lucene_Interface $index
  * @throws Exception
  * @return Zend_Search_Lucene_Document
  */
 public function createIndexedDocument($ajxpNode, &$index)
 {
     $ajxpNode->loadNodeInfo();
     $ext = strtolower(pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION));
     $parseContent = $this->indexContent;
     if ($parseContent && $ajxpNode->bytesize > $this->getFilteredOption("PARSE_CONTENT_MAX_SIZE")) {
         $parseContent = false;
     }
     if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_HTML")))) {
         $doc = @Zend_Search_Lucene_Document_Html::loadHTMLFile($ajxpNode->getUrl());
     } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Docx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Docx::loadDocxFile($realFile);
     } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Pptx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Pptx::loadPptxFile($realFile);
     } elseif ($parseContent && $ext == "xlsx" && class_exists("Zend_Search_Lucene_Document_Xlsx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($realFile);
     } else {
         $doc = new Zend_Search_Lucene_Document();
     }
     if ($doc == null) {
         throw new Exception("Could not load document");
     }
     $doc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl()), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath())), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Text("basename", basename($ajxpNode->getPath())), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_node", "yes"), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "shared"));
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_modiftime", date("Ymd", $ajxpNode->ajxp_modiftime)));
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_bytesize", $ajxpNode->bytesize));
     $ajxpMime = $ajxpNode->ajxp_mime;
     if (empty($ajxpMime)) {
         $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION)));
     } else {
         $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", $ajxpNode->ajxp_mime));
     }
     // Store a cached copy of the metadata
     $serializedMeta = base64_encode(serialize($ajxpNode->metadata));
     $doc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta));
     if (isset($ajxpNode->indexableMetaKeys["shared"])) {
         foreach ($ajxpNode->indexableMetaKeys["shared"] as $sharedField) {
             if ($ajxpNode->{$sharedField}) {
                 $doc->addField(Zend_search_Lucene_Field::keyword($sharedField, $ajxpNode->{$sharedField}));
             }
         }
     }
     foreach ($this->metaFields as $field) {
         if ($ajxpNode->{$field} != null) {
             $doc->addField(Zend_Search_Lucene_Field::Text("ajxp_meta_{$field}", $ajxpNode->{$field}), SystemTextEncoding::getEncoding());
         }
     }
     if (isset($ajxpNode->indexableMetaKeys["user"]) && count($ajxpNode->indexableMetaKeys["user"]) && AuthService::usersEnabled() && AuthService::getLoggedUser() != null) {
         $privateDoc = new Zend_Search_Lucene_Document();
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl(), SystemTextEncoding::getEncoding()));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath()), SystemTextEncoding::getEncoding()));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "user"));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_user", AuthService::getLoggedUser()->getId()));
         foreach ($ajxpNode->indexableMetaKeys["user"] as $userField) {
             if ($ajxpNode->{$userField}) {
                 $privateDoc->addField(Zend_search_Lucene_Field::keyword($userField, $ajxpNode->{$userField}));
             }
         }
         $privateDoc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta));
         $index->addDocument($privateDoc);
     }
     if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_TXT")))) {
         $doc->addField(Zend_Search_Lucene_Field::unStored("body", file_get_contents($ajxpNode->getUrl())));
     }
     $unoconv = $this->getFilteredOption("UNOCONV");
     $pipe = false;
     if ($parseContent && !empty($unoconv) && in_array($ext, array("doc", "odt", "xls", "ods"))) {
         $targetExt = "txt";
         if (in_array($ext, array("xls", "ods"))) {
             $targetExt = "csv";
         } else {
             if (in_array($ext, array("odp", "ppt"))) {
                 $targetExt = "pdf";
                 $pipe = true;
             }
         }
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $unoconv = "HOME=" . AJXP_Utils::getAjxpTmpDir() . " " . $unoconv . " --stdout -f {$targetExt} " . escapeshellarg($realFile);
         if ($pipe) {
             $newTarget = str_replace(".{$ext}", ".pdf", $realFile);
             $unoconv .= " > {$newTarget}";
             register_shutdown_function("unlink", $newTarget);
         }
         $output = array();
         exec($unoconv, $output, $return);
         if (!$pipe) {
             $out = implode("\n", $output);
             $enc = 'ISO-8859-1';
             $asciiString = iconv($enc, 'ASCII//TRANSLIT//IGNORE', $out);
             $doc->addField(Zend_Search_Lucene_Field::unStored("body", $asciiString));
         } else {
             $ext = "pdf";
         }
     }
     $pdftotext = $this->getFilteredOption("PDFTOTEXT");
     if ($parseContent && !empty($pdftotext) && in_array($ext, array("pdf"))) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         if ($pipe && isset($newTarget) && is_file($newTarget)) {
             $realFile = $newTarget;
         }
         $cmd = $pdftotext . " " . escapeshellarg($realFile) . " -";
         $output = array();
         exec($cmd, $output, $return);
         $out = implode("\n", $output);
         $enc = 'UTF8';
         $asciiString = iconv($enc, 'ASCII//TRANSLIT//IGNORE', $out);
         $doc->addField(Zend_Search_Lucene_Field::unStored("body", $asciiString));
     }
     $index->addDocument($doc);
     return $doc;
 }
Ejemplo n.º 8
0
 /**
  * Index an Zym_Search_Lucene_Indexable_Interface
  *
  * @throws Zym_Search_Lucene_Exception
  * @param Zym_Search_Lucene_Indexable_Interface|array $indexables
  * @param boolean $update
  * @param string $searchField
  * @return Zym_Search_Lucene_Index
  */
 public function index($indexables, $update = true, $searchField = null)
 {
     if (!is_array($indexables)) {
         $indexables = array($indexables);
     }
     if (!$searchField) {
         $searchField = $this->_idKey;
     }
     foreach ($indexables as $indexable) {
         if (!$indexable instanceof Zym_Search_Lucene_IIndexable) {
             /**
              * @see Zym_Search_Lucene_Exception
              */
             require_once 'Zym/Search/Lucene/Exception.php';
             throw new Zym_Search_Lucene_Exception('The object of type "' . get_class($indexable) . '" ' . 'is not an instance of Zym_Search_Lucene_Indexable_Interface.');
         }
         if ($update) {
             $recordId = $indexable->getRecordId();
             if (!$recordId) {
                 /**
                  * @see Zym_Search_Lucene_Exception
                  */
                 require_once 'Zym/Search/Lucene/Exception.php';
                 throw new Zym_Search_Lucene_Exception('You must provide a valid record ID.');
             }
             $this->delete($recordId, $searchField);
         }
         $document = $indexable->getSearchDocument();
         if (!$document instanceof Zend_Search_Lucene_Document) {
             /**
              * @see Zym_Search_Lucene_Exception
              */
             require_once 'Zym/Search/Lucene/Exception.php';
             throw new Zym_Search_Lucene_Exception('The provided search-document is not ' . 'an instance of Zend_Search_Lucene_Document.');
         }
         $this->_searchIndex->addDocument($document);
     }
     return $this;
 }