コード例 #1
0
ファイル: FieldTest.php プロジェクト: omusico/logica
 public function testBinary()
 {
     $field = Zend_Search_Lucene_Field::Binary('field', 'value');
     $this->assertEquals($field->boost, 1);
     $this->assertEquals($field->encoding, '');
     $this->assertEquals($field->isBinary, true);
     $this->assertEquals($field->isIndexed, false);
     $this->assertEquals($field->isStored, true);
     $this->assertEquals($field->isTokenized, false);
     $this->assertEquals($field->name, 'field');
     $this->assertEquals($field->value, 'value');
 }
コード例 #2
0
ファイル: document.php プロジェクト: JackCanada/moodle-hacks
 public function __construct(&$doc, &$data, $course_id, $group_id, $user_id, $path, $additional_keyset = null)
 {
     $encoding = 'UTF-8';
     //document identification and indexing
     $this->addField(Zend_Search_Lucene_Field::Keyword('docid', $doc->docid, $encoding));
     //document type : the name of the Moodle element that manages it
     $this->addField(Zend_Search_Lucene_Field::Keyword('doctype', $doc->documenttype, $encoding));
     //allows subclassing information from complex modules.
     $this->addField(Zend_Search_Lucene_Field::Keyword('itemtype', $doc->itemtype, $encoding));
     //caches the course context.
     $this->addField(Zend_Search_Lucene_Field::Keyword('course_id', $course_id, $encoding));
     //caches the originator's group.
     $this->addField(Zend_Search_Lucene_Field::Keyword('group_id', $group_id, $encoding));
     //caches the originator if any
     $this->addField(Zend_Search_Lucene_Field::Keyword('user_id', $user_id, $encoding));
     // caches the context of this information. i-e, the context in which this information
     // is being produced/attached. Speeds up the "check for access" process as context in
     // which the information resides (a course, a module, a block, the site) is stable.
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('context_id', $doc->contextid, $encoding));
     //data for document
     $this->addField(Zend_Search_Lucene_Field::Text('title', $doc->title, $encoding));
     $this->addField(Zend_Search_Lucene_Field::Text('author', $doc->author, $encoding));
     $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $doc->contents, $encoding));
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $doc->url, $encoding));
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('date', $doc->date, $encoding));
     //additional data added on a per-module basis
     $this->addField(Zend_Search_Lucene_Field::Binary('data', serialize($data)));
     // adding a path allows the document to know where to find specific library calls
     // for checking access to a module or block content. The Lucene records should only
     // be responsible to bring back to that call sufficient and consistent information
     // in order to perform the check.
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('path', $path, $encoding));
     /*
     // adding a capability set required for viewing. -1 if no capability required.
     // the capability required for viewing is depending on the local situation
     // of the document. each module should provide this information when pushing
     // out search document structure. Although capability model should be kept flat
     // there is no exclusion some module or block developpers use logical combinations
     // of multiple capabilities in their code. This possibility should be left open here.
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('capabilities', $caps));
     */
     /*
     // Additional key set allows a module to ask for extensible criteria based search
     // depending on the module internal needs.
     */
     if (!empty($additional_keyset)) {
         foreach ($additional_keyset as $keyname => $keyvalue) {
             $this->addField(Zend_Search_Lucene_Field::Keyword($keyname, $keyvalue, $encoding));
         }
     }
 }
コード例 #3
0
ファイル: Search.php プロジェクト: ascseb/kosearch
 /**
  * Add an entry
  *
  * @param Searchable	$item Model	implememting Searchable interface
  * @param bool			$create_new	whether or not to create new index when adding item - only used when index is rebuilt
  * @return Search		return this instance for method chaining
  */
 public function add($item, $create_new = FALSE)
 {
     // ensure item implements Searchable interface
     if (!is_a($item, "Searchable")) {
         throw new Kohana_User_Exception('Invalid Object', 'Object must implement Searchable Interface');
     }
     if (!$create_new) {
         $this->open_index();
     }
     $doc = new Zend_Search_Lucene_Document();
     // get indexable fields;
     $fields = $item->get_indexable_fields();
     // index the object type - this allows search results to be grouped/searched by type
     $doc->addField(Zend_Search_Lucene_Field::Keyword('type', $item->get_type()));
     // index the object's id - to avoid any confusion, we call it 'identifier' as Lucene uses 'id' attribute internally.
     $doc->addField(Zend_Search_Lucene_Field::UnIndexed('identifier', $item->get_identifier()));
     // store, but don't index or tokenize
     // index the object type plus identifier - this gives us a unique identifier for later retrieval - e.g. to delete
     $doc->addField(Zend_Search_Lucene_Field::Keyword('uid', $item->get_unique_identifier()));
     // index all fields that have been identified by Interface
     foreach ($fields as $field) {
         // get attribute value from model
         $value = $item->__get($field->name);
         // html decode value if required
         $value = $field->html_decode ? htmlspecialchars_decode($value) : $value;
         // add field value based on type
         switch ($field->type) {
             case Searchable::KEYWORD:
                 $doc->addField(Zend_Search_Lucene_Field::Keyword($field->name, $value));
                 break;
             case Searchable::UNINDEXED:
                 $doc->addField(Zend_Search_Lucene_Field::UnIndexed($field->name, $value));
                 break;
             case Searchable::BINARY:
                 $doc->addField(Zend_Search_Lucene_Field::Binary($field->name, $value));
                 break;
             case Searchable::TEXT:
                 $doc->addField(Zend_Search_Lucene_Field::Text($field->name, $value));
                 break;
             case Searchable::UNSTORED:
                 $doc->addField(Zend_Search_Lucene_Field::UnStored($field->name, $value));
                 break;
         }
     }
     $this->index->addDocument($doc);
     // return this so we can have chainable methods
     return $this;
 }
コード例 #4
0
 /**
  * @param AJXP_Node $ajxpNode
  * @param Zend_Search_Lucene_Interface $index
  * @throws Exception
  * @return Zend_Search_Lucene_Document
  */
 public function createIndexedDocument($ajxpNode, &$index)
 {
     if (!empty($this->metaFields)) {
         $ajxpNode->loadNodeInfo(false, false, "all");
     } else {
         $ajxpNode->loadNodeInfo();
     }
     $ext = strtolower(pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION));
     $parseContent = $this->indexContent;
     if ($parseContent && $ajxpNode->bytesize > $this->getFilteredOption("PARSE_CONTENT_MAX_SIZE")) {
         $parseContent = false;
     }
     if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_HTML")))) {
         $doc = @Zend_Search_Lucene_Document_Html::loadHTMLFile($ajxpNode->getUrl());
     } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Docx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Docx::loadDocxFile($realFile);
     } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Pptx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Pptx::loadPptxFile($realFile);
     } elseif ($parseContent && $ext == "xlsx" && class_exists("Zend_Search_Lucene_Document_Xlsx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($realFile);
     } else {
         $doc = new Zend_Search_Lucene_Document();
     }
     if ($doc == null) {
         throw new Exception("Could not load document");
     }
     $doc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl()), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath())), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Text("basename", basename($ajxpNode->getPath())), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_node", "yes"), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "shared"));
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_modiftime", date("Ymd", $ajxpNode->ajxp_modiftime)));
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_bytesize", $ajxpNode->bytesize));
     $ajxpMime = $ajxpNode->ajxp_mime;
     if (empty($ajxpMime)) {
         $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION)));
     } else {
         $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", $ajxpNode->ajxp_mime));
     }
     // Store a cached copy of the metadata
     $serializedMeta = base64_encode(serialize($ajxpNode->metadata));
     $doc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta));
     if (isset($ajxpNode->indexableMetaKeys["shared"])) {
         foreach ($ajxpNode->indexableMetaKeys["shared"] as $sharedField) {
             if ($ajxpNode->{$sharedField}) {
                 $doc->addField(Zend_search_Lucene_Field::keyword($sharedField, $ajxpNode->{$sharedField}));
             }
         }
     }
     foreach ($this->metaFields as $field) {
         if ($ajxpNode->{$field} != null) {
             $doc->addField(Zend_Search_Lucene_Field::Text("ajxp_meta_{$field}", $ajxpNode->{$field}), SystemTextEncoding::getEncoding());
         }
     }
     if (isset($ajxpNode->indexableMetaKeys["user"]) && count($ajxpNode->indexableMetaKeys["user"]) && AuthService::usersEnabled() && AuthService::getLoggedUser() != null) {
         $privateDoc = new Zend_Search_Lucene_Document();
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl(), SystemTextEncoding::getEncoding()));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath()), SystemTextEncoding::getEncoding()));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "user"));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_user", AuthService::getLoggedUser()->getId()));
         foreach ($ajxpNode->indexableMetaKeys["user"] as $userField) {
             if ($ajxpNode->{$userField}) {
                 $privateDoc->addField(Zend_search_Lucene_Field::keyword($userField, $ajxpNode->{$userField}));
             }
         }
         $privateDoc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta));
         $index->addDocument($privateDoc);
     }
     if ($parseContent) {
         $body = $this->extractIndexableContent($ajxpNode);
         if (!empty($body)) {
             $doc->addField(Zend_Search_Lucene_Field::unStored("body", $body));
         }
     }
     $index->addDocument($doc);
     return $doc;
 }
コード例 #5
0
 /**
  * @param AJXP_Node $ajxpNode
  * @param Zend_Search_Lucene_Interface $index
  * @throws Exception
  * @return Zend_Search_Lucene_Document
  */
 public function createIndexedDocument($ajxpNode, &$index)
 {
     $ajxpNode->loadNodeInfo();
     $ext = strtolower(pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION));
     $parseContent = $this->indexContent;
     if ($parseContent && $ajxpNode->bytesize > $this->getFilteredOption("PARSE_CONTENT_MAX_SIZE")) {
         $parseContent = false;
     }
     if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_HTML")))) {
         $doc = @Zend_Search_Lucene_Document_Html::loadHTMLFile($ajxpNode->getUrl());
     } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Docx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Docx::loadDocxFile($realFile);
     } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Pptx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Pptx::loadPptxFile($realFile);
     } elseif ($parseContent && $ext == "xlsx" && class_exists("Zend_Search_Lucene_Document_Xlsx")) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $doc = @Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($realFile);
     } else {
         $doc = new Zend_Search_Lucene_Document();
     }
     if ($doc == null) {
         throw new Exception("Could not load document");
     }
     $doc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl()), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath())), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Text("basename", basename($ajxpNode->getPath())), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_node", "yes"), SystemTextEncoding::getEncoding());
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "shared"));
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_modiftime", date("Ymd", $ajxpNode->ajxp_modiftime)));
     $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_bytesize", $ajxpNode->bytesize));
     $ajxpMime = $ajxpNode->ajxp_mime;
     if (empty($ajxpMime)) {
         $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION)));
     } else {
         $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", $ajxpNode->ajxp_mime));
     }
     // Store a cached copy of the metadata
     $serializedMeta = base64_encode(serialize($ajxpNode->metadata));
     $doc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta));
     if (isset($ajxpNode->indexableMetaKeys["shared"])) {
         foreach ($ajxpNode->indexableMetaKeys["shared"] as $sharedField) {
             if ($ajxpNode->{$sharedField}) {
                 $doc->addField(Zend_search_Lucene_Field::keyword($sharedField, $ajxpNode->{$sharedField}));
             }
         }
     }
     foreach ($this->metaFields as $field) {
         if ($ajxpNode->{$field} != null) {
             $doc->addField(Zend_Search_Lucene_Field::Text("ajxp_meta_{$field}", $ajxpNode->{$field}), SystemTextEncoding::getEncoding());
         }
     }
     if (isset($ajxpNode->indexableMetaKeys["user"]) && count($ajxpNode->indexableMetaKeys["user"]) && AuthService::usersEnabled() && AuthService::getLoggedUser() != null) {
         $privateDoc = new Zend_Search_Lucene_Document();
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl(), SystemTextEncoding::getEncoding()));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath()), SystemTextEncoding::getEncoding()));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "user"));
         $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_user", AuthService::getLoggedUser()->getId()));
         foreach ($ajxpNode->indexableMetaKeys["user"] as $userField) {
             if ($ajxpNode->{$userField}) {
                 $privateDoc->addField(Zend_search_Lucene_Field::keyword($userField, $ajxpNode->{$userField}));
             }
         }
         $privateDoc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta));
         $index->addDocument($privateDoc);
     }
     if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_TXT")))) {
         $doc->addField(Zend_Search_Lucene_Field::unStored("body", file_get_contents($ajxpNode->getUrl())));
     }
     $unoconv = $this->getFilteredOption("UNOCONV");
     $pipe = false;
     if ($parseContent && !empty($unoconv) && in_array($ext, array("doc", "odt", "xls", "ods"))) {
         $targetExt = "txt";
         if (in_array($ext, array("xls", "ods"))) {
             $targetExt = "csv";
         } else {
             if (in_array($ext, array("odp", "ppt"))) {
                 $targetExt = "pdf";
                 $pipe = true;
             }
         }
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         $unoconv = "HOME=" . AJXP_Utils::getAjxpTmpDir() . " " . $unoconv . " --stdout -f {$targetExt} " . escapeshellarg($realFile);
         if ($pipe) {
             $newTarget = str_replace(".{$ext}", ".pdf", $realFile);
             $unoconv .= " > {$newTarget}";
             register_shutdown_function("unlink", $newTarget);
         }
         $output = array();
         exec($unoconv, $output, $return);
         if (!$pipe) {
             $out = implode("\n", $output);
             $enc = 'ISO-8859-1';
             $asciiString = iconv($enc, 'ASCII//TRANSLIT//IGNORE', $out);
             $doc->addField(Zend_Search_Lucene_Field::unStored("body", $asciiString));
         } else {
             $ext = "pdf";
         }
     }
     $pdftotext = $this->getFilteredOption("PDFTOTEXT");
     if ($parseContent && !empty($pdftotext) && in_array($ext, array("pdf"))) {
         $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl());
         if ($pipe && isset($newTarget) && is_file($newTarget)) {
             $realFile = $newTarget;
         }
         $cmd = $pdftotext . " " . escapeshellarg($realFile) . " -";
         $output = array();
         exec($cmd, $output, $return);
         $out = implode("\n", $output);
         $enc = 'UTF8';
         $asciiString = iconv($enc, 'ASCII//TRANSLIT//IGNORE', $out);
         $doc->addField(Zend_Search_Lucene_Field::unStored("body", $asciiString));
     }
     $index->addDocument($doc);
     return $doc;
 }
コード例 #6
0
 /**
  * Factory to obtain the search fields.
  * @param string $field The type of field
  * @param string $name To name to use
  * @param string $contents The contents for the field to have.
  * @return mixed The requested type.
  */
 protected function getLuceneField($field, $name, $contents)
 {
     switch (strtolower($field)) {
         case 'keyword':
             return Zend_Search_Lucene_Field::Keyword($name, $contents, $this->getSearch()->getParameter('encoding'));
         case 'unindexed':
             return Zend_Search_Lucene_Field::UnIndexed($name, $contents, $this->getSearch()->getParameter('encoding'));
         case 'binary':
             return Zend_Search_Lucene_Field::Binary($name, $contents);
         case 'text':
             return Zend_Search_Lucene_Field::Text($name, $contents, $this->getSearch()->getParameter('encoding'));
         case 'unstored':
             return Zend_Search_Lucene_Field::UnStored($name, $contents, $this->getSearch()->getParameter('encoding'));
         case 'index term':
             return new Zend_Search_Lucene_Index_Term($contents, $name);
         default:
             throw new sfLuceneIndexerException(sprintf('Unknown field "%s" in factory', $field));
     }
 }