/** * Index a file * * @param string $filePath The file path */ public function index($filePath) { $content = file_get_contents($filePath); $modificationTime = filemtime($filePath); $checksum = md5($content); // Get the document $hits = $this->_data->find('path:' . $filePath); if (count($hits) > 0) { $hit = $hits[0]; $document = $hit->getDocument(); // If the checksums are the same, no need to update if ($checksum === $document->checksum) { return; } // Delete the document $this->_data->delete($hit); } // Create a new document $document = new Zend_Search_Lucene_Document(); $document->addField(Zend_Search_Lucene_Field::keyword('path', $filePath)); $document->addField(Zend_Search_Lucene_Field::keyword('modificationTime', $modificationTime)); $document->addField(Zend_Search_Lucene_Field::keyword('checksum', $checksum)); $document->addField(Zend_Search_Lucene_Field::unStored('content', $content, 'utf-8')); $this->_data->addDocument($document); // Commit the changes $this->_data->commit(); $this->_data->optimize(); }
public function buildplaces() { ini_set('memory_limit', '1000M'); set_time_limit(0); $time = time(); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); /** * Create index */ $index = Zend_Search_Lucene::create($this->_indexPath); /** * Get all users */ $sql = $this->_db->select()->from($this->_name, array('id', 'name', 'placepic'))->limit(7500); $result = $this->_db->fetchAssoc($sql); foreach ($result as $values) { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::keyword('placeid', $values['id'])); $doc->addField(Zend_Search_Lucene_Field::text('placename', $values['name'])); $doc->addField(Zend_Search_Lucene_Field::unStored('placepic', $values['placepic'])); $index->addDocument($doc); } $index->commit(); $elapsed = time() - $time; print_r($elapsed); }
public function __construct(Storefront_Resource_Product_Item_Interface $item, $category) { $this->addField(Zend_Search_Lucene_Field::keyword('productId', $item->productId, 'UTF-8')); $this->addField(Zend_Search_Lucene_Field::text('categories', $category, 'UTF-8')); $this->addField(Zend_Search_Lucene_Field::text('name', $item->name, 'UTF-8')); $this->addField(Zend_Search_Lucene_Field::unStored('description', $item->description, 'UTF-8')); $this->addField(Zend_Search_Lucene_Field::text('price', $this->_formatPrice($item->getPrice()), 'UTF-8')); }
public function updateLuceneIndex() { $index = articuloTable::getLuceneIndex(); $hit = $index->find('pk:' . $this->getId()); if ($hit) { $index->delete($hit->id); } $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::unIndexed('pk', $this->getId())); $doc->addField(Zend_Search_Lucene_Field::unStored('marca', $this->getMarca(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::unStored('categoria', $this->getCategoria(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::unStored('descripcion', $this->getDescripcion(), 'utf-8')); $index->addDocument($doc); $index->commit(); }
public function updateLuceneIndex() { $index = GcMailboxPeer::getLuceneIndex(); //borrar las entradas existentes if ($hit = $index->find('pk:' . $this->getId())) { $index->delete($hit->id); } //no indexar buzones expirados $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('pk', $this->getId())); $doc->addField(Zend_Search_Lucene_Field::unStored('username', $this->getUsername(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::unStored('name', $this->getName(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::unStored('email', $this->getEmail(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::unStored('domain', $this->getDomainName(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::unStored('area', $this->getGcArea(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::unStored('group', $this->getGcGroup(), 'utf-8')); $index->addDocument($doc); $index->commit(); }
/** * Add node to index * * @param Zoo_Content_Interface $item */ protected function _build(Zoo_Content_Interface $item) { // Delete existing document, if exists $hits = $this->index->find('nid:' . $item->id); foreach ($hits as $hit) { $this->index->delete($hit->id); } // (Re-)Index document $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::text('nid', $item->id)); $doc->addField(Zend_Search_Lucene_Field::unIndexed('link', $item->url())); $doc->addField(Zend_Search_Lucene_Field::unStored('title', $item->title)); $doc->addField(Zend_Search_Lucene_Field::unStored('type', $item->type)); $doc->addField(Zend_Search_Lucene_Field::unStored('published', $item->published)); $doc->addField(Zend_Search_Lucene_Field::unStored('uid', $item->uid)); list($content) = Zoo::getService('content')->getRenderedContent($item->id, 'Display'); $doc->addField(Zend_Search_Lucene_Field::unStored('contents', strip_tags($content))); return $doc; }
/** * php index.php db index * */ public function index() { $query = "SELECT * FROM Products AS p JOIN Categories AS c ON p.CategoryID = c.CategoryId JOIN Suppliers AS s ON p.SupplierID = s.SupplierID"; $stmt = $this->db->prepare($query); $stmt->execute(); $rows = $stmt->fetchAll(PDO::FETCH_ASSOC); $indexDir = APP_PATH . '/' . self::INDEX_DIR; is_dir($indexDir) || mkdir($indexDir, 0777, true); $index = self::create($indexDir); foreach ($rows as $row) { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::keyword('ProductName', $row['ProductName'])); $doc->addField(Zend_Search_Lucene_Field::text('Quantity', $row['QuantityPerUnit'])); $doc->addField(Zend_Search_Lucene_Field::keyword('Category', $row['CategoryName'])); $doc->addField(Zend_Search_Lucene_Field::unIndexed('Description', $row['Description'])); $doc->addField(Zend_Search_Lucene_Field::unStored('City', $row['City'])); $doc->addField(Zend_Search_Lucene_Field::keyword('CompanyName', $row['CompanyName'])); $doc->addField(Zend_Search_Lucene_Field::binary('Picture', $row['Picture'])); $index->addDocument($doc); } }
/** * Index a field * * @param string $name the field name * @param mixed $value the field value * @param float $boost the boost value * * @return dmSearchDocument the search_document instance */ protected function index($name, $value, $boost = 1.0) { $field = Zend_Search_Lucene_Field::unStored($name, $value); $field->boost = $boost; $this->addField($field); }
/** * indexFieldNow * @param GenericElementField $objField * @param string $strField * @param integer $intFieldType * @param string|array|object $mixedFieldValue * @param Zend_Search_Lucene_Document $objDoc * @return void * @author Thomas Schedler <*****@*****.**> */ protected final function indexFieldNow($objField, $strField, $intFieldType, $mixedFieldValue, Zend_Search_Lucene_Document &$objDoc) { try { $strValue = ''; $strValueIds = ''; if ($objField->typeId == GenericSetup::FIELD_TYPE_ID_TAG) { $mixedValue = $mixedFieldValue; if (is_object($mixedValue) || is_array($mixedValue)) { foreach ($mixedValue as $objTag) { $strValue .= $objTag->title . ', '; $strValueIds .= '[' . $objTag->id . ']'; } $strValue = rtrim($strValue, ', '); } } elseif (!is_object($mixedFieldValue) && $objField->sqlSelect != '') { $sqlSelect = $objField->sqlSelect; $arrIds = array(); if (is_array($mixedFieldValue)) { $arrIds = $mixedFieldValue; } else { if ($mixedFieldValue != '') { if (strpos($mixedFieldValue, '[') !== false) { $mixedFieldValue = trim($mixedFieldValue, '[]'); $arrIds = explode('][', $mixedFieldValue); } else { $arrIds = array($mixedFieldValue); } } } if (is_array($arrIds)) { if (count($arrIds) > 0) { $strReplaceWhere = ''; foreach ($arrIds as $strId) { $strReplaceWhere .= $strId . ','; } $strReplaceWhere = trim($strReplaceWhere, ','); $objReplacer = new Replacer(); $sqlSelect = $objReplacer->sqlReplacer($sqlSelect, $this->setup->getLanguageId(), $this->setup->getRootLevelId(), ' AND tbl.id IN (' . $strReplaceWhere . ')'); $objCategoriesData = $this->core->dbh->query($sqlSelect)->fetchAll(Zend_Db::FETCH_OBJ); if (count($objCategoriesData) > 0) { foreach ($objCategoriesData as $objCategories) { $strValue .= $objCategories->title . ', '; $strValueIds .= '[' . $objCategories->id . ']'; } $strValue = rtrim($strValue, ', '); } } } } else { $strValue = html_entity_decode($mixedFieldValue, ENT_COMPAT, $this->core->sysConfig->encoding->default); } if (is_string($strValue) && $strValue != '') { if ($intFieldType == GenericSetup::FILE_FIELD) { $objFiles = $this->getModelFiles()->loadFilesById($strValue); $arrValues = array(); if (count($objFiles) > 0) { foreach ($objFiles as $objFile) { $arrValues[] = array('path' => $objFile->path, 'filename' => $objFile->filename, 'version' => $objFile->version); } } $strValueIds = $strValue; $strValue = serialize($arrValues); } if ($strValueIds != '') { $objDoc->addField(Zend_Search_Lucene_Field::unIndexed($strField . 'Ids', $strValueIds, $this->core->sysConfig->encoding->default)); } $this->core->logger->debug($strField . ': ' . $strValue); switch ($objField->idSearchFieldTypes) { case Search::FIELD_TYPE_KEYWORD: $objDoc->addField(Zend_Search_Lucene_Field::keyword($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_UNINDEXED: $objDoc->addField(Zend_Search_Lucene_Field::unIndexed($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_BINARY: $objDoc->addField(Zend_Search_Lucene_Field::binary($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_TEXT: $objDoc->addField(Zend_Search_Lucene_Field::text($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_UNSTORED: $objDoc->addField(Zend_Search_Lucene_Field::unStored($strField, strip_tags($strValue), $this->core->sysConfig->encoding->default)); break; } } } catch (Exception $exc) { $this->core->logger->err($exc); } }
/** * addToIndex * @param string $strIndexPath * @param string $strKey * @author Thomas Schedler <*****@*****.**> * @version 1.0 */ protected final function addToIndex($strIndexPath, $strKey) { try { if (!is_object($this->objIndex) || !$this->objIndex instanceof Zend_Search_Lucene) { if (count(scandir($strIndexPath)) > 2) { $this->objIndex = Zend_Search_Lucene::open($strIndexPath); } else { $this->objIndex = Zend_Search_Lucene::create($strIndexPath); } } $objDoc = new Zend_Search_Lucene_Document(); $objDoc->addField(Zend_Search_Lucene_Field::keyword('key', $strKey)); $objDoc->addField(Zend_Search_Lucene_Field::unIndexed('date', $this->setup->getPublishDate('d.m.Y'))); $objDoc->addField(Zend_Search_Lucene_Field::unIndexed('rootLevelId', $this->setup->getRootLevelId())); /** * index fields */ foreach ($this->setup->FieldNames() as $strField => $intFieldType) { $objField = $this->setup->getField($strField); if (is_object($objField) && $objField->idSearchFieldTypes != Search::FIELD_TYPE_NONE) { $strValue = ''; if (is_array($objField->getValue()) && $objField->sqlSelect != '') { $arrIds = $objField->getValue(); $sqlSelect = $objField->sqlSelect; if (is_array($arrIds)) { if (count($arrIds) > 0) { $strReplaceWhere = ''; foreach ($arrIds as $strId) { $strReplaceWhere .= $strId . ','; } $strReplaceWhere = trim($strReplaceWhere, ','); $objReplacer = new Replacer(); $sqlSelect = $objReplacer->sqlReplacer($sqlSelect, $this->setup->getLanguageId(), $this->setup->getRootLevelId(), ' AND tbl.id IN (' . $strReplaceWhere . ')'); $objCategoriesData = $this->core->dbh->query($sqlSelect)->fetchAll(Zend_Db::FETCH_OBJ); if (count($objCategoriesData) > 0) { foreach ($objCategoriesData as $objCategories) { $strValue .= $objCategories->title . ', '; } $strValue = rtrim($strValue, ', '); } } } } else { $strValue = $objField->getValue(); } if ($strValue != '') { switch ($objField->idSearchFieldTypes) { case Search::FIELD_TYPE_KEYWORD: $objDoc->addField(Zend_Search_Lucene_Field::keyword($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_UNINDEXED: $objDoc->addField(Zend_Search_Lucene_Field::unIndexed($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_BINARY: $objDoc->addField(Zend_Search_Lucene_Field::binary($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_TEXT: $objDoc->addField(Zend_Search_Lucene_Field::text($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_UNSTORED: $objDoc->addField(Zend_Search_Lucene_Field::unStored($strField, strip_tags($strValue), $this->core->sysConfig->encoding->default)); break; } } } } // Add document to the index. $this->objIndex->addDocument($objDoc); $this->objIndex->optimize(); } catch (Exception $exc) { $this->core->logger->err($exc); } }
/** * @ORM\PostPersist */ public function updateLuceneIndex() { $index = self::getLuceneIndex(); //remove existing entries foreach ($index->find('pk:' . $this->getId()) as $hit) { $index->delete($hit->id); } if ($this->isExpired() || !$this->getIsActivated()) { return; } $doc = new \Zend_Search_Lucene_Document(); $doc->addField(\Zend_Search_Lucene_Field::keyword('pk', $this->getId())); //index job fields $doc->addField(\Zend_Search_Lucene_Field::unStored('position', $this->getPosition(), 'utf-8')); $doc->addField(\Zend_Search_Lucene_Field::unStored('company', $this->getCompany(), 'utf-8')); $doc->addField(\Zend_Search_Lucene_Field::unStored('location', $this->getLocation(), 'utf-8')); $doc->addField(\Zend_Search_Lucene_Field::unStored('description', $this->getDescription(), 'utf-8')); $index->addDocument($doc); $index->commit(); }
/** * A refactored method to add the document to the index.. * * @param int $docid * @param string $content * @param string $discussion */ private function addDocument($docid, $content, $discussion, $title, $version) { $teaser = substr($content, 0, 250); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Text('DocumentID', PHPLuceneIndexer::longToString($docid))); $doc->addField(Zend_Search_Lucene_Field::Text('Content', $content, 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::unStored('Discussion', $discussion, 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('Title', $title, 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('Version', $version, 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::unIndexed('Summary', $teaser, 'UTF-8')); $this->lucene->addDocument($doc); }
/** * converts the map into a document so Lucene can save it * * @param MingoTable $table * @param array $map the raw map passed to {@link insert()} or {@link update()} * @return Zend_Search_Lucene_Document */ protected function normalizeMap(MingoTable $table, array $map) { $document = new Zend_Search_Lucene_Document(); // add some fields that will be present in all documents... $document->addField(Zend_Search_Lucene_Field::unStored('_id', $map['_id'])); $document->addField(Zend_Search_Lucene_Field::binary('body', $this->getBody($map))); // add all the indexes into the document... foreach ($table->getIndexes() as $index) { foreach ($index->getFields() as $name => $options) { // use array_key... to account for null values... if (array_key_exists($name, $map)) { $val = null; if (is_array($map[$name])) { $val = join(' ', $map[$name]); } else { $val = $map[$name]; } //if/else $document->addField(Zend_Search_Lucene_Field::UnStored($name, $val)); // let's not try and add it twice... unset($map[$name]); } //if } //foreach } //foreach return $document; }
public static function addPageToIndex($page, $toasterSearchIndex = false) { if (!self::initIndex()) { return false; } if ($page instanceof Application_Model_Models_Page) { $page = $page->toArray(); $containers = Application_Model_Mappers_ContainerMapper::getInstance()->findByPageId($page['id']); $page['content'] = ''; if (!empty($containers)) { foreach ($containers as $container) { $page['content'] .= $container->getContent(); } } } $document = new Zend_Search_Lucene_Document(); $document->addField(Zend_Search_Lucene_Field::keyword('pageId', $page['id'])); $document->addField(Zend_Search_Lucene_Field::unStored('metaKeyWords', $page['metaKeywords'], 'UTF-8')); $document->addField(Zend_Search_Lucene_Field::unStored('metaDescription', $page['metaDescription'], 'UTF-8')); $document->addField(Zend_Search_Lucene_Field::unStored('headerTitle', $page['headerTitle'], 'UTF-8')); $document->addField(Zend_Search_Lucene_Field::unStored('content', $page['content'], 'UTF-8')); $document->addField(Zend_Search_Lucene_Field::text('draft', $page['draft'], 'UTF-8')); $document->addField(Zend_Search_Lucene_Field::text('teaserText', $page['teaserText'], 'UTF-8')); $document->addField(Zend_Search_Lucene_Field::text('url', $page['url'], 'UTF-8')); $document->addField(Zend_Search_Lucene_Field::text('navName', $page['navName'], 'UTF-8')); $document->addField(Zend_Search_Lucene_Field::text('h1', $page['h1'], 'UTF-8')); // $document->addField(Zend_Search_Lucene_Field::text('previewImage', $page['previewImage'])); self::$_index->addDocument($document); }
/** * @param AJXP_Node $ajxpNode * @param Zend_Search_Lucene_Interface $index * @throws Exception * @return Zend_Search_Lucene_Document */ public function createIndexedDocument($ajxpNode, &$index) { $ajxpNode->loadNodeInfo(); $ext = strtolower(pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION)); $parseContent = $this->indexContent; if ($parseContent && $ajxpNode->bytesize > $this->getFilteredOption("PARSE_CONTENT_MAX_SIZE")) { $parseContent = false; } if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_HTML")))) { $doc = @Zend_Search_Lucene_Document_Html::loadHTMLFile($ajxpNode->getUrl()); } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Docx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Docx::loadDocxFile($realFile); } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Pptx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Pptx::loadPptxFile($realFile); } elseif ($parseContent && $ext == "xlsx" && class_exists("Zend_Search_Lucene_Document_Xlsx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($realFile); } else { $doc = new Zend_Search_Lucene_Document(); } if ($doc == null) { throw new Exception("Could not load document"); } $doc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl()), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath())), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Text("basename", basename($ajxpNode->getPath())), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_node", "yes"), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "shared")); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_modiftime", date("Ymd", $ajxpNode->ajxp_modiftime))); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_bytesize", $ajxpNode->bytesize)); $ajxpMime = $ajxpNode->ajxp_mime; if (empty($ajxpMime)) { $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION))); } else { $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", $ajxpNode->ajxp_mime)); } // Store a cached copy of the metadata $serializedMeta = base64_encode(serialize($ajxpNode->metadata)); $doc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta)); if (isset($ajxpNode->indexableMetaKeys["shared"])) { foreach ($ajxpNode->indexableMetaKeys["shared"] as $sharedField) { if ($ajxpNode->{$sharedField}) { $doc->addField(Zend_search_Lucene_Field::keyword($sharedField, $ajxpNode->{$sharedField})); } } } foreach ($this->metaFields as $field) { if ($ajxpNode->{$field} != null) { $doc->addField(Zend_Search_Lucene_Field::Text("ajxp_meta_{$field}", $ajxpNode->{$field}), SystemTextEncoding::getEncoding()); } } if (isset($ajxpNode->indexableMetaKeys["user"]) && count($ajxpNode->indexableMetaKeys["user"]) && AuthService::usersEnabled() && AuthService::getLoggedUser() != null) { $privateDoc = new Zend_Search_Lucene_Document(); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl(), SystemTextEncoding::getEncoding())); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath()), SystemTextEncoding::getEncoding())); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "user")); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_user", AuthService::getLoggedUser()->getId())); foreach ($ajxpNode->indexableMetaKeys["user"] as $userField) { if ($ajxpNode->{$userField}) { $privateDoc->addField(Zend_search_Lucene_Field::keyword($userField, $ajxpNode->{$userField})); } } $privateDoc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta)); $index->addDocument($privateDoc); } if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_TXT")))) { $doc->addField(Zend_Search_Lucene_Field::unStored("body", file_get_contents($ajxpNode->getUrl()))); } $unoconv = $this->getFilteredOption("UNOCONV"); $pipe = false; if ($parseContent && !empty($unoconv) && in_array($ext, array("doc", "odt", "xls", "ods"))) { $targetExt = "txt"; if (in_array($ext, array("xls", "ods"))) { $targetExt = "csv"; } else { if (in_array($ext, array("odp", "ppt"))) { $targetExt = "pdf"; $pipe = true; } } $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $unoconv = "HOME=" . AJXP_Utils::getAjxpTmpDir() . " " . $unoconv . " --stdout -f {$targetExt} " . escapeshellarg($realFile); if ($pipe) { $newTarget = str_replace(".{$ext}", ".pdf", $realFile); $unoconv .= " > {$newTarget}"; register_shutdown_function("unlink", $newTarget); } $output = array(); exec($unoconv, $output, $return); if (!$pipe) { $out = implode("\n", $output); $enc = 'ISO-8859-1'; $asciiString = iconv($enc, 'ASCII//TRANSLIT//IGNORE', $out); $doc->addField(Zend_Search_Lucene_Field::unStored("body", $asciiString)); } else { $ext = "pdf"; } } $pdftotext = $this->getFilteredOption("PDFTOTEXT"); if ($parseContent && !empty($pdftotext) && in_array($ext, array("pdf"))) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); if ($pipe && isset($newTarget) && is_file($newTarget)) { $realFile = $newTarget; } $cmd = $pdftotext . " " . escapeshellarg($realFile) . " -"; $output = array(); exec($cmd, $output, $return); $out = implode("\n", $output); $enc = 'UTF8'; $asciiString = iconv($enc, 'ASCII//TRANSLIT//IGNORE', $out); $doc->addField(Zend_Search_Lucene_Field::unStored("body", $asciiString)); } $index->addDocument($doc); return $doc; }
/** * @param AJXP_Node $ajxpNode * @param Zend_Search_Lucene_Interface $index * @throws Exception * @return Zend_Search_Lucene_Document */ public function createIndexedDocument($ajxpNode, &$index) { if (!empty($this->metaFields)) { $ajxpNode->loadNodeInfo(false, false, "all"); } else { $ajxpNode->loadNodeInfo(); } $ext = strtolower(pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION)); $parseContent = $this->indexContent; if ($parseContent && $ajxpNode->bytesize > $this->getFilteredOption("PARSE_CONTENT_MAX_SIZE")) { $parseContent = false; } if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_HTML")))) { $doc = @Zend_Search_Lucene_Document_Html::loadHTMLFile($ajxpNode->getUrl()); } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Docx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Docx::loadDocxFile($realFile); } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Pptx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Pptx::loadPptxFile($realFile); } elseif ($parseContent && $ext == "xlsx" && class_exists("Zend_Search_Lucene_Document_Xlsx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($realFile); } else { $doc = new Zend_Search_Lucene_Document(); } if ($doc == null) { throw new Exception("Could not load document"); } $doc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl()), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath())), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Text("basename", basename($ajxpNode->getPath())), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_node", "yes"), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "shared")); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_modiftime", date("Ymd", $ajxpNode->ajxp_modiftime))); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_bytesize", $ajxpNode->bytesize)); $ajxpMime = $ajxpNode->ajxp_mime; if (empty($ajxpMime)) { $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION))); } else { $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", $ajxpNode->ajxp_mime)); } // Store a cached copy of the metadata $serializedMeta = base64_encode(serialize($ajxpNode->metadata)); $doc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta)); if (isset($ajxpNode->indexableMetaKeys["shared"])) { foreach ($ajxpNode->indexableMetaKeys["shared"] as $sharedField) { if ($ajxpNode->{$sharedField}) { $doc->addField(Zend_search_Lucene_Field::keyword($sharedField, $ajxpNode->{$sharedField})); } } } foreach ($this->metaFields as $field) { if ($ajxpNode->{$field} != null) { $doc->addField(Zend_Search_Lucene_Field::Text("ajxp_meta_{$field}", $ajxpNode->{$field}), SystemTextEncoding::getEncoding()); } } if (isset($ajxpNode->indexableMetaKeys["user"]) && count($ajxpNode->indexableMetaKeys["user"]) && AuthService::usersEnabled() && AuthService::getLoggedUser() != null) { $privateDoc = new Zend_Search_Lucene_Document(); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl(), SystemTextEncoding::getEncoding())); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath()), SystemTextEncoding::getEncoding())); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "user")); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_user", AuthService::getLoggedUser()->getId())); foreach ($ajxpNode->indexableMetaKeys["user"] as $userField) { if ($ajxpNode->{$userField}) { $privateDoc->addField(Zend_search_Lucene_Field::keyword($userField, $ajxpNode->{$userField})); } } $privateDoc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta)); $index->addDocument($privateDoc); } if ($parseContent) { $body = $this->extractIndexableContent($ajxpNode); if (!empty($body)) { $doc->addField(Zend_Search_Lucene_Field::unStored("body", $body)); } } $index->addDocument($doc); return $doc; }
$document = Entrada_Search_Lucene_Document::factory(FILE_STORAGE_PATH . "/" . $file['efile_id'], $path['extension']); if (!is_null($document)) { $filesBody .= ' ' . $document->body; } } $document = new Zend_Search_Lucene_Document(); $document->addField(Zend_Search_Lucene_Field::text('title', $result['event_title'])); $document->addField(Zend_Search_Lucene_Field::keyword('event_id', $result['event_id'])); $document->addField(Zend_Search_Lucene_Field::unStored('description', $result['event_description'])); $document->addField(Zend_Search_Lucene_Field::unStored('goals', $result['event_goals'])); $document->addField(Zend_Search_Lucene_Field::unStored('objectives', $result['event_objectives'])); $document->addField(Zend_Search_Lucene_Field::unStored('message', $result['event_message'])); $document->addField(Zend_Search_Lucene_Field::keyword('audience_type', $result['audience_type'])); $document->addField(Zend_Search_Lucene_Field::keyword('audience_value', $result['event_cohort'])); $document->addField(Zend_Search_Lucene_Field::keyword('event_start', $result['event_start'])); $document->addField(Zend_Search_Lucene_Field::unStored('files_body', $filesBody)); $document->addField(Zend_Search_Lucene_Field::keyword('organisation_id', $result['organisation_id'])); $index->addDocument($document); } break; case 'optimize': $index = Zend_Search_Lucene::open($path . '/' . $input->index); $index->optimize(); break; case 'search': $index = Zend_Search_Lucene::open($path . '/' . $input->index); $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($input->term); $results = $index->find($userQuery); $textTable = new Zend_Text_Table(array('columnWidths' => array(12, 12, 5, 45))); $textTable->appendRow(array('Document ID', 'Database ID', 'Score', 'Title')); foreach ($results as $hit) {
public function getDocumentForBean(SugarBean $bean) { if ($bean->module_name == 'DocumentRevisions') { $document = $this->getDocumentForRevision($bean); } else { $document = array("error" => false, "document" => new Zend_Search_Lucene_Document()); } if ($document["error"]) { return $document; } $document["document"]->addField(Zend_Search_Lucene_Field::UnIndexed("aod_id", $bean->module_name . " " . $bean->id)); $document["document"]->addField(Zend_Search_Lucene_Field::UnIndexed("record_id", $bean->id)); $document["document"]->addField(Zend_Search_Lucene_Field::UnIndexed("record_module", $bean->module_name)); foreach ($GLOBALS['dictionary'][$bean->getObjectName()]['fields'] as $key => $field) { switch ($field['type']) { case "enum": $document["document"]->addField(Zend_Search_Lucene_Field::Keyword($key, strtolower($bean->{$key}))); break; case "multienum": $vals = unencodeMultienum($bean->{$field}); $document["document"]->addField(Zend_Search_Lucene_Field::unStored($key, strtolower(implode(" ", $vals)))); break; case "name": case "phone": case "html": case "text": case "url": case "varchar": if (property_exists($bean, $key)) { $val = strtolower($bean->{$key}); } else { $val = ''; } $field = Zend_Search_Lucene_Field::unStored($key, $val); if ($key == "name") { $field->boost = 1.5; } $document["document"]->addField($field); break; case "address": case "bool": case "currency": case "date": case "datetimecombo": case "decimal": case "float": case "iframe": case "int": case "radioenum": case "relate": default: break; } } return $document; }
public function getIndexableDocument() { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::keyword('post_id', $this->getId())); $fields = array('title' => $this->profile->title, 'content' => strip_tags($this->profile->content), 'published' => $this->profile->ts_published, 'tags' => join(' ', $this->getTags())); foreach ($fields as $name => $field) { $doc->addField(Zend_Search_Lucene_Field::unStored($name, $field)); } return $doc; }
public function buildforum() { ini_set('memory_limit', '1000M'); set_time_limit(0); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); /** * Create index */ $index = Zend_Search_Lucene::create($this->_indexPath); /** * Get all users */ $sql = $this->_db->select()->from('forum_questions', array('tags', 'question', 'id')); $result = $this->_db->fetchAssoc($sql); /** * Create a document for each user and add it to the index */ /*foreach ($users as $user) { $doc = new Zend_Search_Lucene_Document(); /** * Fill document with data */ /* $doc->addField(Zend_Search_Lucene_Field::unIndexed('title', $user->id)); $doc->addField(Zend_Search_Lucene_Field::text('contents', $user->name)); //$doc->addField(Zend_Search_Lucene_Field::unIndexed('birthday', $user['dob'], 'UTF-8')); /** * Add document */ /*$index->addDocument($doc); } $index->optimize(); $index->commit();*/ foreach ($result as $values) { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::keyword('questionid', $values['id'])); $doc->addField(Zend_Search_Lucene_Field::unStored('questions', $values['question'])); $tag = explode(',', $values['tags']); $tags = implode(' ', $tag); $doc->addField(Zend_Search_Lucene_Field::text('tags', $tags)); $index->addDocument($doc); } $index->commit(); }
/** * @return bool */ public function recreateSearchIndex() { $users = new Users(); try { $index = \Zend_Search_Lucene::open($this->indexfile); } catch (\Zend_Search_Lucene_Exception $e) { $index = \Zend_Search_Lucene::create($this->indexfile); } $all = $users->getSet([]); foreach ($all as $user) { $doc = new \Zend_Search_Lucene_Document(); $doc->addField(\Zend_Search_Lucene_Field::Text('username', $user->username)); $doc->addField(\Zend_Search_Lucene_Field::unStored('name', $user->name)); $index->addDocument($doc); } $index->optimize(); return true; }