public function buildAction() { // create the index $index = Zend_Search_Lucene::create(APPLICATION_PATH . '/indexes'); // fetch all of the current pages $mdlPage = new Model_Page(); $currentPages = $mdlPage->fetchAll(); if ($currentPages->count() > 0) { // create a new search document for each page foreach ($currentPages as $p) { $page = new CMS_Content_Item_Page($p->id); $doc = new Zend_Search_Lucene_Document(); // you use an unindexed field for the id because you want the id to be // included in the search results but not searchable $doc->addField(Zend_Search_Lucene_Field::unIndexed('page_id', $page->id)); // you use text fields here because you want the content to be searchable // and to be returned in search results $doc->addField(Zend_Search_Lucene_Field::text('page_name', $page->name)); $doc->addField(Zend_Search_Lucene_Field::text('page_headline', $page->headline)); $doc->addField(Zend_Search_Lucene_Field::text('page_description', $page->description)); $doc->addField(Zend_Search_Lucene_Field::text('page_content', $page->content)); // add the document to the index $index->addDocument($doc); } } // optimize the index $index->optimize(); // pass the view data for reporting $this->view->indexSize = $index->numDocs(); }
public static function update($data) { try { //Update an index. $index = Zend_Search_Lucene::open('../application/searchindex'); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive()); } catch (Zend_Search_Exception $e) { throw $e; } // remove an existing entry $hits = $index->find('pk:' . $data['pk']); foreach ($hits as $hit) { $index->delete($hit->id); } $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $data['pk'])); $doc->addField(Zend_Search_Lucene_Field::Keyword('code', $data['code'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Keyword('u_code', strtolower($data['code']), 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::unIndexed('type', $data['type'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::unIndexed('id', $data['id'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('title', $data['title'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('en_title', Default_Model_Functions::convert_vi_to_en($data['title']), 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('description', $data['description'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('en_description', Default_Model_Functions::convert_vi_to_en($data['description']), 'UTF-8')); $index->addDocument($doc); $index->commit(); }
/** * Updates the index for an object * * @param Doctrine_Record $object */ public function updateIndex(Doctrine_Record $object, $delete = false) { /* error checking */ if (!array_key_exists('models', $this->config) || empty($this->config['models'])) { throw new Exception(sprintf('No models set in search.yml', $name)); } if (!array_key_exists($model = get_class($object), $this->config['models'])) { throw new Exception(sprintf('Model "%s" not defined in "%s" index in your search.yml', $model, $this->name)); } $id = $this->generateId($object->getId(), $model); $config = $this->config['models'][$model]; //delete existing entries foreach ($this->search('_id:"' . $id . '"') as $hit) { $this->getIndex()->delete($hit->id); } if ($delete) { return; } //only add to search if canSearch method on model returns true (search if no method exists) if (method_exists($object, 'canSearch')) { if (!call_user_func(array($object, 'canSearch'))) { return; } } $doc = new Zend_Search_Lucene_Document(); // store a key for deleting in future $doc->addField(Zend_Search_Lucene_Field::Keyword('_id', $id)); // store job primary key and model name to identify it in the search results $doc->addField(Zend_Search_Lucene_Field::Keyword('_pk', $object->getId())); $doc->addField(Zend_Search_Lucene_Field::Keyword('_model', $model)); // store title - used for search result title if (!array_key_exists('title', $config)) { throw new Exception(sprintf('A title must be set for model "%s" in search.yml', $model)); } $doc->addField(Zend_Search_Lucene_Field::unIndexed('_title', call_user_func(array($object, 'get' . sfInflector::camelize($config['title']))))); // store description - used for search result description if (!array_key_exists('description', $config)) { throw new Exception(sprintf('A description must be set for model "%s" in search.yml', $model)); } $doc->addField(Zend_Search_Lucene_Field::unIndexed('_description', call_user_func(array($object, 'get' . sfInflector::camelize($config['description']))))); // store url - @todo add more routing options if (!array_key_exists('route', $config)) { throw new Exception(sprintf('A route must be set for model "%s" in search.yml', $model)); } sfContext::getInstance()->getConfiguration()->loadHelpers('Url'); $url = url_for($config['route'], $object); $doc->addField(Zend_Search_Lucene_Field::unIndexed('_url', $url)); //store fields if (array_key_exists('fields', $config)) { foreach ($config['fields'] as $field => $config) { $doc->addField(Zend_Search_Lucene_Field::UnStored($field, call_user_func(array($object, 'get' . sfInflector::camelize($field))), 'utf-8')); } } //save index $this->getIndex()->addDocument($doc); $this->getIndex()->commit(); }
/** * index a file * * @author Jörn Dreyer <*****@*****.**> * * @param string $path the path of the file * * @return bool */ public static function indexFile($path = '', $user = null) { if (!Filesystem::isValidPath($path)) { return; } if ($path === '') { //ignore the empty path element return false; } if (is_null($user)) { $view = Filesystem::getView(); $user = \OCP\User::getUser(); } else { $view = new \OC\Files\View('/' . $user . '/files'); } if (!$view) { Util::writeLog('search_lucene', 'could not resolve filesystem view', Util::WARN); return false; } $root = $view->getRoot(); $pk = md5($root . $path); // the cache already knows mime and other basic stuff $data = $view->getFileInfo($path); if (isset($data['mimetype'])) { $mimetype = $data['mimetype']; if ('text/html' === $mimetype) { $doc = \Zend_Search_Lucene_Document_Html::loadHTML($view->file_get_contents($path)); } else { if ('application/msword' === $mimetype) { // FIXME uses ZipArchive ... make compatible with OC\Files\Filesystem //$doc = Zend_Search_Lucene_Document_Docx::loadDocxFile(OC\Files\Filesystem::file_get_contents($path)); //no special treatment yet $doc = new \Zend_Search_Lucene_Document(); } else { $doc = new \Zend_Search_Lucene_Document(); } } // store fscacheid as unique id to lookup by when deleting $doc->addField(\Zend_Search_Lucene_Field::Keyword('pk', $pk)); // Store document URL to identify it in the search results $doc->addField(\Zend_Search_Lucene_Field::Text('path', $path)); $doc->addField(\Zend_Search_Lucene_Field::unIndexed('size', $data['size'])); $doc->addField(\Zend_Search_Lucene_Field::unIndexed('mimetype', $mimetype)); self::extractMetadata($doc, $path, $view, $mimetype); Lucene::updateFile($doc, $path, $user); return true; } else { Util::writeLog('search_lucene', 'need mimetype for content extraction', Util::ERROR); return false; } }
public function updateLuceneIndex() { $index = articuloTable::getLuceneIndex(); $hit = $index->find('pk:' . $this->getId()); if ($hit) { $index->delete($hit->id); } $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::unIndexed('pk', $this->getId())); $doc->addField(Zend_Search_Lucene_Field::unStored('marca', $this->getMarca(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::unStored('categoria', $this->getCategoria(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::unStored('descripcion', $this->getDescripcion(), 'utf-8')); $index->addDocument($doc); $index->commit(); }
public function addPost(Post $post, $feed) { $index = Zend_Search_Lucene::open(Zend_Registry::getInstance()->config->search->post); $feed = $post->findParentFeeds(); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Text('pid', $post->id)); $doc->addField(Zend_Search_Lucene_Field::Text('title', $post->title)); $doc->addField(Zend_Search_Lucene_Field::Text('siteUrl', $feed->siteUrl)); $doc->addField(Zend_Search_Lucene_Field::Text('link', $post->link)); $doc->addField(Zend_Search_Lucene_Field::Text('feedTitle', $feed->title)); $doc->addField(Zend_Search_Lucene_Field::Text('feedSlug', $feed->slug)); $doc->addField(Zend_Search_Lucene_Field::Text('feedDescription', $feed->description)); $doc->addField(Zend_Search_Lucene_Field::keyword('category', $feed->findParentCategories()->title)); $doc->addField(Zend_Search_Lucene_Field::Text('description', $post->description)); $doc->addField(Zend_Search_Lucene_Field::unIndexed('publishDate', $post->publishDate)); $doc->addField(Zend_Search_Lucene_Field::Keyword('type', 'post')); $index->addDocument($doc); }
/** * Add node to index * * @param Zoo_Content_Interface $item */ protected function _build(Zoo_Content_Interface $item) { // Delete existing document, if exists $hits = $this->index->find('nid:' . $item->id); foreach ($hits as $hit) { $this->index->delete($hit->id); } // (Re-)Index document $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::text('nid', $item->id)); $doc->addField(Zend_Search_Lucene_Field::unIndexed('link', $item->url())); $doc->addField(Zend_Search_Lucene_Field::unStored('title', $item->title)); $doc->addField(Zend_Search_Lucene_Field::unStored('type', $item->type)); $doc->addField(Zend_Search_Lucene_Field::unStored('published', $item->published)); $doc->addField(Zend_Search_Lucene_Field::unStored('uid', $item->uid)); list($content) = Zoo::getService('content')->getRenderedContent($item->id, 'Display'); $doc->addField(Zend_Search_Lucene_Field::unStored('contents', strip_tags($content))); return $doc; }
/** * @param string $websiteId * @return string */ public function indexWebsite($websiteId) { $websiteService = new Website('Website'); if (!$websiteService->existsWebsiteAlready($websiteId)) { throw new CmsException('602', __METHOD__, __LINE__); } // Zum Rendern muss die Business-Schicht verwendet werden $renderBusiness = new BusinessRender('Render'); $modulService = new Modul('Modul'); $pageService = new Page('Page'); $allPageIds = $pageService->getIdsByWebsiteId($websiteId); $indexFileOfWebsite = $this->getIndexFileForWebsite($websiteId); if (is_array($allPageIds) && count($allPageIds) > 0) { if (file_exists($indexFileOfWebsite)) { $index = \Zend_Search_Lucene::open($indexFileOfWebsite); $numberOfIndexedDocuments = $index->numDocs(); for ($id = 0; $id < $numberOfIndexedDocuments; ++$id) { if (!$index->isDeleted($id)) { $document = $index->delete($id); } } } else { $index = \Zend_Search_Lucene::create($indexFileOfWebsite); } foreach ($allPageIds as $pageId) { $pageContent = $this->getPageContent($websiteId, $pageId); if ($this->isStoreContentEnabled()) { $document = \Zend_Search_Lucene_Document_Html::loadHTML($pageContent, true, 'UTF-8'); } else { $document = \Zend_Search_Lucene_Document_Html::loadHTML($pageContent, false, 'UTF-8'); } $document->addField(\Zend_Search_Lucene_Field::unIndexed('md5', md5($pageContent))); $document->addField(\Zend_Search_Lucene_Field::unIndexed('pageId', $pageId)); $index->addDocument($document); } $index->commit(); $index->optimize(); unset($index); } return $indexFileOfWebsite; }
/** * php index.php db index * */ public function index() { $query = "SELECT * FROM Products AS p JOIN Categories AS c ON p.CategoryID = c.CategoryId JOIN Suppliers AS s ON p.SupplierID = s.SupplierID"; $stmt = $this->db->prepare($query); $stmt->execute(); $rows = $stmt->fetchAll(PDO::FETCH_ASSOC); $indexDir = APP_PATH . '/' . self::INDEX_DIR; is_dir($indexDir) || mkdir($indexDir, 0777, true); $index = self::create($indexDir); foreach ($rows as $row) { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::keyword('ProductName', $row['ProductName'])); $doc->addField(Zend_Search_Lucene_Field::text('Quantity', $row['QuantityPerUnit'])); $doc->addField(Zend_Search_Lucene_Field::keyword('Category', $row['CategoryName'])); $doc->addField(Zend_Search_Lucene_Field::unIndexed('Description', $row['Description'])); $doc->addField(Zend_Search_Lucene_Field::unStored('City', $row['City'])); $doc->addField(Zend_Search_Lucene_Field::keyword('CompanyName', $row['CompanyName'])); $doc->addField(Zend_Search_Lucene_Field::binary('Picture', $row['Picture'])); $index->addDocument($doc); } }
public function build_index() { echo "Anfang<br>"; // Index erstellen, bisheriger Index wird gelöscht $index = Zend_Search_Lucene::create($this->search_index); $this->db->where('online', 1); $query = $this->db->get('v_einsatz'); foreach ($query->result() as $row) { // neues Suchindex-Dokument erzeugen $doc = new Zend_Search_Lucene_Document(); // Titel für die Anzeige in der Ergebnisliste $doc->addField(Zend_Search_Lucene_Field::Text('title', htmlentities($row->name))); // mit diesem Pfad wird das Suchergebnis verknüpft $doc->addField(Zend_Search_Lucene_Field::Text('path', base_url('aktuelles/einsatz/' . $row->einsatzID))); // dieser Inhalt wird neben dem Titel indexiert $doc->addField(Zend_Search_Lucene_Field::UnStored('content', htmlentities($row->lage . $row->bericht . $row->weitere_kraefte . $row->ort))); $doc->addField(Zend_Search_Lucene_Field::unIndexed('content_type', 'Einsatz')); // zum Index hinzufügen $index->addDocument($doc); echo 'Einsatz ' . $row->name . ' zum Index hinzugefügt.<br />'; } $query = $this->db->get('v_news'); foreach ($query->result() as $row) { // neues Suchindex-Dokument erzeugen $doc = new Zend_Search_Lucene_Document(); // Titel für die Anzeige in der Ergebnisliste $doc->addField(Zend_Search_Lucene_Field::Text('title', htmlentities($row->title))); // mit diesem Pfad wird das Suchergebnis verknüpft $doc->addField(Zend_Search_Lucene_Field::Text('path', base_url('aktuelles/news/' . $row->newsID))); // dieser Inhalt wird neben dem Titel indexiert $doc->addField(Zend_Search_Lucene_Field::UnStored('content', htmlentities($row->teaser . $row->text))); $doc->addField(Zend_Search_Lucene_Field::unIndexed('content_type', 'News')); // zum Index hinzufügen $index->addDocument($doc); echo 'News ' . $row->title . ' zum Index hinzugefügt.<br />'; } // Index optimieren $index->optimize(); echo "Ende"; }
/** * Adds a content item to the index. * * @param array $content * @param int $typeid */ function addToIndex($content, $typeid) { $dirs = $this->dirs; try { $index = Zend_Search_Lucene::open(APPL_PATH . $dirs['structure']['indexes'] . DIR_SEP . 'objects'); } catch (Exception $e) { $this->addIndex(); $index = Zend_Search_Lucene::open(APPL_PATH . $dirs['structure']['indexes'] . DIR_SEP . 'objects'); } Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); $doc = new Zend_Search_Lucene_Document(); foreach ($content as $k => $v) { if ($k == 'title') { $field = Zend_Search_Lucene_Field::UnStored($k, strtolower($v), 'utf-8'); $doc->addField($field); } elseif ($k == 'slug') { $field = Zend_Search_Lucene_Field::unIndexed($k, strtolower($v), 'utf-8'); $doc->addField($field); } } $field = Zend_Search_Lucene_Field::unIndexed('type_id', $typeid); $doc->addField($field); $index->addDocument($doc); $index->commit(); }
/** * Build the post search index * * @param boolean $isCount * @return boolean */ protected function buildPostSearch($isCount = false) { $index = Zend_Search_Lucene::create(Zend_Registry::getInstance()->config->search->post); require_once 'Ifphp/models/Posts.php'; require_once 'Ifphp/models/Feeds.php'; require_once 'Ifphp/models/Categories.php'; $posts = new Posts(); $allPosts = $posts->getRecent(1, 0); if ($isCount) { echo $allPosts->count() . ' posts would have been added to the post index'; exit; } foreach ($allPosts as $post) { $feed = $post->findParentFeeds(); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Text('pid', $post->id)); $doc->addField(Zend_Search_Lucene_Field::Text('title', $post->title)); $doc->addField(Zend_Search_Lucene_Field::Text('siteUrl', $post->siteUrl)); $doc->addField(Zend_Search_Lucene_Field::Text('link', $post->link)); $doc->addField(Zend_Search_Lucene_Field::Text('feedTitle', $feed->title)); $doc->addField(Zend_Search_Lucene_Field::Text('feedSlug', $feed->slug)); $doc->addField(Zend_Search_Lucene_Field::Text('feedDescription', $feed->description)); $doc->addField(Zend_Search_Lucene_Field::keyword('category', $feed->findParentCategories()->title)); $doc->addField(Zend_Search_Lucene_Field::Text('description', $post->description)); $doc->addField(Zend_Search_Lucene_Field::unIndexed('publishDate', $post->publishDate)); $doc->addField(Zend_Search_Lucene_Field::Keyword('type', 'post')); $index->addDocument($doc); } chown(Zend_Registry::getInstance()->search['post'], 'www-data'); return true; }
/** * indexFieldNow * @param GenericElementField $objField * @param string $strField * @param integer $intFieldType * @param string|array|object $mixedFieldValue * @param Zend_Search_Lucene_Document $objDoc * @return void * @author Thomas Schedler <*****@*****.**> */ protected final function indexFieldNow($objField, $strField, $intFieldType, $mixedFieldValue, Zend_Search_Lucene_Document &$objDoc) { try { $strValue = ''; $strValueIds = ''; if ($objField->typeId == GenericSetup::FIELD_TYPE_ID_TAG) { $mixedValue = $mixedFieldValue; if (is_object($mixedValue) || is_array($mixedValue)) { foreach ($mixedValue as $objTag) { $strValue .= $objTag->title . ', '; $strValueIds .= '[' . $objTag->id . ']'; } $strValue = rtrim($strValue, ', '); } } elseif (!is_object($mixedFieldValue) && $objField->sqlSelect != '') { $sqlSelect = $objField->sqlSelect; $arrIds = array(); if (is_array($mixedFieldValue)) { $arrIds = $mixedFieldValue; } else { if ($mixedFieldValue != '') { if (strpos($mixedFieldValue, '[') !== false) { $mixedFieldValue = trim($mixedFieldValue, '[]'); $arrIds = explode('][', $mixedFieldValue); } else { $arrIds = array($mixedFieldValue); } } } if (is_array($arrIds)) { if (count($arrIds) > 0) { $strReplaceWhere = ''; foreach ($arrIds as $strId) { $strReplaceWhere .= $strId . ','; } $strReplaceWhere = trim($strReplaceWhere, ','); $objReplacer = new Replacer(); $sqlSelect = $objReplacer->sqlReplacer($sqlSelect, $this->setup->getLanguageId(), $this->setup->getRootLevelId(), ' AND tbl.id IN (' . $strReplaceWhere . ')'); $objCategoriesData = $this->core->dbh->query($sqlSelect)->fetchAll(Zend_Db::FETCH_OBJ); if (count($objCategoriesData) > 0) { foreach ($objCategoriesData as $objCategories) { $strValue .= $objCategories->title . ', '; $strValueIds .= '[' . $objCategories->id . ']'; } $strValue = rtrim($strValue, ', '); } } } } else { $strValue = html_entity_decode($mixedFieldValue, ENT_COMPAT, $this->core->sysConfig->encoding->default); } if (is_string($strValue) && $strValue != '') { if ($intFieldType == GenericSetup::FILE_FIELD) { $objFiles = $this->getModelFiles()->loadFilesById($strValue); $arrValues = array(); if (count($objFiles) > 0) { foreach ($objFiles as $objFile) { $arrValues[] = array('path' => $objFile->path, 'filename' => $objFile->filename, 'version' => $objFile->version); } } $strValueIds = $strValue; $strValue = serialize($arrValues); } if ($strValueIds != '') { $objDoc->addField(Zend_Search_Lucene_Field::unIndexed($strField . 'Ids', $strValueIds, $this->core->sysConfig->encoding->default)); } $this->core->logger->debug($strField . ': ' . $strValue); switch ($objField->idSearchFieldTypes) { case Search::FIELD_TYPE_KEYWORD: $objDoc->addField(Zend_Search_Lucene_Field::keyword($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_UNINDEXED: $objDoc->addField(Zend_Search_Lucene_Field::unIndexed($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_BINARY: $objDoc->addField(Zend_Search_Lucene_Field::binary($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_TEXT: $objDoc->addField(Zend_Search_Lucene_Field::text($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_UNSTORED: $objDoc->addField(Zend_Search_Lucene_Field::unStored($strField, strip_tags($strValue), $this->core->sysConfig->encoding->default)); break; } } } catch (Exception $exc) { $this->core->logger->err($exc); } }
/** * Store a field without indexing it * * @param string $name the field name * @param mixed $value the field value * * @return dmSearchDocument the search_document instance */ protected function store($name, $value) { $field = Zend_Search_Lucene_Field::unIndexed($name, $value); $this->addField($field); }
/** * addToIndex * @param string $strIndexPath * @param string $strKey * @author Thomas Schedler <*****@*****.**> * @version 1.0 */ protected final function addToIndex($strIndexPath, $strKey) { try { if (!is_object($this->objIndex) || !$this->objIndex instanceof Zend_Search_Lucene) { if (count(scandir($strIndexPath)) > 2) { $this->objIndex = Zend_Search_Lucene::open($strIndexPath); } else { $this->objIndex = Zend_Search_Lucene::create($strIndexPath); } } $objDoc = new Zend_Search_Lucene_Document(); $objDoc->addField(Zend_Search_Lucene_Field::keyword('key', $strKey)); $objDoc->addField(Zend_Search_Lucene_Field::unIndexed('date', $this->setup->getPublishDate('d.m.Y'))); $objDoc->addField(Zend_Search_Lucene_Field::unIndexed('rootLevelId', $this->setup->getRootLevelId())); /** * index fields */ foreach ($this->setup->FieldNames() as $strField => $intFieldType) { $objField = $this->setup->getField($strField); if (is_object($objField) && $objField->idSearchFieldTypes != Search::FIELD_TYPE_NONE) { $strValue = ''; if (is_array($objField->getValue()) && $objField->sqlSelect != '') { $arrIds = $objField->getValue(); $sqlSelect = $objField->sqlSelect; if (is_array($arrIds)) { if (count($arrIds) > 0) { $strReplaceWhere = ''; foreach ($arrIds as $strId) { $strReplaceWhere .= $strId . ','; } $strReplaceWhere = trim($strReplaceWhere, ','); $objReplacer = new Replacer(); $sqlSelect = $objReplacer->sqlReplacer($sqlSelect, $this->setup->getLanguageId(), $this->setup->getRootLevelId(), ' AND tbl.id IN (' . $strReplaceWhere . ')'); $objCategoriesData = $this->core->dbh->query($sqlSelect)->fetchAll(Zend_Db::FETCH_OBJ); if (count($objCategoriesData) > 0) { foreach ($objCategoriesData as $objCategories) { $strValue .= $objCategories->title . ', '; } $strValue = rtrim($strValue, ', '); } } } } else { $strValue = $objField->getValue(); } if ($strValue != '') { switch ($objField->idSearchFieldTypes) { case Search::FIELD_TYPE_KEYWORD: $objDoc->addField(Zend_Search_Lucene_Field::keyword($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_UNINDEXED: $objDoc->addField(Zend_Search_Lucene_Field::unIndexed($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_BINARY: $objDoc->addField(Zend_Search_Lucene_Field::binary($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_TEXT: $objDoc->addField(Zend_Search_Lucene_Field::text($strField, $strValue, $this->core->sysConfig->encoding->default)); break; case Search::FIELD_TYPE_UNSTORED: $objDoc->addField(Zend_Search_Lucene_Field::unStored($strField, strip_tags($strValue), $this->core->sysConfig->encoding->default)); break; } } } } // Add document to the index. $this->objIndex->addDocument($objDoc); $this->objIndex->optimize(); } catch (Exception $exc) { $this->core->logger->err($exc); } }
private function insertPageDocument($user, $doc) { $doc->addField(Zend_Search_Lucene_Field::keyword('userid', $user['userid'])); $doc->addField(Zend_Search_Lucene_Field::keyword('type', $user['type'])); $doc->addField(Zend_Search_Lucene_Field::text('username', $user['username'])); $doc->addField(Zend_Search_Lucene_Field::unIndexed('user_url', $user['user_url'])); $doc->addField(Zend_Search_Lucene_Field::unIndexed('propic', $user['pagepic_url'])); $doc->addField(Zend_Search_Lucene_Field::unIndexed('vote', $user['page_vote'])); $doc->addField(Zend_Search_Lucene_Field::keyword('category', $user['category'])); $doc->addField(Zend_Search_Lucene_Field::keyword('subcategory', $user['subcategory'])); $doc->addField(Zend_Search_Lucene_Field::keyword('bids', $user['bids'])); return $doc; }
protected function addToIndex($data) { if (trim($this->z_indexate) == '') { return; } if (!isset($data['id'])) { return; } $fields = explode(';', trim($this->z_indexate)); $searchIndex = Z_Search::getInstance(); //создаем документ $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::keyword('_id', $data['id'])); $doc->addField(Zend_Search_Lucene_Field::unIndexed('_type', $this->z_model->info('name'))); foreach ($fields as $field) { if (isset($data[$field])) { $doc->addField(Zend_Search_Lucene_Field::text($field, $data[$field])); } } //удаляем старый документ $hits = $searchIndex->find('_id:' . $data['id']); foreach ($hits as $hit) { $searchIndex->delete($hit->id); } //добавляем документ $searchIndex->addDocument($doc); }
public function actionDoIndex() { $document = Document::model()->findAll(); Yii::import('application.vendor.*'); require_once 'Zend/Search/Lucene.php'; $index = new Zend_Search_Lucene(Yii::getPathOfAlias('application.index'), true); try { foreach ($document as $doc) { $indexDoc = new Zend_Search_Lucene_Document(); $indexDoc->addField(Zend_Search_Lucene_Field::text("title", CHtml::encode($doc->title), 'utf-8')); $indexDoc->addField(Zend_Search_Lucene_Field::text("content", CHtml::encode($doc->content), 'utf-8')); $indexDoc->addField(Zend_Search_Lucene_Field::text("author", CHtml::encode($doc->author), 'utf-8')); $indexDoc->addField(Zend_Search_Lucene_Field::unIndexed("id", CHtml::encode($doc->id), 'utf-8')); $indexDoc->addField(Zend_Search_Lucene_Field::keyword("url", CHtml::encode($doc->url), 'utf-8')); $index->addDocument($indexDoc); } $index->commit(); Yii::app()->user->setFlash('success', 'Indexing success!'); } catch (Exception $e) { Yii::app()->user->setFlash('error', 'Indexing fail, try again!'); } $this->redirect(array('document/admin')); }
/** * index a file * * @author Jörn Dreyer <*****@*****.**> * * @param string $path the path of the file * * @return bool */ public static function indexFile($path = '', $user = null) { if (!Filesystem::isValidPath($path)) { return; } if ($path === '') { //ignore the empty path element return false; } if (is_null($user)) { $view = Filesystem::getView(); $user = \OCP\User::getUser(); } else { $view = new \OC\Files\View('/' . $user . '/files'); } if (!$view) { Util::writeLog('search_lucene', 'could not resolve filesystem view', Util::WARN); return false; } if (!$view->file_exists($path)) { Util::writeLog('search_lucene', 'file vanished, ignoring', Util::DEBUG); return true; } $root = $view->getRoot(); $pk = md5($root . $path); // the cache already knows mime and other basic stuff $data = $view->getFileInfo($path); if (isset($data['mimetype'])) { $mimeType = $data['mimetype']; // initialize plain lucene document $doc = new \Zend_Search_Lucene_Document(); // index content for local files only $localFile = $view->getLocalFile($path); if ($localFile) { //try to use special lucene document types if ('text/plain' === $mimeType) { $body = $view->file_get_contents($path); if ($body != '') { $doc->addField(\Zend_Search_Lucene_Field::UnStored('body', $body)); } } else { if ('text/html' === $mimeType) { //TODO could be indexed, even if not local $doc = \Zend_Search_Lucene_Document_Html::loadHTML($view->file_get_contents($path)); } else { if ('application/pdf' === $mimeType) { $doc = Pdf::loadPdf($view->file_get_contents($path)); // commented the mimetype checks, as the zend classes only understand docx and not doc files. // FIXME distinguish doc and docx, xls and xlsx, ppt and pptx, in oc core mimetype helper ... //} else if ('application/msword' === $mimeType) { } else { if (strtolower(substr($data['name'], -5)) === '.docx') { $doc = \Zend_Search_Lucene_Document_Docx::loadDocxFile($localFile); //} else if ('application/msexcel' === $mimeType) { } else { if (strtolower(substr($data['name'], -5)) === '.xlsx') { $doc = \Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($localFile); //} else if ('application/mspowerpoint' === $mimeType) { } else { if (strtolower(substr($data['name'], -5)) === '.pptx') { $doc = \Zend_Search_Lucene_Document_Pptx::loadPptxFile($localFile); } else { if (strtolower(substr($data['name'], -4)) === '.odt') { $doc = Odt::loadOdtFile($localFile); } else { if (strtolower(substr($data['name'], -4)) === '.ods') { $doc = Ods::loadOdsFile($localFile); } } } } } } } } } // Store filecache id as unique id to lookup by when deleting $doc->addField(\Zend_Search_Lucene_Field::Keyword('pk', $pk)); // Store filename $doc->addField(\Zend_Search_Lucene_Field::Text('filename', $data['name'], 'UTF-8')); // Store document path to identify it in the search results $doc->addField(\Zend_Search_Lucene_Field::Text('path', $path, 'UTF-8')); $doc->addField(\Zend_Search_Lucene_Field::unIndexed('size', $data['size'])); $doc->addField(\Zend_Search_Lucene_Field::unIndexed('mimetype', $mimeType)); //self::extractMetadata($doc, $path, $view, $mimeType); Lucene::updateFile($doc, $path, $user); return true; } else { Util::writeLog('search_lucene', 'need mimetype for content extraction', Util::ERROR); return false; } }
/** * A refactored method to add the document to the index.. * * @param int $docid * @param string $content * @param string $discussion */ private function addDocument($docid, $content, $discussion, $title, $version) { $teaser = substr($content, 0, 250); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Text('DocumentID', PHPLuceneIndexer::longToString($docid))); $doc->addField(Zend_Search_Lucene_Field::Text('Content', $content, 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::unStored('Discussion', $discussion, 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('Title', $title, 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('Version', $version, 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::unIndexed('Summary', $teaser, 'UTF-8')); $this->lucene->addDocument($doc); }
/** * Adds defualt fields to the document * * @param string $class * @param mixed $key */ public function __construct($class, $key) { $this->addField(Zend_Search_Lucene_Field::keyword('docRef', "{$class}:{$key}")); $this->addField(Zend_Search_Lucene_Field::unIndexed('class', $class)); $this->addField(Zend_Search_Lucene_Field::unIndexed('key', $key)); }