public function updateLuceneIndex() { //delete existing entries $index = $this->getTable()->getLuceneIndex(); // remove existing entries foreach ($index->find('pk:' . $this->getId()) as $hit) { $index->delete($hit->id); } // create new Lucene document $doc = new Zend_Search_Lucene_Document(); // store product primary key to identify it in the search results $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->getId())); $tr = Doctrine::getTable('ProductTranslation')->createQuery()->from('ProductTranslation pt')->where('pt.id = ?', $this->getId())->execute(); $doc->addField(Zend_Search_Lucene_Field::UnStored('original_title', $this->getOriginalTitle(), 'utf-8')); // add fields to index depending on existing Translations foreach ($tr->toArray() as $transArr) { $lang = $transArr['lang']; unset($transArr['lang'], $transArr['id'], $transArr['volume'], $transArr['slug']); foreach ($transArr as $field => $value) { $fieldName = $field . '_' . $lang; // (name_en, name_fi), (description_en, description_fi) $doc->addField(Zend_Search_Lucene_Field::UnStored($fieldName, strip_tags($value), 'utf-8')); } } // add product to the index $index->addDocument($doc); $index->commit(); }
public function IndexBug($bug) { $this->RemoveBug($bug->bug_id); $doc = new Zend_Search_Lucene_Document(); $doc->AddField(Zend_Search_Lucene_Field::Keyword('bug_id', $bug->bug_id)); $doc->AddField(Zend_Search_Lucene_Field::Text('title', $bug->title)); $doc->AddField(Zend_Search_Lucene_Field::Keyword('reporting_user_id', $bug->reporting_user_id)); $doc->AddField(Zend_Search_Lucene_Field::Keyword('reporting_date', $bug->reporting_date)); // We concatenate all comments into a single text blob. We only show // hits as bugs, but we want comment content to matter. $comment_blob = ''; $stmt = Bugdar::$db->Prepare("SELECT body FROM " . TABLE_PREFIX . "comments WHERE bug_id = ? ORDER BY comment_id"); $stmt->Execute(array($bug->bug_id)); while ($comment = $stmt->FetchObject()) { $comment_blob .= $comment->body . "\n\n"; } $doc->AddField(Zend_Search_Lucene_Field::UnStored('comments', $comment_blob)); // Add all attributes. $stmt = Bugdar::$db->Prepare("SELECT * FROM " . TABLE_PREFIX . "bug_attributes WHERE bug_id = ?"); $stmt->Execute(array($bug->bug_id)); $tags = array(); while ($attr = $stmt->FetchObject()) { if ($attr->attribute_title) { $doc->AddField(Zend_Search_Lucene_Field::Keyword($attr->attribute_title, $attr->value)); } else { $tags[] = $attr->value; } } $doc->AddField(Zend_Search_Lucene_Field::Text('tag', implode(' ', $tags))); $this->lucene->AddDocument($doc); }
public function updateAction() { Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive()); // Создание индекса $index = Zend_Search_Lucene::create(APPLICATION_ROOT . '/data/my-index'); $mediaMapper = new Media_Model_Mapper_Media(); $select = $mediaMapper->getDbTable()->select(); $select->where('deleted != ?', 1)->where('active != ?', 0)->where('category_id IN(?)', array(2, 3, 4))->order('timestamp DESC'); $mediaItems = $mediaMapper->fetchAll($select); if (!empty($mediaItems)) { foreach ($mediaItems as $mediaItem) { $doc = new Zend_Search_Lucene_Document(); // Сохранение Name документа для того, чтобы идентифицировать его // в результатах поиска $doc->addField(Zend_Search_Lucene_Field::Text('title', strtolower($mediaItem->getName()), 'UTF-8')); // Сохранение URL документа для того, чтобы идентифицировать его // в результатах поиска $doc->addField(Zend_Search_Lucene_Field::Text('url', '/media/' . $mediaItem->getFullPath(), 'UTF-8')); // Сохранение Description документа для того, чтобы идентифицировать его // в результатах поиска // $doc->addField(Zend_Search_Lucene_Field::Text('description', strtolower($mediaItem->getSContent()),'UTF-8')); // Индексирование keyWords содержимого документа $doc->addField(Zend_Search_Lucene_Field::UnStored('keyword', strtolower($mediaItem->getMetaKeywords()), 'UTF-8')); // Индексирование содержимого документа $doc->addField(Zend_Search_Lucene_Field::UnStored('contents', strtolower($mediaItem->getContent()), 'UTF-8')); // Добавление документа в индекс $index->addDocument($doc); } } }
/** * Object constructor * * @param string $data * @param boolean $storeContent */ private function __construct($data, $storeContent) { try { $zendpdf = \Zend_Pdf::parse($data); // Store meta data properties if (isset($zendpdf->properties['Title'])) { $this->addField(\Zend_Search_Lucene_Field::UnStored('title', $zendpdf->properties['Title'])); } if (isset($zendpdf->properties['Author'])) { $this->addField(\Zend_Search_Lucene_Field::UnStored('author', $zendpdf->properties['Author'])); } if (isset($zendpdf->properties['Subject'])) { $this->addField(\Zend_Search_Lucene_Field::UnStored('subject', $zendpdf->properties['Subject'])); } if (isset($zendpdf->properties['Keywords'])) { $this->addField(\Zend_Search_Lucene_Field::UnStored('keywords', $zendpdf->properties['Keywords'])); } //TODO handle PDF 1.6 metadata Zend_Pdf::getMetadata() //do the content extraction $pdfParse = new \App_Search_Helper_PdfParser(); $body = $pdfParse->pdf2txt($zendpdf->render()); if ($body != '') { // Store contents if ($storeContent) { $this->addField(\Zend_Search_Lucene_Field::Text('body', $body, 'UTF-8')); } else { $this->addField(\Zend_Search_Lucene_Field::UnStored('body', $body, 'UTF-8')); } } } catch (\Exception $e) { Util::writeLog('search_lucene', $e->getMessage() . ' Trace:\\n' . $e->getTraceAsString(), Util::ERROR); } }
function index_lucene($article, $optimise) { $index = getIndex_lucene(); $term = new Zend_Search_Lucene_Index_Term($article["PMID"], 'PMID'); // a pre-existing page cannot be updated, it has to be // deleted, and indexed again: $exactSearchQuery = new Zend_Search_Lucene_Search_Query_Term($term); $hits = $index->find($exactSearchQuery); if (count($hits) > 0) { echo "[deleting previous version]\n"; foreach ($hits as $hit) { $index->delete($hit->id); } } $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('PMID', $article["PMID"])); $doc->addField(Zend_Search_Lucene_Field::Keyword('Year', $article["Year"])); $doc->addField(Zend_Search_Lucene_Field::Keyword('Journal', $article["Journal"])); $doc->addField(Zend_Search_Lucene_Field::Text('Title', $article["Title"], 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('Authors', $article["Authors"], 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('Reference', $article["Reference"], 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnStored('Abstract', $article["Abstract"], 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('MeshHeadings', $article["MeshHeadings"], 'utf-8')); $index->addDocument($doc); if ($optimise) { echo "Optimising index\n"; $index->optimize(); } $index->commit(); echo "The index contains " . $index->numDocs() . " documents\n"; }
/** * Updates the index for an object * * @param Doctrine_Record $object */ public function updateIndex(Doctrine_Record $object, $delete = false) { /* error checking */ if (!array_key_exists('models', $this->config) || empty($this->config['models'])) { throw new Exception(sprintf('No models set in search.yml', $name)); } if (!array_key_exists($model = get_class($object), $this->config['models'])) { throw new Exception(sprintf('Model "%s" not defined in "%s" index in your search.yml', $model, $this->name)); } $id = $this->generateId($object->getId(), $model); $config = $this->config['models'][$model]; //delete existing entries foreach ($this->search('_id:"' . $id . '"') as $hit) { $this->getIndex()->delete($hit->id); } if ($delete) { return; } //only add to search if canSearch method on model returns true (search if no method exists) if (method_exists($object, 'canSearch')) { if (!call_user_func(array($object, 'canSearch'))) { return; } } $doc = new Zend_Search_Lucene_Document(); // store a key for deleting in future $doc->addField(Zend_Search_Lucene_Field::Keyword('_id', $id)); // store job primary key and model name to identify it in the search results $doc->addField(Zend_Search_Lucene_Field::Keyword('_pk', $object->getId())); $doc->addField(Zend_Search_Lucene_Field::Keyword('_model', $model)); // store title - used for search result title if (!array_key_exists('title', $config)) { throw new Exception(sprintf('A title must be set for model "%s" in search.yml', $model)); } $doc->addField(Zend_Search_Lucene_Field::unIndexed('_title', call_user_func(array($object, 'get' . sfInflector::camelize($config['title']))))); // store description - used for search result description if (!array_key_exists('description', $config)) { throw new Exception(sprintf('A description must be set for model "%s" in search.yml', $model)); } $doc->addField(Zend_Search_Lucene_Field::unIndexed('_description', call_user_func(array($object, 'get' . sfInflector::camelize($config['description']))))); // store url - @todo add more routing options if (!array_key_exists('route', $config)) { throw new Exception(sprintf('A route must be set for model "%s" in search.yml', $model)); } sfContext::getInstance()->getConfiguration()->loadHelpers('Url'); $url = url_for($config['route'], $object); $doc->addField(Zend_Search_Lucene_Field::unIndexed('_url', $url)); //store fields if (array_key_exists('fields', $config)) { foreach ($config['fields'] as $field => $config) { $doc->addField(Zend_Search_Lucene_Field::UnStored($field, call_user_func(array($object, 'get' . sfInflector::camelize($field))), 'utf-8')); } } //save index $this->getIndex()->addDocument($doc); $this->getIndex()->commit(); }
private function __construct($fileName, $storeContent) { // Document data holders $documentBody = array(); $coreProperties = array(); // Open OpenXML package $package = new ZipArchive(); $package->open($fileName); // Read relations and search for officeDocument $relations = simplexml_load_string($package->getFromName('_rels/.rels')); foreach ($relations->Relationship as $rel) { if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) { // Found office document! Read in contents... $contents = simplexml_load_string($package->getFromName($this->absoluteZipPath(dirname($rel['Target']) . '/' . basename($rel['Target'])))); $contents->registerXPathNamespace('w', Zend_Search_Lucene_Document_Docx::SCHEMA_WORDPROCESSINGML); $paragraphs = $contents->xpath('//w:body/w:p'); foreach ($paragraphs as $paragraph) { $runs = $paragraph->xpath('.//w:r/*[name() = "w:t" or name() = "w:br"]'); if ($runs === false) { // Paragraph doesn't contain any text or breaks continue; } foreach ($runs as $run) { if ($run->getName() == 'br') { // Break element $documentBody[] = ' '; } else { $documentBody[] = (string) $run; } } // Add space after each paragraph. So they are not bound together. $documentBody[] = ' '; } break; } } // Read core properties $coreProperties = $this->extractMetaData($package); // Close file $package->close(); // Store filename $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8')); // Store contents if ($storeContent) { $this->addField(Zend_Search_Lucene_Field::Text('body', implode('', $documentBody), 'UTF-8')); } else { $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode('', $documentBody), 'UTF-8')); } // Store meta data properties foreach ($coreProperties as $key => $value) { $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8')); } // Store title (if not present in meta data) if (!isset($coreProperties['title'])) { $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8')); } }
public function asLuceneDocument() { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Text('page_title', $this->title, $this->_charset)); $doc->addField(Zend_Search_Lucene_Field::Text('page_link', $this->path, $this->_charset)); $doc->addField(Zend_Search_Lucene_Field::Text('page_teaser', $this->teaser, $this->_charset)); $doc->addField(Zend_Search_Lucene_Field::unstored('page_content', $this->content, $this->_charset)); $doc->addField(Zend_Search_Lucene_Field::UnStored('search_tags', $this->searchTags, $this->_charset)); return $doc; }
public function __construct($fileName, $storeContent) { // Store filename $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8')); $this->_filename = $fileName; // Store contents if ($storeContent) { $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $this->getBody()), 'UTF-8')); } else { $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $this->getBody()), 'UTF-8')); } }
public function testUnStored() { $field = Zend_Search_Lucene_Field::UnStored('field', 'value'); $this->assertEquals($field->boost, 1); $this->assertEquals($field->encoding, ''); $this->assertEquals($field->isBinary, false); $this->assertEquals($field->isIndexed, true); $this->assertEquals($field->isStored, false); $this->assertEquals($field->isTokenized, true); $this->assertEquals($field->name, 'field'); $this->assertEquals($field->value, 'value'); }
public function addFeed(Feed $feed) { $index = Zend_Search_Lucene::open(Zend_Registry::getInstance()->search->feed); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Text('title', $feed->title)); $doc->addField(Zend_Search_Lucene_Field::Text('siteUrl', $feed->siteUrl)); $doc->addField(Zend_Search_Lucene_Field::Text('feedUrl', $feed->url)); $doc->addField(Zend_Search_Lucene_Field::Text('language', $feed->language)); $doc->addField(Zend_Search_Lucene_Field::Text('category', $feed->category)); $doc->addField(Zend_Search_Lucene_Field::Text('title', $feed->title)); $doc->addField(Zend_Search_Lucene_Field::UnStored('description', $feed->description)); $index->addDocument($doc); }
public function __construct(&$doc, &$data, $course_id, $group_id, $user_id, $path, $additional_keyset = null) { $encoding = 'UTF-8'; //document identification and indexing $this->addField(Zend_Search_Lucene_Field::Keyword('docid', $doc->docid, $encoding)); //document type : the name of the Moodle element that manages it $this->addField(Zend_Search_Lucene_Field::Keyword('doctype', $doc->documenttype, $encoding)); //allows subclassing information from complex modules. $this->addField(Zend_Search_Lucene_Field::Keyword('itemtype', $doc->itemtype, $encoding)); //caches the course context. $this->addField(Zend_Search_Lucene_Field::Keyword('course_id', $course_id, $encoding)); //caches the originator's group. $this->addField(Zend_Search_Lucene_Field::Keyword('group_id', $group_id, $encoding)); //caches the originator if any $this->addField(Zend_Search_Lucene_Field::Keyword('user_id', $user_id, $encoding)); // caches the context of this information. i-e, the context in which this information // is being produced/attached. Speeds up the "check for access" process as context in // which the information resides (a course, a module, a block, the site) is stable. $this->addField(Zend_Search_Lucene_Field::UnIndexed('context_id', $doc->contextid, $encoding)); //data for document $this->addField(Zend_Search_Lucene_Field::Text('title', $doc->title, $encoding)); $this->addField(Zend_Search_Lucene_Field::Text('author', $doc->author, $encoding)); $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $doc->contents, $encoding)); $this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $doc->url, $encoding)); $this->addField(Zend_Search_Lucene_Field::UnIndexed('date', $doc->date, $encoding)); //additional data added on a per-module basis $this->addField(Zend_Search_Lucene_Field::Binary('data', serialize($data))); // adding a path allows the document to know where to find specific library calls // for checking access to a module or block content. The Lucene records should only // be responsible to bring back to that call sufficient and consistent information // in order to perform the check. $this->addField(Zend_Search_Lucene_Field::UnIndexed('path', $path, $encoding)); /* // adding a capability set required for viewing. -1 if no capability required. // the capability required for viewing is depending on the local situation // of the document. each module should provide this information when pushing // out search document structure. Although capability model should be kept flat // there is no exclusion some module or block developpers use logical combinations // of multiple capabilities in their code. This possibility should be left open here. $this->addField(Zend_Search_Lucene_Field::UnIndexed('capabilities', $caps)); */ /* // Additional key set allows a module to ask for extensible criteria based search // depending on the module internal needs. */ if (!empty($additional_keyset)) { foreach ($additional_keyset as $keyname => $keyvalue) { $this->addField(Zend_Search_Lucene_Field::Keyword($keyname, $keyvalue, $encoding)); } } }
function fill_index() { for ($i = 0; $i < 10; $i++) { $index = new Zend_Search_Lucene('./data/index', true); $index->find("test"); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Text("test", getword())); $doc->addField(Zend_Search_Lucene_Field::UnStored("contents", getword())); $index->addDocument($doc); $index->commit(); $index->getDirectory()->close(); //comment this to see another bug :-| } }
public function index(Zfplanet_Model_Entry $entry) { if (is_null($this->_index)) { return; } $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('id', $entry->id, 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('publishedDate', $entry->publishedDate, 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Keyword('uri', $entry->uri, 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('title', $entry->title, 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnStored('content', $entry->content, 'utf-8')); $this->_index->addDocument($doc); $this->_index->commit(); $this->_index->optimize(); }
private function __construct($data, $isFile, $storeContent) { $this->_doc = new DOMDocument(); $this->_doc->substituteEntities = true; if ($isFile) { $htmlData = file_get_contents($data); } else { $htmlData = $data; } @$this->_doc->loadHTML($htmlData); $xpath = new DOMXPath($this->_doc); $docTitle = ''; $titleNodes = $xpath->query('/html/head/title'); foreach ($titleNodes as $titleNode) { // title should always have only one entry, but we process all nodeset entries $docTitle .= $titleNode->nodeValue . ' '; } $this->addField(Zend_Search_Lucene_Field::Text('title', $docTitle, $this->_doc->actualEncoding)); $metaNodes = $xpath->query('/html/head/meta[@name]'); foreach ($metaNodes as $metaNode) { $this->addField(Zend_Search_Lucene_Field::Text($metaNode->getAttribute('name'), $metaNode->getAttribute('content'), $this->_doc->actualEncoding)); } $docBody = ''; $bodyNodes = $xpath->query('/html/body'); foreach ($bodyNodes as $bodyNode) { // body should always have only one entry, but we process all nodeset entries $this->_retrieveNodeText($bodyNode, $docBody); } if ($storeContent) { $this->addField(Zend_Search_Lucene_Field::Text('body', $docBody, $this->_doc->actualEncoding)); } else { $this->addField(Zend_Search_Lucene_Field::UnStored('body', $docBody, $this->_doc->actualEncoding)); } $linkNodes = $this->_doc->getElementsByTagName('a'); foreach ($linkNodes as $linkNode) { if (($href = $linkNode->getAttribute('href')) != '' && (!self::$_excludeNoFollowLinks || strtolower($linkNode->getAttribute('rel')) != 'nofollow')) { $this->_links[] = $href; } } $this->_links = array_unique($this->_links); $linkNodes = $xpath->query('/html/head/link'); foreach ($linkNodes as $linkNode) { if (($href = $linkNode->getAttribute('href')) != '') { $this->_headerLinks[] = $href; } } $this->_headerLinks = array_unique($this->_headerLinks); }
static function AddToIndex(SearchableObject $object, $commitOnEnd = true) { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('combinedid', $object->getRelObjectManager() . $object->getRelObjectId())); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('objectid', $object->getRelObjectId())); $doc->addField(Zend_Search_Lucene_Field::Keyword('manager', $object->getRelObjectManager())); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('column', $object->getColumnName())); $doc->addField(Zend_Search_Lucene_Field::UnStored('text', $object->getContent())); $doc->addField(Zend_Search_Lucene_Field::Text('workspaces', "ws" . $object->getProjectId() . " ")); $doc->addField(Zend_Search_Lucene_Field::Text('isprivate', ($object->getIsPrivate() ? '1' : '0') . " ")); self::GetIndex()->addDocument($doc); if ($commitOnEnd) { self::GetIndex()->commit(); } return true; }
/** * Object constructor * * @param string $fileName * @param boolean $storeContent */ private function __construct($fileName, $storeContent) { // Document data holders $documentBody = array(); $coreProperties = array(); // Open OpenXML package $package = new ZipArchive(); $package->open($fileName); // Read relations and search for officeDocument $relations = simplexml_load_string($package->getFromName("_rels/.rels")); foreach ($relations->Relationship as $rel) { if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) { // Found office document! Read in contents... $contents = simplexml_load_string($package->getFromName($this->absoluteZipPath(dirname($rel["Target"]) . "/" . basename($rel["Target"])))); $contents->registerXPathNamespace("w", Zend_Search_Lucene_Document_Docx::SCHEMA_WORDPROCESSINGML); $paragraphs = $contents->xpath('//w:body/w:p'); foreach ($paragraphs as $paragraph) { $runs = $paragraph->xpath('//w:r/w:t'); foreach ($runs as $run) { $documentBody[] = (string) $run; } } break; } } // Read core properties $coreProperties = $this->extractMetaData($package); // Close file $package->close(); // Store filename $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName)); // Store contents if ($storeContent) { $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody))); } else { $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody))); } // Store meta data properties foreach ($coreProperties as $key => $value) { $this->addField(Zend_Search_Lucene_Field::Text($key, $value)); } // Store title (if not present in meta data) if (!isset($coreProperties['title'])) { $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName)); } }
/** * Object constructor * * @param string $fileName * @param boolean $storeContent * @throws \Zend_Search_Lucene_Exception */ private function __construct($fileName, $storeContent) { if (!class_exists('ZipArchive', false)) { throw new \Zend_Search_Lucene_Exception('Open Document Spreadsheet processing functionality requires Zip extension to be loaded'); } // Document data holders $documentTables = array(); $documentCells = array(); // Open OpenXML package $package = new \ZipArchive(); $package->open($fileName); // Read relations and search for officeDocument $content = $package->getFromName('content.xml'); if ($content === false) { throw new \Zend_Search_Lucene_Exception('Invalid archive or corrupted .ods file.'); } $loadEntities = libxml_disable_entity_loader(true); $sxe = simplexml_load_string($content, 'SimpleXMLElement', LIBXML_NOBLANKS | LIBXML_COMPACT); libxml_disable_entity_loader($loadEntities); foreach ($sxe->xpath('//table:table[@table:name]') as $table) { $documentTables[] = (string) $table->attributes($this::SCHEMA_ODTABLE)->name; } foreach ($sxe->xpath('//text:p') as $cell) { $documentCells[] = (string) $cell; } // Read core properties $coreProperties = $this->extractMetaData($package); // Close file $package->close(); // Store contents if ($storeContent) { $this->addField(\Zend_Search_Lucene_Field::Text('sheets', implode(' ', $documentTables), 'UTF-8')); $this->addField(\Zend_Search_Lucene_Field::Text('body', implode(' ', $documentCells), 'UTF-8')); } else { $this->addField(\Zend_Search_Lucene_Field::UnStored('sheets', implode(' ', $documentTables), 'UTF-8')); $this->addField(\Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentCells), 'UTF-8')); } // Store meta data properties foreach ($coreProperties as $key => $value) { $this->addField(\Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8')); } // Store title (if not present in meta data) if (!isset($coreProperties['title'])) { $this->addField(\Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8')); } }
function index($title, $content, $url, $keywords, $user_id) { $this->initLuceneEngine(); $id = $this->getId($url); $indexer = $this->zend->get_Zend_Search_Lucene(); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('id', $id)); $doc->addField(Zend_Search_Lucene_Field::Keyword('userid', $user_id)); $doc->addField(Zend_Search_Lucene_Field::Keyword('url', $url)); $doc->addField(Zend_Search_Lucene_Field::UnStored("content", $content, 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::text("title", $title, 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::text("keywords", $keywords, 'utf-8')); $indexer->addDocument($doc); $indexer->commit(); //$indexer->optimize(); return TRUE; }
/** * Adds all values of related category to the search document. * * @return void **/ protected function addAttributes() { $content = strip_tags($this->getStaticBlock()); $this->addField(Zend_Search_Lucene_Field::UnStored('content', $content, self::ENCODING)); $this->addField(Zend_Search_Lucene_Field::Text('name', $this->getSourceModel()->getName(), self::ENCODING)); $this->addField(Zend_Search_Lucene_Field::Keyword('category', $this->getSourceModel()->getParentCategory()->getName(), self::ENCODING)); $this->addField(Zend_Search_Lucene_Field::UnIndexed('short_content', substr($content, 0, self::SHORT_CONTENT_CHAR_COUNT), self::ENCODING)); $this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $this->getSourceModel()->getUrl(), self::ENCODING)); if ($this->getSourceModel()->getImage()) { try { $image = Mage::getModel('catalog/product_image')->setBaseFile('../category/' . $this->getSourceModel()->getImage())->setHeight(100)->setWidth(100)->resize()->saveFile()->getUrl(); $this->addField(Zend_Search_Lucene_Field::UnIndexed('image', $image, self::ENCODING)); } catch (Exception $e) { /* no image for category, so none will be added to index */ } } }
public static function indexationAdd($indexationData) { $directory = Zend_Registry::get('lucene_index'); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8()); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('pageID', $indexationData['pageID'])); $doc->addField(Zend_Search_Lucene_Field::Keyword('moduleID', $indexationData['moduleID'])); $doc->addField(Zend_Search_Lucene_Field::Keyword('contentID', $indexationData['contentID'])); $doc->addField(Zend_Search_Lucene_Field::Keyword('languageID', $indexationData['languageID'])); $doc->addField(Zend_Search_Lucene_Field::Text('title', Cible_FunctionsGeneral::html2text($indexationData['title']))); $doc->addField(Zend_Search_Lucene_Field::Text('text', Cible_FunctionsGeneral::html2text($indexationData['text']))); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $indexationData['link'])); $doc->addField(Zend_Search_Lucene_Field::UnStored('contents', strtolower(Cible_FunctionsGeneral::removeAccents(Cible_FunctionsGeneral::html2text($indexationData['contents']))))); $newIndex = !is_dir($directory); $index = new Zend_Search_Lucene($directory, $newIndex); $index->addDocument($doc); $index->commit(); }
public function updateLuceneIndex() { //delete existing entries $index = $this->getTable()->getLuceneIndex(); // remove existing entries foreach ($index->find('pk:' . $this->getId()) as $hit) { $index->delete($hit->id); } // create new Lucene document $doc = new Zend_Search_Lucene_Document(); // store product primary key to identify it in the search results $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->getId())); $doc->addField(Zend_Search_Lucene_Field::UnStored('description', $this->getDescription(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnStored('content', strip_tags($this->getContent()), 'utf-8')); // add product to the index $index->addDocument($doc); $index->commit(); }
/** * Method to use the zend framework for search * update the index file used for search */ public function updateLuceneIndex() { $index = AssoTable::getInstance()->getLuceneIndex(); // remove existing entries foreach ($index->find('pk:' . $this->getId()) as $hit) { $index->delete($hit->id); } $doc = new Zend_Search_Lucene_Document(); // store asso primary key to identify it in the search results $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->getId())); // index asso fields $doc->addField(Zend_Search_Lucene_Field::UnStored('name', $this->getName(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnStored('description', $this->getDescription(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnStored('login', $this->getLogin(), 'utf-8')); // add asso to the index $index->addDocument($doc); $index->commit(); }
/** * Object constructor * * @param string $fileName * @param boolean $storeContent * @throws \Zend_Search_Lucene_Exception */ private function __construct($fileName, $storeContent) { if (!class_exists('ZipArchive', false)) { throw new \Zend_Search_Lucene_Exception('Open Document Text processing functionality requires Zip extension to be loaded'); } // Document data holders $documentHeadlines = array(); $documentParagraphs = array(); // Open OpenXML package $package = new \ZipArchive(); $package->open($fileName); // Read relations and search for officeDocument $content = $package->getFromName('content.xml'); if ($content === false) { throw new \Zend_Search_Lucene_Exception('Invalid archive or corrupted .odt file.'); } $sxe = simplexml_load_string($content, 'SimpleXMLElement', LIBXML_NOBLANKS | LIBXML_COMPACT); foreach ($sxe->xpath('//text:h') as $headline) { $documentHeadlines[] = (string) $headline; } foreach ($sxe->xpath('//text:p') as $paragraph) { $documentParagraphs[] = (string) $paragraph; } // Read core properties $coreProperties = $this->extractMetaData($package); // Close file $package->close(); // Store contents if ($storeContent) { $this->addField(\Zend_Search_Lucene_Field::Text('headlines', implode(' ', $documentHeadlines), 'UTF-8')); $this->addField(\Zend_Search_Lucene_Field::Text('body', implode(' ', $documentParagraphs), 'UTF-8')); } else { $this->addField(\Zend_Search_Lucene_Field::UnStored('headlines', implode(' ', $documentHeadlines), 'UTF-8')); $this->addField(\Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentParagraphs), 'UTF-8')); } // Store meta data properties foreach ($coreProperties as $key => $value) { $this->addField(\Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8')); } // Store title (if not present in meta data) if (!isset($coreProperties['title'])) { $this->addField(\Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8')); } }
public function __construct($class, $key, $title, $contents, $summary, $createdBy, $dateCreated, $keywords = array()) { $this->addField(Zend_Search_Lucene_Field::Keyword('docRef', "{$class}:{$key}")); $this->addField(Zend_Search_Lucene_Field::UnIndexed('class', $class)); $this->addField(Zend_Search_Lucene_Field::UnIndexed('key', $key)); $this->addField(Zend_Search_Lucene_Field::Text('title', $title)); $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $contents)); $this->addField(Zend_Search_Lucene_Field::UnIndexed('summary', $summary)); $this->addField(Zend_Search_Lucene_Field::Keyword('createdBy', $createdBy)); $this->addField(Zend_Search_Lucene_Field::Keyword('dateCreated', $dateCreated)); if (!is_array($keywords)) { $keywords = explode('', $keywords); } foreach ($keywords as $name => $value) { if (!empty($name) && !empty($value)) { $this->addField(Zend_Search_Lucene_Field::keyword($name, $value)); } } }
/** * Object constructor * * @param string $fileName * @param boolean $storeContent * @throws Zend_Search_Lucene_Exception * @return void */ public function __construct($fileName, $storeContent = false) { if (!file_exists($fileName)) { throw new Zend_Search_Lucene_Exception("File doesn't exists. Filename: '{$fileName}'"); } $this->addField(Zend_Search_Lucene_Field::Text('path', $fileName)); $this->addField(Zend_Search_Lucene_Field::Keyword('modified', filemtime($fileName))); $f = fopen($fileName, 'rb'); $byteCount = filesize($fileName); $data = ''; while ($byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false) { $data .= $nextBlock; $byteCount -= strlen($nextBlock); } fclose($f); if ($storeContent) { $this->addField(Zend_Search_Lucene_Field::Text('contents', $data, 'ISO8859-1')); } else { $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $data, 'ISO8859-1')); } }
public function updateLuceneIndex() { $index = trackTable::getLuceneIndex(); // remove existing entries foreach ($index->find('pk:' . $this->getId()) as $hit) { $index->delete($hit->id); } $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->getId())); $doc->addField(Zend_Search_Lucene_Field::text('track_name', $this->getName(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('track_url', $this->getUrl(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Keyword('play_it_user_id', $this->getSfGuardUser()->getId())); $doc->addField(Zend_Search_Lucene_Field::text('track_name', $this->getName(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::text('track_type', $this->getPlayList()->getObjectType(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::text('playlist_name', $this->getPlayList()->getTitle(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Keyword('play_owner_id', $this->getPlayList()->getPlayOwner()->getId())); $doc->addField(Zend_Search_Lucene_Field::text('play_owner_name', $this->getPlayList()->getPlayOwner()->getName(), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnStored('play_owner_name_fr', $this->getPlayList()->getPlayOwner()->getNameFr(), 'utf-8')); $index->addDocument($doc); $index->commit(); }
/** * Constructor. * * @param array $values An associative array of values to be used * in the document. */ public function __construct($values) { // If the Filename or the Key values are not set then reject the document. if (!isset($values['Filename']) && !isset($values['key'])) { return false; } Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive()); // Add the Filename field to the document as a Keyword field. $this->addField(Zend_Search_Lucene_Field::Keyword('Filename', $values['Filename'])); // Add the Key field to the document as a Keyword. $this->addField(Zend_Search_Lucene_Field::Keyword('Key', $values['Key'])); if (isset($values['Title']) && $values['Title'] != '') { // Add the Title field to the document as a Text field. $this->addField(Zend_Search_Lucene_Field::Text('Title', $values['Title'])); } if (isset($values['Subject']) && $values['Subject'] != '') { // Add the Subject field to the document as a Text field. $this->addField(Zend_Search_Lucene_Field::Text('Subject', $values['Subject'])); } if (isset($values['Author']) && $values['Author'] != '') { // Add the Author field to the document as a Text field. $this->addField(Zend_Search_Lucene_Field::Text('Author', $values['Author'])); } if (isset($values['Keywords']) && $values['Keywords'] != '') { // Add the Keywords field to the document as a Text field. $this->addField(Zend_Search_Lucene_Field::Text('Keywords', $values['Keywords'])); } if (isset($values['CreationDate']) && $values['CreationDate'] != '') { // Add the CreationDate field to the document as a Text field. $this->addField(Zend_Search_Lucene_Field::Text('CreationDate', $values['CreationDate'])); } if (isset($values['ModDate']) && $values['ModDate'] != '') { // Add the ModDate field to the document as a Text field. $this->addField(Zend_Search_Lucene_Field::Text('ModDate', $values['ModDate'])); } if (isset($values['Contents']) && $values['Contents'] != '') { // Add the Contents field to the document as an UnStored field. $this->addField(Zend_Search_Lucene_Field::UnStored('Contents', $values['Contents'])); } }
public function build_index() { echo "Anfang<br>"; // Index erstellen, bisheriger Index wird gelöscht $index = Zend_Search_Lucene::create($this->search_index); $this->db->where('online', 1); $query = $this->db->get('v_einsatz'); foreach ($query->result() as $row) { // neues Suchindex-Dokument erzeugen $doc = new Zend_Search_Lucene_Document(); // Titel für die Anzeige in der Ergebnisliste $doc->addField(Zend_Search_Lucene_Field::Text('title', htmlentities($row->name))); // mit diesem Pfad wird das Suchergebnis verknüpft $doc->addField(Zend_Search_Lucene_Field::Text('path', base_url('aktuelles/einsatz/' . $row->einsatzID))); // dieser Inhalt wird neben dem Titel indexiert $doc->addField(Zend_Search_Lucene_Field::UnStored('content', htmlentities($row->lage . $row->bericht . $row->weitere_kraefte . $row->ort))); $doc->addField(Zend_Search_Lucene_Field::unIndexed('content_type', 'Einsatz')); // zum Index hinzufügen $index->addDocument($doc); echo 'Einsatz ' . $row->name . ' zum Index hinzugefügt.<br />'; } $query = $this->db->get('v_news'); foreach ($query->result() as $row) { // neues Suchindex-Dokument erzeugen $doc = new Zend_Search_Lucene_Document(); // Titel für die Anzeige in der Ergebnisliste $doc->addField(Zend_Search_Lucene_Field::Text('title', htmlentities($row->title))); // mit diesem Pfad wird das Suchergebnis verknüpft $doc->addField(Zend_Search_Lucene_Field::Text('path', base_url('aktuelles/news/' . $row->newsID))); // dieser Inhalt wird neben dem Titel indexiert $doc->addField(Zend_Search_Lucene_Field::UnStored('content', htmlentities($row->teaser . $row->text))); $doc->addField(Zend_Search_Lucene_Field::unIndexed('content_type', 'News')); // zum Index hinzufügen $index->addDocument($doc); echo 'News ' . $row->title . ' zum Index hinzugefügt.<br />'; } // Index optimieren $index->optimize(); echo "Ende"; }
public function updateSearchIndex(Doctrine_Record $record) { $index = $this->getIndex(); // remove existing entries foreach ($index->find('pk:' . $this->_getRecordSearchPrimaryKey($record)) as $hit) { $index->delete($hit->id); } $doc = new Zend_Search_Lucene_Document(); // store job primary key to identify it in the search results $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->_getRecordSearchPrimaryKey($record))); if (method_exists($record, 'getSearchData')) { $data = $record->getSearchData(); } else { $data = $record->toArray(false); } foreach ($data as $key => $value) { $doc->addField(Zend_Search_Lucene_Field::UnStored($key, $value, 'utf-8')); } // add job to the index $index->addDocument($doc); $index->commit(); }