function index_lucene($article, $optimise) { $index = getIndex_lucene(); $term = new Zend_Search_Lucene_Index_Term($article["PMID"], 'PMID'); // a pre-existing page cannot be updated, it has to be // deleted, and indexed again: $exactSearchQuery = new Zend_Search_Lucene_Search_Query_Term($term); $hits = $index->find($exactSearchQuery); if (count($hits) > 0) { echo "[deleting previous version]\n"; foreach ($hits as $hit) { $index->delete($hit->id); } } $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('PMID', $article["PMID"])); $doc->addField(Zend_Search_Lucene_Field::Keyword('Year', $article["Year"])); $doc->addField(Zend_Search_Lucene_Field::Keyword('Journal', $article["Journal"])); $doc->addField(Zend_Search_Lucene_Field::Text('Title', $article["Title"], 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('Authors', $article["Authors"], 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('Reference', $article["Reference"], 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnStored('Abstract', $article["Abstract"], 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('MeshHeadings', $article["MeshHeadings"], 'utf-8')); $index->addDocument($doc); if ($optimise) { echo "Optimising index\n"; $index->optimize(); } $index->commit(); echo "The index contains " . $index->numDocs() . " documents\n"; }
public static function update($data) { try { //Update an index. $index = Zend_Search_Lucene::open('../application/searchindex'); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive()); } catch (Zend_Search_Exception $e) { throw $e; } // remove an existing entry $hits = $index->find('pk:' . $data['pk']); foreach ($hits as $hit) { $index->delete($hit->id); } $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $data['pk'])); $doc->addField(Zend_Search_Lucene_Field::Keyword('code', $data['code'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Keyword('u_code', strtolower($data['code']), 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::unIndexed('type', $data['type'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::unIndexed('id', $data['id'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('title', $data['title'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('en_title', Default_Model_Functions::convert_vi_to_en($data['title']), 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('description', $data['description'], 'UTF-8')); $doc->addField(Zend_Search_Lucene_Field::Text('en_description', Default_Model_Functions::convert_vi_to_en($data['description']), 'UTF-8')); $index->addDocument($doc); $index->commit(); }
public function edit($needFields = array(), $data = array(), $charset = 'UTF-8') { $index = new Zend_Search_Lucene(ZY_ROOT . '/index'); $doc = new Zend_Search_Lucene_Document(); foreach ($needFields as $key => $field) { switch ($field) { case 'keywords': $doc->addField(Zend_Search_Lucene_Field::Keyword($key, $data[$key], $charset)); break; case 'text': $doc->addField(Zend_Search_Lucene_Field::Text($key, $data[$key], $charset)); break; case 'unindexed': $doc->addField(Zend_Search_Lucene_Field::unindexed($key, $data[$key], $charset)); break; default: $doc->addField(Zend_Search_Lucene_Field::$field($key, $data[$key], $charset)); break; } } $index->addDocument($doc); $index->commit(); $index->optimize(); return TRUE; }
public function luceneIndexAction() { $this->view->layout()->disableLayout(); $this->_helper->viewRenderer->setNoRender(true); $path = PUBLIC_PATH . '/tmp/lucene'; try { $index = Zend_Search_Lucene::open($path); } catch (Zend_Search_Lucene_Exception $e) { try { $index = Zend_Search_Lucene::create($path); } catch (Zend_Search_Lucene_Exception $e) { echo "Unable to open or create index : {$e->getMessage()}"; } } for ($i = 0; $i < $index->maxDoc(); $i++) { $index->delete($i); } $users = new Application_Model_User(); $users = $users->fetchAll(); foreach ($users as $_user) { Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive()); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Text('title', $_user->getFirstName())); $doc->addField(Zend_Search_Lucene_Field::keyword('empcode', $_user->getEmployeeCode())); $index->addDocument($doc); $index->commit(); $index->optimize(); } }
public function updateLuceneIndex() { //delete existing entries $index = $this->getTable()->getLuceneIndex(); // remove existing entries foreach ($index->find('pk:' . $this->getId()) as $hit) { $index->delete($hit->id); } // create new Lucene document $doc = new Zend_Search_Lucene_Document(); // store product primary key to identify it in the search results $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->getId())); $tr = Doctrine::getTable('ProductTranslation')->createQuery()->from('ProductTranslation pt')->where('pt.id = ?', $this->getId())->execute(); $doc->addField(Zend_Search_Lucene_Field::UnStored('original_title', $this->getOriginalTitle(), 'utf-8')); // add fields to index depending on existing Translations foreach ($tr->toArray() as $transArr) { $lang = $transArr['lang']; unset($transArr['lang'], $transArr['id'], $transArr['volume'], $transArr['slug']); foreach ($transArr as $field => $value) { $fieldName = $field . '_' . $lang; // (name_en, name_fi), (description_en, description_fi) $doc->addField(Zend_Search_Lucene_Field::UnStored($fieldName, strip_tags($value), 'utf-8')); } } // add product to the index $index->addDocument($doc); $index->commit(); }
/** * Construct a Zend_Search_Lucene_Document object out of a document db row. * * @global string $urlServer * @param object $docu * @return Zend_Search_Lucene_Document */ protected function makeDoc($docu) { global $urlServer; $encoding = 'utf-8'; $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', 'doc_' . $docu->id, $encoding)); $doc->addField(Zend_Search_Lucene_Field::Keyword('pkid', $docu->id, $encoding)); $doc->addField(Zend_Search_Lucene_Field::Keyword('doctype', 'doc', $encoding)); $doc->addField(Zend_Search_Lucene_Field::Keyword('courseid', $docu->course_id, $encoding)); $doc->addField(Zend_Search_Lucene_Field::Text('title', Indexer::phonetics($docu->title), $encoding)); $doc->addField(Zend_Search_Lucene_Field::Text('content', Indexer::phonetics($docu->description), $encoding)); $doc->addField(Zend_Search_Lucene_Field::Text('filename', Indexer::phonetics($docu->filename), $encoding)); $doc->addField(Zend_Search_Lucene_Field::Text('comment', Indexer::phonetics($docu->comment), $encoding)); $doc->addField(Zend_Search_Lucene_Field::Text('creator', Indexer::phonetics($docu->creator), $encoding)); $doc->addField(Zend_Search_Lucene_Field::Text('subject', Indexer::phonetics($docu->subject), $encoding)); $doc->addField(Zend_Search_Lucene_Field::Text('author', Indexer::phonetics($docu->author), $encoding)); $doc->addField(Zend_Search_Lucene_Field::Text('visible', $docu->visible, $encoding)); $doc->addField(Zend_Search_Lucene_Field::Text('public', $docu->public, $encoding)); $urlAction = ($docu->format == '.dir') ? 'openDir' : 'download'; $doc->addField(Zend_Search_Lucene_Field::UnIndexed('url', $urlServer . 'modules/document/index.php?course=' . course_id_to_code($docu->course_id) . '&' . $urlAction . '=' . $docu->path, $encoding)); return $doc; }
/** * Index a file * * @param string $filePath The file path */ public function index($filePath) { $content = file_get_contents($filePath); $modificationTime = filemtime($filePath); $checksum = md5($content); // Get the document $hits = $this->_data->find('path:' . $filePath); if (count($hits) > 0) { $hit = $hits[0]; $document = $hit->getDocument(); // If the checksums are the same, no need to update if ($checksum === $document->checksum) { return; } // Delete the document $this->_data->delete($hit); } // Create a new document $document = new Zend_Search_Lucene_Document(); $document->addField(Zend_Search_Lucene_Field::keyword('path', $filePath)); $document->addField(Zend_Search_Lucene_Field::keyword('modificationTime', $modificationTime)); $document->addField(Zend_Search_Lucene_Field::keyword('checksum', $checksum)); $document->addField(Zend_Search_Lucene_Field::unStored('content', $content, 'utf-8')); $this->_data->addDocument($document); // Commit the changes $this->_data->commit(); $this->_data->optimize(); }
private function __construct($rule, $additional, $storeContent) { /*$dom = new DOMDocument (); $dom->preserveWhiteSpace = false; $dom->loadXML ( $rule );*/ //walk through the association rule and index it //$this->_parse ( $rule, 1 ); $quantifiers = $rule->childNodes; foreach ($quantifiers as $quantifier) { if ($quantifier->nodeName == '#text' || $quantifier->nodeValue == '') { continue; } //print $quantifier->nodeName.' : '. trim($quantifier->nodeValue).'<br/>'; $val = trim($quantifier->nodeValue); if (is_numeric($val)) { $val = JuceneHelper::prepareNumber($val); } else { $val = (string) $val; $val = str_replace("-", "", $val); } if ($quantifier->nodeName == 'Text') { $type = 'Unindexed'; } else { $type = 'Text'; } $this->addField(Zend_Search_Lucene_Field::$type($quantifier->nodeName, $val, JUCENE_ENCODING)); } foreach ($additional as $field => $value) { if (is_numeric($value)) { $val = JuceneHelper::prepareNumber($value); } $this->addField(Zend_Search_Lucene_Field::Keyword('service_' . $field, $value, JUCENE_ENCODING)); } }
public function buildAction() { // create the index $index = Zend_Search_Lucene::create(APPLICATION_PATH . '/indexes'); // fetch all of the current pages $mdlPage = new Model_Page(); $currentPages = $mdlPage->fetchAll(); if ($currentPages->count() > 0) { // create a new search document for each page foreach ($currentPages as $p) { $page = new CMS_Content_Item_Page($p->id); $doc = new Zend_Search_Lucene_Document(); // you use an unindexed field for the id because you want the id to be // included in the search results but not searchable $doc->addField(Zend_Search_Lucene_Field::unIndexed('page_id', $page->id)); // you use text fields here because you want the content to be searchable // and to be returned in search results $doc->addField(Zend_Search_Lucene_Field::text('page_name', $page->name)); $doc->addField(Zend_Search_Lucene_Field::text('page_headline', $page->headline)); $doc->addField(Zend_Search_Lucene_Field::text('page_description', $page->description)); $doc->addField(Zend_Search_Lucene_Field::text('page_content', $page->content)); // add the document to the index $index->addDocument($doc); } } // optimize the index $index->optimize(); // pass the view data for reporting $this->view->indexSize = $index->numDocs(); }
protected function _indexate($url) { if (!stristr($url, 'http://')) { $url = HTTP_HOST . $url; } $url = substr($url, -1) == '/' ? substr($url, 0, -1) : $url; if (!in_array($url, $this->_indexedUrl)) { if (stristr($url, HTTP_HOST)) { array_push($this->_indexedUrl, $url); $html = file_get_contents($url); libxml_use_internal_errors(true); $doc = Zend_Search_Lucene_Document_Html::loadHTML($html); libxml_use_internal_errors(false); if (preg_match('/<\\!--index-->(.*)<\\!--\\/index-->/isu', $html, $matches)) { $html = $matches[1]; } $html = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $html); $html = strip_tags($html); $doc->addField(Zend_Search_Lucene_Field::Text('content', $html, 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('body', '', 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('url', $url, 'utf-8')); $this->_indexHandle->addDocument($doc); Zend_Registry::get('Logger')->info('Search index is created: ' . $url, Zend_Log::INFO); foreach ($doc->getLinks() as $link) { $temp = explode('.', $link); $ext = end($temp); if ($link == $ext || in_array($ext, array('php', 'html', 'txt', 'htm'))) { $this->_indexate($link); } } } } }
public function updateAction() { Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive()); // Создание индекса $index = Zend_Search_Lucene::create(APPLICATION_ROOT . '/data/my-index'); $mediaMapper = new Media_Model_Mapper_Media(); $select = $mediaMapper->getDbTable()->select(); $select->where('deleted != ?', 1)->where('active != ?', 0)->where('category_id IN(?)', array(2, 3, 4))->order('timestamp DESC'); $mediaItems = $mediaMapper->fetchAll($select); if (!empty($mediaItems)) { foreach ($mediaItems as $mediaItem) { $doc = new Zend_Search_Lucene_Document(); // Сохранение Name документа для того, чтобы идентифицировать его // в результатах поиска $doc->addField(Zend_Search_Lucene_Field::Text('title', strtolower($mediaItem->getName()), 'UTF-8')); // Сохранение URL документа для того, чтобы идентифицировать его // в результатах поиска $doc->addField(Zend_Search_Lucene_Field::Text('url', '/media/' . $mediaItem->getFullPath(), 'UTF-8')); // Сохранение Description документа для того, чтобы идентифицировать его // в результатах поиска // $doc->addField(Zend_Search_Lucene_Field::Text('description', strtolower($mediaItem->getSContent()),'UTF-8')); // Индексирование keyWords содержимого документа $doc->addField(Zend_Search_Lucene_Field::UnStored('keyword', strtolower($mediaItem->getMetaKeywords()), 'UTF-8')); // Индексирование содержимого документа $doc->addField(Zend_Search_Lucene_Field::UnStored('contents', strtolower($mediaItem->getContent()), 'UTF-8')); // Добавление документа в индекс $index->addDocument($doc); } } }
public function buildplaces() { ini_set('memory_limit', '1000M'); set_time_limit(0); $time = time(); Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); /** * Create index */ $index = Zend_Search_Lucene::create($this->_indexPath); /** * Get all users */ $sql = $this->_db->select()->from($this->_name, array('id', 'name', 'placepic'))->limit(7500); $result = $this->_db->fetchAssoc($sql); foreach ($result as $values) { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::keyword('placeid', $values['id'])); $doc->addField(Zend_Search_Lucene_Field::text('placename', $values['name'])); $doc->addField(Zend_Search_Lucene_Field::unStored('placepic', $values['placepic'])); $index->addDocument($doc); } $index->commit(); $elapsed = time() - $time; print_r($elapsed); }
/** * Object constructor * * @param string $data * @param boolean $storeContent */ private function __construct($data, $storeContent) { try { $zendpdf = \Zend_Pdf::parse($data); // Store meta data properties if (isset($zendpdf->properties['Title'])) { $this->addField(\Zend_Search_Lucene_Field::UnStored('title', $zendpdf->properties['Title'])); } if (isset($zendpdf->properties['Author'])) { $this->addField(\Zend_Search_Lucene_Field::UnStored('author', $zendpdf->properties['Author'])); } if (isset($zendpdf->properties['Subject'])) { $this->addField(\Zend_Search_Lucene_Field::UnStored('subject', $zendpdf->properties['Subject'])); } if (isset($zendpdf->properties['Keywords'])) { $this->addField(\Zend_Search_Lucene_Field::UnStored('keywords', $zendpdf->properties['Keywords'])); } //TODO handle PDF 1.6 metadata Zend_Pdf::getMetadata() //do the content extraction $pdfParse = new \App_Search_Helper_PdfParser(); $body = $pdfParse->pdf2txt($zendpdf->render()); if ($body != '') { // Store contents if ($storeContent) { $this->addField(\Zend_Search_Lucene_Field::Text('body', $body, 'UTF-8')); } else { $this->addField(\Zend_Search_Lucene_Field::UnStored('body', $body, 'UTF-8')); } } } catch (\Exception $e) { Util::writeLog('search_lucene', $e->getMessage() . ' Trace:\\n' . $e->getTraceAsString(), Util::ERROR); } }
public function IndexBug($bug) { $this->RemoveBug($bug->bug_id); $doc = new Zend_Search_Lucene_Document(); $doc->AddField(Zend_Search_Lucene_Field::Keyword('bug_id', $bug->bug_id)); $doc->AddField(Zend_Search_Lucene_Field::Text('title', $bug->title)); $doc->AddField(Zend_Search_Lucene_Field::Keyword('reporting_user_id', $bug->reporting_user_id)); $doc->AddField(Zend_Search_Lucene_Field::Keyword('reporting_date', $bug->reporting_date)); // We concatenate all comments into a single text blob. We only show // hits as bugs, but we want comment content to matter. $comment_blob = ''; $stmt = Bugdar::$db->Prepare("SELECT body FROM " . TABLE_PREFIX . "comments WHERE bug_id = ? ORDER BY comment_id"); $stmt->Execute(array($bug->bug_id)); while ($comment = $stmt->FetchObject()) { $comment_blob .= $comment->body . "\n\n"; } $doc->AddField(Zend_Search_Lucene_Field::UnStored('comments', $comment_blob)); // Add all attributes. $stmt = Bugdar::$db->Prepare("SELECT * FROM " . TABLE_PREFIX . "bug_attributes WHERE bug_id = ?"); $stmt->Execute(array($bug->bug_id)); $tags = array(); while ($attr = $stmt->FetchObject()) { if ($attr->attribute_title) { $doc->AddField(Zend_Search_Lucene_Field::Keyword($attr->attribute_title, $attr->value)); } else { $tags[] = $attr->value; } } $doc->AddField(Zend_Search_Lucene_Field::Text('tag', implode(' ', $tags))); $this->lucene->AddDocument($doc); }
/** * This is the default 'index' action that is invoked * when an action is not explicitly requested by users. */ public function actionIndexing() { ini_set('max_execution_time', 0); ob_start(); $index = new Zend_Search_Lucene(Yii::getPathOfAlias($this->_indexFilesPath), true); $criteria = new CDbCriteria(); $criteria->compare('t.publish', 1); $criteria->order = 'album_id DESC'; //$criteria->limit = 10; $model = Albums::model()->findAll($criteria); foreach ($model as $key => $item) { if ($item->media_id != 0) { $images = Yii::app()->request->baseUrl . '/public/album/' . $item->album_id . '/' . $item->cover->media; } else { $images = ''; } $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('id', CHtml::encode($item->album_id), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('media', CHtml::encode($images), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('title', CHtml::encode($item->title), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('body', CHtml::encode(Utility::hardDecode(Utility::softDecode($item->body))), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::Text('url', CHtml::encode(Utility::getProtocol() . '://' . Yii::app()->request->serverName . Yii::app()->createUrl('album/site/view', array('id' => $item->album_id, 't' => Utility::getUrlTitle($item->title)))), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('date', CHtml::encode(Utility::dateFormat($item->creation_date, true) . ' WIB'), 'utf-8')); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('creation', CHtml::encode($item->user->displayname), 'utf-8')); $index->addDocument($doc); } echo 'Album Lucene index created'; $index->commit(); $this->redirect(Yii::app()->createUrl('article/search/indexing')); ob_end_flush(); }
public function testAddFieldMethodChaining() { $document = new Zend_Search_Lucene_Document(); $this->assertTrue($document->addField(Zend_Search_Lucene_Field::Text('title', 'Title')) instanceof Zend_Search_Lucene_Document); $document = new Zend_Search_Lucene_Document(); $document->addField(Zend_Search_Lucene_Field::Text('title', 'Title'))->addField(Zend_Search_Lucene_Field::Text('annotation', 'Annotation'))->addField(Zend_Search_Lucene_Field::Text('body', 'Document body, document body, document body...')); }
function index() { $this->load->library('zend', 'Zend/Feed'); $this->load->library('zend', 'Zend/Search/Lucene'); $this->load->library('zend'); $this->zend->load('Zend/Feed'); $this->zend->load('Zend/Search/Lucene'); //Create index. $index = new Zend_Search_Lucene('C:\\xampp\\xampp\\htdocs\\controle_frota\\lucene\\feeds_index', true); $feeds = array('http://oglobo.globo.com/rss.xml?limite=50'); //grab each feed. foreach ($feeds as $feed) { $channel = Zend_Feed::import($feed); echo $channel->title() . '<br />'; //index each item. foreach ($channel->items as $item) { if ($item->link() && $item->title() && $item->description()) { //create an index doc. $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('link', $this->sanitize($item->link()))); $doc->addField(Zend_Search_Lucene_Field::Text('title', $this->sanitize($item->title()))); $doc->addField(Zend_Search_Lucene_Field::Unstored('contents', $this->sanitize($item->description()))); echo "\tAdding: " . $item->title() . '<br />'; $index->addDocument($doc); } } } $index->commit(); echo $index->count() . ' Documents indexed.<br />'; }
public function __construct(Storefront_Resource_Product_Item_Interface $item, $category) { $this->addField(Zend_Search_Lucene_Field::keyword('productId', $item->productId, 'UTF-8')); $this->addField(Zend_Search_Lucene_Field::text('categories', $category, 'UTF-8')); $this->addField(Zend_Search_Lucene_Field::text('name', $item->name, 'UTF-8')); $this->addField(Zend_Search_Lucene_Field::unStored('description', $item->description, 'UTF-8')); $this->addField(Zend_Search_Lucene_Field::text('price', $this->_formatPrice($item->getPrice()), 'UTF-8')); }
public function testEncoding() { $field = Zend_Search_Lucene_Field::Text('field', 'Words with umlauts: εγό...', 'ISO-8859-1'); $this->assertEquals($field->encoding, 'ISO-8859-1'); $this->assertEquals($field->name, 'field'); $this->assertEquals($field->value, 'Words with umlauts: εγό...'); $this->assertEquals($field->getUtf8Value(), 'Words with umlauts: Γ₯ãü...'); }
/** * Updates the index for an object * * @param Doctrine_Record $object */ public function updateIndex(Doctrine_Record $object, $delete = false) { /* error checking */ if (!array_key_exists('models', $this->config) || empty($this->config['models'])) { throw new Exception(sprintf('No models set in search.yml', $name)); } if (!array_key_exists($model = get_class($object), $this->config['models'])) { throw new Exception(sprintf('Model "%s" not defined in "%s" index in your search.yml', $model, $this->name)); } $id = $this->generateId($object->getId(), $model); $config = $this->config['models'][$model]; //delete existing entries foreach ($this->search('_id:"' . $id . '"') as $hit) { $this->getIndex()->delete($hit->id); } if ($delete) { return; } //only add to search if canSearch method on model returns true (search if no method exists) if (method_exists($object, 'canSearch')) { if (!call_user_func(array($object, 'canSearch'))) { return; } } $doc = new Zend_Search_Lucene_Document(); // store a key for deleting in future $doc->addField(Zend_Search_Lucene_Field::Keyword('_id', $id)); // store job primary key and model name to identify it in the search results $doc->addField(Zend_Search_Lucene_Field::Keyword('_pk', $object->getId())); $doc->addField(Zend_Search_Lucene_Field::Keyword('_model', $model)); // store title - used for search result title if (!array_key_exists('title', $config)) { throw new Exception(sprintf('A title must be set for model "%s" in search.yml', $model)); } $doc->addField(Zend_Search_Lucene_Field::unIndexed('_title', call_user_func(array($object, 'get' . sfInflector::camelize($config['title']))))); // store description - used for search result description if (!array_key_exists('description', $config)) { throw new Exception(sprintf('A description must be set for model "%s" in search.yml', $model)); } $doc->addField(Zend_Search_Lucene_Field::unIndexed('_description', call_user_func(array($object, 'get' . sfInflector::camelize($config['description']))))); // store url - @todo add more routing options if (!array_key_exists('route', $config)) { throw new Exception(sprintf('A route must be set for model "%s" in search.yml', $model)); } sfContext::getInstance()->getConfiguration()->loadHelpers('Url'); $url = url_for($config['route'], $object); $doc->addField(Zend_Search_Lucene_Field::unIndexed('_url', $url)); //store fields if (array_key_exists('fields', $config)) { foreach ($config['fields'] as $field => $config) { $doc->addField(Zend_Search_Lucene_Field::UnStored($field, call_user_func(array($object, 'get' . sfInflector::camelize($field))), 'utf-8')); } } //save index $this->getIndex()->addDocument($doc); $this->getIndex()->commit(); }
/** * Constructor. Creates our indexable document and adds all * necessary fields to it using the passed in document */ public function __construct($document) { $this->addField(Zend_Search_Lucene_Field::UnIndexed('id_entry', $document->getId())); $this->addField(Zend_Search_Lucene_Field::Keyword('url', $document->getUrl())); $this->addField(Zend_Search_Lucene_Field::UnIndexed('creation_date', $document->getCreationDate())); $this->addField(Zend_Search_Lucene_Field::Text('name', $document->getName()), 'utf-8'); $this->addField(Zend_Search_Lucene_Field::Text('content', $document->getDetails())); $this->addField(Zend_Search_Lucene_Field::Text('tag', $document->getImplodedTags())); }
private function __construct($fileName, $storeContent) { // Document data holders $documentBody = array(); $coreProperties = array(); // Open OpenXML package $package = new ZipArchive(); $package->open($fileName); // Read relations and search for officeDocument $relations = simplexml_load_string($package->getFromName('_rels/.rels')); foreach ($relations->Relationship as $rel) { if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) { // Found office document! Read in contents... $contents = simplexml_load_string($package->getFromName($this->absoluteZipPath(dirname($rel['Target']) . '/' . basename($rel['Target'])))); $contents->registerXPathNamespace('w', Zend_Search_Lucene_Document_Docx::SCHEMA_WORDPROCESSINGML); $paragraphs = $contents->xpath('//w:body/w:p'); foreach ($paragraphs as $paragraph) { $runs = $paragraph->xpath('.//w:r/*[name() = "w:t" or name() = "w:br"]'); if ($runs === false) { // Paragraph doesn't contain any text or breaks continue; } foreach ($runs as $run) { if ($run->getName() == 'br') { // Break element $documentBody[] = ' '; } else { $documentBody[] = (string) $run; } } // Add space after each paragraph. So they are not bound together. $documentBody[] = ' '; } break; } } // Read core properties $coreProperties = $this->extractMetaData($package); // Close file $package->close(); // Store filename $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8')); // Store contents if ($storeContent) { $this->addField(Zend_Search_Lucene_Field::Text('body', implode('', $documentBody), 'UTF-8')); } else { $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode('', $documentBody), 'UTF-8')); } // Store meta data properties foreach ($coreProperties as $key => $value) { $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8')); } // Store title (if not present in meta data) if (!isset($coreProperties['title'])) { $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8')); } }
protected function _indexDocument($doc, $fields) { $doc->addField(Zend_Search_Lucene_Field::UnIndexed('checkSum', md5($doc->body))); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('lastIndexed', time())); foreach ($fields as $name => $value) { $doc->addField(Zend_Search_Lucene_Field::Text($name, $value)); } $this->_index->addDocument($doc); Bbx_Log::write('Added ' . urldecode($doc->url) . ' to index', null, self::LOG); }
public function asLuceneDocument() { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Text('page_title', $this->title, $this->_charset)); $doc->addField(Zend_Search_Lucene_Field::Text('page_link', $this->path, $this->_charset)); $doc->addField(Zend_Search_Lucene_Field::Text('page_teaser', $this->teaser, $this->_charset)); $doc->addField(Zend_Search_Lucene_Field::unstored('page_content', $this->content, $this->_charset)); $doc->addField(Zend_Search_Lucene_Field::UnStored('search_tags', $this->searchTags, $this->_charset)); return $doc; }
public function insertFromDB($since) { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Keyword('id', $row['id'])); $doc->addField(Zend_Search_Lucene_Field::Keyword('person', $row['person'])); $doc->addField(Zend_Search_Lucene_Field::Text('celeb_type', $row['celeb_type'])); $doc->addField(Zend_Search_Lucene_Field::Text('wikikeyword', $row['wikikeyword'])); $doc->addField(Zend_Search_Lucene_Field::Text('blurb', $row['blurb'])); $this->search->addDocument($doc); }
private static function _insert($index, $item, $tags) { $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Text('title', $item->name)); $doc->addField(Zend_Search_Lucene_Field::Text('item_id', strval($item->id))); $doc->addField(Zend_Search_Lucene_Field::UnIndexed('image_id', strval($item->image_id))); $doc->addField(Zend_Search_Lucene_Field::Text('description', $item->description)); $doc->addField(Zend_Search_Lucene_Field::Text('tag', $tags)); $index->addDocument($doc); $index->commit(); }
public function __construct($fileName, $storeContent) { // Store filename $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8')); $this->_filename = $fileName; // Store contents if ($storeContent) { $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $this->getBody()), 'UTF-8')); } else { $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $this->getBody()), 'UTF-8')); } }
/** * Index entity of document with given id. * * @param Mage_Core_Model_Abstract * * @return Mage_Lucene_Model_Index_Document_Abstract **/ public function index($sourceModel) { $this->_sourceModel = $sourceModel; $this->_id = $sourceModel->getId(); $this->delete(); $this->addField(Zend_Search_Lucene_Field::Keyword('doctype', $this->getDoctype())); $this->addField(Zend_Search_Lucene_Field::Keyword('entity_id', $this->_id)); $this->addField(Zend_Search_Lucene_Field::Keyword(self::STORE_ATTRIBUTE_CODE, $this->getStore()->getId())); $this->addAttributes(); $this->addDocument(); return $this; }
/** * rebuild the index * * @access public * @return void */ function build_index() { $index = $this->__open(true); $index->setMergeFactor(2000); $index->setMaxBufferedDocs(500); $start = time(); foreach ($this->settings as $model => $model_options) { App::import('Model', $model); $model = new $model(); if (empty($model_options['find_options'])) { $model_options['find_options'] = array(); } if (method_exists($model, 'find_index')) { $results = $model->find_index('all', $model_options['find_options']); } else { $results = $model->find('all', $model_options['find_options']); } if (Configure::read()) { $this->log($model->name . ' find time: ' . (time() - $start), 'searchable'); $start = time(); } $count = count($results); $i = 1; foreach ($results as $result) { printf("%.1f", $i / $count * 100); $this->out(""); $i++; $this->out('Processing ' . $model->name . ' #' . $result[$model->name]['id']); $doc = new Zend_Search_Lucene_Document(); // add the model field $doc->addField(Zend_Search_Lucene_Field::Keyword('cake_model', $model->name, 'utf-8')); foreach ($model_options['fields'] as $field_name => $options) { if (!empty($options['prepare']) && function_exists($options['prepare'])) { $result[$model->name][$field_name] = call_user_func($options['prepare'], $result[$model->name][$field_name]); } $alias = !empty($options['alias']) ? $options['alias'] : $field_name; $doc->addField(Zend_Search_Lucene_Field::$options['type']($alias, $result[$model->name][$field_name], 'utf-8')); } $index->addDocument($doc); $this->out('Processed ' . $model->name . ' #' . $result[$model->name]['id']); } if (Configure::read()) { $this->log($model->name . ' adding time: ' . (time() - $start), 'searchable'); $start = time(); } } $this->optimize($index); $index->commit(); if (Configure::read()) { $this->log('Optimize+commit time: ' . (time() - $start)); } }
public function addFeed(Feed $feed) { $index = Zend_Search_Lucene::open(Zend_Registry::getInstance()->search->feed); $doc = new Zend_Search_Lucene_Document(); $doc->addField(Zend_Search_Lucene_Field::Text('title', $feed->title)); $doc->addField(Zend_Search_Lucene_Field::Text('siteUrl', $feed->siteUrl)); $doc->addField(Zend_Search_Lucene_Field::Text('feedUrl', $feed->url)); $doc->addField(Zend_Search_Lucene_Field::Text('language', $feed->language)); $doc->addField(Zend_Search_Lucene_Field::Text('category', $feed->category)); $doc->addField(Zend_Search_Lucene_Field::Text('title', $feed->title)); $doc->addField(Zend_Search_Lucene_Field::UnStored('description', $feed->description)); $index->addDocument($doc); }