function index_lucene($article, $optimise)
{
    $index = getIndex_lucene();
    $term = new Zend_Search_Lucene_Index_Term($article["PMID"], 'PMID');
    // a pre-existing page cannot be updated, it has to be
    // deleted, and indexed again:
    $exactSearchQuery = new Zend_Search_Lucene_Search_Query_Term($term);
    $hits = $index->find($exactSearchQuery);
    if (count($hits) > 0) {
        echo "[deleting previous version]\n";
        foreach ($hits as $hit) {
            $index->delete($hit->id);
        }
    }
    $doc = new Zend_Search_Lucene_Document();
    $doc->addField(Zend_Search_Lucene_Field::Keyword('PMID', $article["PMID"]));
    $doc->addField(Zend_Search_Lucene_Field::Keyword('Year', $article["Year"]));
    $doc->addField(Zend_Search_Lucene_Field::Keyword('Journal', $article["Journal"]));
    $doc->addField(Zend_Search_Lucene_Field::Text('Title', $article["Title"], 'utf-8'));
    $doc->addField(Zend_Search_Lucene_Field::Text('Authors', $article["Authors"], 'utf-8'));
    $doc->addField(Zend_Search_Lucene_Field::Text('Reference', $article["Reference"], 'utf-8'));
    $doc->addField(Zend_Search_Lucene_Field::UnStored('Abstract', $article["Abstract"], 'utf-8'));
    $doc->addField(Zend_Search_Lucene_Field::Text('MeshHeadings', $article["MeshHeadings"], 'utf-8'));
    $index->addDocument($doc);
    if ($optimise) {
        echo "Optimising index\n";
        $index->optimize();
    }
    $index->commit();
    echo "The index contains " . $index->numDocs() . " documents\n";
}
Example #2
0
 public static function update($data)
 {
     try {
         //Update an index.
         $index = Zend_Search_Lucene::open('../application/searchindex');
         Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive());
     } catch (Zend_Search_Exception $e) {
         throw $e;
     }
     // remove an existing entry
     $hits = $index->find('pk:' . $data['pk']);
     foreach ($hits as $hit) {
         $index->delete($hit->id);
     }
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $data['pk']));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('code', $data['code'], 'UTF-8'));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('u_code', strtolower($data['code']), 'UTF-8'));
     $doc->addField(Zend_Search_Lucene_Field::unIndexed('type', $data['type'], 'UTF-8'));
     $doc->addField(Zend_Search_Lucene_Field::unIndexed('id', $data['id'], 'UTF-8'));
     $doc->addField(Zend_Search_Lucene_Field::Text('title', $data['title'], 'UTF-8'));
     $doc->addField(Zend_Search_Lucene_Field::Text('en_title', Default_Model_Functions::convert_vi_to_en($data['title']), 'UTF-8'));
     $doc->addField(Zend_Search_Lucene_Field::Text('description', $data['description'], 'UTF-8'));
     $doc->addField(Zend_Search_Lucene_Field::Text('en_description', Default_Model_Functions::convert_vi_to_en($data['description']), 'UTF-8'));
     $index->addDocument($doc);
     $index->commit();
 }
 public function edit($needFields = array(), $data = array(), $charset = 'UTF-8')
 {
     $index = new Zend_Search_Lucene(ZY_ROOT . '/index');
     $doc = new Zend_Search_Lucene_Document();
     foreach ($needFields as $key => $field) {
         switch ($field) {
             case 'keywords':
                 $doc->addField(Zend_Search_Lucene_Field::Keyword($key, $data[$key], $charset));
                 break;
             case 'text':
                 $doc->addField(Zend_Search_Lucene_Field::Text($key, $data[$key], $charset));
                 break;
             case 'unindexed':
                 $doc->addField(Zend_Search_Lucene_Field::unindexed($key, $data[$key], $charset));
                 break;
             default:
                 $doc->addField(Zend_Search_Lucene_Field::$field($key, $data[$key], $charset));
                 break;
         }
     }
     $index->addDocument($doc);
     $index->commit();
     $index->optimize();
     return TRUE;
 }
Example #4
0
 public function luceneIndexAction()
 {
     $this->view->layout()->disableLayout();
     $this->_helper->viewRenderer->setNoRender(true);
     $path = PUBLIC_PATH . '/tmp/lucene';
     try {
         $index = Zend_Search_Lucene::open($path);
     } catch (Zend_Search_Lucene_Exception $e) {
         try {
             $index = Zend_Search_Lucene::create($path);
         } catch (Zend_Search_Lucene_Exception $e) {
             echo "Unable to open or create index : {$e->getMessage()}";
         }
     }
     for ($i = 0; $i < $index->maxDoc(); $i++) {
         $index->delete($i);
     }
     $users = new Application_Model_User();
     $users = $users->fetchAll();
     foreach ($users as $_user) {
         Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive());
         $doc = new Zend_Search_Lucene_Document();
         $doc->addField(Zend_Search_Lucene_Field::Text('title', $_user->getFirstName()));
         $doc->addField(Zend_Search_Lucene_Field::keyword('empcode', $_user->getEmployeeCode()));
         $index->addDocument($doc);
         $index->commit();
         $index->optimize();
     }
 }
Example #5
0
 public function updateLuceneIndex()
 {
     //delete existing entries
     $index = $this->getTable()->getLuceneIndex();
     // remove existing entries
     foreach ($index->find('pk:' . $this->getId()) as $hit) {
         $index->delete($hit->id);
     }
     // create new Lucene document
     $doc = new Zend_Search_Lucene_Document();
     // store product primary key to identify it in the search results
     $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->getId()));
     $tr = Doctrine::getTable('ProductTranslation')->createQuery()->from('ProductTranslation pt')->where('pt.id = ?', $this->getId())->execute();
     $doc->addField(Zend_Search_Lucene_Field::UnStored('original_title', $this->getOriginalTitle(), 'utf-8'));
     // add fields to index depending on existing Translations
     foreach ($tr->toArray() as $transArr) {
         $lang = $transArr['lang'];
         unset($transArr['lang'], $transArr['id'], $transArr['volume'], $transArr['slug']);
         foreach ($transArr as $field => $value) {
             $fieldName = $field . '_' . $lang;
             // (name_en, name_fi),  (description_en, description_fi)
             $doc->addField(Zend_Search_Lucene_Field::UnStored($fieldName, strip_tags($value), 'utf-8'));
         }
     }
     // add product to the index
     $index->addDocument($doc);
     $index->commit();
 }
    /**
     * Construct a Zend_Search_Lucene_Document object out of a document db row.
     * 
     * @global string $urlServer
     * @param  object  $docu
     * @return Zend_Search_Lucene_Document
     */
    protected function makeDoc($docu) {
        global $urlServer;
        $encoding = 'utf-8';

        $doc = new Zend_Search_Lucene_Document();
        $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', 'doc_' . $docu->id, $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Keyword('pkid', $docu->id, $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Keyword('doctype', 'doc', $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Keyword('courseid', $docu->course_id, $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Text('title', Indexer::phonetics($docu->title), $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Text('content', Indexer::phonetics($docu->description), $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Text('filename', Indexer::phonetics($docu->filename), $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Text('comment', Indexer::phonetics($docu->comment), $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Text('creator', Indexer::phonetics($docu->creator), $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Text('subject', Indexer::phonetics($docu->subject), $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Text('author', Indexer::phonetics($docu->author), $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Text('visible', $docu->visible, $encoding));
        $doc->addField(Zend_Search_Lucene_Field::Text('public', $docu->public, $encoding));

        $urlAction = ($docu->format == '.dir') ? 'openDir' : 'download';
        $doc->addField(Zend_Search_Lucene_Field::UnIndexed('url', $urlServer
                        . 'modules/document/index.php?course=' . course_id_to_code($docu->course_id)
                        . '&amp;' . $urlAction . '=' . $docu->path, $encoding));

        return $doc;
    }
Example #7
0
 /**
  * Index a file
  *
  * @param   string  $filePath   The file path
  */
 public function index($filePath)
 {
     $content = file_get_contents($filePath);
     $modificationTime = filemtime($filePath);
     $checksum = md5($content);
     // Get the document
     $hits = $this->_data->find('path:' . $filePath);
     if (count($hits) > 0) {
         $hit = $hits[0];
         $document = $hit->getDocument();
         // If the checksums are the same, no need to update
         if ($checksum === $document->checksum) {
             return;
         }
         // Delete the document
         $this->_data->delete($hit);
     }
     // Create a new document
     $document = new Zend_Search_Lucene_Document();
     $document->addField(Zend_Search_Lucene_Field::keyword('path', $filePath));
     $document->addField(Zend_Search_Lucene_Field::keyword('modificationTime', $modificationTime));
     $document->addField(Zend_Search_Lucene_Field::keyword('checksum', $checksum));
     $document->addField(Zend_Search_Lucene_Field::unStored('content', $content, 'utf-8'));
     $this->_data->addDocument($document);
     // Commit the changes
     $this->_data->commit();
     $this->_data->optimize();
 }
Example #8
0
 private function __construct($rule, $additional, $storeContent)
 {
     /*$dom = new DOMDocument ();
     		$dom->preserveWhiteSpace = false;
     		$dom->loadXML ( $rule );*/
     //walk through the association rule and index it
     //$this->_parse ( $rule, 1 );
     $quantifiers = $rule->childNodes;
     foreach ($quantifiers as $quantifier) {
         if ($quantifier->nodeName == '#text' || $quantifier->nodeValue == '') {
             continue;
         }
         //print $quantifier->nodeName.' : '. trim($quantifier->nodeValue).'<br/>';
         $val = trim($quantifier->nodeValue);
         if (is_numeric($val)) {
             $val = JuceneHelper::prepareNumber($val);
         } else {
             $val = (string) $val;
             $val = str_replace("-", "", $val);
         }
         if ($quantifier->nodeName == 'Text') {
             $type = 'Unindexed';
         } else {
             $type = 'Text';
         }
         $this->addField(Zend_Search_Lucene_Field::$type($quantifier->nodeName, $val, JUCENE_ENCODING));
     }
     foreach ($additional as $field => $value) {
         if (is_numeric($value)) {
             $val = JuceneHelper::prepareNumber($value);
         }
         $this->addField(Zend_Search_Lucene_Field::Keyword('service_' . $field, $value, JUCENE_ENCODING));
     }
 }
Example #9
0
 public function buildAction()
 {
     // create the index
     $index = Zend_Search_Lucene::create(APPLICATION_PATH . '/indexes');
     // fetch all of the current pages
     $mdlPage = new Model_Page();
     $currentPages = $mdlPage->fetchAll();
     if ($currentPages->count() > 0) {
         // create a new search document for each page
         foreach ($currentPages as $p) {
             $page = new CMS_Content_Item_Page($p->id);
             $doc = new Zend_Search_Lucene_Document();
             // you use an unindexed field for the id because you want the id to be
             // included in the search results but not searchable
             $doc->addField(Zend_Search_Lucene_Field::unIndexed('page_id', $page->id));
             // you use text fields here because you want the content to be searchable
             // and to be returned in search results
             $doc->addField(Zend_Search_Lucene_Field::text('page_name', $page->name));
             $doc->addField(Zend_Search_Lucene_Field::text('page_headline', $page->headline));
             $doc->addField(Zend_Search_Lucene_Field::text('page_description', $page->description));
             $doc->addField(Zend_Search_Lucene_Field::text('page_content', $page->content));
             // add the document to the index
             $index->addDocument($doc);
         }
     }
     // optimize the index
     $index->optimize();
     // pass the view data for reporting
     $this->view->indexSize = $index->numDocs();
 }
Example #10
0
 protected function _indexate($url)
 {
     if (!stristr($url, 'http://')) {
         $url = HTTP_HOST . $url;
     }
     $url = substr($url, -1) == '/' ? substr($url, 0, -1) : $url;
     if (!in_array($url, $this->_indexedUrl)) {
         if (stristr($url, HTTP_HOST)) {
             array_push($this->_indexedUrl, $url);
             $html = file_get_contents($url);
             libxml_use_internal_errors(true);
             $doc = Zend_Search_Lucene_Document_Html::loadHTML($html);
             libxml_use_internal_errors(false);
             if (preg_match('/<\\!--index-->(.*)<\\!--\\/index-->/isu', $html, $matches)) {
                 $html = $matches[1];
             }
             $html = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $html);
             $html = strip_tags($html);
             $doc->addField(Zend_Search_Lucene_Field::Text('content', $html, 'utf-8'));
             $doc->addField(Zend_Search_Lucene_Field::UnIndexed('body', '', 'utf-8'));
             $doc->addField(Zend_Search_Lucene_Field::Text('url', $url, 'utf-8'));
             $this->_indexHandle->addDocument($doc);
             Zend_Registry::get('Logger')->info('Search index is created: ' . $url, Zend_Log::INFO);
             foreach ($doc->getLinks() as $link) {
                 $temp = explode('.', $link);
                 $ext = end($temp);
                 if ($link == $ext || in_array($ext, array('php', 'html', 'txt', 'htm'))) {
                     $this->_indexate($link);
                 }
             }
         }
     }
 }
 public function updateAction()
 {
     Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive());
     // Создание индекса
     $index = Zend_Search_Lucene::create(APPLICATION_ROOT . '/data/my-index');
     $mediaMapper = new Media_Model_Mapper_Media();
     $select = $mediaMapper->getDbTable()->select();
     $select->where('deleted != ?', 1)->where('active != ?', 0)->where('category_id IN(?)', array(2, 3, 4))->order('timestamp DESC');
     $mediaItems = $mediaMapper->fetchAll($select);
     if (!empty($mediaItems)) {
         foreach ($mediaItems as $mediaItem) {
             $doc = new Zend_Search_Lucene_Document();
             // Сохранение Name документа для того, чтобы идентифицировать его
             // в результатах поиска
             $doc->addField(Zend_Search_Lucene_Field::Text('title', strtolower($mediaItem->getName()), 'UTF-8'));
             // Сохранение URL документа для того, чтобы идентифицировать его
             // в результатах поиска
             $doc->addField(Zend_Search_Lucene_Field::Text('url', '/media/' . $mediaItem->getFullPath(), 'UTF-8'));
             // Сохранение Description документа для того, чтобы идентифицировать его
             // в результатах поиска
             // $doc->addField(Zend_Search_Lucene_Field::Text('description', strtolower($mediaItem->getSContent()),'UTF-8'));
             // Индексирование keyWords содержимого документа
             $doc->addField(Zend_Search_Lucene_Field::UnStored('keyword', strtolower($mediaItem->getMetaKeywords()), 'UTF-8'));
             // Индексирование содержимого документа
             $doc->addField(Zend_Search_Lucene_Field::UnStored('contents', strtolower($mediaItem->getContent()), 'UTF-8'));
             // Добавление документа в индекс
             $index->addDocument($doc);
         }
     }
 }
Example #12
0
 public function buildplaces()
 {
     ini_set('memory_limit', '1000M');
     set_time_limit(0);
     $time = time();
     Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive());
     /**
      * Create index
      */
     $index = Zend_Search_Lucene::create($this->_indexPath);
     /**
      * Get all users
      */
     $sql = $this->_db->select()->from($this->_name, array('id', 'name', 'placepic'))->limit(7500);
     $result = $this->_db->fetchAssoc($sql);
     foreach ($result as $values) {
         $doc = new Zend_Search_Lucene_Document();
         $doc->addField(Zend_Search_Lucene_Field::keyword('placeid', $values['id']));
         $doc->addField(Zend_Search_Lucene_Field::text('placename', $values['name']));
         $doc->addField(Zend_Search_Lucene_Field::unStored('placepic', $values['placepic']));
         $index->addDocument($doc);
     }
     $index->commit();
     $elapsed = time() - $time;
     print_r($elapsed);
 }
Example #13
0
 /**
  * Object constructor
  *
  * @param string  $data
  * @param boolean $storeContent
  */
 private function __construct($data, $storeContent)
 {
     try {
         $zendpdf = \Zend_Pdf::parse($data);
         // Store meta data properties
         if (isset($zendpdf->properties['Title'])) {
             $this->addField(\Zend_Search_Lucene_Field::UnStored('title', $zendpdf->properties['Title']));
         }
         if (isset($zendpdf->properties['Author'])) {
             $this->addField(\Zend_Search_Lucene_Field::UnStored('author', $zendpdf->properties['Author']));
         }
         if (isset($zendpdf->properties['Subject'])) {
             $this->addField(\Zend_Search_Lucene_Field::UnStored('subject', $zendpdf->properties['Subject']));
         }
         if (isset($zendpdf->properties['Keywords'])) {
             $this->addField(\Zend_Search_Lucene_Field::UnStored('keywords', $zendpdf->properties['Keywords']));
         }
         //TODO handle PDF 1.6 metadata Zend_Pdf::getMetadata()
         //do the content extraction
         $pdfParse = new \App_Search_Helper_PdfParser();
         $body = $pdfParse->pdf2txt($zendpdf->render());
         if ($body != '') {
             // Store contents
             if ($storeContent) {
                 $this->addField(\Zend_Search_Lucene_Field::Text('body', $body, 'UTF-8'));
             } else {
                 $this->addField(\Zend_Search_Lucene_Field::UnStored('body', $body, 'UTF-8'));
             }
         }
     } catch (\Exception $e) {
         Util::writeLog('search_lucene', $e->getMessage() . ' Trace:\\n' . $e->getTraceAsString(), Util::ERROR);
     }
 }
Example #14
0
 public function IndexBug($bug)
 {
     $this->RemoveBug($bug->bug_id);
     $doc = new Zend_Search_Lucene_Document();
     $doc->AddField(Zend_Search_Lucene_Field::Keyword('bug_id', $bug->bug_id));
     $doc->AddField(Zend_Search_Lucene_Field::Text('title', $bug->title));
     $doc->AddField(Zend_Search_Lucene_Field::Keyword('reporting_user_id', $bug->reporting_user_id));
     $doc->AddField(Zend_Search_Lucene_Field::Keyword('reporting_date', $bug->reporting_date));
     // We concatenate all comments into a single text blob. We only show
     // hits as bugs, but we want comment content to matter.
     $comment_blob = '';
     $stmt = Bugdar::$db->Prepare("SELECT body FROM " . TABLE_PREFIX . "comments WHERE bug_id = ? ORDER BY comment_id");
     $stmt->Execute(array($bug->bug_id));
     while ($comment = $stmt->FetchObject()) {
         $comment_blob .= $comment->body . "\n\n";
     }
     $doc->AddField(Zend_Search_Lucene_Field::UnStored('comments', $comment_blob));
     // Add all attributes.
     $stmt = Bugdar::$db->Prepare("SELECT * FROM " . TABLE_PREFIX . "bug_attributes WHERE bug_id = ?");
     $stmt->Execute(array($bug->bug_id));
     $tags = array();
     while ($attr = $stmt->FetchObject()) {
         if ($attr->attribute_title) {
             $doc->AddField(Zend_Search_Lucene_Field::Keyword($attr->attribute_title, $attr->value));
         } else {
             $tags[] = $attr->value;
         }
     }
     $doc->AddField(Zend_Search_Lucene_Field::Text('tag', implode(' ', $tags)));
     $this->lucene->AddDocument($doc);
 }
 /**
  * This is the default 'index' action that is invoked
  * when an action is not explicitly requested by users.
  */
 public function actionIndexing()
 {
     ini_set('max_execution_time', 0);
     ob_start();
     $index = new Zend_Search_Lucene(Yii::getPathOfAlias($this->_indexFilesPath), true);
     $criteria = new CDbCriteria();
     $criteria->compare('t.publish', 1);
     $criteria->order = 'album_id DESC';
     //$criteria->limit = 10;
     $model = Albums::model()->findAll($criteria);
     foreach ($model as $key => $item) {
         if ($item->media_id != 0) {
             $images = Yii::app()->request->baseUrl . '/public/album/' . $item->album_id . '/' . $item->cover->media;
         } else {
             $images = '';
         }
         $doc = new Zend_Search_Lucene_Document();
         $doc->addField(Zend_Search_Lucene_Field::UnIndexed('id', CHtml::encode($item->album_id), 'utf-8'));
         $doc->addField(Zend_Search_Lucene_Field::Text('media', CHtml::encode($images), 'utf-8'));
         $doc->addField(Zend_Search_Lucene_Field::Text('title', CHtml::encode($item->title), 'utf-8'));
         $doc->addField(Zend_Search_Lucene_Field::Text('body', CHtml::encode(Utility::hardDecode(Utility::softDecode($item->body))), 'utf-8'));
         $doc->addField(Zend_Search_Lucene_Field::Text('url', CHtml::encode(Utility::getProtocol() . '://' . Yii::app()->request->serverName . Yii::app()->createUrl('album/site/view', array('id' => $item->album_id, 't' => Utility::getUrlTitle($item->title)))), 'utf-8'));
         $doc->addField(Zend_Search_Lucene_Field::UnIndexed('date', CHtml::encode(Utility::dateFormat($item->creation_date, true) . ' WIB'), 'utf-8'));
         $doc->addField(Zend_Search_Lucene_Field::UnIndexed('creation', CHtml::encode($item->user->displayname), 'utf-8'));
         $index->addDocument($doc);
     }
     echo 'Album Lucene index created';
     $index->commit();
     $this->redirect(Yii::app()->createUrl('article/search/indexing'));
     ob_end_flush();
 }
Example #16
0
 public function testAddFieldMethodChaining()
 {
     $document = new Zend_Search_Lucene_Document();
     $this->assertTrue($document->addField(Zend_Search_Lucene_Field::Text('title', 'Title')) instanceof Zend_Search_Lucene_Document);
     $document = new Zend_Search_Lucene_Document();
     $document->addField(Zend_Search_Lucene_Field::Text('title', 'Title'))->addField(Zend_Search_Lucene_Field::Text('annotation', 'Annotation'))->addField(Zend_Search_Lucene_Field::Text('body', 'Document body, document body, document body...'));
 }
 function index()
 {
     $this->load->library('zend', 'Zend/Feed');
     $this->load->library('zend', 'Zend/Search/Lucene');
     $this->load->library('zend');
     $this->zend->load('Zend/Feed');
     $this->zend->load('Zend/Search/Lucene');
     //Create index.
     $index = new Zend_Search_Lucene('C:\\xampp\\xampp\\htdocs\\controle_frota\\lucene\\feeds_index', true);
     $feeds = array('http://oglobo.globo.com/rss.xml?limite=50');
     //grab each feed.
     foreach ($feeds as $feed) {
         $channel = Zend_Feed::import($feed);
         echo $channel->title() . '<br />';
         //index each item.
         foreach ($channel->items as $item) {
             if ($item->link() && $item->title() && $item->description()) {
                 //create an index doc.
                 $doc = new Zend_Search_Lucene_Document();
                 $doc->addField(Zend_Search_Lucene_Field::Keyword('link', $this->sanitize($item->link())));
                 $doc->addField(Zend_Search_Lucene_Field::Text('title', $this->sanitize($item->title())));
                 $doc->addField(Zend_Search_Lucene_Field::Unstored('contents', $this->sanitize($item->description())));
                 echo "\tAdding: " . $item->title() . '<br />';
                 $index->addDocument($doc);
             }
         }
     }
     $index->commit();
     echo $index->count() . ' Documents indexed.<br />';
 }
 public function __construct(Storefront_Resource_Product_Item_Interface $item, $category)
 {
     $this->addField(Zend_Search_Lucene_Field::keyword('productId', $item->productId, 'UTF-8'));
     $this->addField(Zend_Search_Lucene_Field::text('categories', $category, 'UTF-8'));
     $this->addField(Zend_Search_Lucene_Field::text('name', $item->name, 'UTF-8'));
     $this->addField(Zend_Search_Lucene_Field::unStored('description', $item->description, 'UTF-8'));
     $this->addField(Zend_Search_Lucene_Field::text('price', $this->_formatPrice($item->getPrice()), 'UTF-8'));
 }
Example #19
0
 public function testEncoding()
 {
     $field = Zend_Search_Lucene_Field::Text('field', 'Words with umlauts: εγό...', 'ISO-8859-1');
     $this->assertEquals($field->encoding, 'ISO-8859-1');
     $this->assertEquals($field->name, 'field');
     $this->assertEquals($field->value, 'Words with umlauts: εγό...');
     $this->assertEquals($field->getUtf8Value(), 'Words with umlauts: Γ₯ãü...');
 }
 /**
  * Updates the index for an object
  *
  * @param Doctrine_Record $object
  */
 public function updateIndex(Doctrine_Record $object, $delete = false)
 {
     /* error checking */
     if (!array_key_exists('models', $this->config) || empty($this->config['models'])) {
         throw new Exception(sprintf('No models set in search.yml', $name));
     }
     if (!array_key_exists($model = get_class($object), $this->config['models'])) {
         throw new Exception(sprintf('Model "%s" not defined in "%s" index in your search.yml', $model, $this->name));
     }
     $id = $this->generateId($object->getId(), $model);
     $config = $this->config['models'][$model];
     //delete existing entries
     foreach ($this->search('_id:"' . $id . '"') as $hit) {
         $this->getIndex()->delete($hit->id);
     }
     if ($delete) {
         return;
     }
     //only add to search if canSearch method on model returns true (search if no method exists)
     if (method_exists($object, 'canSearch')) {
         if (!call_user_func(array($object, 'canSearch'))) {
             return;
         }
     }
     $doc = new Zend_Search_Lucene_Document();
     // store a key for deleting in future
     $doc->addField(Zend_Search_Lucene_Field::Keyword('_id', $id));
     // store job primary key and model name to identify it in the search results
     $doc->addField(Zend_Search_Lucene_Field::Keyword('_pk', $object->getId()));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('_model', $model));
     // store title - used for search result title
     if (!array_key_exists('title', $config)) {
         throw new Exception(sprintf('A title must be set for model "%s" in search.yml', $model));
     }
     $doc->addField(Zend_Search_Lucene_Field::unIndexed('_title', call_user_func(array($object, 'get' . sfInflector::camelize($config['title'])))));
     // store description - used for search result description
     if (!array_key_exists('description', $config)) {
         throw new Exception(sprintf('A description must be set for model "%s" in search.yml', $model));
     }
     $doc->addField(Zend_Search_Lucene_Field::unIndexed('_description', call_user_func(array($object, 'get' . sfInflector::camelize($config['description'])))));
     // store url - @todo add more routing options
     if (!array_key_exists('route', $config)) {
         throw new Exception(sprintf('A route must be set for model "%s" in search.yml', $model));
     }
     sfContext::getInstance()->getConfiguration()->loadHelpers('Url');
     $url = url_for($config['route'], $object);
     $doc->addField(Zend_Search_Lucene_Field::unIndexed('_url', $url));
     //store fields
     if (array_key_exists('fields', $config)) {
         foreach ($config['fields'] as $field => $config) {
             $doc->addField(Zend_Search_Lucene_Field::UnStored($field, call_user_func(array($object, 'get' . sfInflector::camelize($field))), 'utf-8'));
         }
     }
     //save index
     $this->getIndex()->addDocument($doc);
     $this->getIndex()->commit();
 }
Example #21
0
 /**
  * Constructor. Creates our indexable document and adds all
  * necessary fields to it using the passed in document
  */
 public function __construct($document)
 {
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('id_entry', $document->getId()));
     $this->addField(Zend_Search_Lucene_Field::Keyword('url', $document->getUrl()));
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('creation_date', $document->getCreationDate()));
     $this->addField(Zend_Search_Lucene_Field::Text('name', $document->getName()), 'utf-8');
     $this->addField(Zend_Search_Lucene_Field::Text('content', $document->getDetails()));
     $this->addField(Zend_Search_Lucene_Field::Text('tag', $document->getImplodedTags()));
 }
Example #22
0
 private function __construct($fileName, $storeContent)
 {
     // Document data holders
     $documentBody = array();
     $coreProperties = array();
     // Open OpenXML package
     $package = new ZipArchive();
     $package->open($fileName);
     // Read relations and search for officeDocument
     $relations = simplexml_load_string($package->getFromName('_rels/.rels'));
     foreach ($relations->Relationship as $rel) {
         if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
             // Found office document! Read in contents...
             $contents = simplexml_load_string($package->getFromName($this->absoluteZipPath(dirname($rel['Target']) . '/' . basename($rel['Target']))));
             $contents->registerXPathNamespace('w', Zend_Search_Lucene_Document_Docx::SCHEMA_WORDPROCESSINGML);
             $paragraphs = $contents->xpath('//w:body/w:p');
             foreach ($paragraphs as $paragraph) {
                 $runs = $paragraph->xpath('.//w:r/*[name() = "w:t" or name() = "w:br"]');
                 if ($runs === false) {
                     // Paragraph doesn't contain any text or breaks
                     continue;
                 }
                 foreach ($runs as $run) {
                     if ($run->getName() == 'br') {
                         // Break element
                         $documentBody[] = ' ';
                     } else {
                         $documentBody[] = (string) $run;
                     }
                 }
                 // Add space after each paragraph. So they are not bound together.
                 $documentBody[] = ' ';
             }
             break;
         }
     }
     // Read core properties
     $coreProperties = $this->extractMetaData($package);
     // Close file
     $package->close();
     // Store filename
     $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
     // Store contents
     if ($storeContent) {
         $this->addField(Zend_Search_Lucene_Field::Text('body', implode('', $documentBody), 'UTF-8'));
     } else {
         $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode('', $documentBody), 'UTF-8'));
     }
     // Store meta data properties
     foreach ($coreProperties as $key => $value) {
         $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
     }
     // Store title (if not present in meta data)
     if (!isset($coreProperties['title'])) {
         $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
     }
 }
Example #23
0
 protected function _indexDocument($doc, $fields)
 {
     $doc->addField(Zend_Search_Lucene_Field::UnIndexed('checkSum', md5($doc->body)));
     $doc->addField(Zend_Search_Lucene_Field::UnIndexed('lastIndexed', time()));
     foreach ($fields as $name => $value) {
         $doc->addField(Zend_Search_Lucene_Field::Text($name, $value));
     }
     $this->_index->addDocument($doc);
     Bbx_Log::write('Added ' . urldecode($doc->url) . ' to index', null, self::LOG);
 }
Example #24
0
 public function asLuceneDocument()
 {
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Text('page_title', $this->title, $this->_charset));
     $doc->addField(Zend_Search_Lucene_Field::Text('page_link', $this->path, $this->_charset));
     $doc->addField(Zend_Search_Lucene_Field::Text('page_teaser', $this->teaser, $this->_charset));
     $doc->addField(Zend_Search_Lucene_Field::unstored('page_content', $this->content, $this->_charset));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('search_tags', $this->searchTags, $this->_charset));
     return $doc;
 }
Example #25
0
 public function insertFromDB($since)
 {
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Keyword('id', $row['id']));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('person', $row['person']));
     $doc->addField(Zend_Search_Lucene_Field::Text('celeb_type', $row['celeb_type']));
     $doc->addField(Zend_Search_Lucene_Field::Text('wikikeyword', $row['wikikeyword']));
     $doc->addField(Zend_Search_Lucene_Field::Text('blurb', $row['blurb']));
     $this->search->addDocument($doc);
 }
Example #26
0
 private static function _insert($index, $item, $tags)
 {
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Text('title', $item->name));
     $doc->addField(Zend_Search_Lucene_Field::Text('item_id', strval($item->id)));
     $doc->addField(Zend_Search_Lucene_Field::UnIndexed('image_id', strval($item->image_id)));
     $doc->addField(Zend_Search_Lucene_Field::Text('description', $item->description));
     $doc->addField(Zend_Search_Lucene_Field::Text('tag', $tags));
     $index->addDocument($doc);
     $index->commit();
 }
Example #27
0
 public function __construct($fileName, $storeContent)
 {
     // Store filename
     $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
     $this->_filename = $fileName;
     // Store contents
     if ($storeContent) {
         $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $this->getBody()), 'UTF-8'));
     } else {
         $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $this->getBody()), 'UTF-8'));
     }
 }
Example #28
0
 /**
  * Index entity of document with given id.
  * 
  * @param Mage_Core_Model_Abstract
  *
  * @return Mage_Lucene_Model_Index_Document_Abstract
  **/
 public function index($sourceModel)
 {
     $this->_sourceModel = $sourceModel;
     $this->_id = $sourceModel->getId();
     $this->delete();
     $this->addField(Zend_Search_Lucene_Field::Keyword('doctype', $this->getDoctype()));
     $this->addField(Zend_Search_Lucene_Field::Keyword('entity_id', $this->_id));
     $this->addField(Zend_Search_Lucene_Field::Keyword(self::STORE_ATTRIBUTE_CODE, $this->getStore()->getId()));
     $this->addAttributes();
     $this->addDocument();
     return $this;
 }
Example #29
0
 /**
  * rebuild the index
  *
  * @access public
  * @return void
  */
 function build_index()
 {
     $index = $this->__open(true);
     $index->setMergeFactor(2000);
     $index->setMaxBufferedDocs(500);
     $start = time();
     foreach ($this->settings as $model => $model_options) {
         App::import('Model', $model);
         $model = new $model();
         if (empty($model_options['find_options'])) {
             $model_options['find_options'] = array();
         }
         if (method_exists($model, 'find_index')) {
             $results = $model->find_index('all', $model_options['find_options']);
         } else {
             $results = $model->find('all', $model_options['find_options']);
         }
         if (Configure::read()) {
             $this->log($model->name . ' find time: ' . (time() - $start), 'searchable');
             $start = time();
         }
         $count = count($results);
         $i = 1;
         foreach ($results as $result) {
             printf("%.1f", $i / $count * 100);
             $this->out("");
             $i++;
             $this->out('Processing ' . $model->name . ' #' . $result[$model->name]['id']);
             $doc = new Zend_Search_Lucene_Document();
             // add the model field
             $doc->addField(Zend_Search_Lucene_Field::Keyword('cake_model', $model->name, 'utf-8'));
             foreach ($model_options['fields'] as $field_name => $options) {
                 if (!empty($options['prepare']) && function_exists($options['prepare'])) {
                     $result[$model->name][$field_name] = call_user_func($options['prepare'], $result[$model->name][$field_name]);
                 }
                 $alias = !empty($options['alias']) ? $options['alias'] : $field_name;
                 $doc->addField(Zend_Search_Lucene_Field::$options['type']($alias, $result[$model->name][$field_name], 'utf-8'));
             }
             $index->addDocument($doc);
             $this->out('Processed ' . $model->name . ' #' . $result[$model->name]['id']);
         }
         if (Configure::read()) {
             $this->log($model->name . ' adding time: ' . (time() - $start), 'searchable');
             $start = time();
         }
     }
     $this->optimize($index);
     $index->commit();
     if (Configure::read()) {
         $this->log('Optimize+commit time: ' . (time() - $start));
     }
 }
Example #30
0
 public function addFeed(Feed $feed)
 {
     $index = Zend_Search_Lucene::open(Zend_Registry::getInstance()->search->feed);
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Text('title', $feed->title));
     $doc->addField(Zend_Search_Lucene_Field::Text('siteUrl', $feed->siteUrl));
     $doc->addField(Zend_Search_Lucene_Field::Text('feedUrl', $feed->url));
     $doc->addField(Zend_Search_Lucene_Field::Text('language', $feed->language));
     $doc->addField(Zend_Search_Lucene_Field::Text('category', $feed->category));
     $doc->addField(Zend_Search_Lucene_Field::Text('title', $feed->title));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('description', $feed->description));
     $index->addDocument($doc);
 }