コード例 #1
0
ファイル: Product.class.php プロジェクト: vcgato29/poff
 public function updateLuceneIndex()
 {
     //delete existing entries
     $index = $this->getTable()->getLuceneIndex();
     // remove existing entries
     foreach ($index->find('pk:' . $this->getId()) as $hit) {
         $index->delete($hit->id);
     }
     // create new Lucene document
     $doc = new Zend_Search_Lucene_Document();
     // store product primary key to identify it in the search results
     $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->getId()));
     $tr = Doctrine::getTable('ProductTranslation')->createQuery()->from('ProductTranslation pt')->where('pt.id = ?', $this->getId())->execute();
     $doc->addField(Zend_Search_Lucene_Field::UnStored('original_title', $this->getOriginalTitle(), 'utf-8'));
     // add fields to index depending on existing Translations
     foreach ($tr->toArray() as $transArr) {
         $lang = $transArr['lang'];
         unset($transArr['lang'], $transArr['id'], $transArr['volume'], $transArr['slug']);
         foreach ($transArr as $field => $value) {
             $fieldName = $field . '_' . $lang;
             // (name_en, name_fi),  (description_en, description_fi)
             $doc->addField(Zend_Search_Lucene_Field::UnStored($fieldName, strip_tags($value), 'utf-8'));
         }
     }
     // add product to the index
     $index->addDocument($doc);
     $index->commit();
 }
コード例 #2
0
ファイル: search_engine.php プロジェクト: rsesek/Bugdar2
 public function IndexBug($bug)
 {
     $this->RemoveBug($bug->bug_id);
     $doc = new Zend_Search_Lucene_Document();
     $doc->AddField(Zend_Search_Lucene_Field::Keyword('bug_id', $bug->bug_id));
     $doc->AddField(Zend_Search_Lucene_Field::Text('title', $bug->title));
     $doc->AddField(Zend_Search_Lucene_Field::Keyword('reporting_user_id', $bug->reporting_user_id));
     $doc->AddField(Zend_Search_Lucene_Field::Keyword('reporting_date', $bug->reporting_date));
     // We concatenate all comments into a single text blob. We only show
     // hits as bugs, but we want comment content to matter.
     $comment_blob = '';
     $stmt = Bugdar::$db->Prepare("SELECT body FROM " . TABLE_PREFIX . "comments WHERE bug_id = ? ORDER BY comment_id");
     $stmt->Execute(array($bug->bug_id));
     while ($comment = $stmt->FetchObject()) {
         $comment_blob .= $comment->body . "\n\n";
     }
     $doc->AddField(Zend_Search_Lucene_Field::UnStored('comments', $comment_blob));
     // Add all attributes.
     $stmt = Bugdar::$db->Prepare("SELECT * FROM " . TABLE_PREFIX . "bug_attributes WHERE bug_id = ?");
     $stmt->Execute(array($bug->bug_id));
     $tags = array();
     while ($attr = $stmt->FetchObject()) {
         if ($attr->attribute_title) {
             $doc->AddField(Zend_Search_Lucene_Field::Keyword($attr->attribute_title, $attr->value));
         } else {
             $tags[] = $attr->value;
         }
     }
     $doc->AddField(Zend_Search_Lucene_Field::Text('tag', implode(' ', $tags)));
     $this->lucene->AddDocument($doc);
 }
コード例 #3
0
 public function updateAction()
 {
     Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive());
     // Создание индекса
     $index = Zend_Search_Lucene::create(APPLICATION_ROOT . '/data/my-index');
     $mediaMapper = new Media_Model_Mapper_Media();
     $select = $mediaMapper->getDbTable()->select();
     $select->where('deleted != ?', 1)->where('active != ?', 0)->where('category_id IN(?)', array(2, 3, 4))->order('timestamp DESC');
     $mediaItems = $mediaMapper->fetchAll($select);
     if (!empty($mediaItems)) {
         foreach ($mediaItems as $mediaItem) {
             $doc = new Zend_Search_Lucene_Document();
             // Сохранение Name документа для того, чтобы идентифицировать его
             // в результатах поиска
             $doc->addField(Zend_Search_Lucene_Field::Text('title', strtolower($mediaItem->getName()), 'UTF-8'));
             // Сохранение URL документа для того, чтобы идентифицировать его
             // в результатах поиска
             $doc->addField(Zend_Search_Lucene_Field::Text('url', '/media/' . $mediaItem->getFullPath(), 'UTF-8'));
             // Сохранение Description документа для того, чтобы идентифицировать его
             // в результатах поиска
             // $doc->addField(Zend_Search_Lucene_Field::Text('description', strtolower($mediaItem->getSContent()),'UTF-8'));
             // Индексирование keyWords содержимого документа
             $doc->addField(Zend_Search_Lucene_Field::UnStored('keyword', strtolower($mediaItem->getMetaKeywords()), 'UTF-8'));
             // Индексирование содержимого документа
             $doc->addField(Zend_Search_Lucene_Field::UnStored('contents', strtolower($mediaItem->getContent()), 'UTF-8'));
             // Добавление документа в индекс
             $index->addDocument($doc);
         }
     }
 }
コード例 #4
0
ファイル: Pdf.php プロジェクト: omusico/isle-web-framework
 /**
  * Object constructor
  *
  * @param string  $data
  * @param boolean $storeContent
  */
 private function __construct($data, $storeContent)
 {
     try {
         $zendpdf = \Zend_Pdf::parse($data);
         // Store meta data properties
         if (isset($zendpdf->properties['Title'])) {
             $this->addField(\Zend_Search_Lucene_Field::UnStored('title', $zendpdf->properties['Title']));
         }
         if (isset($zendpdf->properties['Author'])) {
             $this->addField(\Zend_Search_Lucene_Field::UnStored('author', $zendpdf->properties['Author']));
         }
         if (isset($zendpdf->properties['Subject'])) {
             $this->addField(\Zend_Search_Lucene_Field::UnStored('subject', $zendpdf->properties['Subject']));
         }
         if (isset($zendpdf->properties['Keywords'])) {
             $this->addField(\Zend_Search_Lucene_Field::UnStored('keywords', $zendpdf->properties['Keywords']));
         }
         //TODO handle PDF 1.6 metadata Zend_Pdf::getMetadata()
         //do the content extraction
         $pdfParse = new \App_Search_Helper_PdfParser();
         $body = $pdfParse->pdf2txt($zendpdf->render());
         if ($body != '') {
             // Store contents
             if ($storeContent) {
                 $this->addField(\Zend_Search_Lucene_Field::Text('body', $body, 'UTF-8'));
             } else {
                 $this->addField(\Zend_Search_Lucene_Field::UnStored('body', $body, 'UTF-8'));
             }
         }
     } catch (\Exception $e) {
         Util::writeLog('search_lucene', $e->getMessage() . ' Trace:\\n' . $e->getTraceAsString(), Util::ERROR);
     }
 }
コード例 #5
0
function index_lucene($article, $optimise)
{
    $index = getIndex_lucene();
    $term = new Zend_Search_Lucene_Index_Term($article["PMID"], 'PMID');
    // a pre-existing page cannot be updated, it has to be
    // deleted, and indexed again:
    $exactSearchQuery = new Zend_Search_Lucene_Search_Query_Term($term);
    $hits = $index->find($exactSearchQuery);
    if (count($hits) > 0) {
        echo "[deleting previous version]\n";
        foreach ($hits as $hit) {
            $index->delete($hit->id);
        }
    }
    $doc = new Zend_Search_Lucene_Document();
    $doc->addField(Zend_Search_Lucene_Field::Keyword('PMID', $article["PMID"]));
    $doc->addField(Zend_Search_Lucene_Field::Keyword('Year', $article["Year"]));
    $doc->addField(Zend_Search_Lucene_Field::Keyword('Journal', $article["Journal"]));
    $doc->addField(Zend_Search_Lucene_Field::Text('Title', $article["Title"], 'utf-8'));
    $doc->addField(Zend_Search_Lucene_Field::Text('Authors', $article["Authors"], 'utf-8'));
    $doc->addField(Zend_Search_Lucene_Field::Text('Reference', $article["Reference"], 'utf-8'));
    $doc->addField(Zend_Search_Lucene_Field::UnStored('Abstract', $article["Abstract"], 'utf-8'));
    $doc->addField(Zend_Search_Lucene_Field::Text('MeshHeadings', $article["MeshHeadings"], 'utf-8'));
    $index->addDocument($doc);
    if ($optimise) {
        echo "Optimising index\n";
        $index->optimize();
    }
    $index->commit();
    echo "The index contains " . $index->numDocs() . " documents\n";
}
コード例 #6
0
 /**
  * Updates the index for an object
  *
  * @param Doctrine_Record $object
  */
 public function updateIndex(Doctrine_Record $object, $delete = false)
 {
     /* error checking */
     if (!array_key_exists('models', $this->config) || empty($this->config['models'])) {
         throw new Exception(sprintf('No models set in search.yml', $name));
     }
     if (!array_key_exists($model = get_class($object), $this->config['models'])) {
         throw new Exception(sprintf('Model "%s" not defined in "%s" index in your search.yml', $model, $this->name));
     }
     $id = $this->generateId($object->getId(), $model);
     $config = $this->config['models'][$model];
     //delete existing entries
     foreach ($this->search('_id:"' . $id . '"') as $hit) {
         $this->getIndex()->delete($hit->id);
     }
     if ($delete) {
         return;
     }
     //only add to search if canSearch method on model returns true (search if no method exists)
     if (method_exists($object, 'canSearch')) {
         if (!call_user_func(array($object, 'canSearch'))) {
             return;
         }
     }
     $doc = new Zend_Search_Lucene_Document();
     // store a key for deleting in future
     $doc->addField(Zend_Search_Lucene_Field::Keyword('_id', $id));
     // store job primary key and model name to identify it in the search results
     $doc->addField(Zend_Search_Lucene_Field::Keyword('_pk', $object->getId()));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('_model', $model));
     // store title - used for search result title
     if (!array_key_exists('title', $config)) {
         throw new Exception(sprintf('A title must be set for model "%s" in search.yml', $model));
     }
     $doc->addField(Zend_Search_Lucene_Field::unIndexed('_title', call_user_func(array($object, 'get' . sfInflector::camelize($config['title'])))));
     // store description - used for search result description
     if (!array_key_exists('description', $config)) {
         throw new Exception(sprintf('A description must be set for model "%s" in search.yml', $model));
     }
     $doc->addField(Zend_Search_Lucene_Field::unIndexed('_description', call_user_func(array($object, 'get' . sfInflector::camelize($config['description'])))));
     // store url - @todo add more routing options
     if (!array_key_exists('route', $config)) {
         throw new Exception(sprintf('A route must be set for model "%s" in search.yml', $model));
     }
     sfContext::getInstance()->getConfiguration()->loadHelpers('Url');
     $url = url_for($config['route'], $object);
     $doc->addField(Zend_Search_Lucene_Field::unIndexed('_url', $url));
     //store fields
     if (array_key_exists('fields', $config)) {
         foreach ($config['fields'] as $field => $config) {
             $doc->addField(Zend_Search_Lucene_Field::UnStored($field, call_user_func(array($object, 'get' . sfInflector::camelize($field))), 'utf-8'));
         }
     }
     //save index
     $this->getIndex()->addDocument($doc);
     $this->getIndex()->commit();
 }
コード例 #7
0
ファイル: Docx.php プロジェクト: hackingman/TubeX
 private function __construct($fileName, $storeContent)
 {
     // Document data holders
     $documentBody = array();
     $coreProperties = array();
     // Open OpenXML package
     $package = new ZipArchive();
     $package->open($fileName);
     // Read relations and search for officeDocument
     $relations = simplexml_load_string($package->getFromName('_rels/.rels'));
     foreach ($relations->Relationship as $rel) {
         if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
             // Found office document! Read in contents...
             $contents = simplexml_load_string($package->getFromName($this->absoluteZipPath(dirname($rel['Target']) . '/' . basename($rel['Target']))));
             $contents->registerXPathNamespace('w', Zend_Search_Lucene_Document_Docx::SCHEMA_WORDPROCESSINGML);
             $paragraphs = $contents->xpath('//w:body/w:p');
             foreach ($paragraphs as $paragraph) {
                 $runs = $paragraph->xpath('.//w:r/*[name() = "w:t" or name() = "w:br"]');
                 if ($runs === false) {
                     // Paragraph doesn't contain any text or breaks
                     continue;
                 }
                 foreach ($runs as $run) {
                     if ($run->getName() == 'br') {
                         // Break element
                         $documentBody[] = ' ';
                     } else {
                         $documentBody[] = (string) $run;
                     }
                 }
                 // Add space after each paragraph. So they are not bound together.
                 $documentBody[] = ' ';
             }
             break;
         }
     }
     // Read core properties
     $coreProperties = $this->extractMetaData($package);
     // Close file
     $package->close();
     // Store filename
     $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
     // Store contents
     if ($storeContent) {
         $this->addField(Zend_Search_Lucene_Field::Text('body', implode('', $documentBody), 'UTF-8'));
     } else {
         $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode('', $documentBody), 'UTF-8'));
     }
     // Store meta data properties
     foreach ($coreProperties as $key => $value) {
         $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
     }
     // Store title (if not present in meta data)
     if (!isset($coreProperties['title'])) {
         $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
     }
 }
コード例 #8
0
ファイル: Page.php プロジェクト: laiello/digitalus-cms
 public function asLuceneDocument()
 {
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Text('page_title', $this->title, $this->_charset));
     $doc->addField(Zend_Search_Lucene_Field::Text('page_link', $this->path, $this->_charset));
     $doc->addField(Zend_Search_Lucene_Field::Text('page_teaser', $this->teaser, $this->_charset));
     $doc->addField(Zend_Search_Lucene_Field::unstored('page_content', $this->content, $this->_charset));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('search_tags', $this->searchTags, $this->_charset));
     return $doc;
 }
コード例 #9
0
 public function __construct($fileName, $storeContent)
 {
     // Store filename
     $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
     $this->_filename = $fileName;
     // Store contents
     if ($storeContent) {
         $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $this->getBody()), 'UTF-8'));
     } else {
         $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $this->getBody()), 'UTF-8'));
     }
 }
コード例 #10
0
 public function testUnStored()
 {
     $field = Zend_Search_Lucene_Field::UnStored('field', 'value');
     $this->assertEquals($field->boost, 1);
     $this->assertEquals($field->encoding, '');
     $this->assertEquals($field->isBinary, false);
     $this->assertEquals($field->isIndexed, true);
     $this->assertEquals($field->isStored, false);
     $this->assertEquals($field->isTokenized, true);
     $this->assertEquals($field->name, 'field');
     $this->assertEquals($field->value, 'value');
 }
コード例 #11
0
ファイル: Search.php プロジェクト: aprondak/ifphp
 public function addFeed(Feed $feed)
 {
     $index = Zend_Search_Lucene::open(Zend_Registry::getInstance()->search->feed);
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Text('title', $feed->title));
     $doc->addField(Zend_Search_Lucene_Field::Text('siteUrl', $feed->siteUrl));
     $doc->addField(Zend_Search_Lucene_Field::Text('feedUrl', $feed->url));
     $doc->addField(Zend_Search_Lucene_Field::Text('language', $feed->language));
     $doc->addField(Zend_Search_Lucene_Field::Text('category', $feed->category));
     $doc->addField(Zend_Search_Lucene_Field::Text('title', $feed->title));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('description', $feed->description));
     $index->addDocument($doc);
 }
コード例 #12
0
ファイル: document.php プロジェクト: JackCanada/moodle-hacks
 public function __construct(&$doc, &$data, $course_id, $group_id, $user_id, $path, $additional_keyset = null)
 {
     $encoding = 'UTF-8';
     //document identification and indexing
     $this->addField(Zend_Search_Lucene_Field::Keyword('docid', $doc->docid, $encoding));
     //document type : the name of the Moodle element that manages it
     $this->addField(Zend_Search_Lucene_Field::Keyword('doctype', $doc->documenttype, $encoding));
     //allows subclassing information from complex modules.
     $this->addField(Zend_Search_Lucene_Field::Keyword('itemtype', $doc->itemtype, $encoding));
     //caches the course context.
     $this->addField(Zend_Search_Lucene_Field::Keyword('course_id', $course_id, $encoding));
     //caches the originator's group.
     $this->addField(Zend_Search_Lucene_Field::Keyword('group_id', $group_id, $encoding));
     //caches the originator if any
     $this->addField(Zend_Search_Lucene_Field::Keyword('user_id', $user_id, $encoding));
     // caches the context of this information. i-e, the context in which this information
     // is being produced/attached. Speeds up the "check for access" process as context in
     // which the information resides (a course, a module, a block, the site) is stable.
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('context_id', $doc->contextid, $encoding));
     //data for document
     $this->addField(Zend_Search_Lucene_Field::Text('title', $doc->title, $encoding));
     $this->addField(Zend_Search_Lucene_Field::Text('author', $doc->author, $encoding));
     $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $doc->contents, $encoding));
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $doc->url, $encoding));
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('date', $doc->date, $encoding));
     //additional data added on a per-module basis
     $this->addField(Zend_Search_Lucene_Field::Binary('data', serialize($data)));
     // adding a path allows the document to know where to find specific library calls
     // for checking access to a module or block content. The Lucene records should only
     // be responsible to bring back to that call sufficient and consistent information
     // in order to perform the check.
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('path', $path, $encoding));
     /*
     // adding a capability set required for viewing. -1 if no capability required.
     // the capability required for viewing is depending on the local situation
     // of the document. each module should provide this information when pushing
     // out search document structure. Although capability model should be kept flat
     // there is no exclusion some module or block developpers use logical combinations
     // of multiple capabilities in their code. This possibility should be left open here.
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('capabilities', $caps));
     */
     /*
     // Additional key set allows a module to ask for extensible criteria based search
     // depending on the module internal needs.
     */
     if (!empty($additional_keyset)) {
         foreach ($additional_keyset as $keyname => $keyvalue) {
             $this->addField(Zend_Search_Lucene_Field::Keyword($keyname, $keyvalue, $encoding));
         }
     }
 }
コード例 #13
0
ファイル: search_test.php プロジェクト: Tony133/zf-web
function fill_index()
{
    for ($i = 0; $i < 10; $i++) {
        $index = new Zend_Search_Lucene('./data/index', true);
        $index->find("test");
        $doc = new Zend_Search_Lucene_Document();
        $doc->addField(Zend_Search_Lucene_Field::Text("test", getword()));
        $doc->addField(Zend_Search_Lucene_Field::UnStored("contents", getword()));
        $index->addDocument($doc);
        $index->commit();
        $index->getDirectory()->close();
        //comment this to see another bug :-|
    }
}
コード例 #14
0
ファイル: LuceneIndexer.php プロジェクト: padraic/ZFPlanet
 public function index(Zfplanet_Model_Entry $entry)
 {
     if (is_null($this->_index)) {
         return;
     }
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Keyword('id', $entry->id, 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::UnIndexed('publishedDate', $entry->publishedDate, 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('uri', $entry->uri, 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::Text('title', $entry->title, 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('content', $entry->content, 'utf-8'));
     $this->_index->addDocument($doc);
     $this->_index->commit();
     $this->_index->optimize();
 }
コード例 #15
0
ファイル: Html.php プロジェクト: hackingman/TubeX
 private function __construct($data, $isFile, $storeContent)
 {
     $this->_doc = new DOMDocument();
     $this->_doc->substituteEntities = true;
     if ($isFile) {
         $htmlData = file_get_contents($data);
     } else {
         $htmlData = $data;
     }
     @$this->_doc->loadHTML($htmlData);
     $xpath = new DOMXPath($this->_doc);
     $docTitle = '';
     $titleNodes = $xpath->query('/html/head/title');
     foreach ($titleNodes as $titleNode) {
         // title should always have only one entry, but we process all nodeset entries
         $docTitle .= $titleNode->nodeValue . ' ';
     }
     $this->addField(Zend_Search_Lucene_Field::Text('title', $docTitle, $this->_doc->actualEncoding));
     $metaNodes = $xpath->query('/html/head/meta[@name]');
     foreach ($metaNodes as $metaNode) {
         $this->addField(Zend_Search_Lucene_Field::Text($metaNode->getAttribute('name'), $metaNode->getAttribute('content'), $this->_doc->actualEncoding));
     }
     $docBody = '';
     $bodyNodes = $xpath->query('/html/body');
     foreach ($bodyNodes as $bodyNode) {
         // body should always have only one entry, but we process all nodeset entries
         $this->_retrieveNodeText($bodyNode, $docBody);
     }
     if ($storeContent) {
         $this->addField(Zend_Search_Lucene_Field::Text('body', $docBody, $this->_doc->actualEncoding));
     } else {
         $this->addField(Zend_Search_Lucene_Field::UnStored('body', $docBody, $this->_doc->actualEncoding));
     }
     $linkNodes = $this->_doc->getElementsByTagName('a');
     foreach ($linkNodes as $linkNode) {
         if (($href = $linkNode->getAttribute('href')) != '' && (!self::$_excludeNoFollowLinks || strtolower($linkNode->getAttribute('rel')) != 'nofollow')) {
             $this->_links[] = $href;
         }
     }
     $this->_links = array_unique($this->_links);
     $linkNodes = $xpath->query('/html/head/link');
     foreach ($linkNodes as $linkNode) {
         if (($href = $linkNode->getAttribute('href')) != '') {
             $this->_headerLinks[] = $href;
         }
     }
     $this->_headerLinks = array_unique($this->_headerLinks);
 }
コード例 #16
0
 static function AddToIndex(SearchableObject $object, $commitOnEnd = true)
 {
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Keyword('combinedid', $object->getRelObjectManager() . $object->getRelObjectId()));
     $doc->addField(Zend_Search_Lucene_Field::UnIndexed('objectid', $object->getRelObjectId()));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('manager', $object->getRelObjectManager()));
     $doc->addField(Zend_Search_Lucene_Field::UnIndexed('column', $object->getColumnName()));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('text', $object->getContent()));
     $doc->addField(Zend_Search_Lucene_Field::Text('workspaces', "ws" . $object->getProjectId() . " "));
     $doc->addField(Zend_Search_Lucene_Field::Text('isprivate', ($object->getIsPrivate() ? '1' : '0') . " "));
     self::GetIndex()->addDocument($doc);
     if ($commitOnEnd) {
         self::GetIndex()->commit();
     }
     return true;
 }
コード例 #17
0
ファイル: Docx.php プロジェクト: gauravstomar/Pepool
 /**
  * Object constructor
  *
  * @param string  $fileName
  * @param boolean $storeContent
  */
 private function __construct($fileName, $storeContent)
 {
     // Document data holders
     $documentBody = array();
     $coreProperties = array();
     // Open OpenXML package
     $package = new ZipArchive();
     $package->open($fileName);
     // Read relations and search for officeDocument
     $relations = simplexml_load_string($package->getFromName("_rels/.rels"));
     foreach ($relations->Relationship as $rel) {
         if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
             // Found office document! Read in contents...
             $contents = simplexml_load_string($package->getFromName($this->absoluteZipPath(dirname($rel["Target"]) . "/" . basename($rel["Target"]))));
             $contents->registerXPathNamespace("w", Zend_Search_Lucene_Document_Docx::SCHEMA_WORDPROCESSINGML);
             $paragraphs = $contents->xpath('//w:body/w:p');
             foreach ($paragraphs as $paragraph) {
                 $runs = $paragraph->xpath('//w:r/w:t');
                 foreach ($runs as $run) {
                     $documentBody[] = (string) $run;
                 }
             }
             break;
         }
     }
     // Read core properties
     $coreProperties = $this->extractMetaData($package);
     // Close file
     $package->close();
     // Store filename
     $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName));
     // Store contents
     if ($storeContent) {
         $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody)));
     } else {
         $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody)));
     }
     // Store meta data properties
     foreach ($coreProperties as $key => $value) {
         $this->addField(Zend_Search_Lucene_Field::Text($key, $value));
     }
     // Store title (if not present in meta data)
     if (!isset($coreProperties['title'])) {
         $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName));
     }
 }
コード例 #18
0
ファイル: Ods.php プロジェクト: WYSAC/oregon-owncloud
 /**
  * Object constructor
  *
  * @param string  $fileName
  * @param boolean $storeContent
  * @throws \Zend_Search_Lucene_Exception
  */
 private function __construct($fileName, $storeContent)
 {
     if (!class_exists('ZipArchive', false)) {
         throw new \Zend_Search_Lucene_Exception('Open Document Spreadsheet processing functionality requires Zip extension to be loaded');
     }
     // Document data holders
     $documentTables = array();
     $documentCells = array();
     // Open OpenXML package
     $package = new \ZipArchive();
     $package->open($fileName);
     // Read relations and search for officeDocument
     $content = $package->getFromName('content.xml');
     if ($content === false) {
         throw new \Zend_Search_Lucene_Exception('Invalid archive or corrupted .ods file.');
     }
     $loadEntities = libxml_disable_entity_loader(true);
     $sxe = simplexml_load_string($content, 'SimpleXMLElement', LIBXML_NOBLANKS | LIBXML_COMPACT);
     libxml_disable_entity_loader($loadEntities);
     foreach ($sxe->xpath('//table:table[@table:name]') as $table) {
         $documentTables[] = (string) $table->attributes($this::SCHEMA_ODTABLE)->name;
     }
     foreach ($sxe->xpath('//text:p') as $cell) {
         $documentCells[] = (string) $cell;
     }
     // Read core properties
     $coreProperties = $this->extractMetaData($package);
     // Close file
     $package->close();
     // Store contents
     if ($storeContent) {
         $this->addField(\Zend_Search_Lucene_Field::Text('sheets', implode(' ', $documentTables), 'UTF-8'));
         $this->addField(\Zend_Search_Lucene_Field::Text('body', implode(' ', $documentCells), 'UTF-8'));
     } else {
         $this->addField(\Zend_Search_Lucene_Field::UnStored('sheets', implode(' ', $documentTables), 'UTF-8'));
         $this->addField(\Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentCells), 'UTF-8'));
     }
     // Store meta data properties
     foreach ($coreProperties as $key => $value) {
         $this->addField(\Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
     }
     // Store title (if not present in meta data)
     if (!isset($coreProperties['title'])) {
         $this->addField(\Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
     }
 }
コード例 #19
0
 function index($title, $content, $url, $keywords, $user_id)
 {
     $this->initLuceneEngine();
     $id = $this->getId($url);
     $indexer = $this->zend->get_Zend_Search_Lucene();
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Keyword('id', $id));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('userid', $user_id));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('url', $url));
     $doc->addField(Zend_Search_Lucene_Field::UnStored("content", $content, 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::text("title", $title, 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::text("keywords", $keywords, 'utf-8'));
     $indexer->addDocument($doc);
     $indexer->commit();
     //$indexer->optimize();
     return TRUE;
 }
コード例 #20
0
ファイル: Category.php プロジェクト: rickboyau/magento-lucene
 /**
  * Adds all values of related category to the search document.
  *
  * @return void
  **/
 protected function addAttributes()
 {
     $content = strip_tags($this->getStaticBlock());
     $this->addField(Zend_Search_Lucene_Field::UnStored('content', $content, self::ENCODING));
     $this->addField(Zend_Search_Lucene_Field::Text('name', $this->getSourceModel()->getName(), self::ENCODING));
     $this->addField(Zend_Search_Lucene_Field::Keyword('category', $this->getSourceModel()->getParentCategory()->getName(), self::ENCODING));
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('short_content', substr($content, 0, self::SHORT_CONTENT_CHAR_COUNT), self::ENCODING));
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $this->getSourceModel()->getUrl(), self::ENCODING));
     if ($this->getSourceModel()->getImage()) {
         try {
             $image = Mage::getModel('catalog/product_image')->setBaseFile('../category/' . $this->getSourceModel()->getImage())->setHeight(100)->setWidth(100)->resize()->saveFile()->getUrl();
             $this->addField(Zend_Search_Lucene_Field::UnIndexed('image', $image, self::ENCODING));
         } catch (Exception $e) {
             /* no image for category, so none will be added to index */
         }
     }
 }
コード例 #21
0
 public static function indexationAdd($indexationData)
 {
     $directory = Zend_Registry::get('lucene_index');
     Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8());
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Keyword('pageID', $indexationData['pageID']));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('moduleID', $indexationData['moduleID']));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('contentID', $indexationData['contentID']));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('languageID', $indexationData['languageID']));
     $doc->addField(Zend_Search_Lucene_Field::Text('title', Cible_FunctionsGeneral::html2text($indexationData['title'])));
     $doc->addField(Zend_Search_Lucene_Field::Text('text', Cible_FunctionsGeneral::html2text($indexationData['text'])));
     $doc->addField(Zend_Search_Lucene_Field::UnIndexed('link', $indexationData['link']));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('contents', strtolower(Cible_FunctionsGeneral::removeAccents(Cible_FunctionsGeneral::html2text($indexationData['contents'])))));
     $newIndex = !is_dir($directory);
     $index = new Zend_Search_Lucene($directory, $newIndex);
     $index->addDocument($doc);
     $index->commit();
 }
コード例 #22
0
ファイル: NewItem.class.php プロジェクト: vcgato29/poff
 public function updateLuceneIndex()
 {
     //delete existing entries
     $index = $this->getTable()->getLuceneIndex();
     // remove existing entries
     foreach ($index->find('pk:' . $this->getId()) as $hit) {
         $index->delete($hit->id);
     }
     // create new Lucene document
     $doc = new Zend_Search_Lucene_Document();
     // store product primary key to identify it in the search results
     $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->getId()));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('description', $this->getDescription(), 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('content', strip_tags($this->getContent()), 'utf-8'));
     // add product to the index
     $index->addDocument($doc);
     $index->commit();
 }
コード例 #23
0
ファイル: Asso.class.php プロジェクト: TheoJD/portail
 /**
  * Method to use the zend framework for search
  * update the index file used for search
  */
 public function updateLuceneIndex()
 {
     $index = AssoTable::getInstance()->getLuceneIndex();
     // remove existing entries
     foreach ($index->find('pk:' . $this->getId()) as $hit) {
         $index->delete($hit->id);
     }
     $doc = new Zend_Search_Lucene_Document();
     // store asso primary key to identify it in the search results
     $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->getId()));
     // index asso fields
     $doc->addField(Zend_Search_Lucene_Field::UnStored('name', $this->getName(), 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('description', $this->getDescription(), 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('login', $this->getLogin(), 'utf-8'));
     // add asso to the index
     $index->addDocument($doc);
     $index->commit();
 }
コード例 #24
0
ファイル: Odt.php プロジェクト: omusico/isle-web-framework
 /**
  * Object constructor
  *
  * @param string  $fileName
  * @param boolean $storeContent
  * @throws \Zend_Search_Lucene_Exception
  */
 private function __construct($fileName, $storeContent)
 {
     if (!class_exists('ZipArchive', false)) {
         throw new \Zend_Search_Lucene_Exception('Open Document Text processing functionality requires Zip extension to be loaded');
     }
     // Document data holders
     $documentHeadlines = array();
     $documentParagraphs = array();
     // Open OpenXML package
     $package = new \ZipArchive();
     $package->open($fileName);
     // Read relations and search for officeDocument
     $content = $package->getFromName('content.xml');
     if ($content === false) {
         throw new \Zend_Search_Lucene_Exception('Invalid archive or corrupted .odt file.');
     }
     $sxe = simplexml_load_string($content, 'SimpleXMLElement', LIBXML_NOBLANKS | LIBXML_COMPACT);
     foreach ($sxe->xpath('//text:h') as $headline) {
         $documentHeadlines[] = (string) $headline;
     }
     foreach ($sxe->xpath('//text:p') as $paragraph) {
         $documentParagraphs[] = (string) $paragraph;
     }
     // Read core properties
     $coreProperties = $this->extractMetaData($package);
     // Close file
     $package->close();
     // Store contents
     if ($storeContent) {
         $this->addField(\Zend_Search_Lucene_Field::Text('headlines', implode(' ', $documentHeadlines), 'UTF-8'));
         $this->addField(\Zend_Search_Lucene_Field::Text('body', implode(' ', $documentParagraphs), 'UTF-8'));
     } else {
         $this->addField(\Zend_Search_Lucene_Field::UnStored('headlines', implode(' ', $documentHeadlines), 'UTF-8'));
         $this->addField(\Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentParagraphs), 'UTF-8'));
     }
     // Store meta data properties
     foreach ($coreProperties as $key => $value) {
         $this->addField(\Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
     }
     // Store title (if not present in meta data)
     if (!isset($coreProperties['title'])) {
         $this->addField(\Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
     }
 }
コード例 #25
0
ファイル: Document.php プロジェクト: joshauza/baseapp
 public function __construct($class, $key, $title, $contents, $summary, $createdBy, $dateCreated, $keywords = array())
 {
     $this->addField(Zend_Search_Lucene_Field::Keyword('docRef', "{$class}:{$key}"));
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('class', $class));
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('key', $key));
     $this->addField(Zend_Search_Lucene_Field::Text('title', $title));
     $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $contents));
     $this->addField(Zend_Search_Lucene_Field::UnIndexed('summary', $summary));
     $this->addField(Zend_Search_Lucene_Field::Keyword('createdBy', $createdBy));
     $this->addField(Zend_Search_Lucene_Field::Keyword('dateCreated', $dateCreated));
     if (!is_array($keywords)) {
         $keywords = explode('', $keywords);
     }
     foreach ($keywords as $name => $value) {
         if (!empty($name) && !empty($value)) {
             $this->addField(Zend_Search_Lucene_Field::keyword($name, $value));
         }
     }
 }
コード例 #26
0
 /**
  * Object constructor
  *
  * @param  string  $fileName
  * @param  boolean $storeContent
  * @throws Zend_Search_Lucene_Exception
  * @return void
  */
 public function __construct($fileName, $storeContent = false)
 {
     if (!file_exists($fileName)) {
         throw new Zend_Search_Lucene_Exception("File doesn't exists. Filename: '{$fileName}'");
     }
     $this->addField(Zend_Search_Lucene_Field::Text('path', $fileName));
     $this->addField(Zend_Search_Lucene_Field::Keyword('modified', filemtime($fileName)));
     $f = fopen($fileName, 'rb');
     $byteCount = filesize($fileName);
     $data = '';
     while ($byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false) {
         $data .= $nextBlock;
         $byteCount -= strlen($nextBlock);
     }
     fclose($f);
     if ($storeContent) {
         $this->addField(Zend_Search_Lucene_Field::Text('contents', $data, 'ISO8859-1'));
     } else {
         $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $data, 'ISO8859-1'));
     }
 }
コード例 #27
0
ファイル: track.class.php プロジェクト: hielh/abjihproject
 public function updateLuceneIndex()
 {
     $index = trackTable::getLuceneIndex();
     // remove existing entries
     foreach ($index->find('pk:' . $this->getId()) as $hit) {
         $index->delete($hit->id);
     }
     $doc = new Zend_Search_Lucene_Document();
     $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->getId()));
     $doc->addField(Zend_Search_Lucene_Field::text('track_name', $this->getName(), 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::UnIndexed('track_url', $this->getUrl(), 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('play_it_user_id', $this->getSfGuardUser()->getId()));
     $doc->addField(Zend_Search_Lucene_Field::text('track_name', $this->getName(), 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::text('track_type', $this->getPlayList()->getObjectType(), 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::text('playlist_name', $this->getPlayList()->getTitle(), 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('play_owner_id', $this->getPlayList()->getPlayOwner()->getId()));
     $doc->addField(Zend_Search_Lucene_Field::text('play_owner_name', $this->getPlayList()->getPlayOwner()->getName(), 'utf-8'));
     $doc->addField(Zend_Search_Lucene_Field::UnStored('play_owner_name_fr', $this->getPlayList()->getPlayOwner()->getNameFr(), 'utf-8'));
     $index->addDocument($doc);
     $index->commit();
 }
コード例 #28
0
ファイル: Document.php プロジェクト: philipnorton42/PDFSearch
 /**
  * Constructor.
  *
  * @param array $values An associative array of values to be used
  *                      in the document.
  */
 public function __construct($values)
 {
     // If the Filename or the Key values are not set then reject the document.
     if (!isset($values['Filename']) && !isset($values['key'])) {
         return false;
     }
     Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive());
     // Add the Filename field to the document as a Keyword field.
     $this->addField(Zend_Search_Lucene_Field::Keyword('Filename', $values['Filename']));
     // Add the Key field to the document as a Keyword.
     $this->addField(Zend_Search_Lucene_Field::Keyword('Key', $values['Key']));
     if (isset($values['Title']) && $values['Title'] != '') {
         // Add the Title field to the document as a Text field.
         $this->addField(Zend_Search_Lucene_Field::Text('Title', $values['Title']));
     }
     if (isset($values['Subject']) && $values['Subject'] != '') {
         // Add the Subject field to the document as a Text field.
         $this->addField(Zend_Search_Lucene_Field::Text('Subject', $values['Subject']));
     }
     if (isset($values['Author']) && $values['Author'] != '') {
         // Add the Author field to the document as a Text field.
         $this->addField(Zend_Search_Lucene_Field::Text('Author', $values['Author']));
     }
     if (isset($values['Keywords']) && $values['Keywords'] != '') {
         // Add the Keywords field to the document as a Text field.
         $this->addField(Zend_Search_Lucene_Field::Text('Keywords', $values['Keywords']));
     }
     if (isset($values['CreationDate']) && $values['CreationDate'] != '') {
         // Add the CreationDate field to the document as a Text field.
         $this->addField(Zend_Search_Lucene_Field::Text('CreationDate', $values['CreationDate']));
     }
     if (isset($values['ModDate']) && $values['ModDate'] != '') {
         // Add the ModDate field to the document as a Text field.
         $this->addField(Zend_Search_Lucene_Field::Text('ModDate', $values['ModDate']));
     }
     if (isset($values['Contents']) && $values['Contents'] != '') {
         // Add the Contents field to the document as an UnStored field.
         $this->addField(Zend_Search_Lucene_Field::UnStored('Contents', $values['Contents']));
     }
 }
コード例 #29
0
 public function build_index()
 {
     echo "Anfang<br>";
     // Index erstellen, bisheriger Index wird gelöscht
     $index = Zend_Search_Lucene::create($this->search_index);
     $this->db->where('online', 1);
     $query = $this->db->get('v_einsatz');
     foreach ($query->result() as $row) {
         // neues Suchindex-Dokument erzeugen
         $doc = new Zend_Search_Lucene_Document();
         // Titel für die Anzeige in der Ergebnisliste
         $doc->addField(Zend_Search_Lucene_Field::Text('title', htmlentities($row->name)));
         // mit diesem Pfad wird das Suchergebnis verknüpft
         $doc->addField(Zend_Search_Lucene_Field::Text('path', base_url('aktuelles/einsatz/' . $row->einsatzID)));
         // dieser Inhalt wird neben dem Titel indexiert
         $doc->addField(Zend_Search_Lucene_Field::UnStored('content', htmlentities($row->lage . $row->bericht . $row->weitere_kraefte . $row->ort)));
         $doc->addField(Zend_Search_Lucene_Field::unIndexed('content_type', 'Einsatz'));
         // zum Index hinzufügen
         $index->addDocument($doc);
         echo 'Einsatz ' . $row->name . ' zum Index hinzugefügt.<br />';
     }
     $query = $this->db->get('v_news');
     foreach ($query->result() as $row) {
         // neues Suchindex-Dokument erzeugen
         $doc = new Zend_Search_Lucene_Document();
         // Titel für die Anzeige in der Ergebnisliste
         $doc->addField(Zend_Search_Lucene_Field::Text('title', htmlentities($row->title)));
         // mit diesem Pfad wird das Suchergebnis verknüpft
         $doc->addField(Zend_Search_Lucene_Field::Text('path', base_url('aktuelles/news/' . $row->newsID)));
         // dieser Inhalt wird neben dem Titel indexiert
         $doc->addField(Zend_Search_Lucene_Field::UnStored('content', htmlentities($row->teaser . $row->text)));
         $doc->addField(Zend_Search_Lucene_Field::unIndexed('content_type', 'News'));
         // zum Index hinzufügen
         $index->addDocument($doc);
         echo 'News ' . $row->title . ' zum Index hinzugefügt.<br />';
     }
     // Index optimieren
     $index->optimize();
     echo "Ende";
 }
コード例 #30
0
 public function updateSearchIndex(Doctrine_Record $record)
 {
     $index = $this->getIndex();
     // remove existing entries
     foreach ($index->find('pk:' . $this->_getRecordSearchPrimaryKey($record)) as $hit) {
         $index->delete($hit->id);
     }
     $doc = new Zend_Search_Lucene_Document();
     // store job primary key to identify it in the search results
     $doc->addField(Zend_Search_Lucene_Field::Keyword('pk', $this->_getRecordSearchPrimaryKey($record)));
     if (method_exists($record, 'getSearchData')) {
         $data = $record->getSearchData();
     } else {
         $data = $record->toArray(false);
     }
     foreach ($data as $key => $value) {
         $doc->addField(Zend_Search_Lucene_Field::UnStored($key, $value, 'utf-8'));
     }
     // add job to the index
     $index->addDocument($doc);
     $index->commit();
 }