public function searchAction(Request $request)
 {
     $queryString = $request->get('query');
     // count total documents indexed
     $numDocs = $this->get('zendsearch')->numDocs();
     // parse query string and return a Query object.
     // $query = Search\QueryParser::parse($queryString, 'UTF-8');
     $queryTokens = Analyzer::getDefault()->tokenize($queryString, 'UTF-8');
     $query = new Search\Query\Boolean();
     foreach ($queryTokens as $token) {
         $query->addSubquery(new Search\Query\Fuzzy(new Index\Term($token->getTermText()), 0.5), null);
     }
     // process query
     $results = $this->get('zendsearch')->find($query);
     // sort results by score (MultiSearch does not sort the results between the differents indices)
     usort($results, create_function('$a, $b', 'return $a->score < $b->score;'));
     // // paginate results
     // $results = new \Zend\Paginator\Paginator(new \Zend\Paginator\Adapter\ArrayAdapter($results));
     // $results->setCurrentPageNumber($page);
     // $results->setItemCountPerPage($rpp);
     // // fetch results entities
     // $dataResults = array();
     // foreach ($results as $hit) {
     //     $document = $hit->getDocument();
     //     $repository = $this->get('orm.em')->getRepository( $document->getFieldValue('entityClass') );
     //     $dataResults[] = $repository->find( $document->getFieldValue('id') );
     // }
     // $results = $dataResults;
     return $this->get('twig')->render('admin/search.html.twig', array('query' => $queryString, 'numDocs' => $numDocs, 'results' => $results));
 }
 public function testSetHighlighterAnalyzer()
 {
     $this->app->instance('filterClass1', $tokenFilterMock = m::mock('ZendSearch\\Lucene\\Analysis\\TokenFilter\\TokenFilterInterface'));
     $this->analyzer->shouldReceive('addFilter')->with($tokenFilterMock)->once();
     $this->config->setHighlighterAnalyzer();
     $this->assertEquals($this->analyzer, Analyzer::getDefault());
 }
Example #3
0
 /**
  * Get index
  * @return \ZendSearch\Lucene\Index
  */
 private function index()
 {
     if (!isset(self::$index)) {
         $analyzer = new CaseInsensitive();
         if ($this->config()->exists('zend_search', 'stop_words')) {
             $stop_word_filter = new StopWords();
             $words = $this->getRealPath($this->config()->get('zend_search', 'stop_words'));
             if ($words !== false) {
                 $stop_word_filter->loadFromFile($words);
             } else {
                 throw new \InvalidArgumentException('Path not found');
             }
             $analyzer->addFilter($stop_word_filter);
         }
         if ($this->config()->exists('zend_search', 'morphy_dicts')) {
             $morphy_dicts = $this->getRealPath($this->config()->get('zend_search', 'morphy_dicts'));
             if ($morphy_dicts !== false) {
                 $analyzer->addFilter(new Morphy($morphy_dicts, $this->config()->getCharset()));
             } else {
                 throw new \InvalidArgumentException('Path not found');
             }
         }
         Analyzer::setDefault($analyzer);
         Lucene::setResultSetLimit($this->limit);
         QueryParser::setDefaultEncoding($this->config()->getCharset());
         $index = $this->config() - get('zend_search', 'index');
         $path = $this->getRealPath($index);
         self::$index = $path ? Lucene::open($path) : Lucene::create($index);
     }
     return self::$index;
 }
 /**
  * {@inheritdoc}
  */
 public function register(Application $app)
 {
     Analyzer::setDefault(new CaseInsensitive());
     QueryParser::setDefaultEncoding('UTF-8');
     $app['zendsearch.indices_path'] = array();
     $app['zendsearch.indices.initializer'] = $app->protect(function () use($app) {
         static $initialized = false;
         if ($initialized) {
             return;
         }
         $initialized = true;
         $indices = array();
         foreach ($app['zendsearch.indices_path'] as $name => $index) {
             $indices[$name] = file_exists($index) ? Lucene::open($index) : Lucene::create($index);
         }
         $app['zendsearch.indices_collection'] = $indices;
     });
     $app['zendsearch.indices'] = $app->share(function ($app) {
         $app['zendsearch.indices.initializer']();
         return $app['zendsearch.indices_collection'];
     });
     $app['zendsearch.multisearcher'] = $app->share(function ($app) {
         $app['zendsearch.indices.initializer']();
         $multi = new MultiSearcher();
         foreach ($app['zendsearch.indices'] as $index) {
             $multi->addIndex($index);
         }
         return $multi;
     });
     $app['zendsearch'] = $app->share(function ($app) {
         return $app['zendsearch.multisearcher'];
     });
 }
Example #5
0
 /**
  * @param DB $db
  * @param Core $core
  * @param string $indexesBasePath
  */
 public function __construct(DB $db, Core $core, $indexesBasePath)
 {
     $this->db = $db;
     $this->core = $core;
     $this->indexesBasePath = $indexesBasePath;
     $this->indexes = array();
     Analyzer::setDefault(new CaseInsensitive());
 }
 /**
  * @param string $path Path to search index
  */
 public function __construct($path, NodeTypeManagerInterface $nodeTypeManager = null, $hideDestructException = false)
 {
     $this->path = $path;
     $this->filesystem = new Filesystem();
     $this->nodeTypeManager = $nodeTypeManager;
     $this->hideDestructException = $hideDestructException;
     Analyzer::setDefault(new ExactMatchAnalyzer());
     Wildcard::setMinPrefixLength(0);
 }
 /**
  * Set analyzer for words highlighting (not for indexing).
  */
 public function setHighlighterAnalyzer()
 {
     /** @var AbstractCommon $analyzer */
     $analyzer = App::make(AbstractCommon::class);
     foreach ($this->filterClasses as $filterClass) {
         $analyzer->addFilter(App::make($filterClass));
     }
     Analyzer::setDefault($analyzer);
 }
 /**
  * Set analyzer for words highlighting (not for indexing).
  */
 public function setHighlighterAnalyzer()
 {
     /** @var AbstractCommon $analyzer */
     $analyzer = App::make('ZendSearch\\Lucene\\Analysis\\Analyzer\\Common\\AbstractCommon');
     foreach ($this->filterClasses as $filterClass) {
         $analyzer->addFilter(App::make($filterClass));
     }
     Analyzer::setDefault($analyzer);
 }
 public function init()
 {
     if ($this->caseSensitivity) {
         Analyzer::setDefault(new Utf8());
     } else {
         Analyzer::setDefault(new CaseInsensitive());
     }
     $this->indexDirectory = FileHelper::normalizePath(Yii::getAlias($this->indexDirectory));
     $this->luceneIndex = $this->getLuceneIndex($this->indexDirectory);
 }
 public function init()
 {
     QueryParser::setDefaultEncoding('UTF-8');
     if ($this->caseSensitivity) {
         Analyzer::setDefault($this->parseNumeric ? new Utf8Num() : new Utf8());
     } else {
         Analyzer::setDefault($this->parseNumeric ? new CaseInsensitiveNum() : new CaseInsensitive());
     }
     $this->indexDirectory = FileHelper::normalizePath(Yii::getAlias($this->indexDirectory));
     $this->luceneIndex = $this->getLuceneIndex($this->indexDirectory);
 }
Example #11
0
 public function testAnalyzer()
 {
     $currentAnalyzer = Analyzer::getDefault();
     $this->assertTrue($currentAnalyzer instanceof AnalyzerInterface);
     /** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num */
     $newAnalyzer = new Common\Utf8Num();
     Analyzer::setDefault($newAnalyzer);
     $this->assertTrue(Analyzer::getDefault() === $newAnalyzer);
     // Set analyzer to the default value (used in other tests)
     Analyzer::setDefault($currentAnalyzer);
 }
 public function __construct(FrontendController $frontendController, $moduleName)
 {
     parent::__construct($frontendController, $moduleName);
     $this->controllerRoutes = array('/' => array('GET' => 'showSearchResults'), '/rebuild-index' => array('GET' => 'rebuildIndex'));
     Analyzer::setDefault(new CaseInsensitive());
     QueryParser::setDefaultEncoding('UTF-8');
     $this->registerService('index', 'generateIndex');
     $cmsPage = $frontendController->getCmsPage();
     if ($cmsPage !== null) {
         $cmsPage->setLastModified(date('Y-m-d H:i:s'));
     }
 }
Example #13
0
 /**
  * Get the ZendSearch lucene index instance associated with this instance.
  *
  * @return \ZendSearch\Lucene\Index
  */
 protected function getIndex()
 {
     if (!$this->index) {
         $path = rtrim(Config::get('search.connections.zend.path'), '/') . '/' . $this->name;
         try {
             $this->index = \ZendSearch\Lucene\Lucene::open($path);
         } catch (\ZendSearch\Exception\ExceptionInterface $e) {
             $this->index = \ZendSearch\Lucene\Lucene::create($path);
         } catch (\ErrorException $e) {
             if (!file_exists($path)) {
                 throw new \Exception("'path' directory does not exist for the 'zend' search driver: '" . rtrim(Config::get('search.connections.zend.path'), '/') . "'");
             }
             throw $e;
         }
         \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8Num\CaseInsensitive());
     }
     return $this->index;
 }
Example #14
0
 /**
  * opens or creates the given lucene index
  *
  * @throws SetUpException
  */
 public function openOrCreate()
 {
     $indexFolder = $this->files->setUpIndexFolder();
     $storage = $indexFolder->getStorage();
     $localPath = $storage->getLocalFolder($indexFolder->getInternalPath());
     //let lucene search for numbers as well as words
     Analyzer::setDefault(new CaseInsensitive());
     // can we use the index?
     if ($indexFolder->nodeExists('v0.6.0')) {
         // correct index present
         $this->index = Lucene::open($localPath);
     } else {
         $this->logger->info('recreating outdated lucene index');
         $indexFolder->delete();
         $this->index = Lucene::create($localPath);
         $indexFolder->newFile('v0.6.0');
     }
 }
Example #15
0
 public function search($expression, $page = 1, $conditions = null, $indexName = null)
 {
     if (!$expression) {
         throw new NotFoundHttpException('Empty expression');
     }
     if (!$indexName) {
         $indexName = $this->kernel->getContainer()->getParameter('symbio_fulltext_search.' . Crawler::DEFAULT_INDEX_PARAM);
     }
     if (mb_strlen($expression, 'utf-8') > 2) {
         $index = $this->indexManager->getIndex($indexName);
         Analyzer::setDefault(new CaseInsensitive());
         $query = $this->prepareQuery($expression, $conditions);
         $results = $index->find($query);
         // strankovani
         $paginator = $this->kernel->getContainer()->get('knp_paginator');
         $pagination = $paginator->paginate($results, $page, $this->kernel->getContainer()->getParameter('symbio_fulltext_search.items_on_page'));
         return array('expression' => $expression, 'pagination' => $pagination, 'pgdata' => $pagination->getPaginationData());
     }
     return false;
 }
 /**
  * Gets the index mapped by the given lucene identifier.
  *
  * @param string $identifier The lucene identifier.
  *
  * @return \ZendSearch\Lucene\Index The lucene index.
  */
 public function getIndex($identifier)
 {
     $config = $this->getConfig($identifier);
     $path = $config['path'];
     if (!$this->checkPath($path)) {
         $this->indexes[$identifier] = Lucene::create($path);
     } else {
         $this->indexes[$identifier] = Lucene::open($path);
     }
     Analyzer::setDefault(new $config['analyzer']());
     $this->indexes[$identifier]->setMaxBufferedDocs($config['max_buffered_docs']);
     $this->indexes[$identifier]->setMaxMergeDocs($config['max_merge_docs']);
     $this->indexes[$identifier]->setMergeFactor($config['merge_factor']);
     ZfFilesystem::setDefaultFilePermissions($config['permissions']);
     if ($config['auto_optimized']) {
         $this->indexes[$identifier]->optimize();
     }
     QueryParser::setDefaultEncoding($config['query_parser_encoding']);
     return $this->indexes[$identifier];
 }
Example #17
0
 /**
  * Lists all Post models.
  * @return mixed
  */
 public function actionIndex()
 {
     $searchModel = new PostSearch();
     $dataProvider = $searchModel->search(Yii::$app->request->post());
     //setlocale(LC_ALL, 'en_US.UTF-8');
     setlocale(LC_CTYPE, 'ru_RU.UTF-8');
     //Lucene\Lucene::setDefaultSearchField('contents');
     Lucene\Search\QueryParser::setDefaultEncoding('UTF-8');
     Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     Lucene\Lucene::setResultSetLimit(10);
     // create blog posts index located in /data/posts_index ,make sure the folder is writable
     $index = Lucene\Lucene::create('data/posts_index');
     $posts = Post::find()->all();
     //var_dump($posts);die();
     // iterate through posts and build the index
     foreach ($posts as $p) {
         $doc = new Lucene\Document();
         $doc->addField(Lucene\Document\Field::UnIndexed('entry_id', $p->id));
         $doc->addField(Lucene\Document\Field::Keyword('title', $p->title));
         $doc->addField(Lucene\Document\Field::text('contents', $p->content));
         $index->addDocument($doc);
     }
     // commit the index
     $index->commit();
     //Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     // explode the search query to individual words
     $words = explode(' ', urldecode(Yii::$app->getRequest()->getQueryParam('q')));
     // start a search query and add a term for each word to it
     $query = new Lucene\Search\Query\MultiTerm();
     foreach ($words as $w) {
         $query->addTerm(new Lucene\Index\Term($w));
     }
     // open and query the index
     $index = Lucene\Lucene::open('data/posts_index');
     $results = $index->find($query);
     // the search results
     //var_dump($results);
     return $this->render('index', ['searchModel' => $searchModel, 'dataProvider' => $dataProvider, 'search' => $results, 'query' => $query]);
 }
Example #18
0
 /**
  * インデックスファイルを生成
  */
 public static function updateIndex()
 {
     if (empty(self::$igo)) {
         self::$igo = new Tagger(array('dict_dir' => LIB_DIR . 'ipadic', 'reduce_mode' => true));
     }
     Analyzer::setDefault(new Utf8());
     // 索引の作成
     $index = Lucene::create(CACHE_DIR . self::INDEX_NAME);
     foreach (Listing::pages() as $page) {
         if (empty($page)) {
             continue;
         }
         $wiki = Factory::Wiki($page);
         // 読む権限がない場合スキップ
         if (!$wiki->isReadable() || $wiki->isHidden()) {
             continue;
         }
         /*
         			// HTML出力
         			$html[] = '<html><head>';
         			$html[] = '<meta http-equiv="Content-type" content="text/html; charset=UTF-8"/>';
         			$html[] = '<title>' . $wiki->title() . '</title>';
         			$html[] = '</head>';
         			$html[] = '<body>' . $wiki->render() . '</body>';
         			$html[] = '</html>';
         */
         $doc = new LuceneDoc();
         $doc->addField(Field::Text('title', $wiki->title()));
         // Store document URL to identify it in the search results
         $doc->addField(Field::Text('url', $wiki->uri()));
         // Index document contents
         //$contents = join(" ", self::$igo->wakati(strip_tags($wiki->render())));
         $contents = strip_tags($wiki->render());
         $doc->addField(Field::UnStored('contents', $contents));
         // 索引へ文書の登録
         $index->addDocument($doc);
     }
     $index->optimize();
 }
Example #19
0
 /**
  * Adds a document to this segment.
  *
  * @param \ZendSearch\Lucene\Document $document
  * @throws LuceneException\UnsupportedMethodCallException
  */
 public function addDocument(Document $document)
 {
     $storedFields = array();
     $docNorms = array();
     $similarity = AbstractSimilarity::getDefault();
     foreach ($document->getFieldNames() as $fieldName) {
         $field = $document->getField($fieldName);
         if ($field->storeTermVector) {
             /**
              * @todo term vector storing support
              */
             throw new LuceneException\UnsupportedMethodCallException('Store term vector functionality is not supported yet.');
         }
         if ($field->isIndexed) {
             if ($field->isTokenized) {
                 $analyzer = Analyzer\Analyzer::getDefault();
                 $analyzer->setInput($field->value, $field->encoding);
                 $position = 0;
                 $tokenCounter = 0;
                 while (($token = $analyzer->nextToken()) !== null) {
                     $tokenCounter++;
                     $term = new Index\Term($token->getTermText(), $field->name);
                     $termKey = $term->key();
                     if (!isset($this->_termDictionary[$termKey])) {
                         // New term
                         $this->_termDictionary[$termKey] = $term;
                         $this->_termDocs[$termKey] = array();
                         $this->_termDocs[$termKey][$this->_docCount] = array();
                     } elseif (!isset($this->_termDocs[$termKey][$this->_docCount])) {
                         // Existing term, but new term entry
                         $this->_termDocs[$termKey][$this->_docCount] = array();
                     }
                     $position += $token->getPositionIncrement();
                     $this->_termDocs[$termKey][$this->_docCount][] = $position;
                 }
                 if ($tokenCounter == 0) {
                     // Field contains empty value. Treat it as non-indexed and non-tokenized
                     $field = clone $field;
                     $field->isIndexed = $field->isTokenized = false;
                 } else {
                     $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, $tokenCounter) * $document->boost * $field->boost));
                 }
             } elseif (($fieldUtf8Value = $field->getUtf8Value()) == '') {
                 // Field contains empty value. Treat it as non-indexed and non-tokenized
                 $field = clone $field;
                 $field->isIndexed = $field->isTokenized = false;
             } else {
                 $term = new Index\Term($fieldUtf8Value, $field->name);
                 $termKey = $term->key();
                 if (!isset($this->_termDictionary[$termKey])) {
                     // New term
                     $this->_termDictionary[$termKey] = $term;
                     $this->_termDocs[$termKey] = array();
                     $this->_termDocs[$termKey][$this->_docCount] = array();
                 } elseif (!isset($this->_termDocs[$termKey][$this->_docCount])) {
                     // Existing term, but new term entry
                     $this->_termDocs[$termKey][$this->_docCount] = array();
                 }
                 $this->_termDocs[$termKey][$this->_docCount][] = 0;
                 // position
                 $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, 1) * $document->boost * $field->boost));
             }
         }
         if ($field->isStored) {
             $storedFields[] = $field;
         }
         $this->addField($field);
     }
     foreach ($this->_fields as $fieldName => $field) {
         if (!$field->isIndexed) {
             continue;
         }
         if (!isset($this->_norms[$fieldName])) {
             $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))), $this->_docCount);
         }
         if (isset($docNorms[$fieldName])) {
             $this->_norms[$fieldName] .= $docNorms[$fieldName];
         } else {
             $this->_norms[$fieldName] .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0)));
         }
     }
     $this->addStoredFields($storedFields);
 }
Example #20
0
 public function testFilteredTokensQueryParserProcessing()
 {
     $index = Lucene\Lucene::open(__DIR__ . '/_index23Sample/_files');
     $this->assertEquals(count(\ZendSearch\Lucene\Analysis\Analyzer\Analyzer::getDefault()->tokenize('123456787654321')), 0);
     $hits = $index->find('"PEAR developers" AND Home AND 123456787654321');
     $this->assertEquals(count($hits), 1);
     $expectedResultset = array(array(1, 0.16827, 'IndexSource/contributing.wishlist.html'));
     foreach ($hits as $resId => $hit) {
         $this->assertEquals($hit->id, $expectedResultset[$resId][0]);
         $this->assertTrue(abs($hit->score - $expectedResultset[$resId][1]) < 1.0E-6);
         $this->assertEquals($hit->path, $expectedResultset[$resId][2]);
     }
 }
Example #21
0
 /**
  * Query specific matches highlighting
  *
  * @param Highlighter $highlighter  Highlighter object (also contains doc for highlighting)
  */
 protected function _highlightMatches(Highlighter $highlighter)
 {
     $words = array();
     $docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
     $tokens = Lucene\Analysis\Analyzer\Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
     $lowerTermText = $this->_lowerTerm !== null ? $this->_lowerTerm->text : null;
     $upperTermText = $this->_upperTerm !== null ? $this->_upperTerm->text : null;
     if ($this->_inclusive) {
         foreach ($tokens as $token) {
             $termText = $token->getTermText();
             if (($lowerTermText == null || $lowerTermText <= $termText) && ($upperTermText == null || $termText <= $upperTermText)) {
                 $words[] = $termText;
             }
         }
     } else {
         foreach ($tokens as $token) {
             $termText = $token->getTermText();
             if (($lowerTermText == null || $lowerTermText < $termText) && ($upperTermText == null || $termText < $upperTermText)) {
                 $words[] = $termText;
             }
         }
     }
     $highlighter->highlight($words);
 }
Example #22
0
 /**
  * Query specific matches highlighting
  *
  * @param Highlighter $highlighter  Highlighter object (also contains doc for highlighting)
  */
 protected function _highlightMatches(Highlighter $highlighter)
 {
     $words = array();
     $prefix = Index\Term::getPrefix($this->_term->text, $this->_prefixLength);
     $prefixByteLength = strlen($prefix);
     $prefixUtf8Length = Index\Term::getLength($prefix);
     $termLength = Index\Term::getLength($this->_term->text);
     $termRest = substr($this->_term->text, $prefixByteLength);
     // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
     $termRestLength = strlen($termRest);
     $scaleFactor = 1 / (1 - $this->_minimumSimilarity);
     $docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
     $tokens = Lucene\Analysis\Analyzer\Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
     foreach ($tokens as $token) {
         $termText = $token->getTermText();
         if (substr($termText, 0, $prefixByteLength) == $prefix) {
             // Calculate similarity
             $target = substr($termText, $prefixByteLength);
             $maxDistance = isset($this->_maxDistances[strlen($target)]) ? $this->_maxDistances[strlen($target)] : $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
             if ($termRestLength == 0) {
                 // we don't have anything to compare.  That means if we just add
                 // the letters for current term we get the new word
                 $similarity = $prefixUtf8Length == 0 ? 0 : 1 - strlen($target) / $prefixUtf8Length;
             } elseif (strlen($target) == 0) {
                 $similarity = $prefixUtf8Length == 0 ? 0 : 1 - $termRestLength / $prefixUtf8Length;
             } elseif ($maxDistance < abs($termRestLength - strlen($target))) {
                 //just adding the characters of term to target or vice-versa results in too many edits
                 //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
                 //given this optimal circumstance, the edit distance cannot be less than 5.
                 //which is 8-3 or more precisesly abs(3-8).
                 //if our maximum edit distance is 4, then we can discard this word
                 //without looking at it.
                 $similarity = 0;
             } else {
                 $similarity = 1 - levenshtein($termRest, $target) / ($prefixUtf8Length + min($termRestLength, strlen($target)));
             }
             if ($similarity > $this->_minimumSimilarity) {
                 $words[] = $termText;
             }
         }
     }
     $highlighter->highlight($words);
 }
Example #23
0
 /**
  * Highlight text using specified View helper or callback function.
  *
  * @param string|array $words  Words to highlight. Words could be organized using the array or string.
  * @param callback $callback   Callback method, used to transform (highlighting) text.
  * @param array    $params     Array of additionall callback parameters passed through into it
  *                             (first non-optional parameter is an HTML fragment for highlighting)
  * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException
  * @return string
  */
 public function highlightExtended($words, $callback, $params = array())
 {
     if (!is_array($words)) {
         $words = array($words);
     }
     $wordsToHighlightList = array();
     $analyzer = Analyzer\Analyzer::getDefault();
     foreach ($words as $wordString) {
         $wordsToHighlightList[] = $analyzer->tokenize($wordString, $this->_doc->encoding);
     }
     $wordsToHighlight = call_user_func_array('array_merge', $wordsToHighlightList);
     if (count($wordsToHighlight) == 0) {
         return $this->_doc->saveHTML();
     }
     $wordsToHighlightFlipped = array();
     foreach ($wordsToHighlight as $id => $token) {
         $wordsToHighlightFlipped[$token->getTermText()] = $id;
     }
     if (!is_callable($callback)) {
         throw new InvalidArgumentException('$viewHelper parameter mast be a View Helper name, View Helper object or callback.');
     }
     $xpath = new \DOMXPath($this->_doc);
     $matchedNodes = $xpath->query("/html/body");
     foreach ($matchedNodes as $matchedNode) {
         $this->_highlightNodeRecursive($matchedNode, $wordsToHighlightFlipped, $callback, $params);
     }
 }
Example #24
0
 protected function getIndex()
 {
     if ($this->index != null) {
         return $this->index;
     }
     \ZendSearch\Lucene\Search\QueryParser::setDefaultEncoding('utf-8');
     \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8Num\CaseInsensitive());
     \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_AND);
     try {
         $index = \ZendSearch\Lucene\Lucene::open($this->getIndexPath());
     } catch (\ZendSearch\Lucene\Exception\RuntimeException $ex) {
         $index = \ZendSearch\Lucene\Lucene::create($this->getIndexPath());
     }
     $this->index = $index;
     return $index;
 }
 private function getIndex() : SearchIndexInterface
 {
     $path = $this->getIndexPath();
     if (!$this->checkIndexPath($path)) {
         $index = Lucene::create($path);
     } else {
         $index = Lucene::open($path);
     }
     Analyzer::setDefault(new CaseInsensitive());
     LuceneFilesystem::setDefaultFilePermissions(0775);
     QueryParser::setDefaultEncoding('UTF-8');
     $index->setMaxBufferedDocs($this->options['max_buffered_docs']);
     $index->setMaxMergeDocs($this->options['max_merge_docs']);
     $index->setMergeFactor($this->options['merge_factor']);
     $index->optimize();
     return $index;
 }
 public function sortOutHlCoords()
 {
     //Lucene operators
     $operators = array("and", "or", "not");
     $config = $this->getServiceLocator()->get('config');
     $paramInfo = $this->sortOutParams($config);
     //collect building blocks
     $resLoc = $paramInfo['resLoc'];
     $site = $paramInfo['site'];
     $collection = $paramInfo['collection'];
     $container = $paramInfo['container'];
     $reel = $paramInfo['reel'];
     $page = $paramInfo['page'];
     //the all important query
     $hl = $this->params()->fromRoute('hl', '');
     //coordinates to pass back
     $coords = [];
     //pass back empty coordinate set if any of these parameters
     //are missing
     if ($this->isNullOrEmpty($reel) || $this->isNullOrEmpty($page) || $this->isNullOrEmpty($hl)) {
         return array("imgloc" => '', "indloc" => '', "coords" => $coords);
     }
     //if
     //location of files - ODW file layout
     $resLoc .= '/' . $site . '/' . $collection . '/' . $container . '/' . $reel . '/odw/' . $page . '/';
     $imgLoc = $resLoc . '../../' . $page . '.jpg';
     $iaLoc = $resLoc . 'ia/' . $page . '.jpg';
     //not all images will have IA derivative
     if (file_exists($iaLoc) !== false) {
         $imgLoc = $iaLoc;
     }
     $indLoc = $resLoc . 'index/imgworks';
     //need index directory and segments file to be valid lucene layout
     if (!file_exists($indLoc . '/segments.gen')) {
         return array("imgloc" => $imgLoc, "indloc" => $indLoc, "coords" => $coords);
     }
     //get coordinates from Lucene index
     $searchText = '';
     //use Lucene tokens for searching
     $queryTokens = Analyzer\Analyzer::getDefault()->tokenize($hl);
     foreach ($queryTokens as $token) {
         $searchTerm = $token->getTermText();
         if (!in_array($searchTerm, $operators)) {
             //no snowball analyzer or other stemming option
             //in Lucene 2.x, so create stem seperately
             $searchText .= stem_english($searchTerm);
             //Lucene dropped this limitation after 2.x
             //but this version won't wildcard without
             //at least 3 characters in term
             if (strlen($searchTerm) >= 3) {
                 $searchText .= "* ";
             }
             //if strlen
         }
         //if
     }
     //foreach
     //now do search
     $index = Lucene\Lucene::open($indLoc);
     $searchResults = $index->find($searchText);
     //assemble results
     foreach ($searchResults as $searchResult) {
         array_push($coords, [$searchResult->x1, $searchResult->y1, $searchResult->x2, $searchResult->y2]);
     }
     //foreach
     //pass back image and index location in addition to results
     return array("imgloc" => $imgLoc, "indloc" => $indLoc, "coords" => $coords);
 }
Example #27
0
 /**
  * Process last range query term (closed interval)
  *
  * @throws \ZendSearch\Lucene\Search\Exception\QueryParserException
  */
 public function closedRQLastTerm()
 {
     $tokens = Analyzer\Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
     if (count($tokens) > 1) {
         throw new QueryParserException('Range query boundary terms must be non-multiple word terms');
     } elseif (count($tokens) == 1) {
         $from = new Index\Term(reset($tokens)->getTermText(), $this->_context->getField());
     } else {
         $from = null;
     }
     $tokens = Analyzer\Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
     if (count($tokens) > 1) {
         throw new QueryParserException('Range query boundary terms must be non-multiple word terms');
     } elseif (count($tokens) == 1) {
         $to = new Index\Term(reset($tokens)->getTermText(), $this->_context->getField());
     } else {
         $to = null;
     }
     if ($from === null && $to === null) {
         throw new QueryParserException('At least one range query boundary term must be non-empty term');
     }
     $rangeQuery = new Query\Range($from, $to, true);
     $entry = new QueryEntry\Subquery($rangeQuery);
     $this->_context->addEntry($entry);
 }
 public function luceneSearchAetCommunications($index, $searchKeyWord)
 {
     \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Text\CaseInsensitive());
     $dbIds = array();
     $searchValue = SearchHelper::utf8_to_ascii(mb_strtolower($searchKeyWord, "UTF-8"));
     $em = $this->getDoctrine()->getManager();
     /*
     $term1 = new \ZendSearch\Lucene\Index\Term($searchValue, 'firstname');
     //$subquery1 = new \ZendSearch\Lucene\Search\Query\Term($term1);
     
     $term2 = new \ZendSearch\Lucene\Index\Term($searchValue, 'title');
     //$subquery2 = new \ZendSearch\Lucene\Search\Query\Term($term2);
     
     $term3 = new \ZendSearch\Lucene\Index\Term($searchValue, 'shortdesc');
     //$subquery3 = new \ZendSearch\Lucene\Search\Query\Term($term3);
     
     $term4 = new \ZendSearch\Lucene\Index\Term($searchValue, 'body');
     //$subquery4 = new \ZendSearch\Lucene\Search\Query\Term($term4);
     
     $term5 = new \ZendSearch\Lucene\Index\Term($searchValue, 'author');
     //$subquery5 = new \ZendSearch\Lucene\Search\Query\Term($term5);
     
     
     $terms = array($term1, $term2, $term3, $term4, $term5);
     //$subqueries = array($subquery1, $subquery2, $subquery3, $subquery4, $subquery5);
     $signs = array(null, null, null, null, null);
     
     $termsQuery = new \ZendSearch\Lucene\Search\Query\MultiTerm($terms,$signs);
     */
     \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_OR);
     $query = \ZendSearch\Lucene\Search\QueryParser::parse($searchValue, 'UTF-8');
     $foundDocuments = $index->find($query);
     //$docNum = count($foundDocuments);
     foreach ($foundDocuments as $foundDoc) {
         $dbIds[] = $foundDoc->dbId;
     }
     $results = $em->getRepository('AetCommunicationBundle:Communication')->findById($dbIds);
     return $results;
 }
Example #29
0
 /**
  * Query specific matches highlighting
  *
  * @param Highlighter $highlighter  Highlighter object (also contains doc for highlighting)
  */
 protected function _highlightMatches(Highlighter $highlighter)
 {
     $words = array();
     $matchExpression = '/^' . str_replace(array('\\?', '\\*'), array('.', '.*'), preg_quote($this->_pattern->text, '/')) . '$/';
     ErrorHandler::start(E_WARNING);
     $result = preg_match('/\\pL/u', 'a');
     ErrorHandler::stop();
     if ($result == 1) {
         // PCRE unicode support is turned on
         // add Unicode modifier to the match expression
         $matchExpression .= 'u';
     }
     $docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
     $tokens = Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
     foreach ($tokens as $token) {
         if (preg_match($matchExpression, $token->getTermText()) === 1) {
             $words[] = $token->getTermText();
         }
     }
     $highlighter->highlight($words);
 }
Example #30
0
 /**
  * Подсветка результата поиска в html-фрагменте
  *
  * @param string $inputHTMLFragment исходный фрагмента html
  * @param string $inputEncoding Кодировка исходного фрагмента html
  * @param string $outputEncoding Кодировка резульрирующего фрагмента html
  * @return string html фрагмент с подсвеченными результатами поиска
  */
 public function highlightMatches($inputHTMLFragment, $inputEncoding = 'utf-8', $outputEncoding = 'utf-8')
 {
     $highlightedHTMLFragment = '';
     if (!empty($this->lastQuery)) {
         $queryParser = QueryParser::parse($this->lastQuery);
         /**
          * Убираем фильтры стоп-слов для подсветки слов с псевдокорнями типа 'под' и т.п.
          */
         Analyzer::setDefault($this->analyzerForHighlighter);
         $highlightedHTMLFragment = $queryParser->htmlFragmentHighlightMatches($inputHTMLFragment, $inputEncoding, new Highlighter());
         Analyzer::setDefault($this->defaultAnalyzer);
         $highlightedHTMLFragment = mb_convert_encoding($highlightedHTMLFragment, $outputEncoding, 'utf-8');
     }
     return $highlightedHTMLFragment;
 }