public function __construct(array $filterClasses, array $stopWordFiles, FilterFactory $stopwordsFilterFactory)
 {
     QueryParser::setDefaultEncoding('utf-8');
     $this->filterClasses = $filterClasses;
     $this->stopWordFiles = $stopWordFiles;
     $this->stopwordsFilterFactory = $stopwordsFilterFactory;
 }
Exemple #2
0
 /**
  * Get index
  * @return \ZendSearch\Lucene\Index
  */
 private function index()
 {
     if (!isset(self::$index)) {
         $analyzer = new CaseInsensitive();
         if ($this->config()->exists('zend_search', 'stop_words')) {
             $stop_word_filter = new StopWords();
             $words = $this->getRealPath($this->config()->get('zend_search', 'stop_words'));
             if ($words !== false) {
                 $stop_word_filter->loadFromFile($words);
             } else {
                 throw new \InvalidArgumentException('Path not found');
             }
             $analyzer->addFilter($stop_word_filter);
         }
         if ($this->config()->exists('zend_search', 'morphy_dicts')) {
             $morphy_dicts = $this->getRealPath($this->config()->get('zend_search', 'morphy_dicts'));
             if ($morphy_dicts !== false) {
                 $analyzer->addFilter(new Morphy($morphy_dicts, $this->config()->getCharset()));
             } else {
                 throw new \InvalidArgumentException('Path not found');
             }
         }
         Analyzer::setDefault($analyzer);
         Lucene::setResultSetLimit($this->limit);
         QueryParser::setDefaultEncoding($this->config()->getCharset());
         $index = $this->config() - get('zend_search', 'index');
         $path = $this->getRealPath($index);
         self::$index = $path ? Lucene::open($path) : Lucene::create($index);
     }
     return self::$index;
 }
 /**
  * {@inheritdoc}
  */
 public function register(Application $app)
 {
     Analyzer::setDefault(new CaseInsensitive());
     QueryParser::setDefaultEncoding('UTF-8');
     $app['zendsearch.indices_path'] = array();
     $app['zendsearch.indices.initializer'] = $app->protect(function () use($app) {
         static $initialized = false;
         if ($initialized) {
             return;
         }
         $initialized = true;
         $indices = array();
         foreach ($app['zendsearch.indices_path'] as $name => $index) {
             $indices[$name] = file_exists($index) ? Lucene::open($index) : Lucene::create($index);
         }
         $app['zendsearch.indices_collection'] = $indices;
     });
     $app['zendsearch.indices'] = $app->share(function ($app) {
         $app['zendsearch.indices.initializer']();
         return $app['zendsearch.indices_collection'];
     });
     $app['zendsearch.multisearcher'] = $app->share(function ($app) {
         $app['zendsearch.indices.initializer']();
         $multi = new MultiSearcher();
         foreach ($app['zendsearch.indices'] as $index) {
             $multi->addIndex($index);
         }
         return $multi;
     });
     $app['zendsearch'] = $app->share(function ($app) {
         return $app['zendsearch.multisearcher'];
     });
 }
Exemple #4
0
 /**
  * just shows empty page. need to determine what to show on default page.
  *
  * @param Request $request
  *
  * @return \Symfony\Component\HttpFoundation\Response
  *
  * @Route("/", name="ubc_exam_main_homepage")
  */
 public function indexAction(Request $request)
 {
     $pagination = array();
     $pagerHtml = null;
     $q = $request->get('q');
     if (!is_null($q) && !empty($q)) {
         // search the index
         QueryParser::setDefaultOperator(QueryParser::B_AND);
         Wildcard::setMinPrefixLength(1);
         $hits = $this->get('ivory_lucene_search')->getIndex('exams')->find($q . '*');
         $ids = array();
         foreach ($hits as $hit) {
             $ids[] = $hit->pk;
         }
         $ids = array_unique($ids);
         // search the db by ids, because we need to get the exams only visible for current user
         if (!empty($ids)) {
             // find out the current user registered courses and faculty
             $em = $this->getDoctrine()->getManager();
             $coursesWithKeys = $request->getSession()->get('courses') ? $request->getSession()->get('courses') : array();
             $courses = array_keys($coursesWithKeys);
             $faculties = array_values($em->getRepository('UBCExamMainBundle:SubjectFaculty')->getFacultiesByCourses($courses));
             $userId = $this->get('security.authorization_checker')->isGranted('ROLE_ADMIN') ? -1 : $this->getCurrentUserId();
             $qb = $this->getDoctrine()->getRepository('UBCExamMainBundle:Exam')->queryExamsByIds($ids, $userId, $faculties, $courses);
             $paginator = $this->get('knp_paginator');
             $pagination = $paginator->paginate($qb, $request->query->get('page', 1), 20);
         }
     }
     return $this->render('UBCExamMainBundle:Default:index.html.twig', array('pagination' => $pagination, 'q' => $q, 'subjectCode' => '', 'subjectCodeLabel' => ''));
 }
 public function init()
 {
     QueryParser::setDefaultEncoding('UTF-8');
     if ($this->caseSensitivity) {
         Analyzer::setDefault($this->parseNumeric ? new Utf8Num() : new Utf8());
     } else {
         Analyzer::setDefault($this->parseNumeric ? new CaseInsensitiveNum() : new CaseInsensitive());
     }
     $this->indexDirectory = FileHelper::normalizePath(Yii::getAlias($this->indexDirectory));
     $this->luceneIndex = $this->getLuceneIndex($this->indexDirectory);
 }
 public function prepareQuery($expressionOrigin, $conditions)
 {
     setlocale(LC_ALL, "cs_CZ.UTF-8");
     $expressionOrigin = strtr($expressionOrigin, array(',' => '', ';' => '', "'" => '', '"' => '', '-' => '', '_' => '', '/' => '', '\\' => '', '+' => '', '=' => '', '?' => '', '.' => '', '!' => ''));
     $expressionTranslit = str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $expressionOrigin));
     $expressions = array($expressionOrigin);
     if ($expressionOrigin != $expressionTranslit) {
         $expressions[] = $expressionTranslit;
     }
     $queryWords = array();
     $query = new Boolean();
     foreach ($expressions as $expression) {
         // more words in expression
         if (count($expressionWords = explode(' ', $expression)) > 1) {
             // whole expression
             $query->addSubquery(QueryParser::parse('"' . $expression . '"', 'utf-8'));
             // expression words
             foreach ($expressionWords as $expressionWord) {
                 if (mb_strlen($expressionWord, 'utf-8') > 2 && !in_array($expressionWord, $queryWords)) {
                     $queryWords[] = $expressionWord;
                     $query->addSubquery(QueryParser::parse($expressionWord . '*', 'utf-8'));
                 }
             }
         } else {
             $query->addSubquery(QueryParser::parse($expression . '*', 'utf-8'));
         }
     }
     // specificke podminky do query
     if (is_array($conditions) && count($conditions)) {
         foreach ($conditions as $condition) {
             // TODO - jak v Lucene najit polozky obsahujici url? Wildcard pouzit nejde...
             if (mb_strpos($condition, 'url:', null, 'utf-8') === 0) {
                 $uri = trim(substr($condition, 4), '"');
                 if (strpos($uri, '://') !== false) {
                     $uri = substr($uri, strpos($uri, '://') + 3);
                     $uri = substr($uri, strpos($uri, '/'));
                 }
                 $query .= ' AND ' . Page::URIS_KEY . ':"' . $uri . '"';
             } else {
                 $query .= ' AND ' . $condition;
             }
         }
     }
     if ($this->kernel->getEnvironment() == 'dev') {
         $session = new Session();
         $session->set(self::QUERY_HANDLER, $query);
     }
     return $query;
 }
 public function testSearchRawQuery()
 {
     $query = Search::rawQuery('description:big');
     $this->assertEquals(2, $query->count());
     $query = Search::rawQuery(function () {
         return 'description:big';
     });
     $this->assertEquals(2, $query->count());
     $query = Search::rawQuery(function () {
         $query = new Boolean();
         $query->addSubquery(QueryParser::parse('description:big OR name:monitor'));
         return $query;
     });
     $this->assertEquals(3, $query->count());
 }
 /**
  * Parses a query string
  *
  * @param string $strQuery
  * @param string $encoding
  * @throws \ZendSearch\Lucene\Search\Exception\QueryParserException
  * @throws \ZendSearch\Lucene\Exception\RuntimeException
  * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
  */
 public static function parse($strQuery, $encoding = null)
 {
     self::_getInstance();
     // Reset FSM if previous parse operation didn't return it into a correct state
     self::$_instance->reset();
     try {
         self::$_instance->_encoding = $encoding !== null ? $encoding : self::$_instance->_defaultEncoding;
         self::$_instance->_lastToken = null;
         self::$_instance->_context = new QueryParserContext(self::$_instance->_encoding);
         self::$_instance->_contextStack = array();
         self::$_instance->_tokens = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
         // Empty query
         if (count(self::$_instance->_tokens) == 0) {
             return new Query\Insignificant();
         }
         foreach (self::$_instance->_tokens as $token) {
             try {
                 self::$_instance->_currentToken = $token;
                 self::$_instance->process($token->type);
                 self::$_instance->_lastToken = $token;
             } catch (\Exception $e) {
                 if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
                     throw new QueryParserException('Syntax error at char position ' . $token->position . '.', 0, $e);
                 }
                 throw new RuntimeException($e->getMessage(), $e->getCode(), $e);
             }
         }
         if (count(self::$_instance->_contextStack) != 0) {
             throw new QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.');
         }
         return self::$_instance->_context->getQuery();
     } catch (QueryParserException $e) {
         if (self::$_instance->_suppressQueryParsingExceptions) {
             $queryTokens = Analyzer\Analyzer::getDefault()->tokenize($strQuery, self::$_instance->_encoding);
             $query = new Query\MultiTerm();
             $termsSign = self::$_instance->_defaultOperator == self::B_AND ? true : null;
             foreach ($queryTokens as $token) {
                 $query->addTerm(new Index\Term($token->getTermText()), $termsSign);
             }
             return $query;
         } else {
             throw new RuntimeException($e->getMessage(), $e->getCode(), $e);
         }
     }
 }
Exemple #9
0
 /**
  * Lists all Post models.
  * @return mixed
  */
 public function actionIndex()
 {
     $searchModel = new PostSearch();
     $dataProvider = $searchModel->search(Yii::$app->request->post());
     //setlocale(LC_ALL, 'en_US.UTF-8');
     setlocale(LC_CTYPE, 'ru_RU.UTF-8');
     //Lucene\Lucene::setDefaultSearchField('contents');
     Lucene\Search\QueryParser::setDefaultEncoding('UTF-8');
     Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     Lucene\Lucene::setResultSetLimit(10);
     // create blog posts index located in /data/posts_index ,make sure the folder is writable
     $index = Lucene\Lucene::create('data/posts_index');
     $posts = Post::find()->all();
     //var_dump($posts);die();
     // iterate through posts and build the index
     foreach ($posts as $p) {
         $doc = new Lucene\Document();
         $doc->addField(Lucene\Document\Field::UnIndexed('entry_id', $p->id));
         $doc->addField(Lucene\Document\Field::Keyword('title', $p->title));
         $doc->addField(Lucene\Document\Field::text('contents', $p->content));
         $index->addDocument($doc);
     }
     // commit the index
     $index->commit();
     //Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     // explode the search query to individual words
     $words = explode(' ', urldecode(Yii::$app->getRequest()->getQueryParam('q')));
     // start a search query and add a term for each word to it
     $query = new Lucene\Search\Query\MultiTerm();
     foreach ($words as $w) {
         $query->addTerm(new Lucene\Index\Term($w));
     }
     // open and query the index
     $index = Lucene\Lucene::open('data/posts_index');
     $results = $index->find($query);
     // the search results
     //var_dump($results);
     return $this->render('index', ['searchModel' => $searchModel, 'dataProvider' => $dataProvider, 'search' => $results, 'query' => $query]);
 }
 public function testHighlightRangeNonInclusive()
 {
     $query = Search\QueryParser::parse('{business TO by}');
     $html = '<HTML>' . '<HEAD><TITLE>Page title</TITLE></HEAD>' . '<BODY>' . 'Test of text using range query. ' . 'It has to match "buss" and "but" words, but has to skip "business", "by" and "bus"' . '</BODY>' . '</HTML>';
     $highlightedHTML = $query->highlightMatches($html);
     $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
     $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
     // Check that "bus" word is skipped
     $this->assertTrue(strpos($highlightedHTML, 'has to skip "business", "by" and "bus"') !== false);
 }
Exemple #11
0
 public function testBooleanQueryWithNonExistingPhraseSubquery()
 {
     $index = Lucene\Lucene::open(__DIR__ . '/_index23Sample/_files');
     $query = Search\QueryParser::parse('"Non-existing phrase" AND Home');
     $this->assertEquals($query->__toString(), '+("Non-existing phrase") +(Home)');
     $this->assertEquals($query->rewrite($index)->__toString(), '+((pathkeyword:"non existing phrase") (path:"non existing phrase") (modified:"non existing phrase") (contents:"non existing phrase")) +(pathkeyword:home path:home modified:home contents:home)');
     $this->assertEquals($query->rewrite($index)->optimize($index)->__toString(), '<EmptyQuery>');
 }
 /**
  * default (omitted) + NOT operator processing
  */
 public function emptyNotOperatorAction()
 {
     if (QueryParser::getDefaultOperator() == QueryParser::B_AND) {
         // Do nothing
     } else {
         $this->orOperatorAction();
     }
     // Process NOT operator
     $this->notOperatorAction();
 }
Exemple #13
0
 public function find($query, $parse = FALSE)
 {
     if ($parse) {
         $query = \ZendSearch\Lucene\Search\QueryParser::parse('first_name:' . $query);
     }
     return $this->index->find($query);
 }
 /**
  * Get query object by given query string.
  *
  * @param string $strQuery
  * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
  */
 public function parse($strQuery)
 {
     return QueryParser::parse($strQuery);
 }
 /**
  * @param string $keywords
  * @param string $language
  *
  * @return \stdClass[]
  */
 protected function getCmsSearchResults($keywords, $language)
 {
     $searchModel = new SearchModel($this->cmsController->getDB());
     $searchIndex = Lucene::open($this->cmsController->getCore()->getSiteRoot() . 'index' . DIRECTORY_SEPARATOR . $language);
     /*$query = new Boolean(); // new Fuzzy()
     		$query->addSubquery(QueryParser::parse(
     			$keywords
     		), true);*/
     QueryParser::suppressQueryParsingExceptions();
     $query = QueryParser::parse($keywords);
     //$hits = $searchIndex->find($query, 'score', SORT_NUMERIC, SORT_DESC);
     $hits = $searchIndex->find($query);
     //echo'<pre>'; var_dump(/*$hits, */$indexSize, $documents);
     $searchResultsArr = array();
     $highlighter = new CmsSearchHighlighter($keywords);
     //$highlighter = new DefaultHighlighter();
     foreach ($hits as $hit) {
         /** @var QueryHit $hit */
         $searchResult = new \stdClass();
         // Gibt Zend_Search_Lucene_Document Objekte für diesen Treffer zurück
         /** @var Document $document */
         $document = $hit->getDocument();
         $doc = $searchModel->getDocumentByID($document->getFieldUtf8Value('ID'));
         if ($doc->getID() === null) {
             continue;
         }
         $fldType = $doc->getType();
         if ($fldType !== 'core_page') {
             $contentChunks = $highlighter->highlightMatches(strip_tags($doc->getDescription()), 'UTF-8');
             if ($contentChunks == '') {
                 $contentChunks = null;
             }
             // Gibt ein Zend_Search_Lucene_Field Objekt von
             // Zend_Search_Lucene_Document zurück
             $searchResult->title = $highlighter->highlightMatches(strip_tags($doc->getTitle()), 'UTF-8');
             $searchResult->description = $contentChunks;
             $searchResult->url = $doc->getPath();
             if (isset($searchResultsArr[$fldType]) === false) {
                 $stmntModName = $this->cmsController->getDB()->prepare("\n\t\t\t\t\t\tSELECT manifest_content FROM cms_mod_available WHERE name = ?\n\t\t\t\t\t");
                 $resModName = $this->cmsController->getDB()->select($stmntModName, array($fldType));
                 $displayName = $fldType;
                 try {
                     $manifestObj = JsonUtils::decode($resModName[0]->manifest_content);
                     if (isset($manifestObj->name->{$language})) {
                         $displayName = $manifestObj->name->{$language};
                     } elseif (isset($manifestObj->name->en)) {
                         $displayName = $manifestObj->name->en;
                     }
                 } catch (\Exception $e) {
                 }
                 $searchResultsArr[$fldType] = new \stdClass();
                 $searchResultsArr[$fldType]->title = $displayName;
                 $searchResultsArr[$fldType]->results = array();
             }
             $searchResultsArr[$doc->getType()]->results[] = $searchResult;
         } else {
             $contentChunks = $this->createChunkedHighlighting($highlighter->highlightMatches(strip_tags($doc->getDescription()), 'UTF-8'));
             if ($contentChunks == '') {
                 $contentChunks = null;
             }
             // Gibt ein Zend_Search_Lucene_Field Objekt von
             // Zend_Search_Lucene_Document zurück
             $searchResult->title = $highlighter->highlightMatches(strip_tags($doc->getTitle()), 'UTF-8');
             $searchResult->description = $contentChunks;
             $searchResult->url = $doc->getPath();
             if (isset($searchResultsArr[$fldType]) === false) {
                 $searchResultsArr[$fldType] = new \stdClass();
                 $searchResultsArr[$fldType]->title = 'Andere Suchresultate';
                 $searchResultsArr[$fldType]->results = array();
             }
             $searchResultsArr[$doc->getType()]->results[] = $searchResult;
         }
     }
     return $searchResultsArr;
 }
 protected function getIndex()
 {
     if ($this->index != null) {
         return $this->index;
     }
     \ZendSearch\Lucene\Search\QueryParser::setDefaultEncoding('utf-8');
     \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8Num\CaseInsensitive());
     \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_AND);
     try {
         $index = \ZendSearch\Lucene\Lucene::open($this->getIndexPath());
     } catch (\ZendSearch\Lucene\Exception\RuntimeException $ex) {
         $index = \ZendSearch\Lucene\Lucene::create($this->getIndexPath());
     }
     $this->index = $index;
     return $index;
 }
 public function luceneSearchAetCommunications($index, $searchKeyWord)
 {
     \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Text\CaseInsensitive());
     $dbIds = array();
     $searchValue = SearchHelper::utf8_to_ascii(mb_strtolower($searchKeyWord, "UTF-8"));
     $em = $this->getDoctrine()->getManager();
     /*
     $term1 = new \ZendSearch\Lucene\Index\Term($searchValue, 'firstname');
     //$subquery1 = new \ZendSearch\Lucene\Search\Query\Term($term1);
     
     $term2 = new \ZendSearch\Lucene\Index\Term($searchValue, 'title');
     //$subquery2 = new \ZendSearch\Lucene\Search\Query\Term($term2);
     
     $term3 = new \ZendSearch\Lucene\Index\Term($searchValue, 'shortdesc');
     //$subquery3 = new \ZendSearch\Lucene\Search\Query\Term($term3);
     
     $term4 = new \ZendSearch\Lucene\Index\Term($searchValue, 'body');
     //$subquery4 = new \ZendSearch\Lucene\Search\Query\Term($term4);
     
     $term5 = new \ZendSearch\Lucene\Index\Term($searchValue, 'author');
     //$subquery5 = new \ZendSearch\Lucene\Search\Query\Term($term5);
     
     
     $terms = array($term1, $term2, $term3, $term4, $term5);
     //$subqueries = array($subquery1, $subquery2, $subquery3, $subquery4, $subquery5);
     $signs = array(null, null, null, null, null);
     
     $termsQuery = new \ZendSearch\Lucene\Search\Query\MultiTerm($terms,$signs);
     */
     \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_OR);
     $query = \ZendSearch\Lucene\Search\QueryParser::parse($searchValue, 'UTF-8');
     $foundDocuments = $index->find($query);
     //$docNum = count($foundDocuments);
     foreach ($foundDocuments as $foundDoc) {
         $dbIds[] = $foundDoc->dbId;
     }
     $results = $em->getRepository('AetCommunicationBundle:Communication')->findById($dbIds);
     return $results;
 }
Exemple #18
0
 /**
  * Подсветка результата поиска в html-фрагменте
  *
  * @param string $inputHTMLFragment исходный фрагмента html
  * @param string $inputEncoding Кодировка исходного фрагмента html
  * @param string $outputEncoding Кодировка резульрирующего фрагмента html
  * @return string html фрагмент с подсвеченными результатами поиска
  */
 public function highlightMatches($inputHTMLFragment, $inputEncoding = 'utf-8', $outputEncoding = 'utf-8')
 {
     $highlightedHTMLFragment = '';
     if (!empty($this->lastQuery)) {
         $queryParser = QueryParser::parse($this->lastQuery);
         /**
          * Убираем фильтры стоп-слов для подсветки слов с псевдокорнями типа 'под' и т.п.
          */
         Analyzer::setDefault($this->analyzerForHighlighter);
         $highlightedHTMLFragment = $queryParser->htmlFragmentHighlightMatches($inputHTMLFragment, $inputEncoding, new Highlighter());
         Analyzer::setDefault($this->defaultAnalyzer);
         $highlightedHTMLFragment = mb_convert_encoding($highlightedHTMLFragment, $outputEncoding, 'utf-8');
     }
     return $highlightedHTMLFragment;
 }
 /**
  * Gets the index mapped by the given lucene identifier.
  *
  * @param string $identifier The lucene identifier.
  *
  * @return \ZendSearch\Lucene\Index The lucene index.
  */
 public function getIndex($identifier)
 {
     $config = $this->getConfig($identifier);
     $path = $config['path'];
     if (!$this->checkPath($path)) {
         $this->indexes[$identifier] = Lucene::create($path);
     } else {
         $this->indexes[$identifier] = Lucene::open($path);
     }
     Analyzer::setDefault(new $config['analyzer']());
     $this->indexes[$identifier]->setMaxBufferedDocs($config['max_buffered_docs']);
     $this->indexes[$identifier]->setMaxMergeDocs($config['max_merge_docs']);
     $this->indexes[$identifier]->setMergeFactor($config['merge_factor']);
     ZfFilesystem::setDefaultFilePermissions($config['permissions']);
     if ($config['auto_optimized']) {
         $this->indexes[$identifier]->optimize();
     }
     QueryParser::setDefaultEncoding($config['query_parser_encoding']);
     return $this->indexes[$identifier];
 }
Exemple #20
0
<?php

require 'vendor/autoload.php';
use ZendSearch\Lucene\Lucene;
use ZendSearch\Lucene\MultiSearcher;
use ZendSearch\Lucene\Search\QueryParser;
$stem = function ($e) {
    return \Porter::Stem($e);
};
$q = isset($_GET['q']) ? $_GET['q'] : null;
$q = htmlentities($q);
$q = implode('+', array_map($stem, explode(' ', $q)));
header('Content-Type: application/json');
$output = array();
if ($q) {
    $indexer = Lucene::open('../_index');
    $search = new MultiSearcher(array($indexer));
    $query = QueryParser::parse($q);
    $result = $search->find($query);
    foreach ($result as $hit) {
        $title = strtolower(str_replace('-', ' ', $hit->name));
        $resultUrl = '../' . $hit->fileName;
        $output[] = array('href' => $resultUrl, 'name' => ucfirst($title), 'preview' => $query->htmlFragmentHighlightMatches(substr(preg_replace("/\\s+|{$title}/i", " ", $hit->body), 0, 300) . '...'));
    }
}
echo json_encode($output);
 public function search($query, $contexts = array())
 {
     $searcher = new Lucene\MultiSearcher();
     foreach ($contexts as $indexName) {
         $searcher->addIndex($this->getLuceneIndex($indexName));
     }
     $query = Lucene\Search\QueryParser::parse($query);
     try {
         $luceneHits = $searcher->find($query);
     } catch (\RuntimeException $e) {
         if (!preg_match('&non-wildcard characters&', $e->getMessage())) {
             throw $e;
         }
         $luceneHits = array();
     }
     $hits = array();
     foreach ($luceneHits as $luceneHit) {
         /* @var Lucene\Search\QueryHit $luceneHit */
         $luceneDocument = $luceneHit->getDocument();
         $hit = new Hit();
         $hit->setScore($luceneHit->score);
         $hit->setHash($luceneDocument->getFieldValue(self::HASH_FIELDNAME));
         foreach ($luceneDocument->getFieldNames() as $fieldName) {
             $hit->addMetadata($fieldName, $luceneDocument->getFieldValue($fieldName));
         }
         $hits[] = $hit;
     }
     // The MultiSearcher does not support sorting, so we do it here.
     usort($hits, function (HitInterface $documentA, HitInterface $documentB) {
         if ($documentA->getScore() < $documentB->getScore()) {
             return true;
         }
         return false;
     });
     return $hits;
 }
 /**
  * Generate 'signs style' query from the context
  * '+term1 term2 -term3 +(<subquery1>) ...'
  *
  * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
  */
 public function _signStyleExpressionQuery()
 {
     $query = new Query\Boolean();
     if (QueryParser::getDefaultOperator() == QueryParser::B_AND) {
         $defaultSign = true;
         // required
     } else {
         $defaultSign = null;
         // optional
     }
     foreach ($this->_entries as $entryId => $entry) {
         $sign = $this->_signs[$entryId] !== null ? $this->_signs[$entryId] : $defaultSign;
         $query->addSubquery($entry->getQuery($this->_encoding), $sign);
     }
     return $query;
 }
 private function getIndex() : SearchIndexInterface
 {
     $path = $this->getIndexPath();
     if (!$this->checkIndexPath($path)) {
         $index = Lucene::create($path);
     } else {
         $index = Lucene::open($path);
     }
     Analyzer::setDefault(new CaseInsensitive());
     LuceneFilesystem::setDefaultFilePermissions(0775);
     QueryParser::setDefaultEncoding('UTF-8');
     $index->setMaxBufferedDocs($this->options['max_buffered_docs']);
     $index->setMaxMergeDocs($this->options['max_merge_docs']);
     $index->setMergeFactor($this->options['merge_factor']);
     $index->optimize();
     return $index;
 }
 public function luceneSearchAetUsers($index, $searchKeyWord)
 {
     \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Text\CaseInsensitive());
     $dbIds = array();
     $searchValue = SearchHelper::utf8_to_ascii(mb_strtolower($searchKeyWord, "UTF-8"));
     $em = $this->getDoctrine()->getManager();
     /*
             $term1 = new \ZendSearch\Lucene\Index\Term($searchValue, 'firstname');
             //$subquery1 = new \ZendSearch\Lucene\Search\Query\Term($term1);
     
             $term2 = new \ZendSearch\Lucene\Index\Term($searchValue, 'lastname');
             //$subquery2 = new \ZendSearch\Lucene\Search\Query\Term($term2);
     
             $term3 = new \ZendSearch\Lucene\Index\Term($searchValue, 'activiteprincipale');
             //$subquery3 = new \ZendSearch\Lucene\Search\Query\Term($term3);
     
             $term4 = new \ZendSearch\Lucene\Index\Term($searchValue, 'codepostal');
             //$subquery4 = new \ZendSearch\Lucene\Search\Query\Term($term4);
     
             $term5 = new \ZendSearch\Lucene\Index\Term($searchValue, 'email');
             //$subquery5 = new \ZendSearch\Lucene\Search\Query\Term($term5);
     
             $term6 = new \ZendSearch\Lucene\Index\Term($searchValue, 'matricule');
             //$subquery6 = new \ZendSearch\Lucene\Search\Query\Term($term6);
     
             $term7 = new \ZendSearch\Lucene\Index\Term($searchValue, 'pays');
             //$subquery7 = new \ZendSearch\Lucene\Search\Query\Term($term7);
     
             $term8 = new \ZendSearch\Lucene\Index\Term($searchValue, 'promotion');
             //$subquery8 = new \ZendSearch\Lucene\Search\Query\Term($term8);
     
             $term9 = new \ZendSearch\Lucene\Index\Term($searchValue, 'telephone');
             //$subquery9 = new \ZendSearch\Lucene\Search\Query\Term($term9);
     
             $term10 = new \ZendSearch\Lucene\Index\Term($searchValue, 'ville');
             //$subquery10 = new \ZendSearch\Lucene\Search\Query\Term($term10);
     
             $term11 = new \ZendSearch\Lucene\Index\Term($searchValue, 'whoami');
             //$subquery11 = new \ZendSearch\Lucene\Search\Query\Term($term11);
     
     
             //$subqueries = array($subquery1, $subquery2, $subquery3, $subquery4, $subquery5, $subquery6, $subquery7, $subquery8, $subquery9, $subquery10, $subquery11);
             $terms = array($term1, $term2, $term3, $term4, $term5, $term6, $term7, $term8, $term9, $term10, $term11);
             $signs = array(null, null, null, null, null, null, null, null, null, null, null);
     
     $termsQuery = new \ZendSearch\Lucene\Search\Query\MultiTerm($terms, $signs);
             //$boolQuery = new \ZendSearch\Lucene\Search\Query\Boolean($subqueries, $signs);
     */
     \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_OR);
     $query = \ZendSearch\Lucene\Search\QueryParser::parse($searchValue, 'UTF-8');
     $foundDocuments = $index->find($query);
     //$docNum = count($foundDocuments);
     foreach ($foundDocuments as $foundDoc) {
         $dbIds[] = $foundDoc->dbId;
     }
     $results = $em->getRepository('AetAnnuaireBundle:User')->findById($dbIds);
     return $results;
 }
 public function testParse()
 {
     $this->assertEquals(QueryParser::parse(''), $this->builder->parse(''));
     $this->assertEquals(QueryParser::parse('test'), $this->builder->parse('test'));
 }
Exemple #26
0
 /**
  * Add a search/where clause to the given query based on the given condition.
  * Return the given $query instance when finished.
  *
  * @param \ZendSearch\Lucene\Search\Query\Boolean $query
  * @param array $condition - field      : name of the field
  *                         - value      : value to match
  *                         - required   : must match
  *                         - prohibited : must not match
  *                         - phrase     : match as a phrase
  *                         - filter     : filter results on value
  *                         - fuzzy      : fuzziness value (0 - 1)
  * 
  * @return \ZendSearch\Lucene\Search\Query\Boolean
  */
 public function addConditionToQuery($query, array $condition)
 {
     if (array_get($condition, 'lat')) {
         return $query;
     }
     $value = trim($this->escape(array_get($condition, 'value')));
     if (array_get($condition, 'phrase') || array_get($condition, 'filter')) {
         $value = '"' . $value . '"';
     }
     if (isset($condition['fuzzy']) && false !== $condition['fuzzy']) {
         $fuzziness = '';
         if (is_numeric($condition['fuzzy']) && $condition['fuzzy'] >= 0 && $condition['fuzzy'] <= 1) {
             $fuzziness = $condition['fuzzy'];
         }
         $words = array();
         foreach (explode(' ', $value) as $word) {
             $words[] = $word . '~' . $fuzziness;
         }
         $value = implode(' ', $words);
     }
     $sign = null;
     if (!empty($condition['required'])) {
         $sign = true;
     } else {
         if (!empty($condition['prohibited'])) {
             $sign = false;
         }
     }
     $field = array_get($condition, 'field');
     if (empty($field) || '*' === $field) {
         $field = null;
     }
     if (is_array($field)) {
         $values = array();
         foreach ($field as $f) {
             $values[] = trim($f) . ':(' . $value . ')';
         }
         $value = implode(' OR ', $values);
     } else {
         if ($field) {
             $value = trim(array_get($condition, 'field')) . ':(' . $value . ')';
         }
     }
     $query->addSubquery(\ZendSearch\Lucene\Search\QueryParser::parse($value), $sign);
     return $query;
 }
Exemple #27
0
 /**
  * Performs a query against the index and returns an array
  * of Zend_Search_Lucene_Search_QueryHit objects.
  * Input is a string or Zend_Search_Lucene_Search_Query.
  *
  * @param \ZendSearch\Lucene\Search\QueryParser|string $query
  * @return array|\ZendSearch\Lucene\Search\QueryHit
  * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException
  * @throws \ZendSearch\Lucene\Exception\RuntimeException
  */
 public function find($query)
 {
     if (is_string($query)) {
         $query = Search\QueryParser::parse($query);
     } elseif (!$query instanceof Search\Query\AbstractQuery) {
         throw new InvalidArgumentException('Query must be a string or ZendSearch\\Lucene\\Search\\Query object');
     }
     $this->commit();
     $hits = array();
     $scores = array();
     $ids = array();
     $query = $query->rewrite($this)->optimize($this);
     $query->execute($this);
     $topScore = 0;
     $resultSetLimit = Lucene::getResultSetLimit();
     foreach ($query->matchedDocs() as $id => $num) {
         $docScore = $query->score($id, $this);
         if ($docScore != 0) {
             $hit = new Search\QueryHit($this);
             $hit->document_id = $hit->id = $id;
             $hit->score = $docScore;
             $hits[] = $hit;
             $ids[] = $id;
             $scores[] = $docScore;
             if ($docScore > $topScore) {
                 $topScore = $docScore;
             }
         }
         if ($resultSetLimit != 0 && count($hits) >= $resultSetLimit) {
             break;
         }
     }
     if (count($hits) == 0) {
         // skip sorting, which may cause a error on empty index
         return array();
     }
     if ($topScore > 1) {
         foreach ($hits as $hit) {
             $hit->score /= $topScore;
         }
     }
     if (func_num_args() == 1) {
         // sort by scores
         array_multisort($scores, SORT_DESC, SORT_NUMERIC, $ids, SORT_ASC, SORT_NUMERIC, $hits);
     } else {
         // sort by given field names
         $argList = func_get_args();
         $fieldNames = $this->getFieldNames();
         $sortArgs = array();
         // PHP 5.3 now expects all arguments to array_multisort be passed by
         // reference (if it's invoked through call_user_func_array());
         // since constants can't be passed by reference, create some placeholder variables.
         $sortReg = SORT_REGULAR;
         $sortAsc = SORT_ASC;
         $sortNum = SORT_NUMERIC;
         $sortFieldValues = array();
         for ($count = 1; $count < count($argList); $count++) {
             $fieldName = $argList[$count];
             if (!is_string($fieldName)) {
                 throw new RuntimeException('Field name must be a string.');
             }
             if (strtolower($fieldName) == 'score') {
                 $sortArgs[] =& $scores;
             } else {
                 if (!in_array($fieldName, $fieldNames)) {
                     throw new RuntimeException('Wrong field name.');
                 }
                 if (!isset($sortFieldValues[$fieldName])) {
                     $valuesArray = array();
                     foreach ($hits as $hit) {
                         try {
                             $value = $hit->getDocument()->getFieldValue($fieldName);
                         } catch (\Exception $e) {
                             if (strpos($e->getMessage(), 'not found') === false) {
                                 throw new RuntimeException($e->getMessage(), $e->getCode(), $e);
                             } else {
                                 $value = null;
                             }
                         }
                         $valuesArray[] = $value;
                     }
                     // Collect loaded values in $sortFieldValues
                     // Required for PHP 5.3 which translates references into values when source
                     // variable is destroyed
                     $sortFieldValues[$fieldName] = $valuesArray;
                 }
                 $sortArgs[] =& $sortFieldValues[$fieldName];
             }
             if ($count + 1 < count($argList) && is_integer($argList[$count + 1])) {
                 $count++;
                 $sortArgs[] =& $argList[$count];
                 if ($count + 1 < count($argList) && is_integer($argList[$count + 1])) {
                     $count++;
                     $sortArgs[] =& $argList[$count];
                 } else {
                     if ($argList[$count] == SORT_ASC || $argList[$count] == SORT_DESC) {
                         $sortArgs[] =& $sortReg;
                     } else {
                         $sortArgs[] =& $sortAsc;
                     }
                 }
             } else {
                 $sortArgs[] =& $sortAsc;
                 $sortArgs[] =& $sortReg;
             }
         }
         // Sort by id's if values are equal
         $sortArgs[] =& $ids;
         $sortArgs[] =& $sortAsc;
         $sortArgs[] =& $sortNum;
         // Array to be sorted
         $sortArgs[] =& $hits;
         // Do sort
         call_user_func_array('array_multisort', $sortArgs);
     }
     return $hits;
 }