public function __construct(array $filterClasses, array $stopWordFiles, FilterFactory $stopwordsFilterFactory) { QueryParser::setDefaultEncoding('utf-8'); $this->filterClasses = $filterClasses; $this->stopWordFiles = $stopWordFiles; $this->stopwordsFilterFactory = $stopwordsFilterFactory; }
/** * Get index * @return \ZendSearch\Lucene\Index */ private function index() { if (!isset(self::$index)) { $analyzer = new CaseInsensitive(); if ($this->config()->exists('zend_search', 'stop_words')) { $stop_word_filter = new StopWords(); $words = $this->getRealPath($this->config()->get('zend_search', 'stop_words')); if ($words !== false) { $stop_word_filter->loadFromFile($words); } else { throw new \InvalidArgumentException('Path not found'); } $analyzer->addFilter($stop_word_filter); } if ($this->config()->exists('zend_search', 'morphy_dicts')) { $morphy_dicts = $this->getRealPath($this->config()->get('zend_search', 'morphy_dicts')); if ($morphy_dicts !== false) { $analyzer->addFilter(new Morphy($morphy_dicts, $this->config()->getCharset())); } else { throw new \InvalidArgumentException('Path not found'); } } Analyzer::setDefault($analyzer); Lucene::setResultSetLimit($this->limit); QueryParser::setDefaultEncoding($this->config()->getCharset()); $index = $this->config() - get('zend_search', 'index'); $path = $this->getRealPath($index); self::$index = $path ? Lucene::open($path) : Lucene::create($index); } return self::$index; }
/** * {@inheritdoc} */ public function register(Application $app) { Analyzer::setDefault(new CaseInsensitive()); QueryParser::setDefaultEncoding('UTF-8'); $app['zendsearch.indices_path'] = array(); $app['zendsearch.indices.initializer'] = $app->protect(function () use($app) { static $initialized = false; if ($initialized) { return; } $initialized = true; $indices = array(); foreach ($app['zendsearch.indices_path'] as $name => $index) { $indices[$name] = file_exists($index) ? Lucene::open($index) : Lucene::create($index); } $app['zendsearch.indices_collection'] = $indices; }); $app['zendsearch.indices'] = $app->share(function ($app) { $app['zendsearch.indices.initializer'](); return $app['zendsearch.indices_collection']; }); $app['zendsearch.multisearcher'] = $app->share(function ($app) { $app['zendsearch.indices.initializer'](); $multi = new MultiSearcher(); foreach ($app['zendsearch.indices'] as $index) { $multi->addIndex($index); } return $multi; }); $app['zendsearch'] = $app->share(function ($app) { return $app['zendsearch.multisearcher']; }); }
/** * just shows empty page. need to determine what to show on default page. * * @param Request $request * * @return \Symfony\Component\HttpFoundation\Response * * @Route("/", name="ubc_exam_main_homepage") */ public function indexAction(Request $request) { $pagination = array(); $pagerHtml = null; $q = $request->get('q'); if (!is_null($q) && !empty($q)) { // search the index QueryParser::setDefaultOperator(QueryParser::B_AND); Wildcard::setMinPrefixLength(1); $hits = $this->get('ivory_lucene_search')->getIndex('exams')->find($q . '*'); $ids = array(); foreach ($hits as $hit) { $ids[] = $hit->pk; } $ids = array_unique($ids); // search the db by ids, because we need to get the exams only visible for current user if (!empty($ids)) { // find out the current user registered courses and faculty $em = $this->getDoctrine()->getManager(); $coursesWithKeys = $request->getSession()->get('courses') ? $request->getSession()->get('courses') : array(); $courses = array_keys($coursesWithKeys); $faculties = array_values($em->getRepository('UBCExamMainBundle:SubjectFaculty')->getFacultiesByCourses($courses)); $userId = $this->get('security.authorization_checker')->isGranted('ROLE_ADMIN') ? -1 : $this->getCurrentUserId(); $qb = $this->getDoctrine()->getRepository('UBCExamMainBundle:Exam')->queryExamsByIds($ids, $userId, $faculties, $courses); $paginator = $this->get('knp_paginator'); $pagination = $paginator->paginate($qb, $request->query->get('page', 1), 20); } } return $this->render('UBCExamMainBundle:Default:index.html.twig', array('pagination' => $pagination, 'q' => $q, 'subjectCode' => '', 'subjectCodeLabel' => '')); }
public function init() { QueryParser::setDefaultEncoding('UTF-8'); if ($this->caseSensitivity) { Analyzer::setDefault($this->parseNumeric ? new Utf8Num() : new Utf8()); } else { Analyzer::setDefault($this->parseNumeric ? new CaseInsensitiveNum() : new CaseInsensitive()); } $this->indexDirectory = FileHelper::normalizePath(Yii::getAlias($this->indexDirectory)); $this->luceneIndex = $this->getLuceneIndex($this->indexDirectory); }
public function prepareQuery($expressionOrigin, $conditions) { setlocale(LC_ALL, "cs_CZ.UTF-8"); $expressionOrigin = strtr($expressionOrigin, array(',' => '', ';' => '', "'" => '', '"' => '', '-' => '', '_' => '', '/' => '', '\\' => '', '+' => '', '=' => '', '?' => '', '.' => '', '!' => '')); $expressionTranslit = str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $expressionOrigin)); $expressions = array($expressionOrigin); if ($expressionOrigin != $expressionTranslit) { $expressions[] = $expressionTranslit; } $queryWords = array(); $query = new Boolean(); foreach ($expressions as $expression) { // more words in expression if (count($expressionWords = explode(' ', $expression)) > 1) { // whole expression $query->addSubquery(QueryParser::parse('"' . $expression . '"', 'utf-8')); // expression words foreach ($expressionWords as $expressionWord) { if (mb_strlen($expressionWord, 'utf-8') > 2 && !in_array($expressionWord, $queryWords)) { $queryWords[] = $expressionWord; $query->addSubquery(QueryParser::parse($expressionWord . '*', 'utf-8')); } } } else { $query->addSubquery(QueryParser::parse($expression . '*', 'utf-8')); } } // specificke podminky do query if (is_array($conditions) && count($conditions)) { foreach ($conditions as $condition) { // TODO - jak v Lucene najit polozky obsahujici url? Wildcard pouzit nejde... if (mb_strpos($condition, 'url:', null, 'utf-8') === 0) { $uri = trim(substr($condition, 4), '"'); if (strpos($uri, '://') !== false) { $uri = substr($uri, strpos($uri, '://') + 3); $uri = substr($uri, strpos($uri, '/')); } $query .= ' AND ' . Page::URIS_KEY . ':"' . $uri . '"'; } else { $query .= ' AND ' . $condition; } } } if ($this->kernel->getEnvironment() == 'dev') { $session = new Session(); $session->set(self::QUERY_HANDLER, $query); } return $query; }
public function testSearchRawQuery() { $query = Search::rawQuery('description:big'); $this->assertEquals(2, $query->count()); $query = Search::rawQuery(function () { return 'description:big'; }); $this->assertEquals(2, $query->count()); $query = Search::rawQuery(function () { $query = new Boolean(); $query->addSubquery(QueryParser::parse('description:big OR name:monitor')); return $query; }); $this->assertEquals(3, $query->count()); }
/** * Parses a query string * * @param string $strQuery * @param string $encoding * @throws \ZendSearch\Lucene\Search\Exception\QueryParserException * @throws \ZendSearch\Lucene\Exception\RuntimeException * @return \ZendSearch\Lucene\Search\Query\AbstractQuery */ public static function parse($strQuery, $encoding = null) { self::_getInstance(); // Reset FSM if previous parse operation didn't return it into a correct state self::$_instance->reset(); try { self::$_instance->_encoding = $encoding !== null ? $encoding : self::$_instance->_defaultEncoding; self::$_instance->_lastToken = null; self::$_instance->_context = new QueryParserContext(self::$_instance->_encoding); self::$_instance->_contextStack = array(); self::$_instance->_tokens = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding); // Empty query if (count(self::$_instance->_tokens) == 0) { return new Query\Insignificant(); } foreach (self::$_instance->_tokens as $token) { try { self::$_instance->_currentToken = $token; self::$_instance->process($token->type); self::$_instance->_lastToken = $token; } catch (\Exception $e) { if (strpos($e->getMessage(), 'There is no any rule for') !== false) { throw new QueryParserException('Syntax error at char position ' . $token->position . '.', 0, $e); } throw new RuntimeException($e->getMessage(), $e->getCode(), $e); } } if (count(self::$_instance->_contextStack) != 0) { throw new QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.'); } return self::$_instance->_context->getQuery(); } catch (QueryParserException $e) { if (self::$_instance->_suppressQueryParsingExceptions) { $queryTokens = Analyzer\Analyzer::getDefault()->tokenize($strQuery, self::$_instance->_encoding); $query = new Query\MultiTerm(); $termsSign = self::$_instance->_defaultOperator == self::B_AND ? true : null; foreach ($queryTokens as $token) { $query->addTerm(new Index\Term($token->getTermText()), $termsSign); } return $query; } else { throw new RuntimeException($e->getMessage(), $e->getCode(), $e); } } }
/** * Lists all Post models. * @return mixed */ public function actionIndex() { $searchModel = new PostSearch(); $dataProvider = $searchModel->search(Yii::$app->request->post()); //setlocale(LC_ALL, 'en_US.UTF-8'); setlocale(LC_CTYPE, 'ru_RU.UTF-8'); //Lucene\Lucene::setDefaultSearchField('contents'); Lucene\Search\QueryParser::setDefaultEncoding('UTF-8'); Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive()); Lucene\Lucene::setResultSetLimit(10); // create blog posts index located in /data/posts_index ,make sure the folder is writable $index = Lucene\Lucene::create('data/posts_index'); $posts = Post::find()->all(); //var_dump($posts);die(); // iterate through posts and build the index foreach ($posts as $p) { $doc = new Lucene\Document(); $doc->addField(Lucene\Document\Field::UnIndexed('entry_id', $p->id)); $doc->addField(Lucene\Document\Field::Keyword('title', $p->title)); $doc->addField(Lucene\Document\Field::text('contents', $p->content)); $index->addDocument($doc); } // commit the index $index->commit(); //Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive()); // explode the search query to individual words $words = explode(' ', urldecode(Yii::$app->getRequest()->getQueryParam('q'))); // start a search query and add a term for each word to it $query = new Lucene\Search\Query\MultiTerm(); foreach ($words as $w) { $query->addTerm(new Lucene\Index\Term($w)); } // open and query the index $index = Lucene\Lucene::open('data/posts_index'); $results = $index->find($query); // the search results //var_dump($results); return $this->render('index', ['searchModel' => $searchModel, 'dataProvider' => $dataProvider, 'search' => $results, 'query' => $query]); }
public function testHighlightRangeNonInclusive() { $query = Search\QueryParser::parse('{business TO by}'); $html = '<HTML>' . '<HEAD><TITLE>Page title</TITLE></HEAD>' . '<BODY>' . 'Test of text using range query. ' . 'It has to match "buss" and "but" words, but has to skip "business", "by" and "bus"' . '</BODY>' . '</HTML>'; $highlightedHTML = $query->highlightMatches($html); $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false); $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false); // Check that "bus" word is skipped $this->assertTrue(strpos($highlightedHTML, 'has to skip "business", "by" and "bus"') !== false); }
public function testBooleanQueryWithNonExistingPhraseSubquery() { $index = Lucene\Lucene::open(__DIR__ . '/_index23Sample/_files'); $query = Search\QueryParser::parse('"Non-existing phrase" AND Home'); $this->assertEquals($query->__toString(), '+("Non-existing phrase") +(Home)'); $this->assertEquals($query->rewrite($index)->__toString(), '+((pathkeyword:"non existing phrase") (path:"non existing phrase") (modified:"non existing phrase") (contents:"non existing phrase")) +(pathkeyword:home path:home modified:home contents:home)'); $this->assertEquals($query->rewrite($index)->optimize($index)->__toString(), '<EmptyQuery>'); }
/** * default (omitted) + NOT operator processing */ public function emptyNotOperatorAction() { if (QueryParser::getDefaultOperator() == QueryParser::B_AND) { // Do nothing } else { $this->orOperatorAction(); } // Process NOT operator $this->notOperatorAction(); }
public function find($query, $parse = FALSE) { if ($parse) { $query = \ZendSearch\Lucene\Search\QueryParser::parse('first_name:' . $query); } return $this->index->find($query); }
/** * Get query object by given query string. * * @param string $strQuery * @return \ZendSearch\Lucene\Search\Query\AbstractQuery */ public function parse($strQuery) { return QueryParser::parse($strQuery); }
/** * @param string $keywords * @param string $language * * @return \stdClass[] */ protected function getCmsSearchResults($keywords, $language) { $searchModel = new SearchModel($this->cmsController->getDB()); $searchIndex = Lucene::open($this->cmsController->getCore()->getSiteRoot() . 'index' . DIRECTORY_SEPARATOR . $language); /*$query = new Boolean(); // new Fuzzy() $query->addSubquery(QueryParser::parse( $keywords ), true);*/ QueryParser::suppressQueryParsingExceptions(); $query = QueryParser::parse($keywords); //$hits = $searchIndex->find($query, 'score', SORT_NUMERIC, SORT_DESC); $hits = $searchIndex->find($query); //echo'<pre>'; var_dump(/*$hits, */$indexSize, $documents); $searchResultsArr = array(); $highlighter = new CmsSearchHighlighter($keywords); //$highlighter = new DefaultHighlighter(); foreach ($hits as $hit) { /** @var QueryHit $hit */ $searchResult = new \stdClass(); // Gibt Zend_Search_Lucene_Document Objekte für diesen Treffer zurück /** @var Document $document */ $document = $hit->getDocument(); $doc = $searchModel->getDocumentByID($document->getFieldUtf8Value('ID')); if ($doc->getID() === null) { continue; } $fldType = $doc->getType(); if ($fldType !== 'core_page') { $contentChunks = $highlighter->highlightMatches(strip_tags($doc->getDescription()), 'UTF-8'); if ($contentChunks == '') { $contentChunks = null; } // Gibt ein Zend_Search_Lucene_Field Objekt von // Zend_Search_Lucene_Document zurück $searchResult->title = $highlighter->highlightMatches(strip_tags($doc->getTitle()), 'UTF-8'); $searchResult->description = $contentChunks; $searchResult->url = $doc->getPath(); if (isset($searchResultsArr[$fldType]) === false) { $stmntModName = $this->cmsController->getDB()->prepare("\n\t\t\t\t\t\tSELECT manifest_content FROM cms_mod_available WHERE name = ?\n\t\t\t\t\t"); $resModName = $this->cmsController->getDB()->select($stmntModName, array($fldType)); $displayName = $fldType; try { $manifestObj = JsonUtils::decode($resModName[0]->manifest_content); if (isset($manifestObj->name->{$language})) { $displayName = $manifestObj->name->{$language}; } elseif (isset($manifestObj->name->en)) { $displayName = $manifestObj->name->en; } } catch (\Exception $e) { } $searchResultsArr[$fldType] = new \stdClass(); $searchResultsArr[$fldType]->title = $displayName; $searchResultsArr[$fldType]->results = array(); } $searchResultsArr[$doc->getType()]->results[] = $searchResult; } else { $contentChunks = $this->createChunkedHighlighting($highlighter->highlightMatches(strip_tags($doc->getDescription()), 'UTF-8')); if ($contentChunks == '') { $contentChunks = null; } // Gibt ein Zend_Search_Lucene_Field Objekt von // Zend_Search_Lucene_Document zurück $searchResult->title = $highlighter->highlightMatches(strip_tags($doc->getTitle()), 'UTF-8'); $searchResult->description = $contentChunks; $searchResult->url = $doc->getPath(); if (isset($searchResultsArr[$fldType]) === false) { $searchResultsArr[$fldType] = new \stdClass(); $searchResultsArr[$fldType]->title = 'Andere Suchresultate'; $searchResultsArr[$fldType]->results = array(); } $searchResultsArr[$doc->getType()]->results[] = $searchResult; } } return $searchResultsArr; }
protected function getIndex() { if ($this->index != null) { return $this->index; } \ZendSearch\Lucene\Search\QueryParser::setDefaultEncoding('utf-8'); \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8Num\CaseInsensitive()); \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_AND); try { $index = \ZendSearch\Lucene\Lucene::open($this->getIndexPath()); } catch (\ZendSearch\Lucene\Exception\RuntimeException $ex) { $index = \ZendSearch\Lucene\Lucene::create($this->getIndexPath()); } $this->index = $index; return $index; }
public function luceneSearchAetCommunications($index, $searchKeyWord) { \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Text\CaseInsensitive()); $dbIds = array(); $searchValue = SearchHelper::utf8_to_ascii(mb_strtolower($searchKeyWord, "UTF-8")); $em = $this->getDoctrine()->getManager(); /* $term1 = new \ZendSearch\Lucene\Index\Term($searchValue, 'firstname'); //$subquery1 = new \ZendSearch\Lucene\Search\Query\Term($term1); $term2 = new \ZendSearch\Lucene\Index\Term($searchValue, 'title'); //$subquery2 = new \ZendSearch\Lucene\Search\Query\Term($term2); $term3 = new \ZendSearch\Lucene\Index\Term($searchValue, 'shortdesc'); //$subquery3 = new \ZendSearch\Lucene\Search\Query\Term($term3); $term4 = new \ZendSearch\Lucene\Index\Term($searchValue, 'body'); //$subquery4 = new \ZendSearch\Lucene\Search\Query\Term($term4); $term5 = new \ZendSearch\Lucene\Index\Term($searchValue, 'author'); //$subquery5 = new \ZendSearch\Lucene\Search\Query\Term($term5); $terms = array($term1, $term2, $term3, $term4, $term5); //$subqueries = array($subquery1, $subquery2, $subquery3, $subquery4, $subquery5); $signs = array(null, null, null, null, null); $termsQuery = new \ZendSearch\Lucene\Search\Query\MultiTerm($terms,$signs); */ \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_OR); $query = \ZendSearch\Lucene\Search\QueryParser::parse($searchValue, 'UTF-8'); $foundDocuments = $index->find($query); //$docNum = count($foundDocuments); foreach ($foundDocuments as $foundDoc) { $dbIds[] = $foundDoc->dbId; } $results = $em->getRepository('AetCommunicationBundle:Communication')->findById($dbIds); return $results; }
/** * Подсветка результата поиска в html-фрагменте * * @param string $inputHTMLFragment исходный фрагмента html * @param string $inputEncoding Кодировка исходного фрагмента html * @param string $outputEncoding Кодировка резульрирующего фрагмента html * @return string html фрагмент с подсвеченными результатами поиска */ public function highlightMatches($inputHTMLFragment, $inputEncoding = 'utf-8', $outputEncoding = 'utf-8') { $highlightedHTMLFragment = ''; if (!empty($this->lastQuery)) { $queryParser = QueryParser::parse($this->lastQuery); /** * Убираем фильтры стоп-слов для подсветки слов с псевдокорнями типа 'под' и т.п. */ Analyzer::setDefault($this->analyzerForHighlighter); $highlightedHTMLFragment = $queryParser->htmlFragmentHighlightMatches($inputHTMLFragment, $inputEncoding, new Highlighter()); Analyzer::setDefault($this->defaultAnalyzer); $highlightedHTMLFragment = mb_convert_encoding($highlightedHTMLFragment, $outputEncoding, 'utf-8'); } return $highlightedHTMLFragment; }
/** * Gets the index mapped by the given lucene identifier. * * @param string $identifier The lucene identifier. * * @return \ZendSearch\Lucene\Index The lucene index. */ public function getIndex($identifier) { $config = $this->getConfig($identifier); $path = $config['path']; if (!$this->checkPath($path)) { $this->indexes[$identifier] = Lucene::create($path); } else { $this->indexes[$identifier] = Lucene::open($path); } Analyzer::setDefault(new $config['analyzer']()); $this->indexes[$identifier]->setMaxBufferedDocs($config['max_buffered_docs']); $this->indexes[$identifier]->setMaxMergeDocs($config['max_merge_docs']); $this->indexes[$identifier]->setMergeFactor($config['merge_factor']); ZfFilesystem::setDefaultFilePermissions($config['permissions']); if ($config['auto_optimized']) { $this->indexes[$identifier]->optimize(); } QueryParser::setDefaultEncoding($config['query_parser_encoding']); return $this->indexes[$identifier]; }
<?php require 'vendor/autoload.php'; use ZendSearch\Lucene\Lucene; use ZendSearch\Lucene\MultiSearcher; use ZendSearch\Lucene\Search\QueryParser; $stem = function ($e) { return \Porter::Stem($e); }; $q = isset($_GET['q']) ? $_GET['q'] : null; $q = htmlentities($q); $q = implode('+', array_map($stem, explode(' ', $q))); header('Content-Type: application/json'); $output = array(); if ($q) { $indexer = Lucene::open('../_index'); $search = new MultiSearcher(array($indexer)); $query = QueryParser::parse($q); $result = $search->find($query); foreach ($result as $hit) { $title = strtolower(str_replace('-', ' ', $hit->name)); $resultUrl = '../' . $hit->fileName; $output[] = array('href' => $resultUrl, 'name' => ucfirst($title), 'preview' => $query->htmlFragmentHighlightMatches(substr(preg_replace("/\\s+|{$title}/i", " ", $hit->body), 0, 300) . '...')); } } echo json_encode($output);
public function search($query, $contexts = array()) { $searcher = new Lucene\MultiSearcher(); foreach ($contexts as $indexName) { $searcher->addIndex($this->getLuceneIndex($indexName)); } $query = Lucene\Search\QueryParser::parse($query); try { $luceneHits = $searcher->find($query); } catch (\RuntimeException $e) { if (!preg_match('&non-wildcard characters&', $e->getMessage())) { throw $e; } $luceneHits = array(); } $hits = array(); foreach ($luceneHits as $luceneHit) { /* @var Lucene\Search\QueryHit $luceneHit */ $luceneDocument = $luceneHit->getDocument(); $hit = new Hit(); $hit->setScore($luceneHit->score); $hit->setHash($luceneDocument->getFieldValue(self::HASH_FIELDNAME)); foreach ($luceneDocument->getFieldNames() as $fieldName) { $hit->addMetadata($fieldName, $luceneDocument->getFieldValue($fieldName)); } $hits[] = $hit; } // The MultiSearcher does not support sorting, so we do it here. usort($hits, function (HitInterface $documentA, HitInterface $documentB) { if ($documentA->getScore() < $documentB->getScore()) { return true; } return false; }); return $hits; }
/** * Generate 'signs style' query from the context * '+term1 term2 -term3 +(<subquery1>) ...' * * @return \ZendSearch\Lucene\Search\Query\AbstractQuery */ public function _signStyleExpressionQuery() { $query = new Query\Boolean(); if (QueryParser::getDefaultOperator() == QueryParser::B_AND) { $defaultSign = true; // required } else { $defaultSign = null; // optional } foreach ($this->_entries as $entryId => $entry) { $sign = $this->_signs[$entryId] !== null ? $this->_signs[$entryId] : $defaultSign; $query->addSubquery($entry->getQuery($this->_encoding), $sign); } return $query; }
private function getIndex() : SearchIndexInterface { $path = $this->getIndexPath(); if (!$this->checkIndexPath($path)) { $index = Lucene::create($path); } else { $index = Lucene::open($path); } Analyzer::setDefault(new CaseInsensitive()); LuceneFilesystem::setDefaultFilePermissions(0775); QueryParser::setDefaultEncoding('UTF-8'); $index->setMaxBufferedDocs($this->options['max_buffered_docs']); $index->setMaxMergeDocs($this->options['max_merge_docs']); $index->setMergeFactor($this->options['merge_factor']); $index->optimize(); return $index; }
public function luceneSearchAetUsers($index, $searchKeyWord) { \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Text\CaseInsensitive()); $dbIds = array(); $searchValue = SearchHelper::utf8_to_ascii(mb_strtolower($searchKeyWord, "UTF-8")); $em = $this->getDoctrine()->getManager(); /* $term1 = new \ZendSearch\Lucene\Index\Term($searchValue, 'firstname'); //$subquery1 = new \ZendSearch\Lucene\Search\Query\Term($term1); $term2 = new \ZendSearch\Lucene\Index\Term($searchValue, 'lastname'); //$subquery2 = new \ZendSearch\Lucene\Search\Query\Term($term2); $term3 = new \ZendSearch\Lucene\Index\Term($searchValue, 'activiteprincipale'); //$subquery3 = new \ZendSearch\Lucene\Search\Query\Term($term3); $term4 = new \ZendSearch\Lucene\Index\Term($searchValue, 'codepostal'); //$subquery4 = new \ZendSearch\Lucene\Search\Query\Term($term4); $term5 = new \ZendSearch\Lucene\Index\Term($searchValue, 'email'); //$subquery5 = new \ZendSearch\Lucene\Search\Query\Term($term5); $term6 = new \ZendSearch\Lucene\Index\Term($searchValue, 'matricule'); //$subquery6 = new \ZendSearch\Lucene\Search\Query\Term($term6); $term7 = new \ZendSearch\Lucene\Index\Term($searchValue, 'pays'); //$subquery7 = new \ZendSearch\Lucene\Search\Query\Term($term7); $term8 = new \ZendSearch\Lucene\Index\Term($searchValue, 'promotion'); //$subquery8 = new \ZendSearch\Lucene\Search\Query\Term($term8); $term9 = new \ZendSearch\Lucene\Index\Term($searchValue, 'telephone'); //$subquery9 = new \ZendSearch\Lucene\Search\Query\Term($term9); $term10 = new \ZendSearch\Lucene\Index\Term($searchValue, 'ville'); //$subquery10 = new \ZendSearch\Lucene\Search\Query\Term($term10); $term11 = new \ZendSearch\Lucene\Index\Term($searchValue, 'whoami'); //$subquery11 = new \ZendSearch\Lucene\Search\Query\Term($term11); //$subqueries = array($subquery1, $subquery2, $subquery3, $subquery4, $subquery5, $subquery6, $subquery7, $subquery8, $subquery9, $subquery10, $subquery11); $terms = array($term1, $term2, $term3, $term4, $term5, $term6, $term7, $term8, $term9, $term10, $term11); $signs = array(null, null, null, null, null, null, null, null, null, null, null); $termsQuery = new \ZendSearch\Lucene\Search\Query\MultiTerm($terms, $signs); //$boolQuery = new \ZendSearch\Lucene\Search\Query\Boolean($subqueries, $signs); */ \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_OR); $query = \ZendSearch\Lucene\Search\QueryParser::parse($searchValue, 'UTF-8'); $foundDocuments = $index->find($query); //$docNum = count($foundDocuments); foreach ($foundDocuments as $foundDoc) { $dbIds[] = $foundDoc->dbId; } $results = $em->getRepository('AetAnnuaireBundle:User')->findById($dbIds); return $results; }
public function testParse() { $this->assertEquals(QueryParser::parse(''), $this->builder->parse('')); $this->assertEquals(QueryParser::parse('test'), $this->builder->parse('test')); }
/** * Add a search/where clause to the given query based on the given condition. * Return the given $query instance when finished. * * @param \ZendSearch\Lucene\Search\Query\Boolean $query * @param array $condition - field : name of the field * - value : value to match * - required : must match * - prohibited : must not match * - phrase : match as a phrase * - filter : filter results on value * - fuzzy : fuzziness value (0 - 1) * * @return \ZendSearch\Lucene\Search\Query\Boolean */ public function addConditionToQuery($query, array $condition) { if (array_get($condition, 'lat')) { return $query; } $value = trim($this->escape(array_get($condition, 'value'))); if (array_get($condition, 'phrase') || array_get($condition, 'filter')) { $value = '"' . $value . '"'; } if (isset($condition['fuzzy']) && false !== $condition['fuzzy']) { $fuzziness = ''; if (is_numeric($condition['fuzzy']) && $condition['fuzzy'] >= 0 && $condition['fuzzy'] <= 1) { $fuzziness = $condition['fuzzy']; } $words = array(); foreach (explode(' ', $value) as $word) { $words[] = $word . '~' . $fuzziness; } $value = implode(' ', $words); } $sign = null; if (!empty($condition['required'])) { $sign = true; } else { if (!empty($condition['prohibited'])) { $sign = false; } } $field = array_get($condition, 'field'); if (empty($field) || '*' === $field) { $field = null; } if (is_array($field)) { $values = array(); foreach ($field as $f) { $values[] = trim($f) . ':(' . $value . ')'; } $value = implode(' OR ', $values); } else { if ($field) { $value = trim(array_get($condition, 'field')) . ':(' . $value . ')'; } } $query->addSubquery(\ZendSearch\Lucene\Search\QueryParser::parse($value), $sign); return $query; }
/** * Performs a query against the index and returns an array * of Zend_Search_Lucene_Search_QueryHit objects. * Input is a string or Zend_Search_Lucene_Search_Query. * * @param \ZendSearch\Lucene\Search\QueryParser|string $query * @return array|\ZendSearch\Lucene\Search\QueryHit * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException * @throws \ZendSearch\Lucene\Exception\RuntimeException */ public function find($query) { if (is_string($query)) { $query = Search\QueryParser::parse($query); } elseif (!$query instanceof Search\Query\AbstractQuery) { throw new InvalidArgumentException('Query must be a string or ZendSearch\\Lucene\\Search\\Query object'); } $this->commit(); $hits = array(); $scores = array(); $ids = array(); $query = $query->rewrite($this)->optimize($this); $query->execute($this); $topScore = 0; $resultSetLimit = Lucene::getResultSetLimit(); foreach ($query->matchedDocs() as $id => $num) { $docScore = $query->score($id, $this); if ($docScore != 0) { $hit = new Search\QueryHit($this); $hit->document_id = $hit->id = $id; $hit->score = $docScore; $hits[] = $hit; $ids[] = $id; $scores[] = $docScore; if ($docScore > $topScore) { $topScore = $docScore; } } if ($resultSetLimit != 0 && count($hits) >= $resultSetLimit) { break; } } if (count($hits) == 0) { // skip sorting, which may cause a error on empty index return array(); } if ($topScore > 1) { foreach ($hits as $hit) { $hit->score /= $topScore; } } if (func_num_args() == 1) { // sort by scores array_multisort($scores, SORT_DESC, SORT_NUMERIC, $ids, SORT_ASC, SORT_NUMERIC, $hits); } else { // sort by given field names $argList = func_get_args(); $fieldNames = $this->getFieldNames(); $sortArgs = array(); // PHP 5.3 now expects all arguments to array_multisort be passed by // reference (if it's invoked through call_user_func_array()); // since constants can't be passed by reference, create some placeholder variables. $sortReg = SORT_REGULAR; $sortAsc = SORT_ASC; $sortNum = SORT_NUMERIC; $sortFieldValues = array(); for ($count = 1; $count < count($argList); $count++) { $fieldName = $argList[$count]; if (!is_string($fieldName)) { throw new RuntimeException('Field name must be a string.'); } if (strtolower($fieldName) == 'score') { $sortArgs[] =& $scores; } else { if (!in_array($fieldName, $fieldNames)) { throw new RuntimeException('Wrong field name.'); } if (!isset($sortFieldValues[$fieldName])) { $valuesArray = array(); foreach ($hits as $hit) { try { $value = $hit->getDocument()->getFieldValue($fieldName); } catch (\Exception $e) { if (strpos($e->getMessage(), 'not found') === false) { throw new RuntimeException($e->getMessage(), $e->getCode(), $e); } else { $value = null; } } $valuesArray[] = $value; } // Collect loaded values in $sortFieldValues // Required for PHP 5.3 which translates references into values when source // variable is destroyed $sortFieldValues[$fieldName] = $valuesArray; } $sortArgs[] =& $sortFieldValues[$fieldName]; } if ($count + 1 < count($argList) && is_integer($argList[$count + 1])) { $count++; $sortArgs[] =& $argList[$count]; if ($count + 1 < count($argList) && is_integer($argList[$count + 1])) { $count++; $sortArgs[] =& $argList[$count]; } else { if ($argList[$count] == SORT_ASC || $argList[$count] == SORT_DESC) { $sortArgs[] =& $sortReg; } else { $sortArgs[] =& $sortAsc; } } } else { $sortArgs[] =& $sortAsc; $sortArgs[] =& $sortReg; } } // Sort by id's if values are equal $sortArgs[] =& $ids; $sortArgs[] =& $sortAsc; $sortArgs[] =& $sortNum; // Array to be sorted $sortArgs[] =& $hits; // Do sort call_user_func_array('array_multisort', $sortArgs); } return $hits; }