public function prepareQuery($expressionOrigin, $conditions) { setlocale(LC_ALL, "cs_CZ.UTF-8"); $expressionOrigin = strtr($expressionOrigin, array(',' => '', ';' => '', "'" => '', '"' => '', '-' => '', '_' => '', '/' => '', '\\' => '', '+' => '', '=' => '', '?' => '', '.' => '', '!' => '')); $expressionTranslit = str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $expressionOrigin)); $expressions = array($expressionOrigin); if ($expressionOrigin != $expressionTranslit) { $expressions[] = $expressionTranslit; } $queryWords = array(); $query = new Boolean(); foreach ($expressions as $expression) { // more words in expression if (count($expressionWords = explode(' ', $expression)) > 1) { // whole expression $query->addSubquery(QueryParser::parse('"' . $expression . '"', 'utf-8')); // expression words foreach ($expressionWords as $expressionWord) { if (mb_strlen($expressionWord, 'utf-8') > 2 && !in_array($expressionWord, $queryWords)) { $queryWords[] = $expressionWord; $query->addSubquery(QueryParser::parse($expressionWord . '*', 'utf-8')); } } } else { $query->addSubquery(QueryParser::parse($expression . '*', 'utf-8')); } } // specificke podminky do query if (is_array($conditions) && count($conditions)) { foreach ($conditions as $condition) { // TODO - jak v Lucene najit polozky obsahujici url? Wildcard pouzit nejde... if (mb_strpos($condition, 'url:', null, 'utf-8') === 0) { $uri = trim(substr($condition, 4), '"'); if (strpos($uri, '://') !== false) { $uri = substr($uri, strpos($uri, '://') + 3); $uri = substr($uri, strpos($uri, '/')); } $query .= ' AND ' . Page::URIS_KEY . ':"' . $uri . '"'; } else { $query .= ' AND ' . $condition; } } } if ($this->kernel->getEnvironment() == 'dev') { $session = new Session(); $session->set(self::QUERY_HANDLER, $query); } return $query; }
public function testSearchRawQuery() { $query = Search::rawQuery('description:big'); $this->assertEquals(2, $query->count()); $query = Search::rawQuery(function () { return 'description:big'; }); $this->assertEquals(2, $query->count()); $query = Search::rawQuery(function () { $query = new Boolean(); $query->addSubquery(QueryParser::parse('description:big OR name:monitor')); return $query; }); $this->assertEquals(3, $query->count()); }
public function testHighlightRangeNonInclusive() { $query = Search\QueryParser::parse('{business TO by}'); $html = '<HTML>' . '<HEAD><TITLE>Page title</TITLE></HEAD>' . '<BODY>' . 'Test of text using range query. ' . 'It has to match "buss" and "but" words, but has to skip "business", "by" and "bus"' . '</BODY>' . '</HTML>'; $highlightedHTML = $query->highlightMatches($html); $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false); $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false); // Check that "bus" word is skipped $this->assertTrue(strpos($highlightedHTML, 'has to skip "business", "by" and "bus"') !== false); }
public function testBooleanQueryWithNonExistingPhraseSubquery() { $index = Lucene\Lucene::open(__DIR__ . '/_index23Sample/_files'); $query = Search\QueryParser::parse('"Non-existing phrase" AND Home'); $this->assertEquals($query->__toString(), '+("Non-existing phrase") +(Home)'); $this->assertEquals($query->rewrite($index)->__toString(), '+((pathkeyword:"non existing phrase") (path:"non existing phrase") (modified:"non existing phrase") (contents:"non existing phrase")) +(pathkeyword:home path:home modified:home contents:home)'); $this->assertEquals($query->rewrite($index)->optimize($index)->__toString(), '<EmptyQuery>'); }
/** * Get query object by given query string. * * @param string $strQuery * @return \ZendSearch\Lucene\Search\Query\AbstractQuery */ public function parse($strQuery) { return QueryParser::parse($strQuery); }
public function find($query, $parse = FALSE) { if ($parse) { $query = \ZendSearch\Lucene\Search\QueryParser::parse('first_name:' . $query); } return $this->index->find($query); }
public function testParse() { $this->assertEquals(QueryParser::parse(''), $this->builder->parse('')); $this->assertEquals(QueryParser::parse('test'), $this->builder->parse('test')); }
public function luceneSearchAetCommunications($index, $searchKeyWord) { \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Text\CaseInsensitive()); $dbIds = array(); $searchValue = SearchHelper::utf8_to_ascii(mb_strtolower($searchKeyWord, "UTF-8")); $em = $this->getDoctrine()->getManager(); /* $term1 = new \ZendSearch\Lucene\Index\Term($searchValue, 'firstname'); //$subquery1 = new \ZendSearch\Lucene\Search\Query\Term($term1); $term2 = new \ZendSearch\Lucene\Index\Term($searchValue, 'title'); //$subquery2 = new \ZendSearch\Lucene\Search\Query\Term($term2); $term3 = new \ZendSearch\Lucene\Index\Term($searchValue, 'shortdesc'); //$subquery3 = new \ZendSearch\Lucene\Search\Query\Term($term3); $term4 = new \ZendSearch\Lucene\Index\Term($searchValue, 'body'); //$subquery4 = new \ZendSearch\Lucene\Search\Query\Term($term4); $term5 = new \ZendSearch\Lucene\Index\Term($searchValue, 'author'); //$subquery5 = new \ZendSearch\Lucene\Search\Query\Term($term5); $terms = array($term1, $term2, $term3, $term4, $term5); //$subqueries = array($subquery1, $subquery2, $subquery3, $subquery4, $subquery5); $signs = array(null, null, null, null, null); $termsQuery = new \ZendSearch\Lucene\Search\Query\MultiTerm($terms,$signs); */ \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_OR); $query = \ZendSearch\Lucene\Search\QueryParser::parse($searchValue, 'UTF-8'); $foundDocuments = $index->find($query); //$docNum = count($foundDocuments); foreach ($foundDocuments as $foundDoc) { $dbIds[] = $foundDoc->dbId; } $results = $em->getRepository('AetCommunicationBundle:Communication')->findById($dbIds); return $results; }
/** * @param string $keywords * @param string $language * * @return \stdClass[] */ protected function getCmsSearchResults($keywords, $language) { $searchModel = new SearchModel($this->cmsController->getDB()); $searchIndex = Lucene::open($this->cmsController->getCore()->getSiteRoot() . 'index' . DIRECTORY_SEPARATOR . $language); /*$query = new Boolean(); // new Fuzzy() $query->addSubquery(QueryParser::parse( $keywords ), true);*/ QueryParser::suppressQueryParsingExceptions(); $query = QueryParser::parse($keywords); //$hits = $searchIndex->find($query, 'score', SORT_NUMERIC, SORT_DESC); $hits = $searchIndex->find($query); //echo'<pre>'; var_dump(/*$hits, */$indexSize, $documents); $searchResultsArr = array(); $highlighter = new CmsSearchHighlighter($keywords); //$highlighter = new DefaultHighlighter(); foreach ($hits as $hit) { /** @var QueryHit $hit */ $searchResult = new \stdClass(); // Gibt Zend_Search_Lucene_Document Objekte für diesen Treffer zurück /** @var Document $document */ $document = $hit->getDocument(); $doc = $searchModel->getDocumentByID($document->getFieldUtf8Value('ID')); if ($doc->getID() === null) { continue; } $fldType = $doc->getType(); if ($fldType !== 'core_page') { $contentChunks = $highlighter->highlightMatches(strip_tags($doc->getDescription()), 'UTF-8'); if ($contentChunks == '') { $contentChunks = null; } // Gibt ein Zend_Search_Lucene_Field Objekt von // Zend_Search_Lucene_Document zurück $searchResult->title = $highlighter->highlightMatches(strip_tags($doc->getTitle()), 'UTF-8'); $searchResult->description = $contentChunks; $searchResult->url = $doc->getPath(); if (isset($searchResultsArr[$fldType]) === false) { $stmntModName = $this->cmsController->getDB()->prepare("\n\t\t\t\t\t\tSELECT manifest_content FROM cms_mod_available WHERE name = ?\n\t\t\t\t\t"); $resModName = $this->cmsController->getDB()->select($stmntModName, array($fldType)); $displayName = $fldType; try { $manifestObj = JsonUtils::decode($resModName[0]->manifest_content); if (isset($manifestObj->name->{$language})) { $displayName = $manifestObj->name->{$language}; } elseif (isset($manifestObj->name->en)) { $displayName = $manifestObj->name->en; } } catch (\Exception $e) { } $searchResultsArr[$fldType] = new \stdClass(); $searchResultsArr[$fldType]->title = $displayName; $searchResultsArr[$fldType]->results = array(); } $searchResultsArr[$doc->getType()]->results[] = $searchResult; } else { $contentChunks = $this->createChunkedHighlighting($highlighter->highlightMatches(strip_tags($doc->getDescription()), 'UTF-8')); if ($contentChunks == '') { $contentChunks = null; } // Gibt ein Zend_Search_Lucene_Field Objekt von // Zend_Search_Lucene_Document zurück $searchResult->title = $highlighter->highlightMatches(strip_tags($doc->getTitle()), 'UTF-8'); $searchResult->description = $contentChunks; $searchResult->url = $doc->getPath(); if (isset($searchResultsArr[$fldType]) === false) { $searchResultsArr[$fldType] = new \stdClass(); $searchResultsArr[$fldType]->title = 'Andere Suchresultate'; $searchResultsArr[$fldType]->results = array(); } $searchResultsArr[$doc->getType()]->results[] = $searchResult; } } return $searchResultsArr; }
/** * Add a search/where clause to the given query based on the given condition. * Return the given $query instance when finished. * * @param \ZendSearch\Lucene\Search\Query\Boolean $query * @param array $condition - field : name of the field * - value : value to match * - required : must match * - prohibited : must not match * - phrase : match as a phrase * - filter : filter results on value * - fuzzy : fuzziness value (0 - 1) * * @return \ZendSearch\Lucene\Search\Query\Boolean */ public function addConditionToQuery($query, array $condition) { if (array_get($condition, 'lat')) { return $query; } $value = trim($this->escape(array_get($condition, 'value'))); if (array_get($condition, 'phrase') || array_get($condition, 'filter')) { $value = '"' . $value . '"'; } if (isset($condition['fuzzy']) && false !== $condition['fuzzy']) { $fuzziness = ''; if (is_numeric($condition['fuzzy']) && $condition['fuzzy'] >= 0 && $condition['fuzzy'] <= 1) { $fuzziness = $condition['fuzzy']; } $words = array(); foreach (explode(' ', $value) as $word) { $words[] = $word . '~' . $fuzziness; } $value = implode(' ', $words); } $sign = null; if (!empty($condition['required'])) { $sign = true; } else { if (!empty($condition['prohibited'])) { $sign = false; } } $field = array_get($condition, 'field'); if (empty($field) || '*' === $field) { $field = null; } if (is_array($field)) { $values = array(); foreach ($field as $f) { $values[] = trim($f) . ':(' . $value . ')'; } $value = implode(' OR ', $values); } else { if ($field) { $value = trim(array_get($condition, 'field')) . ':(' . $value . ')'; } } $query->addSubquery(\ZendSearch\Lucene\Search\QueryParser::parse($value), $sign); return $query; }
/** * Performs a query against the index and returns an array * of Zend_Search_Lucene_Search_QueryHit objects. * Input is a string or Zend_Search_Lucene_Search_Query. * * @param \ZendSearch\Lucene\Search\QueryParser|string $query * @return array|\ZendSearch\Lucene\Search\QueryHit * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException * @throws \ZendSearch\Lucene\Exception\RuntimeException */ public function find($query) { if (is_string($query)) { $query = Search\QueryParser::parse($query); } elseif (!$query instanceof Search\Query\AbstractQuery) { throw new InvalidArgumentException('Query must be a string or ZendSearch\\Lucene\\Search\\Query object'); } $this->commit(); $hits = array(); $scores = array(); $ids = array(); $query = $query->rewrite($this)->optimize($this); $query->execute($this); $topScore = 0; $resultSetLimit = Lucene::getResultSetLimit(); foreach ($query->matchedDocs() as $id => $num) { $docScore = $query->score($id, $this); if ($docScore != 0) { $hit = new Search\QueryHit($this); $hit->document_id = $hit->id = $id; $hit->score = $docScore; $hits[] = $hit; $ids[] = $id; $scores[] = $docScore; if ($docScore > $topScore) { $topScore = $docScore; } } if ($resultSetLimit != 0 && count($hits) >= $resultSetLimit) { break; } } if (count($hits) == 0) { // skip sorting, which may cause a error on empty index return array(); } if ($topScore > 1) { foreach ($hits as $hit) { $hit->score /= $topScore; } } if (func_num_args() == 1) { // sort by scores array_multisort($scores, SORT_DESC, SORT_NUMERIC, $ids, SORT_ASC, SORT_NUMERIC, $hits); } else { // sort by given field names $argList = func_get_args(); $fieldNames = $this->getFieldNames(); $sortArgs = array(); // PHP 5.3 now expects all arguments to array_multisort be passed by // reference (if it's invoked through call_user_func_array()); // since constants can't be passed by reference, create some placeholder variables. $sortReg = SORT_REGULAR; $sortAsc = SORT_ASC; $sortNum = SORT_NUMERIC; $sortFieldValues = array(); for ($count = 1; $count < count($argList); $count++) { $fieldName = $argList[$count]; if (!is_string($fieldName)) { throw new RuntimeException('Field name must be a string.'); } if (strtolower($fieldName) == 'score') { $sortArgs[] =& $scores; } else { if (!in_array($fieldName, $fieldNames)) { throw new RuntimeException('Wrong field name.'); } if (!isset($sortFieldValues[$fieldName])) { $valuesArray = array(); foreach ($hits as $hit) { try { $value = $hit->getDocument()->getFieldValue($fieldName); } catch (\Exception $e) { if (strpos($e->getMessage(), 'not found') === false) { throw new RuntimeException($e->getMessage(), $e->getCode(), $e); } else { $value = null; } } $valuesArray[] = $value; } // Collect loaded values in $sortFieldValues // Required for PHP 5.3 which translates references into values when source // variable is destroyed $sortFieldValues[$fieldName] = $valuesArray; } $sortArgs[] =& $sortFieldValues[$fieldName]; } if ($count + 1 < count($argList) && is_integer($argList[$count + 1])) { $count++; $sortArgs[] =& $argList[$count]; if ($count + 1 < count($argList) && is_integer($argList[$count + 1])) { $count++; $sortArgs[] =& $argList[$count]; } else { if ($argList[$count] == SORT_ASC || $argList[$count] == SORT_DESC) { $sortArgs[] =& $sortReg; } else { $sortArgs[] =& $sortAsc; } } } else { $sortArgs[] =& $sortAsc; $sortArgs[] =& $sortReg; } } // Sort by id's if values are equal $sortArgs[] =& $ids; $sortArgs[] =& $sortAsc; $sortArgs[] =& $sortNum; // Array to be sorted $sortArgs[] =& $hits; // Do sort call_user_func_array('array_multisort', $sortArgs); } return $hits; }
/** * Подсветка результата поиска в html-фрагменте * * @param string $inputHTMLFragment исходный фрагмента html * @param string $inputEncoding Кодировка исходного фрагмента html * @param string $outputEncoding Кодировка резульрирующего фрагмента html * @return string html фрагмент с подсвеченными результатами поиска */ public function highlightMatches($inputHTMLFragment, $inputEncoding = 'utf-8', $outputEncoding = 'utf-8') { $highlightedHTMLFragment = ''; if (!empty($this->lastQuery)) { $queryParser = QueryParser::parse($this->lastQuery); /** * Убираем фильтры стоп-слов для подсветки слов с псевдокорнями типа 'под' и т.п. */ Analyzer::setDefault($this->analyzerForHighlighter); $highlightedHTMLFragment = $queryParser->htmlFragmentHighlightMatches($inputHTMLFragment, $inputEncoding, new Highlighter()); Analyzer::setDefault($this->defaultAnalyzer); $highlightedHTMLFragment = mb_convert_encoding($highlightedHTMLFragment, $outputEncoding, 'utf-8'); } return $highlightedHTMLFragment; }
<?php require 'vendor/autoload.php'; use ZendSearch\Lucene\Lucene; use ZendSearch\Lucene\MultiSearcher; use ZendSearch\Lucene\Search\QueryParser; $stem = function ($e) { return \Porter::Stem($e); }; $q = isset($_GET['q']) ? $_GET['q'] : null; $q = htmlentities($q); $q = implode('+', array_map($stem, explode(' ', $q))); header('Content-Type: application/json'); $output = array(); if ($q) { $indexer = Lucene::open('../_index'); $search = new MultiSearcher(array($indexer)); $query = QueryParser::parse($q); $result = $search->find($query); foreach ($result as $hit) { $title = strtolower(str_replace('-', ' ', $hit->name)); $resultUrl = '../' . $hit->fileName; $output[] = array('href' => $resultUrl, 'name' => ucfirst($title), 'preview' => $query->htmlFragmentHighlightMatches(substr(preg_replace("/\\s+|{$title}/i", " ", $hit->body), 0, 300) . '...')); } } echo json_encode($output);
public function luceneSearchAetUsers($index, $searchKeyWord) { \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Text\CaseInsensitive()); $dbIds = array(); $searchValue = SearchHelper::utf8_to_ascii(mb_strtolower($searchKeyWord, "UTF-8")); $em = $this->getDoctrine()->getManager(); /* $term1 = new \ZendSearch\Lucene\Index\Term($searchValue, 'firstname'); //$subquery1 = new \ZendSearch\Lucene\Search\Query\Term($term1); $term2 = new \ZendSearch\Lucene\Index\Term($searchValue, 'lastname'); //$subquery2 = new \ZendSearch\Lucene\Search\Query\Term($term2); $term3 = new \ZendSearch\Lucene\Index\Term($searchValue, 'activiteprincipale'); //$subquery3 = new \ZendSearch\Lucene\Search\Query\Term($term3); $term4 = new \ZendSearch\Lucene\Index\Term($searchValue, 'codepostal'); //$subquery4 = new \ZendSearch\Lucene\Search\Query\Term($term4); $term5 = new \ZendSearch\Lucene\Index\Term($searchValue, 'email'); //$subquery5 = new \ZendSearch\Lucene\Search\Query\Term($term5); $term6 = new \ZendSearch\Lucene\Index\Term($searchValue, 'matricule'); //$subquery6 = new \ZendSearch\Lucene\Search\Query\Term($term6); $term7 = new \ZendSearch\Lucene\Index\Term($searchValue, 'pays'); //$subquery7 = new \ZendSearch\Lucene\Search\Query\Term($term7); $term8 = new \ZendSearch\Lucene\Index\Term($searchValue, 'promotion'); //$subquery8 = new \ZendSearch\Lucene\Search\Query\Term($term8); $term9 = new \ZendSearch\Lucene\Index\Term($searchValue, 'telephone'); //$subquery9 = new \ZendSearch\Lucene\Search\Query\Term($term9); $term10 = new \ZendSearch\Lucene\Index\Term($searchValue, 'ville'); //$subquery10 = new \ZendSearch\Lucene\Search\Query\Term($term10); $term11 = new \ZendSearch\Lucene\Index\Term($searchValue, 'whoami'); //$subquery11 = new \ZendSearch\Lucene\Search\Query\Term($term11); //$subqueries = array($subquery1, $subquery2, $subquery3, $subquery4, $subquery5, $subquery6, $subquery7, $subquery8, $subquery9, $subquery10, $subquery11); $terms = array($term1, $term2, $term3, $term4, $term5, $term6, $term7, $term8, $term9, $term10, $term11); $signs = array(null, null, null, null, null, null, null, null, null, null, null); $termsQuery = new \ZendSearch\Lucene\Search\Query\MultiTerm($terms, $signs); //$boolQuery = new \ZendSearch\Lucene\Search\Query\Boolean($subqueries, $signs); */ \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_OR); $query = \ZendSearch\Lucene\Search\QueryParser::parse($searchValue, 'UTF-8'); $foundDocuments = $index->find($query); //$docNum = count($foundDocuments); foreach ($foundDocuments as $foundDoc) { $dbIds[] = $foundDoc->dbId; } $results = $em->getRepository('AetAnnuaireBundle:User')->findById($dbIds); return $results; }
public function search($query, $contexts = array()) { $searcher = new Lucene\MultiSearcher(); foreach ($contexts as $indexName) { $searcher->addIndex($this->getLuceneIndex($indexName)); } $query = Lucene\Search\QueryParser::parse($query); try { $luceneHits = $searcher->find($query); } catch (\RuntimeException $e) { if (!preg_match('&non-wildcard characters&', $e->getMessage())) { throw $e; } $luceneHits = array(); } $hits = array(); foreach ($luceneHits as $luceneHit) { /* @var Lucene\Search\QueryHit $luceneHit */ $luceneDocument = $luceneHit->getDocument(); $hit = new Hit(); $hit->setScore($luceneHit->score); $hit->setHash($luceneDocument->getFieldValue(self::HASH_FIELDNAME)); foreach ($luceneDocument->getFieldNames() as $fieldName) { $hit->addMetadata($fieldName, $luceneDocument->getFieldValue($fieldName)); } $hits[] = $hit; } // The MultiSearcher does not support sorting, so we do it here. usort($hits, function (HitInterface $documentA, HitInterface $documentB) { if ($documentA->getScore() < $documentB->getScore()) { return true; } return false; }); return $hits; }