public function testFuzzyQuery() { $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index23Sample/_files'); $defaultPrefixLength = Zend_Search_Lucene_Search_Query_Fuzzy::getDefaultPrefixLength(); Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(0); $hits = $index->find('tesd~0.4'); $this->assertEquals(count($hits), 9); $expectedResultset = array(array(2, 0.037139, 'IndexSource/contributing.patches.html'), array(0, 0.008735, 'IndexSource/contributing.documentation.html'), array(7, 0.002449, 'IndexSource/contributing.bugs.html'), array(1, 0.000483, 'IndexSource/contributing.wishlist.html'), array(3, 0.000483, 'IndexSource/about-pear.html'), array(9, 0.000483, 'IndexSource/core.html'), array(5, 0.000414, 'IndexSource/authors.html'), array(8, 0.000414, 'IndexSource/contributing.html'), array(4, 0.000345, 'IndexSource/copyright.html')); foreach ($hits as $resId => $hit) { $this->assertEquals($hit->id, $expectedResultset[$resId][0]); $this->assertTrue(abs($hit->score - $expectedResultset[$resId][1]) < 1.0E-6); $this->assertEquals($hit->path, $expectedResultset[$resId][2]); } Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength($defaultPrefixLength); }
public function tearDown() { Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength($this->_wildcardMinPrefix); Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength($this->_defaultPrefixLength); }
/** * Returns a handle on the actual lucene index. * @return Zend_Search_Lucene_Interface */ private function getLuceneIndex() { if (!empty($this->index)) { return $this->index; } if (file_exists($this->location)) { $this->index = new Zend_Search_Lucene($this->location); } else { $this->index = Zend_Search_Lucene::create($this->location); } $this->index->setMaxBufferedDocs(64); //$this->index->setMaxMergeDocs(50); Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(1); $this->index->setMergeFactor(5); return $this->index; }
/** * * @throws nc_search_exception */ protected function open_index() { $path = $this->get_index_path(); try { if ($this->index_exists($path)) { $this->index = Zend_Search_Lucene::open($path); } else { $this->index = Zend_Search_Lucene::create($path); } } catch (Zend_Search_Lucene_Exception $e) { throw new nc_search_exception("Cannot open Lucene index: {$e->getMessage()}"); } $this->is_opened = true; // apply settings to the index Zend_Search_Lucene::setResultSetLimit($this->get_setting('ZendSearchLucene_ResultSetLimit')); Zend_Search_Lucene::setTermsPerQueryLimit($this->get_setting('MaxTermsPerQuery')); $settings = array('MaxBufferedDocs', 'MaxMergeDocs', 'MergeFactor'); foreach ($settings as $s) { $setter = "set{$s}"; $this->index->{$setter}($this->get_setting("ZendSearchLucene_{$s}")); } // set analyzer Zend_Search_Lucene_Analysis_Analyzer::setDefault(new nc_search_provider_zend_analyzer()); // set fuzzy prefix length so fuzzy searches will match a wider array of possibilities Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(0); // set wildcard prefix length so wildcards will match a wider array of possibilities Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(0); /** * @todo set default search fields */ }
public function findAction() { $queryFromRequest = $this->cleanRequestString($_REQUEST["query"]); $categoryFromRequest = $this->cleanRequestString($_REQUEST["cat"]); $searcher = new SearchPhp_Frontend_Searcher(); $this->view->groupByCategory = $this->_getParam("groupByCategory"); $this->view->omitSearchForm = $this->_getParam("omitSearchForm"); $this->view->categoryOrder = $this->_getParam("categoryOrder"); $this->view->omitJsIncludes = $this->_getParam("omitJsIncludes"); $perPage = $this->_getParam("perPage"); if (empty($perPage)) { $perPage = 10; } $page = $this->_getParam("page"); if (empty($page)) { $page = 1; } $queryStr = strtolower($queryFromRequest); $this->view->category = $categoryFromRequest; if (!empty($this->view->category)) { $category = $this->view->category; } else { $category = null; } $pluginConf = SearchPhp_Plugin::getSearchConfigArray(); if (!empty($pluginConf["search"]["frontend"]["categories"])) { $this->view->availableCategories = explode(",", $pluginConf["search"]["frontend"]["categories"]); } $doFuzzy = $this->_getParam("fuzzy"); try { $query = new Zend_Search_Lucene_Search_Query_Boolean(); $field = $this->_getParam("field"); if (!empty($field)) { Zend_Search_Lucene::setDefaultSearchField($field); } $searchResults = array(); if (!empty($queryStr)) { if ($doFuzzy) { $queryStr = str_replace(" ", "~ ", $queryStr); $queryStr .= "~"; Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(3); } $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($queryStr, 'utf-8'); $query->addSubquery($userQuery, true); if (!empty($this->searchLanguage)) { if (is_object($this->searchLanguage)) { $lang = $this->searchLanguage->toString(); } else { $lang = $this->searchLanguage; } $lang = str_replace(array("_", "-"), "", $lang); $languageTerm = new Zend_Search_Lucene_Index_Term($lang, 'lang'); $languageQuery = new Zend_Search_Lucene_Search_Query_Term($languageTerm); $query->addSubquery($languageQuery, true); } if (!empty($category)) { $categoryTerm = new Zend_Search_Lucene_Index_Term($category, 'cat'); $categoryQuery = new Zend_Search_Lucene_Search_Query_Term($categoryTerm); $query->addSubquery($categoryQuery, true); } $hits = $this->frontendIndex->find($query); $validHits = array(); if ($this->ownHostOnly and $hits != null) { //get rid of hits from other hosts $currenthost = $_SERVER['HTTP_HOST']; for ($i = 0; $i < count($hits); $i++) { $url = $hits[$i]->getDocument()->getField("url"); if (strpos($url->value, "http://" . $currenthost) !== FALSE || strpos($url->value, "https://" . $currenthost) !== FALSE) { $validHits[] = $hits[$i]; } } } else { $validHits = $hits; } $start = $perPage * ($page - 1); $end = $start + ($perPage - 1); if ($end > count($validHits) - 1) { $end = count($validHits) - 1; } for ($i = $start; $i <= $end; $i++) { $hit = $validHits[$i]; $url = $hit->getDocument()->getField("url"); $title = $hit->getDocument()->getField("title"); $searchResult['boost'] = $hit->getDocument()->boost; $searchResult['title'] = $title->value; $searchResult['url'] = $url->value; $searchResult['sumary'] = $searcher->getSumaryForUrl($url->value, $queryStr); try { if ($hit->getDocument()->getField("h1")) { $searchResult['h1'] = $hit->getDocument()->getField("h1")->value; } } catch (Zend_Search_Lucene_Exception $e) { } foreach ($this->categories as $category) { try { $searchResult['categories'][] = $hit->getDocument()->getField("cat")->value; } catch (Zend_Search_Lucene_Exception $e) { } } $searchResults[] = $searchResult; unset($searchResult); } } if (count($validHits) < 1) { $this->view->pages = 0; } else { $this->view->pages = ceil(count($validHits) / $perPage); } $this->view->perPage = $perPage; $this->view->page = $page; $this->view->total = count($validHits); $this->view->query = $queryStr; $this->view->searchResults = $searchResults; if ($this->fuzzySearch) { //look for similar search terms if (!empty($queryStr) and (empty($searchResults) or count($searchResults) < 1)) { $terms = SearchPhp_Plugin::fuzzyFindTerms($queryStr, $this->frontendIndex, 3); if (empty($terms) or count($terms) < 1) { $terms = SearchPhp_Plugin::fuzzyFindTerms($queryStr, $this->frontendIndex, 0); } $suggestions = array(); if (is_array($terms)) { $counter = 0; foreach ($terms as $term) { $t = $term->text; //check if term can be found for current language if ($this->searchLanguage != null) { if (is_object($this->searchLanguage)) { $language = $this->searchLanguage->toString(); } else { $language = $this->searchLanguage; } $language = str_replace(array("_", "-"), "", $language); } $hits = null; $query = new Zend_Search_Lucene_Search_Query_Boolean(); if ($language != null) { $languageTerm = new Zend_Search_Lucene_Index_Term($language, 'lang'); $languageQuery = new Zend_Search_Lucene_Search_Query_Term($languageTerm); $query->addSubquery($languageQuery, true); } if (!empty($category)) { $categoryTerm = new Zend_Search_Lucene_Index_Term($category, 'cat'); $categoryQuery = new Zend_Search_Lucene_Search_Query_Term($categoryTerm); $query->addSubquery($categoryQuery, true); } $userQuery = Zend_Search_Lucene_Search_QueryParser::parse($t, 'utf-8'); $query->addSubquery($userQuery, true); $hits = $this->frontendIndex->find($query); $validHits = array(); if ($this->ownHostOnly and $hits != null) { //get rid of hits from other hosts $currenthost = $_SERVER['HTTP_HOST']; if (count($hits) == 1) { $url = $hits[0]->getDocument()->getField("url"); if (strpos($url->value, "http://" . $currenthost) !== FALSE || strpos($url->value, "https://" . $currenthost) !== FALSE) { $validHits[] = $hits[0]; } } for ($i = 0; $i < count($hits); $i++) { $url = $hits[$i]->getDocument()->getField("url"); if (strpos($url->value, "http://" . $currenthost) !== FALSE) { $validHits[] = $hits[$i]; } } } else { $validHits = $hits; } if (count($validHits) > 0 and !in_array($t, $suggestions)) { $suggestions[] = $t; if ($counter >= 20) { break; } $counter++; } } } $this->view->suggestions = $suggestions; } } } catch (Exception $e) { Logger::log("An Exception occured during search:", Zend_Log::ERR); Logger::log($e, Zend_Log::ERR); $this->view->searchResults = array(); } if ($this->_getParam("viewscript")) { $this->renderScript($this->_getParam("viewscript")); } }
/** * finds similar terms * @param string $queryStr * @param \Zend_Search_Lucene_Interface $index * @param integer $prefixLength optionally specify prefix length, default 0 * @param float $similarity optionally specify similarity, default 0.5 * @return string[] $similarSearchTerms */ public static function fuzzyFindTerms($queryStr, $index, $prefixLength = 0, $similarity = 0.5) { if ($index != NULL) { \Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength($prefixLength); $term = new \Zend_Search_Lucene_Index_Term($queryStr); $fuzzyQuery = new \Zend_Search_Lucene_Search_Query_Fuzzy($term, $similarity); $hits = $index->find($fuzzyQuery); $terms = $fuzzyQuery->getQueryTerms(); return $terms; } }
public function findAction() { $this->disableViewAutoRender(); $searcher = new Searcher(); try { $query = new \Zend_Search_Lucene_Search_Query_Boolean(); $field = $this->getParam('field'); if (!empty($field)) { \Zend_Search_Lucene::setDefaultSearchField($field); } $searchResults = array(); $validHits = array(); if (!empty($this->query)) { if ($this->fuzzySearch) { $this->query = str_replace(' ', '~ ', $this->query); $this->query .= '~'; \Zend_Search_Lucene_Search_Query_Fuzzy::setDefaultPrefixLength(3); } $userQuery = \Zend_Search_Lucene_Search_QueryParser::parse($this->query, 'utf-8'); $query->addSubquery($userQuery, TRUE); $this->addLanguageQuery($query); $this->addCountryQuery($query); $this->addCategoryQuery($query); $this->addRestrictionQuery($query); $validHits = $this->getValidHits($this->frontendIndex->find($query)); $start = $this->perPage * ($this->currentPage - 1); $end = $start + ($this->perPage - 1); if ($end > count($validHits) - 1) { $end = count($validHits) - 1; } for ($i = $start; $i <= $end; $i++) { $hit = $validHits[$i]; /** @var \Zend_Search_Lucene_Document $doc */ $doc = $hit->getDocument(); $url = $doc->getField('url'); $title = $doc->getField('title'); $content = $doc->getField('content'); $searchResult['boost'] = $doc->boost; $searchResult['title'] = $title->value; $searchResult['url'] = $url->value; $searchResult['summary'] = $searcher->getSummaryForUrl($content->value, $this->untouchedQuery); //H1, description and imageTags are not available in pdf files. try { if ($doc->getField('h1')) { $searchResult['h1'] = $doc->getField('h1')->value; } if ($doc->getField('description')) { $searchResult['description'] = $searcher->getSummaryForUrl($doc->getField('description')->value, $this->untouchedQuery); } if ($doc->getField('imageTags')) { $searchResult['imageTags'] = $doc->getField('imageTags')->value; } } catch (\Zend_Search_Lucene_Exception $e) { } foreach ($this->categories as $category) { try { $searchResult['categories'][] = $hit->getDocument()->getField('cat')->value; } catch (\Zend_Search_Lucene_Exception $e) { } } $searchResults[] = $searchResult; unset($searchResult); } } $suggestions = FALSE; if ($this->fuzzySearch) { $suggestions = $this->getFuzzySuggestions($searchResults); } $currentPageResultStart = $this->perPage * ($this->currentPage - 1); $currentPageResultEnd = $currentPageResultStart + $this->perPage; if ($currentPageResultEnd > count($validHits)) { $currentPageResultEnd = count($validHits); } $pages = 0; if (count($validHits) > 0) { $pages = ceil(count($validHits) / $this->perPage); } $this->view->assign(['searchCurrentPage' => $this->currentPage, 'searchAllPages' => $pages, 'searchCategory' => $this->category, 'searchAvailableCategories' => $this->categories, 'searchSuggestions' => $suggestions, 'searchLanguage' => $this->searchLanguage, 'searchCountry' => $this->searchCountry, 'searchPerPage' => $this->perPage, 'searchTotalHits' => count($validHits), 'searchQuery' => $this->untouchedQuery, 'searchHasResults' => count($searchResults) > 0, 'searchResults' => $searchResults, 'searchCurrentPageResultStart' => $currentPageResultStart + 1, 'searchCurrentPageResultEnd' => $currentPageResultEnd]); } catch (\Exception $e) { \Pimcore\Logger::debug('An Exception occurred during search: ' . $e->getMessage()); $this->view->assign(['searchResults' => [], 'searchHasResults' => FALSE]); } if ($this->getParam('viewScript')) { $this->renderScript($this->_getParam('viewScript')); } else { $this->renderScript('/search/find.php'); } }