/** * Get index * @return \ZendSearch\Lucene\Index */ private function index() { if (!isset(self::$index)) { $analyzer = new CaseInsensitive(); if ($this->config()->exists('zend_search', 'stop_words')) { $stop_word_filter = new StopWords(); $words = $this->getRealPath($this->config()->get('zend_search', 'stop_words')); if ($words !== false) { $stop_word_filter->loadFromFile($words); } else { throw new \InvalidArgumentException('Path not found'); } $analyzer->addFilter($stop_word_filter); } if ($this->config()->exists('zend_search', 'morphy_dicts')) { $morphy_dicts = $this->getRealPath($this->config()->get('zend_search', 'morphy_dicts')); if ($morphy_dicts !== false) { $analyzer->addFilter(new Morphy($morphy_dicts, $this->config()->getCharset())); } else { throw new \InvalidArgumentException('Path not found'); } } Analyzer::setDefault($analyzer); Lucene::setResultSetLimit($this->limit); QueryParser::setDefaultEncoding($this->config()->getCharset()); $index = $this->config() - get('zend_search', 'index'); $path = $this->getRealPath($index); self::$index = $path ? Lucene::open($path) : Lucene::create($index); } return self::$index; }
/** * {@inheritdoc} */ public function register(Application $app) { Analyzer::setDefault(new CaseInsensitive()); QueryParser::setDefaultEncoding('UTF-8'); $app['zendsearch.indices_path'] = array(); $app['zendsearch.indices.initializer'] = $app->protect(function () use($app) { static $initialized = false; if ($initialized) { return; } $initialized = true; $indices = array(); foreach ($app['zendsearch.indices_path'] as $name => $index) { $indices[$name] = file_exists($index) ? Lucene::open($index) : Lucene::create($index); } $app['zendsearch.indices_collection'] = $indices; }); $app['zendsearch.indices'] = $app->share(function ($app) { $app['zendsearch.indices.initializer'](); return $app['zendsearch.indices_collection']; }); $app['zendsearch.multisearcher'] = $app->share(function ($app) { $app['zendsearch.indices.initializer'](); $multi = new MultiSearcher(); foreach ($app['zendsearch.indices'] as $index) { $multi->addIndex($index); } return $multi; }); $app['zendsearch'] = $app->share(function ($app) { return $app['zendsearch.multisearcher']; }); }
/** * Opens a new zend search index. If it does not exist it will be created. * * @param string $indexPath Path to the index * * @return SearchIndexInterface */ public static function openOrCreate($indexPath) { try { return Lucene::open($indexPath); } catch (\Exception $e) { return Lucene::create($indexPath); } }
/** * @covers ZendSearch\Lucene\MultiSearcher::find * @covers ZendSearch\Lucene\Search\QueryHit::getDocument */ public function testFind() { $index = new Lucene\MultiSearcher(array(Lucene\Lucene::open(__DIR__ . '/_indexSample/_files'), Lucene\Lucene::open(__DIR__ . '/_indexSample/_files'))); $hits = $index->find('submitting'); $this->assertEquals(count($hits), 2 * 3); foreach ($hits as $hit) { $document = $hit->getDocument(); $this->assertTrue($document instanceof Lucene\Document); } }
/** * @param string $indexName The name of the index * @return SearchIndexInterface */ public function getIndex($indexName) { if (isset($this->indexes[$indexName]) === false) { $indexPath = $this->indexesBasePath . $indexName; //if(is_dir($indexPath)) rmdir($indexPath); echo "+++Allocated index: " . $indexName . "\n"; $this->indexes[$indexName] = Lucene::create($indexPath); } return $this->indexes[$indexName]; }
/** * * Create connection to index * * @param $path * @param AnalyzerConfig $config * @throws \Exception */ public function __construct($path) { $this->indexPath = $path; try { $this->index = Lucene::open($path); } catch (ExceptionInterface $e) { $this->index = Lucene::create($path); } catch (\Exception $e) { if (!file_exists($path)) { throw new \Exception("Couldn't connect to index of Zend Lucene. Directory '{$path}' doesn't exist.'"); } throw $e; } }
/** * opens or creates the given lucene index * * @throws SetUpException */ public function openOrCreate() { $indexFolder = $this->files->setUpIndexFolder(); $storage = $indexFolder->getStorage(); $localPath = $storage->getLocalFolder($indexFolder->getInternalPath()); //let lucene search for numbers as well as words Analyzer::setDefault(new CaseInsensitive()); // can we use the index? if ($indexFolder->nodeExists('v0.6.0')) { // correct index present $this->index = Lucene::open($localPath); } else { $this->logger->info('recreating outdated lucene index'); $indexFolder->delete(); $this->index = Lucene::create($localPath); $indexFolder->newFile('v0.6.0'); } }
/** * Get the ZendSearch lucene index instance associated with this instance. * * @return \ZendSearch\Lucene\Index */ protected function getIndex() { if (!$this->index) { $path = rtrim(Config::get('search.connections.zend.path'), '/') . '/' . $this->name; try { $this->index = \ZendSearch\Lucene\Lucene::open($path); } catch (\ZendSearch\Exception\ExceptionInterface $e) { $this->index = \ZendSearch\Lucene\Lucene::create($path); } catch (\ErrorException $e) { if (!file_exists($path)) { throw new \Exception("'path' directory does not exist for the 'zend' search driver: '" . rtrim(Config::get('search.connections.zend.path'), '/') . "'"); } throw $e; } \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8Num\CaseInsensitive()); } return $this->index; }
public function optimizeSearchIndex() { $startTime = microtime(true); $indexRootPath = $this->cmsController->getCore()->getSiteRoot() . 'index' . DIRECTORY_SEPARATOR; foreach (scandir($indexRootPath) as $d) { if (is_dir($indexRootPath . $d) === false || in_array($d, array('.', '..'))) { continue; } try { $searchIndex = Lucene::open($indexRootPath . $d); $searchIndex->optimize(); $searchIndex->commit(); } catch (\Exception $e) { continue; } } $endTime = microtime(true); $tplVars = array('siteTitle' => 'Optimize search index', 'duration' => round($endTime - $startTime, 3)); return $this->renderModuleContent('mod-search-optimize', $tplVars); }
/** * Gets the index mapped by the given lucene identifier. * * @param string $identifier The lucene identifier. * * @return \ZendSearch\Lucene\Index The lucene index. */ public function getIndex($identifier) { $config = $this->getConfig($identifier); $path = $config['path']; if (!$this->checkPath($path)) { $this->indexes[$identifier] = Lucene::create($path); } else { $this->indexes[$identifier] = Lucene::open($path); } Analyzer::setDefault(new $config['analyzer']()); $this->indexes[$identifier]->setMaxBufferedDocs($config['max_buffered_docs']); $this->indexes[$identifier]->setMaxMergeDocs($config['max_merge_docs']); $this->indexes[$identifier]->setMergeFactor($config['merge_factor']); ZfFilesystem::setDefaultFilePermissions($config['permissions']); if ($config['auto_optimized']) { $this->indexes[$identifier]->optimize(); } QueryParser::setDefaultEncoding($config['query_parser_encoding']); return $this->indexes[$identifier]; }
public function generateIndexAction() { $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::create($searchIndexLocation); $userTable = $this->getServiceLocator()->get('UserTable'); $uploadTable = $this->getServiceLocator()->get('UploadTable'); $allUploads = $uploadTable->fetchAll(); foreach ($allUploads as $fileUpload) { $uploadOwner = $userTable->getById($fileUpload->getUserId()); // создание полей lucene $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->getId()); $label = Document\Field::Text('label', $fileUpload->getLabel()); $owner = Document\Field::Text('owner', $uploadOwner->getName()); $uploadPath = $this->getFileUploadLocation(); $fileName = $fileUpload->getFilename(); $filePath = $uploadPath . DIRECTORY_SEPARATOR . $fileName; if (substr_compare($fileName, ".xlsx", strlen($fileName) - strlen(".xlsx"), strlen(".xlsx")) === 0) { // Индексирование таблицы excel $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($filePath); } else { if (substr_compare($fileName, ".docx", strlen($fileName) - strlen(".docx"), strlen(".docx")) === 0) { // Индексирование документа Word $indexDoc = Lucene\Document\Docx::loadDocxFile($filePath); } else { $indexDoc = new Lucene\Document(); } } // создание нового документа и добавление всех полей $indexDoc = new Lucene\Document(); $indexDoc->addField($label); $indexDoc->addField($owner); $indexDoc->addField($fileUploadId); $index->addDocument($indexDoc); } $index->commit(); $response = $this->getResponse(); $response->setContent("Index Ok"); return $response; }
/** * Lists all Post models. * @return mixed */ public function actionIndex() { $searchModel = new PostSearch(); $dataProvider = $searchModel->search(Yii::$app->request->post()); //setlocale(LC_ALL, 'en_US.UTF-8'); setlocale(LC_CTYPE, 'ru_RU.UTF-8'); //Lucene\Lucene::setDefaultSearchField('contents'); Lucene\Search\QueryParser::setDefaultEncoding('UTF-8'); Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive()); Lucene\Lucene::setResultSetLimit(10); // create blog posts index located in /data/posts_index ,make sure the folder is writable $index = Lucene\Lucene::create('data/posts_index'); $posts = Post::find()->all(); //var_dump($posts);die(); // iterate through posts and build the index foreach ($posts as $p) { $doc = new Lucene\Document(); $doc->addField(Lucene\Document\Field::UnIndexed('entry_id', $p->id)); $doc->addField(Lucene\Document\Field::Keyword('title', $p->title)); $doc->addField(Lucene\Document\Field::text('contents', $p->content)); $index->addDocument($doc); } // commit the index $index->commit(); //Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive()); // explode the search query to individual words $words = explode(' ', urldecode(Yii::$app->getRequest()->getQueryParam('q'))); // start a search query and add a term for each word to it $query = new Lucene\Search\Query\MultiTerm(); foreach ($words as $w) { $query->addTerm(new Lucene\Index\Term($w)); } // open and query the index $index = Lucene\Lucene::open('data/posts_index'); $results = $index->find($query); // the search results //var_dump($results); return $this->render('index', ['searchModel' => $searchModel, 'dataProvider' => $dataProvider, 'search' => $results, 'query' => $query]); }
public function generateIndexAction() { $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::create($searchIndexLocation); $userTable = $this->getServiceLocator()->get('UserTable'); $uploadTable = $this->getServiceLocator()->get('UploadTable'); $allUploads = $uploadTable->fetchAll(); foreach ($allUploads as $fileUpload) { // $uploadOwner = $userTable->getUser($fileUpload->user_id); // id field $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->id); // label field $label = Document\Field::Text('label', $fileUpload->label); // owner field $owner = Document\Field::Text('owner', $uploadOwner->name); if (substr_compare($fileUpload->filename, ".xlsx", strlen($fileUpload->filename) - strlen(".xlsx"), strlen(".xlsx")) === 0) { // index excel sheet $uploadPath = $this->getFileUploadLocation(); $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($uploadPath . "/" . $fileUpload->filename); } else { if (substr_compare($fileUpload->filename, ".docx", strlen($fileUpload->filename) - strlen(".docx"), strlen(".docx")) === 0) { // index word doc $uploadPath = $this->getFileUploadLocation(); $indexDoc = Lucene\Document\Docx::loadDocxFile($uploadPath . "/" . $fileUpload->filename); } else { $indexDoc = new Lucene\Document(); } } $indexDoc->addField($label); $indexDoc->addField($owner); $indexDoc->addField($fileUploadId); $index->addDocument($indexDoc); } $index->commit(); }
public function autocompleteAction() { $index = Lucene\Lucene::open('./data/search/'); $searchResults = $index->find('title:' . $this->params()->fromRoute('term')); $result = array(); foreach ($searchResults as $item) { if ($item->score >= 0.8) { $href = ''; $type = ''; switch ($item->type) { case 'article': $href = $this->url()->fromRoute('greinar/index', array('id' => $item->identifier)); $type = "Grein"; break; case 'event': $href = $this->url()->fromRoute('vidburdir/index', array('id' => $item->identifier)); $type = "Viðburður"; break; case 'group': $href = $this->url()->fromRoute('hopur/index', array('id' => $item->identifier)); $type = "Hópur"; break; case 'news': $href = $this->url()->fromRoute('frettir/index', array('id' => $item->identifier)); $type = "Frétt"; break; default: $href = "#"; $type = "?"; break; } $result[] = (object) array('value' => $item->title, 'type' => $type, 'href' => $href); } } return new JsonModel($result); }
/** * Re-write query into primitive queries in the context of specified index * * @param \ZendSearch\Lucene\SearchIndexInterface $index * @throws \ZendSearch\Lucene\Exception\OutOfBoundsException * @return \ZendSearch\Lucene\Search\Query\AbstractQuery */ public function rewrite(Lucene\SearchIndexInterface $index) { $this->_matches = array(); $this->_scores = array(); $this->_termKeys = array(); if ($this->_term->field === null) { // Search through all fields $fields = $index->getFieldNames(true); } else { $fields = array($this->_term->field); } $prefix = Index\Term::getPrefix($this->_term->text, $this->_prefixLength); $prefixByteLength = strlen($prefix); $prefixUtf8Length = Index\Term::getLength($prefix); $termLength = Index\Term::getLength($this->_term->text); $termRest = substr($this->_term->text, $prefixByteLength); // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible $termRestLength = strlen($termRest); $scaleFactor = 1 / (1 - $this->_minimumSimilarity); $maxTerms = Lucene\Lucene::getTermsPerQueryLimit(); foreach ($fields as $field) { $index->resetTermsStream(); if ($prefix != '') { $index->skipTo(new Index\Term($prefix, $field)); while ($index->currentTerm() !== null && $index->currentTerm()->field == $field && substr($index->currentTerm()->text, 0, $prefixByteLength) == $prefix) { // Calculate similarity $target = substr($index->currentTerm()->text, $prefixByteLength); $maxDistance = isset($this->_maxDistances[strlen($target)]) ? $this->_maxDistances[strlen($target)] : $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target)); if ($termRestLength == 0) { // we don't have anything to compare. That means if we just add // the letters for current term we get the new word $similarity = $prefixUtf8Length == 0 ? 0 : 1 - strlen($target) / $prefixUtf8Length; } elseif (strlen($target) == 0) { $similarity = $prefixUtf8Length == 0 ? 0 : 1 - $termRestLength / $prefixUtf8Length; } elseif ($maxDistance < abs($termRestLength - strlen($target))) { //just adding the characters of term to target or vice-versa results in too many edits //for example "pre" length is 3 and "prefixes" length is 8. We can see that //given this optimal circumstance, the edit distance cannot be less than 5. //which is 8-3 or more precisesly abs(3-8). //if our maximum edit distance is 4, then we can discard this word //without looking at it. $similarity = 0; } else { $similarity = 1 - levenshtein($termRest, $target) / ($prefixUtf8Length + min($termRestLength, strlen($target))); } if ($similarity > $this->_minimumSimilarity) { $this->_matches[] = $index->currentTerm(); $this->_termKeys[] = $index->currentTerm()->key(); $this->_scores[] = ($similarity - $this->_minimumSimilarity) * $scaleFactor; if ($maxTerms != 0 && count($this->_matches) > $maxTerms) { throw new OutOfBoundsException('Terms per query limit is reached.'); } } $index->nextTerm(); } } else { $index->skipTo(new Index\Term('', $field)); while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) { // Calculate similarity $target = $index->currentTerm()->text; $maxDistance = isset($this->_maxDistances[strlen($target)]) ? $this->_maxDistances[strlen($target)] : $this->_calculateMaxDistance(0, $termRestLength, strlen($target)); if ($maxDistance < abs($termRestLength - strlen($target))) { //just adding the characters of term to target or vice-versa results in too many edits //for example "pre" length is 3 and "prefixes" length is 8. We can see that //given this optimal circumstance, the edit distance cannot be less than 5. //which is 8-3 or more precisesly abs(3-8). //if our maximum edit distance is 4, then we can discard this word //without looking at it. $similarity = 0; } else { $similarity = 1 - levenshtein($termRest, $target) / min($termRestLength, strlen($target)); } if ($similarity > $this->_minimumSimilarity) { $this->_matches[] = $index->currentTerm(); $this->_termKeys[] = $index->currentTerm()->key(); $this->_scores[] = ($similarity - $this->_minimumSimilarity) * $scaleFactor; if ($maxTerms != 0 && count($this->_matches) > $maxTerms) { throw new OutOfBoundsException('Terms per query limit is reached.'); } } $index->nextTerm(); } } $index->closeTermsStream(); } if (count($this->_matches) == 0) { return new EmptyResult(); } elseif (count($this->_matches) == 1) { return new Term(reset($this->_matches)); } else { $rewrittenQuery = new Boolean(); array_multisort($this->_scores, SORT_DESC, SORT_NUMERIC, $this->_termKeys, SORT_ASC, SORT_STRING, $this->_matches); $termCount = 0; foreach ($this->_matches as $id => $matchedTerm) { $subquery = new Term($matchedTerm); $subquery->setBoost($this->_scores[$id]); $rewrittenQuery->addSubquery($subquery); $termCount++; if ($termCount >= self::MAX_CLAUSE_COUNT) { break; } } return $rewrittenQuery; } }
public function DefaultSearchField() { return Lucene::getDefaultSearchField(); }
protected function getIndex() { if ($this->index != null) { return $this->index; } \ZendSearch\Lucene\Search\QueryParser::setDefaultEncoding('utf-8'); \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8Num\CaseInsensitive()); \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_AND); try { $index = \ZendSearch\Lucene\Lucene::open($this->getIndexPath()); } catch (\ZendSearch\Lucene\Exception\RuntimeException $ex) { $index = \ZendSearch\Lucene\Lucene::create($this->getIndexPath()); } $this->index = $index; return $index; }
/** * Re-write query into primitive queries in the context of specified index * * @param \ZendSearch\Lucene\SearchIndexInterface $index * @throws \ZendSearch\Lucene\Exception\RuntimeException * @throws \ZendSearch\Lucene\Exception\OutOfBoundsException * @return \ZendSearch\Lucene\Search\Query\AbstractQuery */ public function rewrite(Lucene\SearchIndexInterface $index) { $this->_matches = array(); if ($this->_pattern->field === null) { // Search through all fields $fields = $index->getFieldNames(true); } else { $fields = array($this->_pattern->field); } $prefix = self::_getPrefix($this->_pattern->text); $prefixLength = strlen($prefix); $matchExpression = '/^' . str_replace(array('\\?', '\\*'), array('.', '.*'), preg_quote($this->_pattern->text, '/')) . '$/'; if ($prefixLength < self::$_minPrefixLength) { throw new RuntimeException('At least ' . self::$_minPrefixLength . ' non-wildcard characters are required at the beginning of pattern.'); } /** * @todo check for PCRE unicode support may be performed through Zend_Environment in some future */ ErrorHandler::start(E_WARNING); $result = preg_match('/\\pL/u', 'a'); ErrorHandler::stop(); if ($result == 1) { // PCRE unicode support is turned on // add Unicode modifier to the match expression $matchExpression .= 'u'; } $maxTerms = Lucene\Lucene::getTermsPerQueryLimit(); foreach ($fields as $field) { $index->resetTermsStream(); if ($prefix != '') { $index->skipTo(new Index\Term($prefix, $field)); while ($index->currentTerm() !== null && $index->currentTerm()->field == $field && substr($index->currentTerm()->text, 0, $prefixLength) == $prefix) { if (preg_match($matchExpression, $index->currentTerm()->text) === 1) { $this->_matches[] = $index->currentTerm(); if ($maxTerms != 0 && count($this->_matches) > $maxTerms) { throw new OutOfBoundsException('Terms per query limit is reached.'); } } $index->nextTerm(); } } else { $index->skipTo(new Index\Term('', $field)); while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) { if (preg_match($matchExpression, $index->currentTerm()->text) === 1) { $this->_matches[] = $index->currentTerm(); if ($maxTerms != 0 && count($this->_matches) > $maxTerms) { throw new OutOfBoundsException('Terms per query limit is reached.'); } } $index->nextTerm(); } } $index->closeTermsStream(); } if (count($this->_matches) == 0) { return new EmptyResult(); } elseif (count($this->_matches) == 1) { return new Term(reset($this->_matches)); } else { $rewrittenQuery = new MultiTerm(); foreach ($this->_matches as $matchedTerm) { $rewrittenQuery->addTerm($matchedTerm); } return $rewrittenQuery; } }
public static function getLuceneIndex() { if (file_exists($index = self::getLuceneIndexFile())) { return Lucene::open($index); } return Lucene::create($index); }
/** * @group ZF-4252 */ public function testHtmlInlineTagsIndexing() { $index = Lucene\Lucene::create(__DIR__ . '/_index/_files'); $htmlString = '<html><head><title>Hello World</title></head>' . '<body><b>Zend</b>Framework' . "\n" . ' <div>Foo</div>Bar ' . "\n" . ' <strong>Test</strong></body></html>'; $doc = Document\Html::loadHTML($htmlString); $index->addDocument($doc); $hits = $index->find('FooBar'); $this->assertEquals(count($hits), 0); $hits = $index->find('ZendFramework'); $this->assertEquals(count($hits), 1); unset($index); $this->_clearDirectory(__DIR__ . '/_index/_files'); }
public function deleteAction() { $fileId = $this->params("id"); $uploadTable = $this->getServiceLocator()->get('UploadTable'); $upload = $uploadTable->getById($fileId); $fileName = $upload->getFileName(); $fileNameP = $this->getFileUploadLocation() . DIRECTORY_SEPARATOR . $fileName; if (file_exists($fileNameP)) { unlink($fileNameP); } //удалить из Lucene $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::create($searchIndexLocation); $document = $index->find($fileId); if ($document) { $index->delete($document->document_id); $index->commit(); } $uploadTable->deleteById($fileId); return $this->redirect()->toRoute('uploads', array('action' => 'index')); }
public function testLimitingResult() { $index = Lucene\Lucene::open(__DIR__ . '/_index23Sample/_files'); $storedResultSetLimit = Lucene\Lucene::getResultSetLimit(); Lucene\Lucene::setResultSetLimit(3); $hits = $index->find('"reporting bugs"', 'path'); $this->assertEquals(count($hits), 3); $expectedResultset = array(array(7, 0.212395, 'IndexSource/contributing.bugs.html'), array(0, 0.247795, 'IndexSource/contributing.documentation.html'), array(2, 0.176996, 'IndexSource/contributing.patches.html')); foreach ($hits as $resId => $hit) { $this->assertEquals($hit->id, $expectedResultset[$resId][0]); $this->assertTrue(abs($hit->score - $expectedResultset[$resId][1]) < 1.0E-6); $this->assertEquals($hit->path, $expectedResultset[$resId][2]); } Lucene\Lucene::setResultSetLimit($storedResultSetLimit); }
public function searchAction() { $request = $this->getRequest(); if ($request->isPost()) { $queryText = $request->getPost()->get('query'); $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::open($searchIndexLocation); $searchResult = $index->find($queryText); } $form = new \Zend\Form\Form(); $form->add(array('name' => 'query', 'attributes' => array('type' => 'text', 'id' => 'queryText'), 'options' => array('label' => 'Search String'))); $form->add(array('name' => 'submit', 'attributes' => array('type' => 'submit', 'value' => 'Search'))); $viewModel = new ViewModel(array('form' => $form, 'searchResults' => $searchResult)); return $viewModel; }
/** * Object constructor */ public function __construct($text, $field = null) { $this->field = $field === null ? Lucene\Lucene::getDefaultSearchField() : $field; $this->text = $text; }
public function flushIndex() { $fileSystem = tao_models_classes_FileSourceService::singleton()->getFileSource($this->getOption('fileSystem')); Lucene::create($fileSystem->getPath()); }
public function sortOutHlCoords() { //Lucene operators $operators = array("and", "or", "not"); $config = $this->getServiceLocator()->get('config'); $paramInfo = $this->sortOutParams($config); //collect building blocks $resLoc = $paramInfo['resLoc']; $site = $paramInfo['site']; $collection = $paramInfo['collection']; $container = $paramInfo['container']; $reel = $paramInfo['reel']; $page = $paramInfo['page']; //the all important query $hl = $this->params()->fromRoute('hl', ''); //coordinates to pass back $coords = []; //pass back empty coordinate set if any of these parameters //are missing if ($this->isNullOrEmpty($reel) || $this->isNullOrEmpty($page) || $this->isNullOrEmpty($hl)) { return array("imgloc" => '', "indloc" => '', "coords" => $coords); } //if //location of files - ODW file layout $resLoc .= '/' . $site . '/' . $collection . '/' . $container . '/' . $reel . '/odw/' . $page . '/'; $imgLoc = $resLoc . '../../' . $page . '.jpg'; $iaLoc = $resLoc . 'ia/' . $page . '.jpg'; //not all images will have IA derivative if (file_exists($iaLoc) !== false) { $imgLoc = $iaLoc; } $indLoc = $resLoc . 'index/imgworks'; //need index directory and segments file to be valid lucene layout if (!file_exists($indLoc . '/segments.gen')) { return array("imgloc" => $imgLoc, "indloc" => $indLoc, "coords" => $coords); } //get coordinates from Lucene index $searchText = ''; //use Lucene tokens for searching $queryTokens = Analyzer\Analyzer::getDefault()->tokenize($hl); foreach ($queryTokens as $token) { $searchTerm = $token->getTermText(); if (!in_array($searchTerm, $operators)) { //no snowball analyzer or other stemming option //in Lucene 2.x, so create stem seperately $searchText .= stem_english($searchTerm); //Lucene dropped this limitation after 2.x //but this version won't wildcard without //at least 3 characters in term if (strlen($searchTerm) >= 3) { $searchText .= "* "; } //if strlen } //if } //foreach //now do search $index = Lucene\Lucene::open($indLoc); $searchResults = $index->find($searchText); //assemble results foreach ($searchResults as $searchResult) { array_push($coords, [$searchResult->x1, $searchResult->y1, $searchResult->x2, $searchResult->y2]); } //foreach //pass back image and index location in addition to results return array("imgloc" => $imgLoc, "indloc" => $indLoc, "coords" => $coords); }
/** * @group ZF-9680 */ public function testIsDeletedWithoutExplicitCommit() { $index = Lucene\Lucene::create(__DIR__ . '/_index/_files'); $document = new Document(); $document->addField(Document\Field::Keyword('_id', 'myId')); $document->addField(Document\Field::Keyword('bla', 'blubb')); $index->addDocument($document); $this->assertFalse($index->isDeleted(0)); }
/** * Class constructor. Create a new multi-term query object. * * if $signs array is omitted then all terms are required * it differs from addTerm() behavior, but should never be used * * @param array $terms Array of \ZendSearch\Lucene\Index\Term objects * @param array $signs Array of signs. Sign is boolean|null. * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException */ public function __construct($terms = null, $signs = null) { if (is_array($terms)) { if (count($terms) > Lucene\Lucene::getTermsPerQueryLimit()) { throw new InvalidArgumentException('Terms per query limit is reached.'); } $this->_terms = $terms; $this->_signs = null; // Check if all terms are required if (is_array($signs)) { foreach ($signs as $sign) { if ($sign !== true) { $this->_signs = $signs; break; } } } } }
/** * Indexa dados nos arquivos de json */ public function index() { $dir = realpath(dirname(__FILE__)) . DIRECTORY_SEPARATOR . "data" . DIRECTORY_SEPARATOR; $jsonDir = $dir . "json"; $indexDir = $dir . "index"; // ler aquivos json $files = scandir($jsonDir); foreach ($files as $file) { if ($file == '.' || $file == '..') { continue; } // Se arquivo existe if (is_file($jsonDir . DIRECTORY_SEPARATOR . $file)) { $json = json_decode(file_get_contents($jsonDir . DIRECTORY_SEPARATOR . $file)); $indexName = substr($file, 0, -5); // Cria index $index = Lucene\Lucene::create($indexDir . DIRECTORY_SEPARATOR . $indexName); // Cria documento e define campos para indexar foreach ($json as $entry) { $doc = new Lucene\Document(); $doc->addField(Lucene\Document\Field::Text('url', $entry->title)); $doc->addField(Lucene\Document\Field::UnStored('contents', $entry->text)); $index->addDocument($doc); } } } }
/** * Executa uma busca nos indexes criados * * @param $query */ public function query($query) { $dir = realpath(dirname(__FILE__)) . DIRECTORY_SEPARATOR . "data" . DIRECTORY_SEPARATOR; $jsonDir = $dir . "json"; $indexDir = $dir . "index"; // Percorre os indices $files = scandir($jsonDir); foreach ($files as $file) { if ($file == '.' || $file == '..') { continue; } $indexName = substr($file, 0, -5); $index = Lucene\Lucene::open($indexDir . DIRECTORY_SEPARATOR . $indexName); // Abre index $hits = $index->find($query); // Executa query // Lista resultados foreach ($hits as $hit) { $document = $hit->getDocument(); // return a Zend\Search\Lucene\Field object // from the Zend\Search\Lucene\Document echo "<h3>" . $document->getFieldValue('url') . "</h3><br />"; //echo "<p>" . $hit->text . "</p><br /><br />"; } } }