/** * Get index * @return \ZendSearch\Lucene\Index */ private function index() { if (!isset(self::$index)) { $analyzer = new CaseInsensitive(); if ($this->config()->exists('zend_search', 'stop_words')) { $stop_word_filter = new StopWords(); $words = $this->getRealPath($this->config()->get('zend_search', 'stop_words')); if ($words !== false) { $stop_word_filter->loadFromFile($words); } else { throw new \InvalidArgumentException('Path not found'); } $analyzer->addFilter($stop_word_filter); } if ($this->config()->exists('zend_search', 'morphy_dicts')) { $morphy_dicts = $this->getRealPath($this->config()->get('zend_search', 'morphy_dicts')); if ($morphy_dicts !== false) { $analyzer->addFilter(new Morphy($morphy_dicts, $this->config()->getCharset())); } else { throw new \InvalidArgumentException('Path not found'); } } Analyzer::setDefault($analyzer); Lucene::setResultSetLimit($this->limit); QueryParser::setDefaultEncoding($this->config()->getCharset()); $index = $this->config() - get('zend_search', 'index'); $path = $this->getRealPath($index); self::$index = $path ? Lucene::open($path) : Lucene::create($index); } return self::$index; }
/** * {@inheritdoc} */ public function register(Application $app) { Analyzer::setDefault(new CaseInsensitive()); QueryParser::setDefaultEncoding('UTF-8'); $app['zendsearch.indices_path'] = array(); $app['zendsearch.indices.initializer'] = $app->protect(function () use($app) { static $initialized = false; if ($initialized) { return; } $initialized = true; $indices = array(); foreach ($app['zendsearch.indices_path'] as $name => $index) { $indices[$name] = file_exists($index) ? Lucene::open($index) : Lucene::create($index); } $app['zendsearch.indices_collection'] = $indices; }); $app['zendsearch.indices'] = $app->share(function ($app) { $app['zendsearch.indices.initializer'](); return $app['zendsearch.indices_collection']; }); $app['zendsearch.multisearcher'] = $app->share(function ($app) { $app['zendsearch.indices.initializer'](); $multi = new MultiSearcher(); foreach ($app['zendsearch.indices'] as $index) { $multi->addIndex($index); } return $multi; }); $app['zendsearch'] = $app->share(function ($app) { return $app['zendsearch.multisearcher']; }); }
/** * @param string $directory * @return \ZendSearch\Lucene\SearchIndexInterface */ protected function getLuceneIndex($directory) { if (file_exists($directory . DIRECTORY_SEPARATOR . 'segments.gen')) { return Lucene::open($directory); } else { return Lucene::create($directory); } }
/** * Opens a new zend search index. If it does not exist it will be created. * * @param string $indexPath Path to the index * * @return SearchIndexInterface */ public static function openOrCreate($indexPath) { try { return Lucene::open($indexPath); } catch (\Exception $e) { return Lucene::create($indexPath); } }
/** * @param string $indexName The name of the index * @return SearchIndexInterface */ public function getIndex($indexName) { if (isset($this->indexes[$indexName]) === false) { $indexPath = $this->indexesBasePath . $indexName; //if(is_dir($indexPath)) rmdir($indexPath); echo "+++Allocated index: " . $indexName . "\n"; $this->indexes[$indexName] = Lucene::create($indexPath); } return $this->indexes[$indexName]; }
/** * * Create connection to index * * @param $path * @param AnalyzerConfig $config * @throws \Exception */ public function __construct($path) { $this->indexPath = $path; try { $this->index = Lucene::open($path); } catch (ExceptionInterface $e) { $this->index = Lucene::create($path); } catch (\Exception $e) { if (!file_exists($path)) { throw new \Exception("Couldn't connect to index of Zend Lucene. Directory '{$path}' doesn't exist.'"); } throw $e; } }
/** * Get the ZendSearch lucene index instance associated with this instance. * * @return \ZendSearch\Lucene\Index */ protected function getIndex() { if (!$this->index) { $path = rtrim(Config::get('search.connections.zend.path'), '/') . '/' . $this->name; try { $this->index = \ZendSearch\Lucene\Lucene::open($path); } catch (\ZendSearch\Exception\ExceptionInterface $e) { $this->index = \ZendSearch\Lucene\Lucene::create($path); } catch (\ErrorException $e) { if (!file_exists($path)) { throw new \Exception("'path' directory does not exist for the 'zend' search driver: '" . rtrim(Config::get('search.connections.zend.path'), '/') . "'"); } throw $e; } \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8Num\CaseInsensitive()); } return $this->index; }
/** * opens or creates the given lucene index * * @throws SetUpException */ public function openOrCreate() { $indexFolder = $this->files->setUpIndexFolder(); $storage = $indexFolder->getStorage(); $localPath = $storage->getLocalFolder($indexFolder->getInternalPath()); //let lucene search for numbers as well as words Analyzer::setDefault(new CaseInsensitive()); // can we use the index? if ($indexFolder->nodeExists('v0.6.0')) { // correct index present $this->index = Lucene::open($localPath); } else { $this->logger->info('recreating outdated lucene index'); $indexFolder->delete(); $this->index = Lucene::create($localPath); $indexFolder->newFile('v0.6.0'); } }
/** * Gets the index mapped by the given lucene identifier. * * @param string $identifier The lucene identifier. * * @return \ZendSearch\Lucene\Index The lucene index. */ public function getIndex($identifier) { $config = $this->getConfig($identifier); $path = $config['path']; if (!$this->checkPath($path)) { $this->indexes[$identifier] = Lucene::create($path); } else { $this->indexes[$identifier] = Lucene::open($path); } Analyzer::setDefault(new $config['analyzer']()); $this->indexes[$identifier]->setMaxBufferedDocs($config['max_buffered_docs']); $this->indexes[$identifier]->setMaxMergeDocs($config['max_merge_docs']); $this->indexes[$identifier]->setMergeFactor($config['merge_factor']); ZfFilesystem::setDefaultFilePermissions($config['permissions']); if ($config['auto_optimized']) { $this->indexes[$identifier]->optimize(); } QueryParser::setDefaultEncoding($config['query_parser_encoding']); return $this->indexes[$identifier]; }
/** * Lists all Post models. * @return mixed */ public function actionIndex() { $searchModel = new PostSearch(); $dataProvider = $searchModel->search(Yii::$app->request->post()); //setlocale(LC_ALL, 'en_US.UTF-8'); setlocale(LC_CTYPE, 'ru_RU.UTF-8'); //Lucene\Lucene::setDefaultSearchField('contents'); Lucene\Search\QueryParser::setDefaultEncoding('UTF-8'); Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive()); Lucene\Lucene::setResultSetLimit(10); // create blog posts index located in /data/posts_index ,make sure the folder is writable $index = Lucene\Lucene::create('data/posts_index'); $posts = Post::find()->all(); //var_dump($posts);die(); // iterate through posts and build the index foreach ($posts as $p) { $doc = new Lucene\Document(); $doc->addField(Lucene\Document\Field::UnIndexed('entry_id', $p->id)); $doc->addField(Lucene\Document\Field::Keyword('title', $p->title)); $doc->addField(Lucene\Document\Field::text('contents', $p->content)); $index->addDocument($doc); } // commit the index $index->commit(); //Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive()); // explode the search query to individual words $words = explode(' ', urldecode(Yii::$app->getRequest()->getQueryParam('q'))); // start a search query and add a term for each word to it $query = new Lucene\Search\Query\MultiTerm(); foreach ($words as $w) { $query->addTerm(new Lucene\Index\Term($w)); } // open and query the index $index = Lucene\Lucene::open('data/posts_index'); $results = $index->find($query); // the search results //var_dump($results); return $this->render('index', ['searchModel' => $searchModel, 'dataProvider' => $dataProvider, 'search' => $results, 'query' => $query]); }
/** * インデックスファイルを生成 */ public static function updateIndex() { if (empty(self::$igo)) { self::$igo = new Tagger(array('dict_dir' => LIB_DIR . 'ipadic', 'reduce_mode' => true)); } Analyzer::setDefault(new Utf8()); // 索引の作成 $index = Lucene::create(CACHE_DIR . self::INDEX_NAME); foreach (Listing::pages() as $page) { if (empty($page)) { continue; } $wiki = Factory::Wiki($page); // 読む権限がない場合スキップ if (!$wiki->isReadable() || $wiki->isHidden()) { continue; } /* // HTML出力 $html[] = '<html><head>'; $html[] = '<meta http-equiv="Content-type" content="text/html; charset=UTF-8"/>'; $html[] = '<title>' . $wiki->title() . '</title>'; $html[] = '</head>'; $html[] = '<body>' . $wiki->render() . '</body>'; $html[] = '</html>'; */ $doc = new LuceneDoc(); $doc->addField(Field::Text('title', $wiki->title())); // Store document URL to identify it in the search results $doc->addField(Field::Text('url', $wiki->uri())); // Index document contents //$contents = join(" ", self::$igo->wakati(strip_tags($wiki->render()))); $contents = strip_tags($wiki->render()); $doc->addField(Field::UnStored('contents', $contents)); // 索引へ文書の登録 $index->addDocument($doc); } $index->optimize(); }
public function generateIndexAction() { $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::create($searchIndexLocation); $userTable = $this->getServiceLocator()->get('UserTable'); $uploadTable = $this->getServiceLocator()->get('UploadTable'); $allUploads = $uploadTable->fetchAll(); foreach ($allUploads as $fileUpload) { $uploadOwner = $userTable->getById($fileUpload->getUserId()); // создание полей lucene $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->getId()); $label = Document\Field::Text('label', $fileUpload->getLabel()); $owner = Document\Field::Text('owner', $uploadOwner->getName()); $uploadPath = $this->getFileUploadLocation(); $fileName = $fileUpload->getFilename(); $filePath = $uploadPath . DIRECTORY_SEPARATOR . $fileName; if (substr_compare($fileName, ".xlsx", strlen($fileName) - strlen(".xlsx"), strlen(".xlsx")) === 0) { // Индексирование таблицы excel $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($filePath); } else { if (substr_compare($fileName, ".docx", strlen($fileName) - strlen(".docx"), strlen(".docx")) === 0) { // Индексирование документа Word $indexDoc = Lucene\Document\Docx::loadDocxFile($filePath); } else { $indexDoc = new Lucene\Document(); } } // создание нового документа и добавление всех полей $indexDoc = new Lucene\Document(); $indexDoc->addField($label); $indexDoc->addField($owner); $indexDoc->addField($fileUploadId); $index->addDocument($indexDoc); } $index->commit(); $response = $this->getResponse(); $response->setContent("Index Ok"); return $response; }
public function generateIndexAction() { $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::create($searchIndexLocation); $userTable = $this->getServiceLocator()->get('UserTable'); $uploadTable = $this->getServiceLocator()->get('UploadTable'); $allUploads = $uploadTable->fetchAll(); foreach ($allUploads as $fileUpload) { // $uploadOwner = $userTable->getUser($fileUpload->user_id); // id field $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->id); // label field $label = Document\Field::Text('label', $fileUpload->label); // owner field $owner = Document\Field::Text('owner', $uploadOwner->name); if (substr_compare($fileUpload->filename, ".xlsx", strlen($fileUpload->filename) - strlen(".xlsx"), strlen(".xlsx")) === 0) { // index excel sheet $uploadPath = $this->getFileUploadLocation(); $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($uploadPath . "/" . $fileUpload->filename); } else { if (substr_compare($fileUpload->filename, ".docx", strlen($fileUpload->filename) - strlen(".docx"), strlen(".docx")) === 0) { // index word doc $uploadPath = $this->getFileUploadLocation(); $indexDoc = Lucene\Document\Docx::loadDocxFile($uploadPath . "/" . $fileUpload->filename); } else { $indexDoc = new Lucene\Document(); } } $indexDoc->addField($label); $indexDoc->addField($owner); $indexDoc->addField($fileUploadId); $index->addDocument($indexDoc); } $index->commit(); }
protected function getIndex() { if ($this->index != null) { return $this->index; } \ZendSearch\Lucene\Search\QueryParser::setDefaultEncoding('utf-8'); \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8Num\CaseInsensitive()); \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_AND); try { $index = \ZendSearch\Lucene\Lucene::open($this->getIndexPath()); } catch (\ZendSearch\Lucene\Exception\RuntimeException $ex) { $index = \ZendSearch\Lucene\Lucene::create($this->getIndexPath()); } $this->index = $index; return $index; }
public function generateSearchAction() { $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::create($searchIndexLocation); $allUsers = $this->getUserTable()->fetchAll(false); foreach ($allUsers as $user) { $id = Document\Field::keyword('userId', $user->userId); $firstName = Document\Field::text('firstName', $user->firstName); $lastName = Document\Field::text('lastName', $user->lastName); $email = Document\Field::text('email', $user->email); $role = Document\Field::text('role', $user->role); $activated = Document\Field::keyword('activated', $user->activated); $indexDoc = new Lucene\Document(); $indexDoc->addField($id); $indexDoc->addField($firstName); $indexDoc->addField($lastName); $indexDoc->addField($email); $indexDoc->addField($role); $indexDoc->addField($activated); $index->addDocument($indexDoc); } $index->commit(); }
private function getIndex() : SearchIndexInterface { $path = $this->getIndexPath(); if (!$this->checkIndexPath($path)) { $index = Lucene::create($path); } else { $index = Lucene::open($path); } Analyzer::setDefault(new CaseInsensitive()); LuceneFilesystem::setDefaultFilePermissions(0775); QueryParser::setDefaultEncoding('UTF-8'); $index->setMaxBufferedDocs($this->options['max_buffered_docs']); $index->setMaxMergeDocs($this->options['max_merge_docs']); $index->setMergeFactor($this->options['merge_factor']); $index->optimize(); return $index; }
/** * @group ZF-4252 */ public function testHtmlInlineTagsIndexing() { $index = Lucene\Lucene::create(__DIR__ . '/_index/_files'); $htmlString = '<html><head><title>Hello World</title></head>' . '<body><b>Zend</b>Framework' . "\n" . ' <div>Foo</div>Bar ' . "\n" . ' <strong>Test</strong></body></html>'; $doc = Document\Html::loadHTML($htmlString); $index->addDocument($doc); $hits = $index->find('FooBar'); $this->assertEquals(count($hits), 0); $hits = $index->find('ZendFramework'); $this->assertEquals(count($hits), 1); unset($index); $this->_clearDirectory(__DIR__ . '/_index/_files'); }
public function create($dir) { $this->index = Lucene::create($this->index_dir . $dir); return $this; }
public static function getLuceneIndex() { if (file_exists($index = self::getLuceneIndexFile())) { return Lucene::open($index); } return Lucene::create($index); }
public function deleteAction() { $fileId = $this->params("id"); $uploadTable = $this->getServiceLocator()->get('UploadTable'); $upload = $uploadTable->getById($fileId); $fileName = $upload->getFileName(); $fileNameP = $this->getFileUploadLocation() . DIRECTORY_SEPARATOR . $fileName; if (file_exists($fileNameP)) { unlink($fileNameP); } //удалить из Lucene $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::create($searchIndexLocation); $document = $index->find($fileId); if ($document) { $index->delete($document->document_id); $index->commit(); } $uploadTable->deleteById($fileId); return $this->redirect()->toRoute('uploads', array('action' => 'index')); }
/** * * Инициализация Zend Lucene * * @param string[]|string $modelClasses * @param string $indexLocation * @throws SearchCanNotConnectException */ public function __construct($modelClasses, $indexLocation) { if (!is_array($modelClasses)) { $modelClasses = array($modelClasses); } $this->modelClasses = $this->filterModelClasses($modelClasses); $this->defaultAnalyzer = $this->getDefaultAnalyzer(); $this->analyzerForHighlighter = $this->getAnalyzerForHighlighter(); Analyzer::setDefault($this->defaultAnalyzer); QueryParser::setDefaultEncoding('utf-8'); // сделаем ограничение количества записей результата поиска // Lucene::setResultSetLimit(10000); // открываем/создаём новый индекс if (file_exists($indexLocation = $indexLocation)) { try { $this->connection = Lucene::open($indexLocation); } catch (\Exception $ex) { $this->connection = Lucene::create($indexLocation); } } else { $this->connection = Lucene::create($indexLocation); } if (!$this->connection) { throw new SearchCanNotConnectException($indexLocation); } }
/** * @group ZF-9680 */ public function testIsDeletedWithoutExplicitCommit() { $index = Lucene\Lucene::create(__DIR__ . '/_index/_files'); $document = new Document(); $document->addField(Document\Field::Keyword('_id', 'myId')); $document->addField(Document\Field::Keyword('bla', 'blubb')); $index->addDocument($document); $this->assertFalse($index->isDeleted(0)); }
public function flushIndex() { $fileSystem = tao_models_classes_FileSourceService::singleton()->getFileSource($this->getOption('fileSystem')); Lucene::create($fileSystem->getPath()); }
/** * Indexa dados nos arquivos de json */ public function index() { $dir = realpath(dirname(__FILE__)) . DIRECTORY_SEPARATOR . "data" . DIRECTORY_SEPARATOR; $jsonDir = $dir . "json"; $indexDir = $dir . "index"; // ler aquivos json $files = scandir($jsonDir); foreach ($files as $file) { if ($file == '.' || $file == '..') { continue; } // Se arquivo existe if (is_file($jsonDir . DIRECTORY_SEPARATOR . $file)) { $json = json_decode(file_get_contents($jsonDir . DIRECTORY_SEPARATOR . $file)); $indexName = substr($file, 0, -5); // Cria index $index = Lucene\Lucene::create($indexDir . DIRECTORY_SEPARATOR . $indexName); // Cria documento e define campos para indexar foreach ($json as $entry) { $doc = new Lucene\Document(); $doc->addField(Lucene\Document\Field::Text('url', $entry->title)); $doc->addField(Lucene\Document\Field::UnStored('contents', $entry->text)); $index->addDocument($doc); } } } }