Ejemplo n.º 1
0
 /**
  * Get index
  * @return \ZendSearch\Lucene\Index
  */
 private function index()
 {
     if (!isset(self::$index)) {
         $analyzer = new CaseInsensitive();
         if ($this->config()->exists('zend_search', 'stop_words')) {
             $stop_word_filter = new StopWords();
             $words = $this->getRealPath($this->config()->get('zend_search', 'stop_words'));
             if ($words !== false) {
                 $stop_word_filter->loadFromFile($words);
             } else {
                 throw new \InvalidArgumentException('Path not found');
             }
             $analyzer->addFilter($stop_word_filter);
         }
         if ($this->config()->exists('zend_search', 'morphy_dicts')) {
             $morphy_dicts = $this->getRealPath($this->config()->get('zend_search', 'morphy_dicts'));
             if ($morphy_dicts !== false) {
                 $analyzer->addFilter(new Morphy($morphy_dicts, $this->config()->getCharset()));
             } else {
                 throw new \InvalidArgumentException('Path not found');
             }
         }
         Analyzer::setDefault($analyzer);
         Lucene::setResultSetLimit($this->limit);
         QueryParser::setDefaultEncoding($this->config()->getCharset());
         $index = $this->config() - get('zend_search', 'index');
         $path = $this->getRealPath($index);
         self::$index = $path ? Lucene::open($path) : Lucene::create($index);
     }
     return self::$index;
 }
 /**
  * {@inheritdoc}
  */
 public function register(Application $app)
 {
     Analyzer::setDefault(new CaseInsensitive());
     QueryParser::setDefaultEncoding('UTF-8');
     $app['zendsearch.indices_path'] = array();
     $app['zendsearch.indices.initializer'] = $app->protect(function () use($app) {
         static $initialized = false;
         if ($initialized) {
             return;
         }
         $initialized = true;
         $indices = array();
         foreach ($app['zendsearch.indices_path'] as $name => $index) {
             $indices[$name] = file_exists($index) ? Lucene::open($index) : Lucene::create($index);
         }
         $app['zendsearch.indices_collection'] = $indices;
     });
     $app['zendsearch.indices'] = $app->share(function ($app) {
         $app['zendsearch.indices.initializer']();
         return $app['zendsearch.indices_collection'];
     });
     $app['zendsearch.multisearcher'] = $app->share(function ($app) {
         $app['zendsearch.indices.initializer']();
         $multi = new MultiSearcher();
         foreach ($app['zendsearch.indices'] as $index) {
             $multi->addIndex($index);
         }
         return $multi;
     });
     $app['zendsearch'] = $app->share(function ($app) {
         return $app['zendsearch.multisearcher'];
     });
 }
Ejemplo n.º 3
0
 /**
  * @param string $directory
  * @return \ZendSearch\Lucene\SearchIndexInterface
  */
 protected function getLuceneIndex($directory)
 {
     if (file_exists($directory . DIRECTORY_SEPARATOR . 'segments.gen')) {
         return Lucene::open($directory);
     } else {
         return Lucene::create($directory);
     }
 }
Ejemplo n.º 4
0
 /**
  * Opens a new zend search index. If it does not exist it will be created.
  * 
  * @param string $indexPath Path to the index
  *
  * @return SearchIndexInterface
  */
 public static function openOrCreate($indexPath)
 {
     try {
         return Lucene::open($indexPath);
     } catch (\Exception $e) {
         return Lucene::create($indexPath);
     }
 }
Ejemplo n.º 5
0
 /**
  * @param string $indexName The name of the index
  * @return SearchIndexInterface
  */
 public function getIndex($indexName)
 {
     if (isset($this->indexes[$indexName]) === false) {
         $indexPath = $this->indexesBasePath . $indexName;
         //if(is_dir($indexPath)) rmdir($indexPath);
         echo "+++Allocated index: " . $indexName . "\n";
         $this->indexes[$indexName] = Lucene::create($indexPath);
     }
     return $this->indexes[$indexName];
 }
Ejemplo n.º 6
0
 /**
  *
  * Create connection to index
  *
  * @param $path
  * @param AnalyzerConfig $config
  * @throws \Exception
  */
 public function __construct($path)
 {
     $this->indexPath = $path;
     try {
         $this->index = Lucene::open($path);
     } catch (ExceptionInterface $e) {
         $this->index = Lucene::create($path);
     } catch (\Exception $e) {
         if (!file_exists($path)) {
             throw new \Exception("Couldn't connect to index of Zend Lucene. Directory '{$path}' doesn't exist.'");
         }
         throw $e;
     }
 }
Ejemplo n.º 7
0
 /**
  * Get the ZendSearch lucene index instance associated with this instance.
  *
  * @return \ZendSearch\Lucene\Index
  */
 protected function getIndex()
 {
     if (!$this->index) {
         $path = rtrim(Config::get('search.connections.zend.path'), '/') . '/' . $this->name;
         try {
             $this->index = \ZendSearch\Lucene\Lucene::open($path);
         } catch (\ZendSearch\Exception\ExceptionInterface $e) {
             $this->index = \ZendSearch\Lucene\Lucene::create($path);
         } catch (\ErrorException $e) {
             if (!file_exists($path)) {
                 throw new \Exception("'path' directory does not exist for the 'zend' search driver: '" . rtrim(Config::get('search.connections.zend.path'), '/') . "'");
             }
             throw $e;
         }
         \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8Num\CaseInsensitive());
     }
     return $this->index;
 }
Ejemplo n.º 8
0
 /**
  * opens or creates the given lucene index
  *
  * @throws SetUpException
  */
 public function openOrCreate()
 {
     $indexFolder = $this->files->setUpIndexFolder();
     $storage = $indexFolder->getStorage();
     $localPath = $storage->getLocalFolder($indexFolder->getInternalPath());
     //let lucene search for numbers as well as words
     Analyzer::setDefault(new CaseInsensitive());
     // can we use the index?
     if ($indexFolder->nodeExists('v0.6.0')) {
         // correct index present
         $this->index = Lucene::open($localPath);
     } else {
         $this->logger->info('recreating outdated lucene index');
         $indexFolder->delete();
         $this->index = Lucene::create($localPath);
         $indexFolder->newFile('v0.6.0');
     }
 }
 /**
  * Gets the index mapped by the given lucene identifier.
  *
  * @param string $identifier The lucene identifier.
  *
  * @return \ZendSearch\Lucene\Index The lucene index.
  */
 public function getIndex($identifier)
 {
     $config = $this->getConfig($identifier);
     $path = $config['path'];
     if (!$this->checkPath($path)) {
         $this->indexes[$identifier] = Lucene::create($path);
     } else {
         $this->indexes[$identifier] = Lucene::open($path);
     }
     Analyzer::setDefault(new $config['analyzer']());
     $this->indexes[$identifier]->setMaxBufferedDocs($config['max_buffered_docs']);
     $this->indexes[$identifier]->setMaxMergeDocs($config['max_merge_docs']);
     $this->indexes[$identifier]->setMergeFactor($config['merge_factor']);
     ZfFilesystem::setDefaultFilePermissions($config['permissions']);
     if ($config['auto_optimized']) {
         $this->indexes[$identifier]->optimize();
     }
     QueryParser::setDefaultEncoding($config['query_parser_encoding']);
     return $this->indexes[$identifier];
 }
Ejemplo n.º 10
0
 /**
  * Lists all Post models.
  * @return mixed
  */
 public function actionIndex()
 {
     $searchModel = new PostSearch();
     $dataProvider = $searchModel->search(Yii::$app->request->post());
     //setlocale(LC_ALL, 'en_US.UTF-8');
     setlocale(LC_CTYPE, 'ru_RU.UTF-8');
     //Lucene\Lucene::setDefaultSearchField('contents');
     Lucene\Search\QueryParser::setDefaultEncoding('UTF-8');
     Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     Lucene\Lucene::setResultSetLimit(10);
     // create blog posts index located in /data/posts_index ,make sure the folder is writable
     $index = Lucene\Lucene::create('data/posts_index');
     $posts = Post::find()->all();
     //var_dump($posts);die();
     // iterate through posts and build the index
     foreach ($posts as $p) {
         $doc = new Lucene\Document();
         $doc->addField(Lucene\Document\Field::UnIndexed('entry_id', $p->id));
         $doc->addField(Lucene\Document\Field::Keyword('title', $p->title));
         $doc->addField(Lucene\Document\Field::text('contents', $p->content));
         $index->addDocument($doc);
     }
     // commit the index
     $index->commit();
     //Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     // explode the search query to individual words
     $words = explode(' ', urldecode(Yii::$app->getRequest()->getQueryParam('q')));
     // start a search query and add a term for each word to it
     $query = new Lucene\Search\Query\MultiTerm();
     foreach ($words as $w) {
         $query->addTerm(new Lucene\Index\Term($w));
     }
     // open and query the index
     $index = Lucene\Lucene::open('data/posts_index');
     $results = $index->find($query);
     // the search results
     //var_dump($results);
     return $this->render('index', ['searchModel' => $searchModel, 'dataProvider' => $dataProvider, 'search' => $results, 'query' => $query]);
 }
Ejemplo n.º 11
0
 /**
  * インデックスファイルを生成
  */
 public static function updateIndex()
 {
     if (empty(self::$igo)) {
         self::$igo = new Tagger(array('dict_dir' => LIB_DIR . 'ipadic', 'reduce_mode' => true));
     }
     Analyzer::setDefault(new Utf8());
     // 索引の作成
     $index = Lucene::create(CACHE_DIR . self::INDEX_NAME);
     foreach (Listing::pages() as $page) {
         if (empty($page)) {
             continue;
         }
         $wiki = Factory::Wiki($page);
         // 読む権限がない場合スキップ
         if (!$wiki->isReadable() || $wiki->isHidden()) {
             continue;
         }
         /*
         			// HTML出力
         			$html[] = '<html><head>';
         			$html[] = '<meta http-equiv="Content-type" content="text/html; charset=UTF-8"/>';
         			$html[] = '<title>' . $wiki->title() . '</title>';
         			$html[] = '</head>';
         			$html[] = '<body>' . $wiki->render() . '</body>';
         			$html[] = '</html>';
         */
         $doc = new LuceneDoc();
         $doc->addField(Field::Text('title', $wiki->title()));
         // Store document URL to identify it in the search results
         $doc->addField(Field::Text('url', $wiki->uri()));
         // Index document contents
         //$contents = join(" ", self::$igo->wakati(strip_tags($wiki->render())));
         $contents = strip_tags($wiki->render());
         $doc->addField(Field::UnStored('contents', $contents));
         // 索引へ文書の登録
         $index->addDocument($doc);
     }
     $index->optimize();
 }
Ejemplo n.º 12
0
 public function generateIndexAction()
 {
     $searchIndexLocation = $this->getIndexLocation();
     $index = Lucene\Lucene::create($searchIndexLocation);
     $userTable = $this->getServiceLocator()->get('UserTable');
     $uploadTable = $this->getServiceLocator()->get('UploadTable');
     $allUploads = $uploadTable->fetchAll();
     foreach ($allUploads as $fileUpload) {
         $uploadOwner = $userTable->getById($fileUpload->getUserId());
         // создание полей lucene
         $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->getId());
         $label = Document\Field::Text('label', $fileUpload->getLabel());
         $owner = Document\Field::Text('owner', $uploadOwner->getName());
         $uploadPath = $this->getFileUploadLocation();
         $fileName = $fileUpload->getFilename();
         $filePath = $uploadPath . DIRECTORY_SEPARATOR . $fileName;
         if (substr_compare($fileName, ".xlsx", strlen($fileName) - strlen(".xlsx"), strlen(".xlsx")) === 0) {
             // Индексирование таблицы excel
             $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($filePath);
         } else {
             if (substr_compare($fileName, ".docx", strlen($fileName) - strlen(".docx"), strlen(".docx")) === 0) {
                 // Индексирование документа Word
                 $indexDoc = Lucene\Document\Docx::loadDocxFile($filePath);
             } else {
                 $indexDoc = new Lucene\Document();
             }
         }
         // создание нового документа и добавление всех полей
         $indexDoc = new Lucene\Document();
         $indexDoc->addField($label);
         $indexDoc->addField($owner);
         $indexDoc->addField($fileUploadId);
         $index->addDocument($indexDoc);
     }
     $index->commit();
     $response = $this->getResponse();
     $response->setContent("Index Ok");
     return $response;
 }
 public function generateIndexAction()
 {
     $searchIndexLocation = $this->getIndexLocation();
     $index = Lucene\Lucene::create($searchIndexLocation);
     $userTable = $this->getServiceLocator()->get('UserTable');
     $uploadTable = $this->getServiceLocator()->get('UploadTable');
     $allUploads = $uploadTable->fetchAll();
     foreach ($allUploads as $fileUpload) {
         //
         $uploadOwner = $userTable->getUser($fileUpload->user_id);
         // id field
         $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->id);
         // label field
         $label = Document\Field::Text('label', $fileUpload->label);
         // owner field
         $owner = Document\Field::Text('owner', $uploadOwner->name);
         if (substr_compare($fileUpload->filename, ".xlsx", strlen($fileUpload->filename) - strlen(".xlsx"), strlen(".xlsx")) === 0) {
             // index excel sheet
             $uploadPath = $this->getFileUploadLocation();
             $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($uploadPath . "/" . $fileUpload->filename);
         } else {
             if (substr_compare($fileUpload->filename, ".docx", strlen($fileUpload->filename) - strlen(".docx"), strlen(".docx")) === 0) {
                 // index word doc
                 $uploadPath = $this->getFileUploadLocation();
                 $indexDoc = Lucene\Document\Docx::loadDocxFile($uploadPath . "/" . $fileUpload->filename);
             } else {
                 $indexDoc = new Lucene\Document();
             }
         }
         $indexDoc->addField($label);
         $indexDoc->addField($owner);
         $indexDoc->addField($fileUploadId);
         $index->addDocument($indexDoc);
     }
     $index->commit();
 }
Ejemplo n.º 14
0
 protected function getIndex()
 {
     if ($this->index != null) {
         return $this->index;
     }
     \ZendSearch\Lucene\Search\QueryParser::setDefaultEncoding('utf-8');
     \ZendSearch\Lucene\Analysis\Analyzer\Analyzer::setDefault(new \ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8Num\CaseInsensitive());
     \ZendSearch\Lucene\Search\QueryParser::setDefaultOperator(\ZendSearch\Lucene\Search\QueryParser::B_AND);
     try {
         $index = \ZendSearch\Lucene\Lucene::open($this->getIndexPath());
     } catch (\ZendSearch\Lucene\Exception\RuntimeException $ex) {
         $index = \ZendSearch\Lucene\Lucene::create($this->getIndexPath());
     }
     $this->index = $index;
     return $index;
 }
Ejemplo n.º 15
0
 public function generateSearchAction()
 {
     $searchIndexLocation = $this->getIndexLocation();
     $index = Lucene\Lucene::create($searchIndexLocation);
     $allUsers = $this->getUserTable()->fetchAll(false);
     foreach ($allUsers as $user) {
         $id = Document\Field::keyword('userId', $user->userId);
         $firstName = Document\Field::text('firstName', $user->firstName);
         $lastName = Document\Field::text('lastName', $user->lastName);
         $email = Document\Field::text('email', $user->email);
         $role = Document\Field::text('role', $user->role);
         $activated = Document\Field::keyword('activated', $user->activated);
         $indexDoc = new Lucene\Document();
         $indexDoc->addField($id);
         $indexDoc->addField($firstName);
         $indexDoc->addField($lastName);
         $indexDoc->addField($email);
         $indexDoc->addField($role);
         $indexDoc->addField($activated);
         $index->addDocument($indexDoc);
     }
     $index->commit();
 }
Ejemplo n.º 16
0
 private function getIndex() : SearchIndexInterface
 {
     $path = $this->getIndexPath();
     if (!$this->checkIndexPath($path)) {
         $index = Lucene::create($path);
     } else {
         $index = Lucene::open($path);
     }
     Analyzer::setDefault(new CaseInsensitive());
     LuceneFilesystem::setDefaultFilePermissions(0775);
     QueryParser::setDefaultEncoding('UTF-8');
     $index->setMaxBufferedDocs($this->options['max_buffered_docs']);
     $index->setMaxMergeDocs($this->options['max_merge_docs']);
     $index->setMergeFactor($this->options['merge_factor']);
     $index->optimize();
     return $index;
 }
Ejemplo n.º 17
0
 /**
  * @group ZF-4252
  */
 public function testHtmlInlineTagsIndexing()
 {
     $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');
     $htmlString = '<html><head><title>Hello World</title></head>' . '<body><b>Zend</b>Framework' . "\n" . ' <div>Foo</div>Bar ' . "\n" . ' <strong>Test</strong></body></html>';
     $doc = Document\Html::loadHTML($htmlString);
     $index->addDocument($doc);
     $hits = $index->find('FooBar');
     $this->assertEquals(count($hits), 0);
     $hits = $index->find('ZendFramework');
     $this->assertEquals(count($hits), 1);
     unset($index);
     $this->_clearDirectory(__DIR__ . '/_index/_files');
 }
Ejemplo n.º 18
0
 public function create($dir)
 {
     $this->index = Lucene::create($this->index_dir . $dir);
     return $this;
 }
Ejemplo n.º 19
0
 public static function getLuceneIndex()
 {
     if (file_exists($index = self::getLuceneIndexFile())) {
         return Lucene::open($index);
     }
     return Lucene::create($index);
 }
 public function deleteAction()
 {
     $fileId = $this->params("id");
     $uploadTable = $this->getServiceLocator()->get('UploadTable');
     $upload = $uploadTable->getById($fileId);
     $fileName = $upload->getFileName();
     $fileNameP = $this->getFileUploadLocation() . DIRECTORY_SEPARATOR . $fileName;
     if (file_exists($fileNameP)) {
         unlink($fileNameP);
     }
     //удалить из Lucene
     $searchIndexLocation = $this->getIndexLocation();
     $index = Lucene\Lucene::create($searchIndexLocation);
     $document = $index->find($fileId);
     if ($document) {
         $index->delete($document->document_id);
         $index->commit();
     }
     $uploadTable->deleteById($fileId);
     return $this->redirect()->toRoute('uploads', array('action' => 'index'));
 }
Ejemplo n.º 21
0
 /**
  *
  * Инициализация Zend Lucene
  *
  * @param string[]|string $modelClasses
  * @param string $indexLocation
  * @throws SearchCanNotConnectException
  */
 public function __construct($modelClasses, $indexLocation)
 {
     if (!is_array($modelClasses)) {
         $modelClasses = array($modelClasses);
     }
     $this->modelClasses = $this->filterModelClasses($modelClasses);
     $this->defaultAnalyzer = $this->getDefaultAnalyzer();
     $this->analyzerForHighlighter = $this->getAnalyzerForHighlighter();
     Analyzer::setDefault($this->defaultAnalyzer);
     QueryParser::setDefaultEncoding('utf-8');
     // сделаем ограничение количества записей результата поиска
     // Lucene::setResultSetLimit(10000);
     // открываем/создаём новый индекс
     if (file_exists($indexLocation = $indexLocation)) {
         try {
             $this->connection = Lucene::open($indexLocation);
         } catch (\Exception $ex) {
             $this->connection = Lucene::create($indexLocation);
         }
     } else {
         $this->connection = Lucene::create($indexLocation);
     }
     if (!$this->connection) {
         throw new SearchCanNotConnectException($indexLocation);
     }
 }
Ejemplo n.º 22
0
 /**
  * @group ZF-9680
  */
 public function testIsDeletedWithoutExplicitCommit()
 {
     $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');
     $document = new Document();
     $document->addField(Document\Field::Keyword('_id', 'myId'));
     $document->addField(Document\Field::Keyword('bla', 'blubb'));
     $index->addDocument($document);
     $this->assertFalse($index->isDeleted(0));
 }
Ejemplo n.º 23
0
 public function flushIndex()
 {
     $fileSystem = tao_models_classes_FileSourceService::singleton()->getFileSource($this->getOption('fileSystem'));
     Lucene::create($fileSystem->getPath());
 }
 /**
  * Indexa dados nos arquivos de json
  */
 public function index()
 {
     $dir = realpath(dirname(__FILE__)) . DIRECTORY_SEPARATOR . "data" . DIRECTORY_SEPARATOR;
     $jsonDir = $dir . "json";
     $indexDir = $dir . "index";
     // ler aquivos json
     $files = scandir($jsonDir);
     foreach ($files as $file) {
         if ($file == '.' || $file == '..') {
             continue;
         }
         // Se arquivo existe
         if (is_file($jsonDir . DIRECTORY_SEPARATOR . $file)) {
             $json = json_decode(file_get_contents($jsonDir . DIRECTORY_SEPARATOR . $file));
             $indexName = substr($file, 0, -5);
             // Cria index
             $index = Lucene\Lucene::create($indexDir . DIRECTORY_SEPARATOR . $indexName);
             // Cria documento e define campos para indexar
             foreach ($json as $entry) {
                 $doc = new Lucene\Document();
                 $doc->addField(Lucene\Document\Field::Text('url', $entry->title));
                 $doc->addField(Lucene\Document\Field::UnStored('contents', $entry->text));
                 $index->addDocument($doc);
             }
         }
     }
 }