/**
  * @dataProvider searchResultDataProvider
  */
 function testSearchLuceneResultContent($fileId, $name, $path, $size, $score, $mimeType, $modified, $container)
 {
     require_once __DIR__ . '/util/dummyindex.php';
     $index = new DummyIndex();
     $doc = new Document();
     $doc->addField(Document\Field::Keyword('fileId', $fileId));
     $doc->addField(Document\Field::Text('path', '/test/files' . $path, 'UTF-8'));
     $doc->addField(Document\Field::unIndexed('mtime', $modified));
     $doc->addField(Document\Field::unIndexed('size', $size));
     $doc->addField(Document\Field::unIndexed('mimetype', $mimeType));
     $index->addDocument($doc);
     $hit = new QueryHit($index);
     $hit->score = $score;
     $hit->id = 0;
     $hit->document_id = 0;
     $searchResult = new \OCA\Search_Lucene\Search\LuceneResult($hit);
     $this->assertInstanceOf('OCA\\Search_Lucene\\Search\\LuceneResult', $searchResult);
     $this->assertEquals($fileId, $searchResult->id);
     $this->assertEquals('lucene', $searchResult->type);
     $this->assertEquals($path, $searchResult->path);
     $this->assertEquals($name, $searchResult->name);
     $this->assertEquals($mimeType, $searchResult->mime_type);
     $this->assertEquals($size, $searchResult->size);
     $this->assertEquals($score, $searchResult->score);
     $this->assertEquals($modified, $searchResult->modified);
 }
 public function testUpdate()
 {
     $this->connection->shouldReceive('getIndex')->andReturn($luceneIndex = m::mock());
     $luceneIndex->shouldReceive('addDocument')->with(m::on(function ($arg) {
         $doc = new Document();
         $doc->addField(Field::keyword('primary_key', 1));
         $doc->addField(Field::Keyword('class_uid', '12345'));
         $field = Field::unStored('name', 'test name');
         $field->boost = 1;
         $doc->addField($field);
         $field = Field::unStored('optional_attribute1', 'optional value');
         $field->boost = 1;
         $doc->addField($field);
         $this->assertEquals($doc, $arg);
         return true;
     }))->once();
     $luceneIndex->shouldReceive('find')->with(m::on(function ($arg) {
         $term = new MultiTerm();
         $term->addTerm(new Term(1, 'primary_key'), true);
         $term->addTerm(new Term('12345', 'class_uid'), true);
         $this->assertEquals($term, $arg);
         return true;
     }))->andReturnUsing(function () {
         $hitMock = m::mock();
         $hitMock->id = 10;
         return [$hitMock];
     })->once();
     $luceneIndex->shouldReceive('delete')->with(10)->once();
     $index = $this->createIndex();
     $index->update($this->model);
 }
Example #3
0
 public function testKeyword()
 {
     $field = Document\Field::Keyword('field', 'value');
     $this->assertEquals($field->boost, 1);
     $this->assertEquals($field->encoding, 'UTF-8');
     $this->assertEquals($field->isBinary, false);
     $this->assertEquals($field->isIndexed, true);
     $this->assertEquals($field->isStored, true);
     $this->assertEquals($field->isTokenized, false);
     $this->assertEquals($field->name, 'field');
     $this->assertEquals($field->value, 'value');
 }
Example #4
0
 /**
  * @param $data
  * @param SearchIndexInterface $index
  *
  * @return IndexInterface
  */
 public function index($data, SearchIndexInterface $index)
 {
     $this->unindex($data, $index);
     $indexDoc = new Document();
     $indexDoc->addField(Field::Keyword('group_id', $data->id));
     $indexDoc->addField(Field::UnIndexed('type', "group"));
     $indexDoc->addField(Field::UnIndexed('identifier', $data->url));
     $indexDoc->addField(Field::UnIndexed('date_time', date('c')));
     $indexDoc->addField(Field::UnIndexed('date', date('j. M. Y')));
     $indexDoc->addField(Field::Text('title', $data->name_short, 'utf-8'));
     $indexDoc->addField(Field::Text('body', $data->description, 'utf-8'));
     $index->addDocument($indexDoc);
     return $this;
 }
Example #5
0
 /**
  * @param $data
  * @param SearchIndexInterface $index
  *
  * @return IndexInterface
  */
 public function index($data, SearchIndexInterface $index)
 {
     $this->unindex($data, $index);
     $indexDoc = new Document();
     $indexDoc->addField(Field::Keyword('news_id', $data->id));
     $indexDoc->addField(Field::UnIndexed('type', "news"));
     $indexDoc->addField(Field::UnIndexed('identifier', $data->id));
     $indexDoc->addField(Field::UnIndexed('date_time', $data->created_date->format('c')));
     $indexDoc->addField(Field::UnIndexed('date', $data->created_date->format('j. M. Y')));
     $indexDoc->addField(Field::Text('title', $data->title, 'utf-8'));
     $indexDoc->addField(Field::Text('body', $data->body, 'utf-8'));
     $index->addDocument($indexDoc);
     return $this;
 }
Example #6
0
 function testUpdate()
 {
     // preparation
     $app = new Application();
     $container = $app->getContainer();
     // get an index
     /** @var Index $index */
     $index = $container->query('Index');
     // add a document
     $doc = new Document();
     $doc->addField(Document\Field::Keyword('fileId', '1'));
     $doc->addField(Document\Field::Text('path', '/somewhere/deep/down/the/rabbit/hole', 'UTF-8'));
     $doc->addField(Document\Field::Text('users', 'alice', 'UTF-8'));
     $index->index->addDocument($doc);
     $index->commit();
     // search for it
     $idTerm = new Term('1', 'fileId');
     $idQuery = new Query\Term($idTerm);
     $query = new Query\Boolean();
     $query->addSubquery($idQuery);
     /** @var QueryHit $hit */
     $hits = $index->find($query);
     // get the document from the query hit
     $foundDoc = $hits[0]->getDocument();
     $this->assertEquals('alice', $foundDoc->getFieldValue('users'));
     // delete the document from the index
     //$index->index->delete($hit);
     // change the 'users' key of the document
     $foundDoc->addField(Document\Field::Text('users', 'bob', 'UTF-8'));
     $this->assertEquals('bob', $foundDoc->getFieldValue('users'));
     // add the document back to the index
     $index->updateFile($foundDoc, '1');
     $idTerm2 = new Term('1', 'fileId');
     $idQuery2 = new Query\Term($idTerm2);
     $query2 = new Query\Boolean();
     $query2->addSubquery($idQuery2);
     /** @var QueryHit $hit */
     $hits2 = $index->find($query2);
     // get the document from the query hit
     $foundDoc2 = $hits2[0]->getDocument();
     $this->assertEquals('bob', $foundDoc2->getFieldValue('users'));
 }
Example #7
0
 /**
  * Lists all Post models.
  * @return mixed
  */
 public function actionIndex()
 {
     $searchModel = new PostSearch();
     $dataProvider = $searchModel->search(Yii::$app->request->post());
     //setlocale(LC_ALL, 'en_US.UTF-8');
     setlocale(LC_CTYPE, 'ru_RU.UTF-8');
     //Lucene\Lucene::setDefaultSearchField('contents');
     Lucene\Search\QueryParser::setDefaultEncoding('UTF-8');
     Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     Lucene\Lucene::setResultSetLimit(10);
     // create blog posts index located in /data/posts_index ,make sure the folder is writable
     $index = Lucene\Lucene::create('data/posts_index');
     $posts = Post::find()->all();
     //var_dump($posts);die();
     // iterate through posts and build the index
     foreach ($posts as $p) {
         $doc = new Lucene\Document();
         $doc->addField(Lucene\Document\Field::UnIndexed('entry_id', $p->id));
         $doc->addField(Lucene\Document\Field::Keyword('title', $p->title));
         $doc->addField(Lucene\Document\Field::text('contents', $p->content));
         $index->addDocument($doc);
     }
     // commit the index
     $index->commit();
     //Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     // explode the search query to individual words
     $words = explode(' ', urldecode(Yii::$app->getRequest()->getQueryParam('q')));
     // start a search query and add a term for each word to it
     $query = new Lucene\Search\Query\MultiTerm();
     foreach ($words as $w) {
         $query->addTerm(new Lucene\Index\Term($w));
     }
     // open and query the index
     $index = Lucene\Lucene::open('data/posts_index');
     $results = $index->find($query);
     // the search results
     //var_dump($results);
     return $this->render('index', ['searchModel' => $searchModel, 'dataProvider' => $dataProvider, 'search' => $results, 'query' => $query]);
 }
Example #8
0
 /**
  * @group ZF-9680
  */
 public function testIsDeletedWithoutExplicitCommit()
 {
     $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');
     $document = new Document();
     $document->addField(Document\Field::Keyword('_id', 'myId'));
     $document->addField(Document\Field::Keyword('bla', 'blubb'));
     $index->addDocument($document);
     $this->assertFalse($index->isDeleted(0));
 }
 /**
  * Update document in index for model
  *
  * @param Model $model
  */
 public function update(Model $model)
 {
     // Remove any existing documents for model.
     $this->delete($model);
     // Create new document for model.
     $doc = new Document();
     list($name, $value) = $this->config->primaryKeyPair($model);
     // Add private key.
     $doc->addField(Field::keyword($name, $value));
     // Add model's class UID.
     list($name, $value) = $this->config->classUidPair($model);
     // Add class uid for identification of model's class.
     $doc->addField(Field::Keyword($name, $value));
     // Get base fields.
     $fields = $this->config->fields($model);
     // Add fields to document to be indexed (but not stored).
     foreach ($fields as $fieldName => $options) {
         $fieldValue = $model->{trim($fieldName)};
         $field = Field::unStored(trim($fieldName), strip_tags(trim($fieldValue)));
         $field->boost = array_get($options, 'boost');
         $doc->addField($field);
     }
     // Get dynamic fields.
     $optionalAttributes = $this->config->optionalAttributes($model);
     // Add optional attributes to document to be indexed (but not stored).
     foreach ($optionalAttributes as $fieldName => $options) {
         $fieldValue = array_get($options, "value");
         $field = Field::unStored(trim($fieldName), strip_tags(trim($fieldValue)));
         $field->boost = array_get($options, "boost");
         $doc->addField($field);
     }
     // Set boost for model.
     $doc->boost = $this->config->boost($model);
     // Add document to index.
     $this->index()->addDocument($doc);
 }
Example #10
0
 /**
  * @ORM\PostPersist
  */
 public function updateLuceneIndex()
 {
     $index = self::getLuceneIndex();
     // remove existing entries
     foreach ($index->find('pk:' . $this->getId()) as $hit) {
         $index->delete($hit->id);
     }
     // don't index expired and non-activated jobs
     if ($this->isExpired() || !$this->getIsActivated()) {
         return;
     }
     $doc = new Document();
     // store job primary key to identify it in the search results
     $doc->addField(Document\Field::Keyword('pk', $this->getId()));
     // index job fields
     $doc->addField(Document\Field::UnStored('position', $this->getPosition(), 'utf-8'));
     $doc->addField(Document\Field::UnStored('company', $this->getCompany(), 'utf-8'));
     $doc->addField(Document\Field::UnStored('location', $this->getLocation(), 'utf-8'));
     $doc->addField(Document\Field::UnStored('description', $this->getDescription(), 'utf-8'));
     // add job to the index
     $index->addDocument($doc);
     $index->commit();
 }
 /**
  * Update document in index for model
  *
  * @param Model $model
  */
 public function update(Model $model)
 {
     // Remove any existing documents for model.
     $this->delete($model);
     // Create new document for model.
     $doc = new Document();
     list($name, $value) = $this->config->privateKeyPair($model);
     // Add private key.
     $doc->addField(Field::keyword($name, $value));
     // Add model's class UID.
     list($name, $value) = $this->config->classUidPair($model);
     // Add class uid for identification of model's class.
     $doc->addField(Field::Keyword($name, $value));
     $fields = $this->config->fields($model);
     // Add fields to document to be indexed (but not stored).
     foreach ($fields as $field) {
         $doc->addField(Field::unStored(trim($field), strip_tags(trim($model->{trim($field)}))));
     }
     $optionalAttributes = $this->config->optionalAttributes($model);
     // Add optional attributes to document to be indexed (but not stored).
     foreach ($optionalAttributes as $fieldName => $fieldValue) {
         $doc->addField(Field::unStored(trim($fieldName), strip_tags(trim($fieldValue))));
     }
     // Add document to index.
     $this->index()->addDocument($doc);
 }
Example #12
0
 /**
  * index a file
  *
  * @param File $file the file to be indexed
  * @param bool $commit
  *
  * @return bool true when something was stored in the index, false otherwise (eg, folders are not indexed)
  * @throws NotIndexedException when an unsupported file type is encountered
  */
 public function indexFile(File $file, $commit = true)
 {
     // we decide how to index on mime type or file extension
     $mimeType = $file->getMimeType();
     $fileExtension = strtolower(pathinfo($file->getName(), PATHINFO_EXTENSION));
     // initialize plain lucene document
     $doc = new Document();
     // index content for local files only
     $storage = $file->getStorage();
     if ($storage->isLocal()) {
         $path = $storage->getLocalFile($file->getInternalPath());
         //try to use special lucene document types
         if ('text/html' === $mimeType) {
             //TODO could be indexed, even if not local
             $doc = HTML::loadHTML($file->getContent());
         } else {
             if ('text/' === substr($mimeType, 0, 5) || 'application/x-tex' === $mimeType) {
                 $body = $file->getContent();
                 if ($body != '') {
                     $doc->addField(Document\Field::UnStored('body', $body));
                 }
             } else {
                 if ('application/pdf' === $mimeType) {
                     $doc = Pdf::loadPdf($file->getContent());
                     // the zend classes only understand docx and not doc files
                 } else {
                     if ($fileExtension === 'docx') {
                         $doc = Document\Docx::loadDocxFile($path);
                         //} else if ('application/msexcel' === $mimeType) {
                     } else {
                         if ($fileExtension === 'xlsx') {
                             $doc = Document\Xlsx::loadXlsxFile($path);
                             //} else if ('application/mspowerpoint' === $mimeType) {
                         } else {
                             if ($fileExtension === 'pptx') {
                                 $doc = Document\Pptx::loadPptxFile($path);
                             } else {
                                 if ($fileExtension === 'odt') {
                                     $doc = Odt::loadOdtFile($path);
                                 } else {
                                     if ($fileExtension === 'ods') {
                                         $doc = Ods::loadOdsFile($path);
                                     } else {
                                         throw new NotIndexedException();
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     // Store filecache id as unique id to lookup by when deleting
     $doc->addField(Document\Field::Keyword('fileId', $file->getId()));
     // Store document path for the search results
     $doc->addField(Document\Field::Text('path', $file->getPath(), 'UTF-8'));
     $doc->addField(Document\Field::unIndexed('mtime', $file->getMTime()));
     $doc->addField(Document\Field::unIndexed('size', $file->getSize()));
     $doc->addField(Document\Field::unIndexed('mimetype', $mimeType));
     $this->index->updateFile($doc, $file->getId(), $commit);
     return true;
 }
 protected function indexProperty(Document $document, \core_kernel_classes_Property $property)
 {
     $indexes = $property->getPropertyValues(new \core_kernel_classes_Property('http://www.tao.lu/Ontologies/TAO.rdf#PropertyIndex'));
     foreach ($indexes as $indexUri) {
         $index = new Index($indexUri);
         $id = $index->getIdentifier();
         $strings = $index->tokenize($this->resource->getPropertyValues($property));
         if (!empty($strings)) {
             if ($index->isFuzzyMatching()) {
                 // cannot store multiple fuzzy strings
                 $string = implode(' ', $strings);
                 $field = Document\Field::Text($index->getIdentifier(), $string);
                 $field->isStored = $index->isStored();
                 $document->addField($field);
             } else {
                 $value = count($strings) > 1 ? $strings : reset($strings);
                 $field = Document\Field::Keyword($index->getIdentifier(), $value);
                 $field->isStored = $index->isStored() && !is_array($value);
                 // storage of arrays not supported
                 $document->addField($field);
             }
         }
     }
 }
 /**
  * {@inheritDoc}
  */
 public function index($workspace, $path, Node $node)
 {
     $index = $this->getIndex($workspace);
     $document = new Document();
     $nodeName = PathHelper::getNodeName($path);
     $localNodeName = $nodeName;
     // PathHelper::getLocalNodeName($path);
     $parentPath = PathHelper::getParentPath($path);
     $document->addField(Field::Keyword(self::IDX_PATH, $path));
     $document->addField(Field::Keyword(self::IDX_NODENAME, $nodeName));
     $document->addField(Field::Keyword(self::IDX_NODELOCALNAME, $localNodeName));
     $document->addField(Field::Keyword(self::IDX_PARENTPATH, $parentPath));
     foreach ($node->getProperties() as $propertyName => $property) {
         $propertyValue = $property['value'];
         $propertyType = $property['type'];
         if ($propertyName === Storage::INTERNAL_UUID) {
             $document->addField(Field::Keyword(Storage::INTERNAL_UUID, $propertyValue));
             continue;
         }
         switch ($propertyType) {
             case PropertyType::TYPENAME_STRING:
             case PropertyType::TYPENAME_NAME:
             case PropertyType::TYPENAME_PATH:
             case PropertyType::TYPENAME_URI:
                 $value = (array) $propertyValue;
                 $value = join(self::MULTIVALUE_SEPARATOR, $value);
                 $document->addField(Field::Text($propertyName, $value));
                 break;
             case PropertyType::TYPENAME_DATE:
                 $values = (array) $propertyValue;
                 foreach ($values as $i => $value) {
                     if ($value instanceof \DateTime) {
                         $values[$i] = $value->format('c');
                     }
                 }
                 $value = join(self::MULTIVALUE_SEPARATOR, $values);
                 $document->addField(Field::Text($propertyName, $value));
                 break;
             case PropertyType::TYPENAME_DECIMAL:
             case PropertyType::TYPENAME_LONG:
             case PropertyType::TYPENAME_DOUBLE:
                 $values = (array) $propertyValue;
                 foreach ($values as &$value) {
                     $value = sprintf('%0' . strlen(PHP_INT_MAX) . 's', $value);
                 }
                 $value = join(self::MULTIVALUE_SEPARATOR, $values);
                 $document->addField(Field::Text($propertyName, $value));
                 break;
             case PropertyType::TYPENAME_BOOLEAN:
                 $values = (array) $propertyValue;
                 foreach ($values as &$value) {
                     if ($propertyValue === 'false') {
                         $value = self::VALUE_BOOLEAN_FALSE;
                     } else {
                         $value = 1;
                     }
                 }
                 $value = join(self::MULTIVALUE_SEPARATOR, $values);
                 $document->addField(Field::Text($propertyName, $value));
                 break;
         }
     }
     $index->addDocument($document);
 }
Example #15
0
 /**
  * Обновить индекс для объекта ISearchable
  *
  * @param ISearchable $item
  */
 public function updateIndex(ISearchable $item)
 {
     // получаем доступ собственно к индексу
     $index = $this->connection;
     // удаляем старый индекс
     $this->deleteIndex($item);
     // недоступные не индексируем
     if (!$item->isAvailableForIndexing()) {
         return;
     }
     $document = new Document();
     // сохраняем первичный ключ модели для идентификации ее в результатах поиска
     $document->addField(Field::Keyword('pk', $item->getId()));
     // id моделей могут пересекаться (например, у продуктов и услуг),
     // поэтому добавим второе поле для однозначной идентификации
     $document->addField(Field::Keyword('model', get_class($item)));
     // индексируем поля модели
     foreach ($item->getAttributesForIndexing() as $attribute) {
         $field = $attribute->getFieldName();
         $value = $attribute->getValue();
         $document->addField(Field::unStored($field, strip_tags($value)));
     }
     // добавляем запись в индекс
     $index->addDocument($document);
     $index->commit();
 }