/** * @dataProvider searchResultDataProvider */ function testSearchLuceneResultContent($fileId, $name, $path, $size, $score, $mimeType, $modified, $container) { require_once __DIR__ . '/util/dummyindex.php'; $index = new DummyIndex(); $doc = new Document(); $doc->addField(Document\Field::Keyword('fileId', $fileId)); $doc->addField(Document\Field::Text('path', '/test/files' . $path, 'UTF-8')); $doc->addField(Document\Field::unIndexed('mtime', $modified)); $doc->addField(Document\Field::unIndexed('size', $size)); $doc->addField(Document\Field::unIndexed('mimetype', $mimeType)); $index->addDocument($doc); $hit = new QueryHit($index); $hit->score = $score; $hit->id = 0; $hit->document_id = 0; $searchResult = new \OCA\Search_Lucene\Search\LuceneResult($hit); $this->assertInstanceOf('OCA\\Search_Lucene\\Search\\LuceneResult', $searchResult); $this->assertEquals($fileId, $searchResult->id); $this->assertEquals('lucene', $searchResult->type); $this->assertEquals($path, $searchResult->path); $this->assertEquals($name, $searchResult->name); $this->assertEquals($mimeType, $searchResult->mime_type); $this->assertEquals($size, $searchResult->size); $this->assertEquals($score, $searchResult->score); $this->assertEquals($modified, $searchResult->modified); }
public function testUpdate() { $this->connection->shouldReceive('getIndex')->andReturn($luceneIndex = m::mock()); $luceneIndex->shouldReceive('addDocument')->with(m::on(function ($arg) { $doc = new Document(); $doc->addField(Field::keyword('primary_key', 1)); $doc->addField(Field::Keyword('class_uid', '12345')); $field = Field::unStored('name', 'test name'); $field->boost = 1; $doc->addField($field); $field = Field::unStored('optional_attribute1', 'optional value'); $field->boost = 1; $doc->addField($field); $this->assertEquals($doc, $arg); return true; }))->once(); $luceneIndex->shouldReceive('find')->with(m::on(function ($arg) { $term = new MultiTerm(); $term->addTerm(new Term(1, 'primary_key'), true); $term->addTerm(new Term('12345', 'class_uid'), true); $this->assertEquals($term, $arg); return true; }))->andReturnUsing(function () { $hitMock = m::mock(); $hitMock->id = 10; return [$hitMock]; })->once(); $luceneIndex->shouldReceive('delete')->with(10)->once(); $index = $this->createIndex(); $index->update($this->model); }
public function testKeyword() { $field = Document\Field::Keyword('field', 'value'); $this->assertEquals($field->boost, 1); $this->assertEquals($field->encoding, 'UTF-8'); $this->assertEquals($field->isBinary, false); $this->assertEquals($field->isIndexed, true); $this->assertEquals($field->isStored, true); $this->assertEquals($field->isTokenized, false); $this->assertEquals($field->name, 'field'); $this->assertEquals($field->value, 'value'); }
/** * @param $data * @param SearchIndexInterface $index * * @return IndexInterface */ public function index($data, SearchIndexInterface $index) { $this->unindex($data, $index); $indexDoc = new Document(); $indexDoc->addField(Field::Keyword('group_id', $data->id)); $indexDoc->addField(Field::UnIndexed('type', "group")); $indexDoc->addField(Field::UnIndexed('identifier', $data->url)); $indexDoc->addField(Field::UnIndexed('date_time', date('c'))); $indexDoc->addField(Field::UnIndexed('date', date('j. M. Y'))); $indexDoc->addField(Field::Text('title', $data->name_short, 'utf-8')); $indexDoc->addField(Field::Text('body', $data->description, 'utf-8')); $index->addDocument($indexDoc); return $this; }
/** * @param $data * @param SearchIndexInterface $index * * @return IndexInterface */ public function index($data, SearchIndexInterface $index) { $this->unindex($data, $index); $indexDoc = new Document(); $indexDoc->addField(Field::Keyword('news_id', $data->id)); $indexDoc->addField(Field::UnIndexed('type', "news")); $indexDoc->addField(Field::UnIndexed('identifier', $data->id)); $indexDoc->addField(Field::UnIndexed('date_time', $data->created_date->format('c'))); $indexDoc->addField(Field::UnIndexed('date', $data->created_date->format('j. M. Y'))); $indexDoc->addField(Field::Text('title', $data->title, 'utf-8')); $indexDoc->addField(Field::Text('body', $data->body, 'utf-8')); $index->addDocument($indexDoc); return $this; }
function testUpdate() { // preparation $app = new Application(); $container = $app->getContainer(); // get an index /** @var Index $index */ $index = $container->query('Index'); // add a document $doc = new Document(); $doc->addField(Document\Field::Keyword('fileId', '1')); $doc->addField(Document\Field::Text('path', '/somewhere/deep/down/the/rabbit/hole', 'UTF-8')); $doc->addField(Document\Field::Text('users', 'alice', 'UTF-8')); $index->index->addDocument($doc); $index->commit(); // search for it $idTerm = new Term('1', 'fileId'); $idQuery = new Query\Term($idTerm); $query = new Query\Boolean(); $query->addSubquery($idQuery); /** @var QueryHit $hit */ $hits = $index->find($query); // get the document from the query hit $foundDoc = $hits[0]->getDocument(); $this->assertEquals('alice', $foundDoc->getFieldValue('users')); // delete the document from the index //$index->index->delete($hit); // change the 'users' key of the document $foundDoc->addField(Document\Field::Text('users', 'bob', 'UTF-8')); $this->assertEquals('bob', $foundDoc->getFieldValue('users')); // add the document back to the index $index->updateFile($foundDoc, '1'); $idTerm2 = new Term('1', 'fileId'); $idQuery2 = new Query\Term($idTerm2); $query2 = new Query\Boolean(); $query2->addSubquery($idQuery2); /** @var QueryHit $hit */ $hits2 = $index->find($query2); // get the document from the query hit $foundDoc2 = $hits2[0]->getDocument(); $this->assertEquals('bob', $foundDoc2->getFieldValue('users')); }
/** * Lists all Post models. * @return mixed */ public function actionIndex() { $searchModel = new PostSearch(); $dataProvider = $searchModel->search(Yii::$app->request->post()); //setlocale(LC_ALL, 'en_US.UTF-8'); setlocale(LC_CTYPE, 'ru_RU.UTF-8'); //Lucene\Lucene::setDefaultSearchField('contents'); Lucene\Search\QueryParser::setDefaultEncoding('UTF-8'); Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive()); Lucene\Lucene::setResultSetLimit(10); // create blog posts index located in /data/posts_index ,make sure the folder is writable $index = Lucene\Lucene::create('data/posts_index'); $posts = Post::find()->all(); //var_dump($posts);die(); // iterate through posts and build the index foreach ($posts as $p) { $doc = new Lucene\Document(); $doc->addField(Lucene\Document\Field::UnIndexed('entry_id', $p->id)); $doc->addField(Lucene\Document\Field::Keyword('title', $p->title)); $doc->addField(Lucene\Document\Field::text('contents', $p->content)); $index->addDocument($doc); } // commit the index $index->commit(); //Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive()); // explode the search query to individual words $words = explode(' ', urldecode(Yii::$app->getRequest()->getQueryParam('q'))); // start a search query and add a term for each word to it $query = new Lucene\Search\Query\MultiTerm(); foreach ($words as $w) { $query->addTerm(new Lucene\Index\Term($w)); } // open and query the index $index = Lucene\Lucene::open('data/posts_index'); $results = $index->find($query); // the search results //var_dump($results); return $this->render('index', ['searchModel' => $searchModel, 'dataProvider' => $dataProvider, 'search' => $results, 'query' => $query]); }
/** * @group ZF-9680 */ public function testIsDeletedWithoutExplicitCommit() { $index = Lucene\Lucene::create(__DIR__ . '/_index/_files'); $document = new Document(); $document->addField(Document\Field::Keyword('_id', 'myId')); $document->addField(Document\Field::Keyword('bla', 'blubb')); $index->addDocument($document); $this->assertFalse($index->isDeleted(0)); }
/** * Update document in index for model * * @param Model $model */ public function update(Model $model) { // Remove any existing documents for model. $this->delete($model); // Create new document for model. $doc = new Document(); list($name, $value) = $this->config->primaryKeyPair($model); // Add private key. $doc->addField(Field::keyword($name, $value)); // Add model's class UID. list($name, $value) = $this->config->classUidPair($model); // Add class uid for identification of model's class. $doc->addField(Field::Keyword($name, $value)); // Get base fields. $fields = $this->config->fields($model); // Add fields to document to be indexed (but not stored). foreach ($fields as $fieldName => $options) { $fieldValue = $model->{trim($fieldName)}; $field = Field::unStored(trim($fieldName), strip_tags(trim($fieldValue))); $field->boost = array_get($options, 'boost'); $doc->addField($field); } // Get dynamic fields. $optionalAttributes = $this->config->optionalAttributes($model); // Add optional attributes to document to be indexed (but not stored). foreach ($optionalAttributes as $fieldName => $options) { $fieldValue = array_get($options, "value"); $field = Field::unStored(trim($fieldName), strip_tags(trim($fieldValue))); $field->boost = array_get($options, "boost"); $doc->addField($field); } // Set boost for model. $doc->boost = $this->config->boost($model); // Add document to index. $this->index()->addDocument($doc); }
/** * @ORM\PostPersist */ public function updateLuceneIndex() { $index = self::getLuceneIndex(); // remove existing entries foreach ($index->find('pk:' . $this->getId()) as $hit) { $index->delete($hit->id); } // don't index expired and non-activated jobs if ($this->isExpired() || !$this->getIsActivated()) { return; } $doc = new Document(); // store job primary key to identify it in the search results $doc->addField(Document\Field::Keyword('pk', $this->getId())); // index job fields $doc->addField(Document\Field::UnStored('position', $this->getPosition(), 'utf-8')); $doc->addField(Document\Field::UnStored('company', $this->getCompany(), 'utf-8')); $doc->addField(Document\Field::UnStored('location', $this->getLocation(), 'utf-8')); $doc->addField(Document\Field::UnStored('description', $this->getDescription(), 'utf-8')); // add job to the index $index->addDocument($doc); $index->commit(); }
/** * Update document in index for model * * @param Model $model */ public function update(Model $model) { // Remove any existing documents for model. $this->delete($model); // Create new document for model. $doc = new Document(); list($name, $value) = $this->config->privateKeyPair($model); // Add private key. $doc->addField(Field::keyword($name, $value)); // Add model's class UID. list($name, $value) = $this->config->classUidPair($model); // Add class uid for identification of model's class. $doc->addField(Field::Keyword($name, $value)); $fields = $this->config->fields($model); // Add fields to document to be indexed (but not stored). foreach ($fields as $field) { $doc->addField(Field::unStored(trim($field), strip_tags(trim($model->{trim($field)})))); } $optionalAttributes = $this->config->optionalAttributes($model); // Add optional attributes to document to be indexed (but not stored). foreach ($optionalAttributes as $fieldName => $fieldValue) { $doc->addField(Field::unStored(trim($fieldName), strip_tags(trim($fieldValue)))); } // Add document to index. $this->index()->addDocument($doc); }
/** * index a file * * @param File $file the file to be indexed * @param bool $commit * * @return bool true when something was stored in the index, false otherwise (eg, folders are not indexed) * @throws NotIndexedException when an unsupported file type is encountered */ public function indexFile(File $file, $commit = true) { // we decide how to index on mime type or file extension $mimeType = $file->getMimeType(); $fileExtension = strtolower(pathinfo($file->getName(), PATHINFO_EXTENSION)); // initialize plain lucene document $doc = new Document(); // index content for local files only $storage = $file->getStorage(); if ($storage->isLocal()) { $path = $storage->getLocalFile($file->getInternalPath()); //try to use special lucene document types if ('text/html' === $mimeType) { //TODO could be indexed, even if not local $doc = HTML::loadHTML($file->getContent()); } else { if ('text/' === substr($mimeType, 0, 5) || 'application/x-tex' === $mimeType) { $body = $file->getContent(); if ($body != '') { $doc->addField(Document\Field::UnStored('body', $body)); } } else { if ('application/pdf' === $mimeType) { $doc = Pdf::loadPdf($file->getContent()); // the zend classes only understand docx and not doc files } else { if ($fileExtension === 'docx') { $doc = Document\Docx::loadDocxFile($path); //} else if ('application/msexcel' === $mimeType) { } else { if ($fileExtension === 'xlsx') { $doc = Document\Xlsx::loadXlsxFile($path); //} else if ('application/mspowerpoint' === $mimeType) { } else { if ($fileExtension === 'pptx') { $doc = Document\Pptx::loadPptxFile($path); } else { if ($fileExtension === 'odt') { $doc = Odt::loadOdtFile($path); } else { if ($fileExtension === 'ods') { $doc = Ods::loadOdsFile($path); } else { throw new NotIndexedException(); } } } } } } } } } // Store filecache id as unique id to lookup by when deleting $doc->addField(Document\Field::Keyword('fileId', $file->getId())); // Store document path for the search results $doc->addField(Document\Field::Text('path', $file->getPath(), 'UTF-8')); $doc->addField(Document\Field::unIndexed('mtime', $file->getMTime())); $doc->addField(Document\Field::unIndexed('size', $file->getSize())); $doc->addField(Document\Field::unIndexed('mimetype', $mimeType)); $this->index->updateFile($doc, $file->getId(), $commit); return true; }
protected function indexProperty(Document $document, \core_kernel_classes_Property $property) { $indexes = $property->getPropertyValues(new \core_kernel_classes_Property('http://www.tao.lu/Ontologies/TAO.rdf#PropertyIndex')); foreach ($indexes as $indexUri) { $index = new Index($indexUri); $id = $index->getIdentifier(); $strings = $index->tokenize($this->resource->getPropertyValues($property)); if (!empty($strings)) { if ($index->isFuzzyMatching()) { // cannot store multiple fuzzy strings $string = implode(' ', $strings); $field = Document\Field::Text($index->getIdentifier(), $string); $field->isStored = $index->isStored(); $document->addField($field); } else { $value = count($strings) > 1 ? $strings : reset($strings); $field = Document\Field::Keyword($index->getIdentifier(), $value); $field->isStored = $index->isStored() && !is_array($value); // storage of arrays not supported $document->addField($field); } } } }
/** * {@inheritDoc} */ public function index($workspace, $path, Node $node) { $index = $this->getIndex($workspace); $document = new Document(); $nodeName = PathHelper::getNodeName($path); $localNodeName = $nodeName; // PathHelper::getLocalNodeName($path); $parentPath = PathHelper::getParentPath($path); $document->addField(Field::Keyword(self::IDX_PATH, $path)); $document->addField(Field::Keyword(self::IDX_NODENAME, $nodeName)); $document->addField(Field::Keyword(self::IDX_NODELOCALNAME, $localNodeName)); $document->addField(Field::Keyword(self::IDX_PARENTPATH, $parentPath)); foreach ($node->getProperties() as $propertyName => $property) { $propertyValue = $property['value']; $propertyType = $property['type']; if ($propertyName === Storage::INTERNAL_UUID) { $document->addField(Field::Keyword(Storage::INTERNAL_UUID, $propertyValue)); continue; } switch ($propertyType) { case PropertyType::TYPENAME_STRING: case PropertyType::TYPENAME_NAME: case PropertyType::TYPENAME_PATH: case PropertyType::TYPENAME_URI: $value = (array) $propertyValue; $value = join(self::MULTIVALUE_SEPARATOR, $value); $document->addField(Field::Text($propertyName, $value)); break; case PropertyType::TYPENAME_DATE: $values = (array) $propertyValue; foreach ($values as $i => $value) { if ($value instanceof \DateTime) { $values[$i] = $value->format('c'); } } $value = join(self::MULTIVALUE_SEPARATOR, $values); $document->addField(Field::Text($propertyName, $value)); break; case PropertyType::TYPENAME_DECIMAL: case PropertyType::TYPENAME_LONG: case PropertyType::TYPENAME_DOUBLE: $values = (array) $propertyValue; foreach ($values as &$value) { $value = sprintf('%0' . strlen(PHP_INT_MAX) . 's', $value); } $value = join(self::MULTIVALUE_SEPARATOR, $values); $document->addField(Field::Text($propertyName, $value)); break; case PropertyType::TYPENAME_BOOLEAN: $values = (array) $propertyValue; foreach ($values as &$value) { if ($propertyValue === 'false') { $value = self::VALUE_BOOLEAN_FALSE; } else { $value = 1; } } $value = join(self::MULTIVALUE_SEPARATOR, $values); $document->addField(Field::Text($propertyName, $value)); break; } } $index->addDocument($document); }
/** * Обновить индекс для объекта ISearchable * * @param ISearchable $item */ public function updateIndex(ISearchable $item) { // получаем доступ собственно к индексу $index = $this->connection; // удаляем старый индекс $this->deleteIndex($item); // недоступные не индексируем if (!$item->isAvailableForIndexing()) { return; } $document = new Document(); // сохраняем первичный ключ модели для идентификации ее в результатах поиска $document->addField(Field::Keyword('pk', $item->getId())); // id моделей могут пересекаться (например, у продуктов и услуг), // поэтому добавим второе поле для однозначной идентификации $document->addField(Field::Keyword('model', get_class($item))); // индексируем поля модели foreach ($item->getAttributesForIndexing() as $attribute) { $field = $attribute->getFieldName(); $value = $attribute->getValue(); $document->addField(Field::unStored($field, strip_tags($value))); } // добавляем запись в индекс $index->addDocument($document); $index->commit(); }