Пример #1
0
 public function testAddFieldMethodChaining()
 {
     $document = new Document();
     $this->assertTrue($document->addField(Document\Field::Text('title', 'Title')) instanceof Document);
     $document = new Document();
     $document->addField(Document\Field::Text('title', 'Title'))->addField(Document\Field::Text('annotation', 'Annotation'))->addField(Document\Field::Text('body', 'Document body, document body, document body...'));
 }
 public function createDocument(EntityInterface $entity) : Document
 {
     $document = new Document();
     $context = $this->createContext($entity);
     $fieldsCollection = $context->getFieldsCollection();
     $document->addField(Field::unIndexed('identifier', $entity->getId()));
     $fieldsCollection->map(function (SearchField $field) use($document) {
         $document->addField(Field::text($field->getName(), $field->getValue()));
     });
     return $document;
 }
 private function appendToDocument(Lucene\Document $document, $name, $value)
 {
     if (is_string($value)) {
         $document->addField(Lucene\Document\Field::keyword($name, utf8_decode($value)));
     } elseif ($value instanceof \DateTime) {
         $document->addField(Lucene\Document\Field::keyword($name, $value->getTimestamp()));
     } elseif (is_array($value)) {
         $document->addField(Lucene\Document\Field::text($name, implode(',', $value)));
     } else {
         $document->addField(Lucene\Document\Field::binary($name, $value));
     }
 }
Пример #4
0
 /**
  * @param $data
  * @param SearchIndexInterface $index
  *
  * @return IndexInterface
  */
 public function index($data, SearchIndexInterface $index)
 {
     $this->unindex($data, $index);
     $indexDoc = new Document();
     $indexDoc->addField(Field::Keyword('news_id', $data->id));
     $indexDoc->addField(Field::UnIndexed('type', "news"));
     $indexDoc->addField(Field::UnIndexed('identifier', $data->id));
     $indexDoc->addField(Field::UnIndexed('date_time', $data->created_date->format('c')));
     $indexDoc->addField(Field::UnIndexed('date', $data->created_date->format('j. M. Y')));
     $indexDoc->addField(Field::Text('title', $data->title, 'utf-8'));
     $indexDoc->addField(Field::Text('body', $data->body, 'utf-8'));
     $index->addDocument($indexDoc);
     return $this;
 }
Пример #5
0
 /**
  * @dataProvider searchResultDataProvider
  */
 function testSearchLuceneResultContent($fileId, $name, $path, $size, $score, $mimeType, $modified, $container)
 {
     require_once __DIR__ . '/util/dummyindex.php';
     $index = new DummyIndex();
     $doc = new Document();
     $doc->addField(Document\Field::Keyword('fileId', $fileId));
     $doc->addField(Document\Field::Text('path', '/test/files' . $path, 'UTF-8'));
     $doc->addField(Document\Field::unIndexed('mtime', $modified));
     $doc->addField(Document\Field::unIndexed('size', $size));
     $doc->addField(Document\Field::unIndexed('mimetype', $mimeType));
     $index->addDocument($doc);
     $hit = new QueryHit($index);
     $hit->score = $score;
     $hit->id = 0;
     $hit->document_id = 0;
     $searchResult = new \OCA\Search_Lucene\Search\LuceneResult($hit);
     $this->assertInstanceOf('OCA\\Search_Lucene\\Search\\LuceneResult', $searchResult);
     $this->assertEquals($fileId, $searchResult->id);
     $this->assertEquals('lucene', $searchResult->type);
     $this->assertEquals($path, $searchResult->path);
     $this->assertEquals($name, $searchResult->name);
     $this->assertEquals($mimeType, $searchResult->mime_type);
     $this->assertEquals($size, $searchResult->size);
     $this->assertEquals($score, $searchResult->score);
     $this->assertEquals($modified, $searchResult->modified);
 }
Пример #6
0
 /**
  * @param $data
  * @param SearchIndexInterface $index
  *
  * @return IndexInterface
  */
 public function index($data, SearchIndexInterface $index)
 {
     $this->unindex($data, $index);
     $indexDoc = new Document();
     $indexDoc->addField(Field::Keyword('group_id', $data->id));
     $indexDoc->addField(Field::UnIndexed('type', "group"));
     $indexDoc->addField(Field::UnIndexed('identifier', $data->url));
     $indexDoc->addField(Field::UnIndexed('date_time', date('c')));
     $indexDoc->addField(Field::UnIndexed('date', date('j. M. Y')));
     $indexDoc->addField(Field::Text('title', $data->name_short, 'utf-8'));
     $indexDoc->addField(Field::Text('body', $data->description, 'utf-8'));
     $index->addDocument($indexDoc);
     return $this;
 }
Пример #7
0
 /**
  * Create or update an indexed document
  *
  * @param object $object
  */
 public function index($object)
 {
     // create property accessor
     $accessor = PropertyAccess::createPropertyAccessor();
     // delete existing documents with same id
     foreach ($this->index->find('id:' . $accessor->getValue($object, 'id')) as $hit) {
         $this->index->delete($hit->id);
     }
     // create new Lucene document
     $doc = new Document();
     // add primary key to identify it in the search results
     $doc->addField(Field::keyword('id', $accessor->getValue($object, 'id')));
     // add entity class reference to identify it in the search results
     $doc->addField(Field::unIndexed('entityClass', get_class($object)));
     // analyze each property's annotations to see which ones must be add to the document
     $reflClass = new ReflectionClass($object);
     foreach ($reflClass->getProperties() as $property) {
         $reflProperty = new \ReflectionProperty($object, $property->name);
         $annotation = $this->reader->getPropertyAnnotation($reflProperty, '\\Keratine\\Lucene\\Mapping\\Annotation\\DocumentField');
         if ($annotation) {
             $value = $accessor->getValue($object, $property->name);
             $value = $this->ensureString($value);
             // use the appropriate indexing strategy for the field
             switch ($annotation->type) {
                 case 'keyword':
                     $doc->addField(Field::keyword($property->name, $value, 'UTF-8'));
                     break;
                 case 'unIndexed':
                     $doc->addField(Field::unIndexed($property->name, $value, 'UTF-8'));
                     break;
                 case 'binary':
                     $doc->addField(Field::binary($property->name, $value));
                     break;
                 case 'text':
                     $doc->addField(Field::text($property->name, $value, 'UTF-8'));
                     break;
                 case 'unStored':
                 default:
                     $doc->addField(Field::unStored($property->name, $value, 'UTF-8'));
                     break;
             }
         }
     }
     // add the document to the index and commit it
     $this->index->addDocument($doc);
     $this->index->commit();
 }
Пример #8
0
 function testUpdate()
 {
     // preparation
     $app = new Application();
     $container = $app->getContainer();
     // get an index
     /** @var Index $index */
     $index = $container->query('Index');
     // add a document
     $doc = new Document();
     $doc->addField(Document\Field::Keyword('fileId', '1'));
     $doc->addField(Document\Field::Text('path', '/somewhere/deep/down/the/rabbit/hole', 'UTF-8'));
     $doc->addField(Document\Field::Text('users', 'alice', 'UTF-8'));
     $index->index->addDocument($doc);
     $index->commit();
     // search for it
     $idTerm = new Term('1', 'fileId');
     $idQuery = new Query\Term($idTerm);
     $query = new Query\Boolean();
     $query->addSubquery($idQuery);
     /** @var QueryHit $hit */
     $hits = $index->find($query);
     // get the document from the query hit
     $foundDoc = $hits[0]->getDocument();
     $this->assertEquals('alice', $foundDoc->getFieldValue('users'));
     // delete the document from the index
     //$index->index->delete($hit);
     // change the 'users' key of the document
     $foundDoc->addField(Document\Field::Text('users', 'bob', 'UTF-8'));
     $this->assertEquals('bob', $foundDoc->getFieldValue('users'));
     // add the document back to the index
     $index->updateFile($foundDoc, '1');
     $idTerm2 = new Term('1', 'fileId');
     $idQuery2 = new Query\Term($idTerm2);
     $query2 = new Query\Boolean();
     $query2->addSubquery($idQuery2);
     /** @var QueryHit $hit */
     $hits2 = $index->find($query2);
     // get the document from the query hit
     $foundDoc2 = $hits2[0]->getDocument();
     $this->assertEquals('bob', $foundDoc2->getFieldValue('users'));
 }
 public function testUpdate()
 {
     $this->connection->shouldReceive('getIndex')->andReturn($luceneIndex = m::mock());
     $luceneIndex->shouldReceive('addDocument')->with(m::on(function ($arg) {
         $doc = new Document();
         $doc->addField(Field::keyword('primary_key', 1));
         $doc->addField(Field::Keyword('class_uid', '12345'));
         $field = Field::unStored('name', 'test name');
         $field->boost = 1;
         $doc->addField($field);
         $field = Field::unStored('optional_attribute1', 'optional value');
         $field->boost = 1;
         $doc->addField($field);
         $this->assertEquals($doc, $arg);
         return true;
     }))->once();
     $luceneIndex->shouldReceive('find')->with(m::on(function ($arg) {
         $term = new MultiTerm();
         $term->addTerm(new Term(1, 'primary_key'), true);
         $term->addTerm(new Term('12345', 'class_uid'), true);
         $this->assertEquals($term, $arg);
         return true;
     }))->andReturnUsing(function () {
         $hitMock = m::mock();
         $hitMock->id = 10;
         return [$hitMock];
     })->once();
     $luceneIndex->shouldReceive('delete')->with(10)->once();
     $index = $this->createIndex();
     $index->update($this->model);
 }
Пример #10
0
 /**
  * Lists all Post models.
  * @return mixed
  */
 public function actionIndex()
 {
     $searchModel = new PostSearch();
     $dataProvider = $searchModel->search(Yii::$app->request->post());
     //setlocale(LC_ALL, 'en_US.UTF-8');
     setlocale(LC_CTYPE, 'ru_RU.UTF-8');
     //Lucene\Lucene::setDefaultSearchField('contents');
     Lucene\Search\QueryParser::setDefaultEncoding('UTF-8');
     Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     Lucene\Lucene::setResultSetLimit(10);
     // create blog posts index located in /data/posts_index ,make sure the folder is writable
     $index = Lucene\Lucene::create('data/posts_index');
     $posts = Post::find()->all();
     //var_dump($posts);die();
     // iterate through posts and build the index
     foreach ($posts as $p) {
         $doc = new Lucene\Document();
         $doc->addField(Lucene\Document\Field::UnIndexed('entry_id', $p->id));
         $doc->addField(Lucene\Document\Field::Keyword('title', $p->title));
         $doc->addField(Lucene\Document\Field::text('contents', $p->content));
         $index->addDocument($doc);
     }
     // commit the index
     $index->commit();
     //Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     // explode the search query to individual words
     $words = explode(' ', urldecode(Yii::$app->getRequest()->getQueryParam('q')));
     // start a search query and add a term for each word to it
     $query = new Lucene\Search\Query\MultiTerm();
     foreach ($words as $w) {
         $query->addTerm(new Lucene\Index\Term($w));
     }
     // open and query the index
     $index = Lucene\Lucene::open('data/posts_index');
     $results = $index->find($query);
     // the search results
     //var_dump($results);
     return $this->render('index', ['searchModel' => $searchModel, 'dataProvider' => $dataProvider, 'search' => $results, 'query' => $query]);
 }
Пример #11
0
 /**
  * インデックスファイルを生成
  */
 public static function updateIndex()
 {
     if (empty(self::$igo)) {
         self::$igo = new Tagger(array('dict_dir' => LIB_DIR . 'ipadic', 'reduce_mode' => true));
     }
     Analyzer::setDefault(new Utf8());
     // 索引の作成
     $index = Lucene::create(CACHE_DIR . self::INDEX_NAME);
     foreach (Listing::pages() as $page) {
         if (empty($page)) {
             continue;
         }
         $wiki = Factory::Wiki($page);
         // 読む権限がない場合スキップ
         if (!$wiki->isReadable() || $wiki->isHidden()) {
             continue;
         }
         /*
         			// HTML出力
         			$html[] = '<html><head>';
         			$html[] = '<meta http-equiv="Content-type" content="text/html; charset=UTF-8"/>';
         			$html[] = '<title>' . $wiki->title() . '</title>';
         			$html[] = '</head>';
         			$html[] = '<body>' . $wiki->render() . '</body>';
         			$html[] = '</html>';
         */
         $doc = new LuceneDoc();
         $doc->addField(Field::Text('title', $wiki->title()));
         // Store document URL to identify it in the search results
         $doc->addField(Field::Text('url', $wiki->uri()));
         // Index document contents
         //$contents = join(" ", self::$igo->wakati(strip_tags($wiki->render())));
         $contents = strip_tags($wiki->render());
         $doc->addField(Field::UnStored('contents', $contents));
         // 索引へ文書の登録
         $index->addDocument($doc);
     }
     $index->optimize();
 }
Пример #12
0
 public function generateIndexAction()
 {
     $searchIndexLocation = $this->getIndexLocation();
     $index = Lucene\Lucene::create($searchIndexLocation);
     $userTable = $this->getServiceLocator()->get('UserTable');
     $uploadTable = $this->getServiceLocator()->get('UploadTable');
     $allUploads = $uploadTable->fetchAll();
     foreach ($allUploads as $fileUpload) {
         $uploadOwner = $userTable->getById($fileUpload->getUserId());
         // создание полей lucene
         $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->getId());
         $label = Document\Field::Text('label', $fileUpload->getLabel());
         $owner = Document\Field::Text('owner', $uploadOwner->getName());
         $uploadPath = $this->getFileUploadLocation();
         $fileName = $fileUpload->getFilename();
         $filePath = $uploadPath . DIRECTORY_SEPARATOR . $fileName;
         if (substr_compare($fileName, ".xlsx", strlen($fileName) - strlen(".xlsx"), strlen(".xlsx")) === 0) {
             // Индексирование таблицы excel
             $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($filePath);
         } else {
             if (substr_compare($fileName, ".docx", strlen($fileName) - strlen(".docx"), strlen(".docx")) === 0) {
                 // Индексирование документа Word
                 $indexDoc = Lucene\Document\Docx::loadDocxFile($filePath);
             } else {
                 $indexDoc = new Lucene\Document();
             }
         }
         // создание нового документа и добавление всех полей
         $indexDoc = new Lucene\Document();
         $indexDoc->addField($label);
         $indexDoc->addField($owner);
         $indexDoc->addField($fileUploadId);
         $index->addDocument($indexDoc);
     }
     $index->commit();
     $response = $this->getResponse();
     $response->setContent("Index Ok");
     return $response;
 }
Пример #13
0
 /**
  * {@inheritdoc}
  */
 public function addProduct(ProductInterface $product, $indexName = ProductIndexerInterface::DEFAULT_INDEX_NAME)
 {
     $index = $this->searchIndexManager->getIndex(ProductIndexerInterface::DEFAULT_INDEX_NAME);
     $document = new Document();
     $document->addField(Field::unIndexed('identifier', $product->getId()));
     $document->addField(Field::text('name', $product->translate('en')->getName()));
     $document->addField(Field::text('shortDescription', $product->translate()->getShortDescription()));
     $document->addField(Field::text('description', $product->translate()->getDescription()));
     $index->addDocument($document);
     $index->commit();
 }
Пример #14
0
 /**
  * Update document in index for model
  *
  * @param Model $model
  */
 public function update(Model $model)
 {
     // Remove any existing documents for model.
     $this->delete($model);
     // Create new document for model.
     $doc = new Document();
     list($name, $value) = $this->config->primaryKeyPair($model);
     // Add private key.
     $doc->addField(Field::keyword($name, $value));
     // Add model's class UID.
     list($name, $value) = $this->config->classUidPair($model);
     // Add class uid for identification of model's class.
     $doc->addField(Field::Keyword($name, $value));
     // Get base fields.
     $fields = $this->config->fields($model);
     // Add fields to document to be indexed (but not stored).
     foreach ($fields as $fieldName => $options) {
         $fieldValue = $model->{trim($fieldName)};
         $field = Field::unStored(trim($fieldName), strip_tags(trim($fieldValue)));
         $field->boost = array_get($options, 'boost');
         $doc->addField($field);
     }
     // Get dynamic fields.
     $optionalAttributes = $this->config->optionalAttributes($model);
     // Add optional attributes to document to be indexed (but not stored).
     foreach ($optionalAttributes as $fieldName => $options) {
         $fieldValue = array_get($options, "value");
         $field = Field::unStored(trim($fieldName), strip_tags(trim($fieldValue)));
         $field->boost = array_get($options, "boost");
         $doc->addField($field);
     }
     // Set boost for model.
     $doc->boost = $this->config->boost($model);
     // Add document to index.
     $this->index()->addDocument($doc);
 }
 public function addAetCommunicationToSearchIndex($index, Communication $aetCommunication)
 {
     // Create a new document
     $document = new Document();
     $document->addField(Field::keyword('dbId', $aetCommunication->getId(), 'utf-8'));
     $document->addField(Field::unStored('title', $aetCommunication->getTitle(), 'utf-8'));
     $document->addField(Field::unStored('shortdesc', $aetCommunication->getShortDesc(), 'utf-8'));
     $document->addField(Field::unStored('body', html_entity_decode(strip_tags($aetCommunication->getBody()), ENT_SUBSTITUTE, 'UTF-8'), 'utf-8'));
     $document->addField(Field::unStored('author', $aetCommunication->getUser()->getFirstname() . " " . $aetCommunication->getUser()->getLastname(), 'utf-8'));
     // Add your document to the index
     $index->addDocument($document);
     // Commit your change
     $index->commit();
     $index->optimize();
 }
Пример #16
0
 /**
  * Adds a document to this segment.
  *
  * @param \ZendSearch\Lucene\Document $document
  * @throws LuceneException\UnsupportedMethodCallException
  */
 public function addDocument(Document $document)
 {
     $storedFields = array();
     $docNorms = array();
     $similarity = AbstractSimilarity::getDefault();
     foreach ($document->getFieldNames() as $fieldName) {
         $field = $document->getField($fieldName);
         if ($field->storeTermVector) {
             /**
              * @todo term vector storing support
              */
             throw new LuceneException\UnsupportedMethodCallException('Store term vector functionality is not supported yet.');
         }
         if ($field->isIndexed) {
             if ($field->isTokenized) {
                 $analyzer = Analyzer\Analyzer::getDefault();
                 $analyzer->setInput($field->value, $field->encoding);
                 $position = 0;
                 $tokenCounter = 0;
                 while (($token = $analyzer->nextToken()) !== null) {
                     $tokenCounter++;
                     $term = new Index\Term($token->getTermText(), $field->name);
                     $termKey = $term->key();
                     if (!isset($this->_termDictionary[$termKey])) {
                         // New term
                         $this->_termDictionary[$termKey] = $term;
                         $this->_termDocs[$termKey] = array();
                         $this->_termDocs[$termKey][$this->_docCount] = array();
                     } elseif (!isset($this->_termDocs[$termKey][$this->_docCount])) {
                         // Existing term, but new term entry
                         $this->_termDocs[$termKey][$this->_docCount] = array();
                     }
                     $position += $token->getPositionIncrement();
                     $this->_termDocs[$termKey][$this->_docCount][] = $position;
                 }
                 if ($tokenCounter == 0) {
                     // Field contains empty value. Treat it as non-indexed and non-tokenized
                     $field = clone $field;
                     $field->isIndexed = $field->isTokenized = false;
                 } else {
                     $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, $tokenCounter) * $document->boost * $field->boost));
                 }
             } elseif (($fieldUtf8Value = $field->getUtf8Value()) == '') {
                 // Field contains empty value. Treat it as non-indexed and non-tokenized
                 $field = clone $field;
                 $field->isIndexed = $field->isTokenized = false;
             } else {
                 $term = new Index\Term($fieldUtf8Value, $field->name);
                 $termKey = $term->key();
                 if (!isset($this->_termDictionary[$termKey])) {
                     // New term
                     $this->_termDictionary[$termKey] = $term;
                     $this->_termDocs[$termKey] = array();
                     $this->_termDocs[$termKey][$this->_docCount] = array();
                 } elseif (!isset($this->_termDocs[$termKey][$this->_docCount])) {
                     // Existing term, but new term entry
                     $this->_termDocs[$termKey][$this->_docCount] = array();
                 }
                 $this->_termDocs[$termKey][$this->_docCount][] = 0;
                 // position
                 $docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, 1) * $document->boost * $field->boost));
             }
         }
         if ($field->isStored) {
             $storedFields[] = $field;
         }
         $this->addField($field);
     }
     foreach ($this->_fields as $fieldName => $field) {
         if (!$field->isIndexed) {
             continue;
         }
         if (!isset($this->_norms[$fieldName])) {
             $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))), $this->_docCount);
         }
         if (isset($docNorms[$fieldName])) {
             $this->_norms[$fieldName] .= $docNorms[$fieldName];
         } else {
             $this->_norms[$fieldName] .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0)));
         }
     }
     $this->addStoredFields($storedFields);
 }
 public function generateIndexAction()
 {
     $searchIndexLocation = $this->getIndexLocation();
     $index = Lucene\Lucene::create($searchIndexLocation);
     $userTable = $this->getServiceLocator()->get('UserTable');
     $uploadTable = $this->getServiceLocator()->get('UploadTable');
     $allUploads = $uploadTable->fetchAll();
     foreach ($allUploads as $fileUpload) {
         //
         $uploadOwner = $userTable->getUser($fileUpload->user_id);
         // id field
         $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->id);
         // label field
         $label = Document\Field::Text('label', $fileUpload->label);
         // owner field
         $owner = Document\Field::Text('owner', $uploadOwner->name);
         if (substr_compare($fileUpload->filename, ".xlsx", strlen($fileUpload->filename) - strlen(".xlsx"), strlen(".xlsx")) === 0) {
             // index excel sheet
             $uploadPath = $this->getFileUploadLocation();
             $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($uploadPath . "/" . $fileUpload->filename);
         } else {
             if (substr_compare($fileUpload->filename, ".docx", strlen($fileUpload->filename) - strlen(".docx"), strlen(".docx")) === 0) {
                 // index word doc
                 $uploadPath = $this->getFileUploadLocation();
                 $indexDoc = Lucene\Document\Docx::loadDocxFile($uploadPath . "/" . $fileUpload->filename);
             } else {
                 $indexDoc = new Lucene\Document();
             }
         }
         $indexDoc->addField($label);
         $indexDoc->addField($owner);
         $indexDoc->addField($fileUploadId);
         $index->addDocument($indexDoc);
     }
     $index->commit();
 }
Пример #18
0
 /**
  * Add index
  * @param integer $id
  * @param SearchCollection $index
  */
 public function add($id, SearchCollection $index)
 {
     if ($this->config()->exists('zend_search', 'index')) {
         $document = new Document();
         $document->addField(Field::keyword('id', $id));
         foreach ($index as $field) {
             $document->addField($field);
         }
         $this->index()->addDocument($document);
         $this->index()->commit();
     }
 }
Пример #19
0
 /**
  * @group ZF-9680
  */
 public function testIsDeletedWithoutExplicitCommit()
 {
     $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');
     $document = new Document();
     $document->addField(Document\Field::Keyword('_id', 'myId'));
     $document->addField(Document\Field::Keyword('bla', 'blubb'));
     $index->addDocument($document);
     $this->assertFalse($index->isDeleted(0));
 }
Пример #20
0
 /**
  * Обновить индекс для объекта ISearchable
  *
  * @param ISearchable $item
  */
 public function updateIndex(ISearchable $item)
 {
     // получаем доступ собственно к индексу
     $index = $this->connection;
     // удаляем старый индекс
     $this->deleteIndex($item);
     // недоступные не индексируем
     if (!$item->isAvailableForIndexing()) {
         return;
     }
     $document = new Document();
     // сохраняем первичный ключ модели для идентификации ее в результатах поиска
     $document->addField(Field::Keyword('pk', $item->getId()));
     // id моделей могут пересекаться (например, у продуктов и услуг),
     // поэтому добавим второе поле для однозначной идентификации
     $document->addField(Field::Keyword('model', get_class($item)));
     // индексируем поля модели
     foreach ($item->getAttributesForIndexing() as $attribute) {
         $field = $attribute->getFieldName();
         $value = $attribute->getValue();
         $document->addField(Field::unStored($field, strip_tags($value)));
     }
     // добавляем запись в индекс
     $index->addDocument($document);
     $index->commit();
 }
Пример #21
0
 public function index($indexer, $commit = true, $optimize = true)
 {
     $document = new Document();
     $document->addField(Field::keyword('pk', $this->getId()));
     $document->addField(Field::Text('course', $this->getSubjectcode()));
     $document->addField(Field::Text('cross-listed', str_replace(array(';', ',', '|'), ' ', $this->getCrossListed())));
     $document->addField(Field::Text('instructor', $this->getLegalContentOwner()));
     $document->addField(Field::Unstored('comments', $this->getComments()));
     $indexer->addDocument($document);
     if ($commit) {
         $indexer->commit();
     }
     if ($optimize) {
         $indexer->optimize();
     }
 }
Пример #22
0
 /**
  * @ORM\PostPersist
  */
 public function updateLuceneIndex()
 {
     $index = self::getLuceneIndex();
     // remove existing entries
     foreach ($index->find('pk:' . $this->getId()) as $hit) {
         $index->delete($hit->id);
     }
     // don't index expired and non-activated jobs
     if ($this->isExpired() || !$this->getIsActivated()) {
         return;
     }
     $doc = new Document();
     // store job primary key to identify it in the search results
     $doc->addField(Document\Field::Keyword('pk', $this->getId()));
     // index job fields
     $doc->addField(Document\Field::UnStored('position', $this->getPosition(), 'utf-8'));
     $doc->addField(Document\Field::UnStored('company', $this->getCompany(), 'utf-8'));
     $doc->addField(Document\Field::UnStored('location', $this->getLocation(), 'utf-8'));
     $doc->addField(Document\Field::UnStored('description', $this->getDescription(), 'utf-8'));
     // add job to the index
     $index->addDocument($doc);
     $index->commit();
 }
Пример #23
0
 public function generateSearchAction()
 {
     $searchIndexLocation = $this->getIndexLocation();
     $index = Lucene\Lucene::create($searchIndexLocation);
     $allUsers = $this->getUserTable()->fetchAll(false);
     foreach ($allUsers as $user) {
         $id = Document\Field::keyword('userId', $user->userId);
         $firstName = Document\Field::text('firstName', $user->firstName);
         $lastName = Document\Field::text('lastName', $user->lastName);
         $email = Document\Field::text('email', $user->email);
         $role = Document\Field::text('role', $user->role);
         $activated = Document\Field::keyword('activated', $user->activated);
         $indexDoc = new Lucene\Document();
         $indexDoc->addField($id);
         $indexDoc->addField($firstName);
         $indexDoc->addField($lastName);
         $indexDoc->addField($email);
         $indexDoc->addField($role);
         $indexDoc->addField($activated);
         $index->addDocument($indexDoc);
     }
     $index->commit();
 }
Пример #24
0
 /**
  * create document from configured fields within extracted data
  * @param string $url
  * @param array $page
  * @return Document
  */
 protected function createDocument($url, $page)
 {
     $document = new Document();
     if (!isset($page['status_code'])) {
         $page['status_code'] = 00;
         //tmp
     }
     setlocale(LC_ALL, "cs_CZ.UTF-8");
     $document->addField(Field::keyword('url', $url));
     // ancestor URLs to search by URL
     $urlParts = parse_url($url);
     if (isset($urlParts['path']) && $urlParts['path'] && strlen($urlParts['path']) > 1) {
         $uri = $urlParts['path'];
         $uris = array($uri);
         do {
             $uri = substr($uri, 0, strrpos($uri, '/'));
             $uris[] = $uri;
         } while (strrpos($uri, '/') > 1);
         $document->addField(Field::text(Page::URIS_KEY, implode(' ', $uris)));
     }
     foreach (array(Page::TITLE_KEY, Page::DESCRIPTION_KEY, Page::BODY_KEY, Page::IMAGE_KEY) as $fieldName) {
         $fieldValue = isset($page[$fieldName]) ? $page[$fieldName] : '';
         switch ($fieldName) {
             case Page::TITLE_KEY:
             case Page::DESCRIPTION_KEY:
             case Page::BODY_KEY:
                 $field = Field::text($fieldName, $fieldValue);
                 // translit
                 $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue)));
                 $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25;
                 $document->addField($fieldTranslit);
                 break;
             case Page::IMAGE_KEY:
                 $field = Field::unIndexed($fieldName, $fieldValue);
                 break;
             default:
                 $translitValue = str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue));
                 $field = Field::text($fieldName, $fieldValue . ($translitValue != $fieldValue ? ' ' . $translitValue : ''));
         }
         $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25;
         $document->addField($field);
     }
     // title tags as configured i.e. h1, h2, ...
     foreach ($this->parameters[self::TITLE_TAGS_PARAM] as $fieldName) {
         $fieldValue = Page::hasHeadlineType($page, $fieldName) ? Page::getHeadline($page, $fieldName) : '';
         $field = Field::text($fieldName, $fieldValue);
         $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1;
         $document->addField($field);
         $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue)));
         $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25;
         $document->addField($fieldTranslit);
     }
     // page ID if selector defined
     if ($this->parameters[self::PAGE_ID_PARAM]) {
         $fieldValue = isset($page[Page::PAGE_ID_KEY]) ? $page[Page::PAGE_ID_KEY] : '';
         $field = Field::unIndexed(Page::PAGE_ID_KEY, $fieldValue);
         $document->addField($field);
     }
     // route name if selector defined
     if ($this->parameters[self::ROUTE_NAME_PARAM]) {
         $fieldValue = isset($page[Page::ROUTE_NAME_KEY]) ? $page[Page::ROUTE_NAME_KEY] : '';
         $field = Field::unIndexed(Page::ROUTE_NAME_KEY, $fieldValue);
         $document->addField($field);
     }
     return $document;
 }
 protected function indexProperty(Document $document, \core_kernel_classes_Property $property)
 {
     $indexes = $property->getPropertyValues(new \core_kernel_classes_Property('http://www.tao.lu/Ontologies/TAO.rdf#PropertyIndex'));
     foreach ($indexes as $indexUri) {
         $index = new Index($indexUri);
         $id = $index->getIdentifier();
         $strings = $index->tokenize($this->resource->getPropertyValues($property));
         if (!empty($strings)) {
             if ($index->isFuzzyMatching()) {
                 // cannot store multiple fuzzy strings
                 $string = implode(' ', $strings);
                 $field = Document\Field::Text($index->getIdentifier(), $string);
                 $field->isStored = $index->isStored();
                 $document->addField($field);
             } else {
                 $value = count($strings) > 1 ? $strings : reset($strings);
                 $field = Document\Field::Keyword($index->getIdentifier(), $value);
                 $field->isStored = $index->isStored() && !is_array($value);
                 // storage of arrays not supported
                 $document->addField($field);
             }
         }
     }
 }
Пример #26
0
 /**
  * Update document in index for model
  *
  * @param Model $model
  */
 public function update(Model $model)
 {
     // Remove any existing documents for model.
     $this->delete($model);
     // Create new document for model.
     $doc = new Document();
     list($name, $value) = $this->config->privateKeyPair($model);
     // Add private key.
     $doc->addField(Field::keyword($name, $value));
     // Add model's class UID.
     list($name, $value) = $this->config->classUidPair($model);
     // Add class uid for identification of model's class.
     $doc->addField(Field::Keyword($name, $value));
     $fields = $this->config->fields($model);
     // Add fields to document to be indexed (but not stored).
     foreach ($fields as $field) {
         $doc->addField(Field::unStored(trim($field), strip_tags(trim($model->{trim($field)}))));
     }
     $optionalAttributes = $this->config->optionalAttributes($model);
     // Add optional attributes to document to be indexed (but not stored).
     foreach ($optionalAttributes as $fieldName => $fieldValue) {
         $doc->addField(Field::unStored(trim($fieldName), strip_tags(trim($fieldValue))));
     }
     // Add document to index.
     $this->index()->addDocument($doc);
 }
 public function processAction()
 {
     $userEmail = $this->getAuthService()->getStorage()->read();
     if (!$userEmail) {
         $this->flashMessenger()->addErrorMessage("not authorized");
         return $this->getResponse()->setContent("not authorized");
     }
     $request = $this->getRequest();
     $form = new UploadForm();
     $uploadFile = $this->params()->fromFiles('fileupload');
     if ($request->isPost()) {
         $form->setData($request->getPost());
         if ($form->isValid()) {
             // Получение конфигурации из конфигурационных данных модуля
             $uploadPath = $this->getFileUploadLocation();
             // Сохранение выгруженного файла
             $adapter = new \Zend\File\Transfer\Adapter\Http();
             $adapter->setDestination($uploadPath);
             if ($adapter->receive($uploadFile['name'])) {
                 $userTable = $this->getServiceLocator()->get('UserTable');
                 $user = $userTable->getUserByEmail($userEmail);
                 $upload = new \Users\Model\Upload();
                 // Успешная выгрузка файла
                 $exchange_data = array();
                 $exchange_data['label'] = $request->getPost()->get('label');
                 $exchange_data['filename'] = $uploadFile['name'];
                 $exchange_data['user_id'] = $user->getId();
                 $upload->exchangeArray($exchange_data);
                 $uploadTable = $this->getServiceLocator()->get('UploadTable');
                 $uploadTable->save($upload);
                 $upload->setId($uploadTable->getLastInsertValue());
                 //добавить в Lucene
                 $searchIndexLocation = $this->getIndexLocation();
                 $index = Lucene\Lucene::create($searchIndexLocation);
                 // создание полей lucene
                 $fileUploadId = Document\Field::unIndexed('upload_id', $upload->getId());
                 $label = Document\Field::Text('label', $upload->getLabel());
                 $owner = Document\Field::Text('owner', $user->getName());
                 $uploadPath = $this->getFileUploadLocation();
                 $fileName = $upload->getFilename();
                 $filePath = $uploadPath . DIRECTORY_SEPARATOR . $fileName;
                 if (substr_compare($fileName, ".xlsx", strlen($fileName) - strlen(".xlsx"), strlen(".xlsx")) === 0) {
                     // Индексирование таблицы excel
                     $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($filePath);
                 } else {
                     if (substr_compare($fileName, ".docx", strlen($fileName) - strlen(".docx"), strlen(".docx")) === 0) {
                         // Индексирование документа Word
                         $indexDoc = Lucene\Document\Docx::loadDocxFile($filePath);
                     } else {
                         $indexDoc = new Lucene\Document();
                     }
                 }
                 // создание нового документа и добавление всех полей
                 $indexDoc = new Lucene\Document();
                 $indexDoc->addField($label);
                 $indexDoc->addField($owner);
                 $indexDoc->addField($fileUploadId);
                 $index->addDocument($indexDoc);
                 $index->commit();
             }
         }
     }
     return $this->redirect()->toRoute('uploads', array('action' => 'index'));
 }
Пример #28
0
 protected function createDocument($title, $body, $url)
 {
     $document = new Document();
     $document->addField(Field::text('title', $title));
     $document->addField(Field::text('body', strip_tags($body)));
     $document->addField(Field::keyword('url', $url));
     return $document;
 }
Пример #29
0
 /**
  * {@inheritDoc}
  */
 public function index($workspace, $path, Node $node)
 {
     $index = $this->getIndex($workspace);
     $document = new Document();
     $nodeName = PathHelper::getNodeName($path);
     $localNodeName = $nodeName;
     // PathHelper::getLocalNodeName($path);
     $parentPath = PathHelper::getParentPath($path);
     $document->addField(Field::Keyword(self::IDX_PATH, $path));
     $document->addField(Field::Keyword(self::IDX_NODENAME, $nodeName));
     $document->addField(Field::Keyword(self::IDX_NODELOCALNAME, $localNodeName));
     $document->addField(Field::Keyword(self::IDX_PARENTPATH, $parentPath));
     foreach ($node->getProperties() as $propertyName => $property) {
         $propertyValue = $property['value'];
         $propertyType = $property['type'];
         if ($propertyName === Storage::INTERNAL_UUID) {
             $document->addField(Field::Keyword(Storage::INTERNAL_UUID, $propertyValue));
             continue;
         }
         switch ($propertyType) {
             case PropertyType::TYPENAME_STRING:
             case PropertyType::TYPENAME_NAME:
             case PropertyType::TYPENAME_PATH:
             case PropertyType::TYPENAME_URI:
                 $value = (array) $propertyValue;
                 $value = join(self::MULTIVALUE_SEPARATOR, $value);
                 $document->addField(Field::Text($propertyName, $value));
                 break;
             case PropertyType::TYPENAME_DATE:
                 $values = (array) $propertyValue;
                 foreach ($values as $i => $value) {
                     if ($value instanceof \DateTime) {
                         $values[$i] = $value->format('c');
                     }
                 }
                 $value = join(self::MULTIVALUE_SEPARATOR, $values);
                 $document->addField(Field::Text($propertyName, $value));
                 break;
             case PropertyType::TYPENAME_DECIMAL:
             case PropertyType::TYPENAME_LONG:
             case PropertyType::TYPENAME_DOUBLE:
                 $values = (array) $propertyValue;
                 foreach ($values as &$value) {
                     $value = sprintf('%0' . strlen(PHP_INT_MAX) . 's', $value);
                 }
                 $value = join(self::MULTIVALUE_SEPARATOR, $values);
                 $document->addField(Field::Text($propertyName, $value));
                 break;
             case PropertyType::TYPENAME_BOOLEAN:
                 $values = (array) $propertyValue;
                 foreach ($values as &$value) {
                     if ($propertyValue === 'false') {
                         $value = self::VALUE_BOOLEAN_FALSE;
                     } else {
                         $value = 1;
                     }
                 }
                 $value = join(self::MULTIVALUE_SEPARATOR, $values);
                 $document->addField(Field::Text($propertyName, $value));
                 break;
         }
     }
     $index->addDocument($document);
 }
 /**
  * Indexa dados nos arquivos de json
  */
 public function index()
 {
     $dir = realpath(dirname(__FILE__)) . DIRECTORY_SEPARATOR . "data" . DIRECTORY_SEPARATOR;
     $jsonDir = $dir . "json";
     $indexDir = $dir . "index";
     // ler aquivos json
     $files = scandir($jsonDir);
     foreach ($files as $file) {
         if ($file == '.' || $file == '..') {
             continue;
         }
         // Se arquivo existe
         if (is_file($jsonDir . DIRECTORY_SEPARATOR . $file)) {
             $json = json_decode(file_get_contents($jsonDir . DIRECTORY_SEPARATOR . $file));
             $indexName = substr($file, 0, -5);
             // Cria index
             $index = Lucene\Lucene::create($indexDir . DIRECTORY_SEPARATOR . $indexName);
             // Cria documento e define campos para indexar
             foreach ($json as $entry) {
                 $doc = new Lucene\Document();
                 $doc->addField(Lucene\Document\Field::Text('url', $entry->title));
                 $doc->addField(Lucene\Document\Field::UnStored('contents', $entry->text));
                 $index->addDocument($doc);
             }
         }
     }
 }