Пример #1
0
 /**
  * @dataProvider searchResultDataProvider
  */
 function testSearchLuceneResultContent($fileId, $name, $path, $size, $score, $mimeType, $modified, $container)
 {
     require_once __DIR__ . '/util/dummyindex.php';
     $index = new DummyIndex();
     $doc = new Document();
     $doc->addField(Document\Field::Keyword('fileId', $fileId));
     $doc->addField(Document\Field::Text('path', '/test/files' . $path, 'UTF-8'));
     $doc->addField(Document\Field::unIndexed('mtime', $modified));
     $doc->addField(Document\Field::unIndexed('size', $size));
     $doc->addField(Document\Field::unIndexed('mimetype', $mimeType));
     $index->addDocument($doc);
     $hit = new QueryHit($index);
     $hit->score = $score;
     $hit->id = 0;
     $hit->document_id = 0;
     $searchResult = new \OCA\Search_Lucene\Search\LuceneResult($hit);
     $this->assertInstanceOf('OCA\\Search_Lucene\\Search\\LuceneResult', $searchResult);
     $this->assertEquals($fileId, $searchResult->id);
     $this->assertEquals('lucene', $searchResult->type);
     $this->assertEquals($path, $searchResult->path);
     $this->assertEquals($name, $searchResult->name);
     $this->assertEquals($mimeType, $searchResult->mime_type);
     $this->assertEquals($size, $searchResult->size);
     $this->assertEquals($score, $searchResult->score);
     $this->assertEquals($modified, $searchResult->modified);
 }
 public function createDocument(EntityInterface $entity) : Document
 {
     $document = new Document();
     $context = $this->createContext($entity);
     $fieldsCollection = $context->getFieldsCollection();
     $document->addField(Field::unIndexed('identifier', $entity->getId()));
     $fieldsCollection->map(function (SearchField $field) use($document) {
         $document->addField(Field::text($field->getName(), $field->getValue()));
     });
     return $document;
 }
Пример #3
0
 /**
  * {@inheritdoc}
  */
 public function addProduct(ProductInterface $product, $indexName = ProductIndexerInterface::DEFAULT_INDEX_NAME)
 {
     $index = $this->searchIndexManager->getIndex(ProductIndexerInterface::DEFAULT_INDEX_NAME);
     $document = new Document();
     $document->addField(Field::unIndexed('identifier', $product->getId()));
     $document->addField(Field::text('name', $product->translate('en')->getName()));
     $document->addField(Field::text('shortDescription', $product->translate()->getShortDescription()));
     $document->addField(Field::text('description', $product->translate()->getDescription()));
     $index->addDocument($document);
     $index->commit();
 }
Пример #4
0
 /**
  * Create or update an indexed document
  *
  * @param object $object
  */
 public function index($object)
 {
     // create property accessor
     $accessor = PropertyAccess::createPropertyAccessor();
     // delete existing documents with same id
     foreach ($this->index->find('id:' . $accessor->getValue($object, 'id')) as $hit) {
         $this->index->delete($hit->id);
     }
     // create new Lucene document
     $doc = new Document();
     // add primary key to identify it in the search results
     $doc->addField(Field::keyword('id', $accessor->getValue($object, 'id')));
     // add entity class reference to identify it in the search results
     $doc->addField(Field::unIndexed('entityClass', get_class($object)));
     // analyze each property's annotations to see which ones must be add to the document
     $reflClass = new ReflectionClass($object);
     foreach ($reflClass->getProperties() as $property) {
         $reflProperty = new \ReflectionProperty($object, $property->name);
         $annotation = $this->reader->getPropertyAnnotation($reflProperty, '\\Keratine\\Lucene\\Mapping\\Annotation\\DocumentField');
         if ($annotation) {
             $value = $accessor->getValue($object, $property->name);
             $value = $this->ensureString($value);
             // use the appropriate indexing strategy for the field
             switch ($annotation->type) {
                 case 'keyword':
                     $doc->addField(Field::keyword($property->name, $value, 'UTF-8'));
                     break;
                 case 'unIndexed':
                     $doc->addField(Field::unIndexed($property->name, $value, 'UTF-8'));
                     break;
                 case 'binary':
                     $doc->addField(Field::binary($property->name, $value));
                     break;
                 case 'text':
                     $doc->addField(Field::text($property->name, $value, 'UTF-8'));
                     break;
                 case 'unStored':
                 default:
                     $doc->addField(Field::unStored($property->name, $value, 'UTF-8'));
                     break;
             }
         }
     }
     // add the document to the index and commit it
     $this->index->addDocument($doc);
     $this->index->commit();
 }
Пример #5
0
 public function generateIndexAction()
 {
     $searchIndexLocation = $this->getIndexLocation();
     $index = Lucene\Lucene::create($searchIndexLocation);
     $userTable = $this->getServiceLocator()->get('UserTable');
     $uploadTable = $this->getServiceLocator()->get('UploadTable');
     $allUploads = $uploadTable->fetchAll();
     foreach ($allUploads as $fileUpload) {
         $uploadOwner = $userTable->getById($fileUpload->getUserId());
         // создание полей lucene
         $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->getId());
         $label = Document\Field::Text('label', $fileUpload->getLabel());
         $owner = Document\Field::Text('owner', $uploadOwner->getName());
         $uploadPath = $this->getFileUploadLocation();
         $fileName = $fileUpload->getFilename();
         $filePath = $uploadPath . DIRECTORY_SEPARATOR . $fileName;
         if (substr_compare($fileName, ".xlsx", strlen($fileName) - strlen(".xlsx"), strlen(".xlsx")) === 0) {
             // Индексирование таблицы excel
             $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($filePath);
         } else {
             if (substr_compare($fileName, ".docx", strlen($fileName) - strlen(".docx"), strlen(".docx")) === 0) {
                 // Индексирование документа Word
                 $indexDoc = Lucene\Document\Docx::loadDocxFile($filePath);
             } else {
                 $indexDoc = new Lucene\Document();
             }
         }
         // создание нового документа и добавление всех полей
         $indexDoc = new Lucene\Document();
         $indexDoc->addField($label);
         $indexDoc->addField($owner);
         $indexDoc->addField($fileUploadId);
         $index->addDocument($indexDoc);
     }
     $index->commit();
     $response = $this->getResponse();
     $response->setContent("Index Ok");
     return $response;
 }
 public function generateIndexAction()
 {
     $searchIndexLocation = $this->getIndexLocation();
     $index = Lucene\Lucene::create($searchIndexLocation);
     $userTable = $this->getServiceLocator()->get('UserTable');
     $uploadTable = $this->getServiceLocator()->get('UploadTable');
     $allUploads = $uploadTable->fetchAll();
     foreach ($allUploads as $fileUpload) {
         //
         $uploadOwner = $userTable->getUser($fileUpload->user_id);
         // id field
         $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->id);
         // label field
         $label = Document\Field::Text('label', $fileUpload->label);
         // owner field
         $owner = Document\Field::Text('owner', $uploadOwner->name);
         if (substr_compare($fileUpload->filename, ".xlsx", strlen($fileUpload->filename) - strlen(".xlsx"), strlen(".xlsx")) === 0) {
             // index excel sheet
             $uploadPath = $this->getFileUploadLocation();
             $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($uploadPath . "/" . $fileUpload->filename);
         } else {
             if (substr_compare($fileUpload->filename, ".docx", strlen($fileUpload->filename) - strlen(".docx"), strlen(".docx")) === 0) {
                 // index word doc
                 $uploadPath = $this->getFileUploadLocation();
                 $indexDoc = Lucene\Document\Docx::loadDocxFile($uploadPath . "/" . $fileUpload->filename);
             } else {
                 $indexDoc = new Lucene\Document();
             }
         }
         $indexDoc->addField($label);
         $indexDoc->addField($owner);
         $indexDoc->addField($fileUploadId);
         $index->addDocument($indexDoc);
     }
     $index->commit();
 }
Пример #7
0
 /**
  * create document from configured fields within extracted data
  * @param string $url
  * @param array $page
  * @return Document
  */
 protected function createDocument($url, $page)
 {
     $document = new Document();
     if (!isset($page['status_code'])) {
         $page['status_code'] = 00;
         //tmp
     }
     setlocale(LC_ALL, "cs_CZ.UTF-8");
     $document->addField(Field::keyword('url', $url));
     // ancestor URLs to search by URL
     $urlParts = parse_url($url);
     if (isset($urlParts['path']) && $urlParts['path'] && strlen($urlParts['path']) > 1) {
         $uri = $urlParts['path'];
         $uris = array($uri);
         do {
             $uri = substr($uri, 0, strrpos($uri, '/'));
             $uris[] = $uri;
         } while (strrpos($uri, '/') > 1);
         $document->addField(Field::text(Page::URIS_KEY, implode(' ', $uris)));
     }
     foreach (array(Page::TITLE_KEY, Page::DESCRIPTION_KEY, Page::BODY_KEY, Page::IMAGE_KEY) as $fieldName) {
         $fieldValue = isset($page[$fieldName]) ? $page[$fieldName] : '';
         switch ($fieldName) {
             case Page::TITLE_KEY:
             case Page::DESCRIPTION_KEY:
             case Page::BODY_KEY:
                 $field = Field::text($fieldName, $fieldValue);
                 // translit
                 $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue)));
                 $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25;
                 $document->addField($fieldTranslit);
                 break;
             case Page::IMAGE_KEY:
                 $field = Field::unIndexed($fieldName, $fieldValue);
                 break;
             default:
                 $translitValue = str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue));
                 $field = Field::text($fieldName, $fieldValue . ($translitValue != $fieldValue ? ' ' . $translitValue : ''));
         }
         $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25;
         $document->addField($field);
     }
     // title tags as configured i.e. h1, h2, ...
     foreach ($this->parameters[self::TITLE_TAGS_PARAM] as $fieldName) {
         $fieldValue = Page::hasHeadlineType($page, $fieldName) ? Page::getHeadline($page, $fieldName) : '';
         $field = Field::text($fieldName, $fieldValue);
         $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1;
         $document->addField($field);
         $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue)));
         $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25;
         $document->addField($fieldTranslit);
     }
     // page ID if selector defined
     if ($this->parameters[self::PAGE_ID_PARAM]) {
         $fieldValue = isset($page[Page::PAGE_ID_KEY]) ? $page[Page::PAGE_ID_KEY] : '';
         $field = Field::unIndexed(Page::PAGE_ID_KEY, $fieldValue);
         $document->addField($field);
     }
     // route name if selector defined
     if ($this->parameters[self::ROUTE_NAME_PARAM]) {
         $fieldValue = isset($page[Page::ROUTE_NAME_KEY]) ? $page[Page::ROUTE_NAME_KEY] : '';
         $field = Field::unIndexed(Page::ROUTE_NAME_KEY, $fieldValue);
         $document->addField($field);
     }
     return $document;
 }
Пример #8
0
 /**
  * index a file
  *
  * @param File $file the file to be indexed
  * @param bool $commit
  *
  * @return bool true when something was stored in the index, false otherwise (eg, folders are not indexed)
  * @throws NotIndexedException when an unsupported file type is encountered
  */
 public function indexFile(File $file, $commit = true)
 {
     // we decide how to index on mime type or file extension
     $mimeType = $file->getMimeType();
     $fileExtension = strtolower(pathinfo($file->getName(), PATHINFO_EXTENSION));
     // initialize plain lucene document
     $doc = new Document();
     // index content for local files only
     $storage = $file->getStorage();
     if ($storage->isLocal()) {
         $path = $storage->getLocalFile($file->getInternalPath());
         //try to use special lucene document types
         if ('text/html' === $mimeType) {
             //TODO could be indexed, even if not local
             $doc = HTML::loadHTML($file->getContent());
         } else {
             if ('text/' === substr($mimeType, 0, 5) || 'application/x-tex' === $mimeType) {
                 $body = $file->getContent();
                 if ($body != '') {
                     $doc->addField(Document\Field::UnStored('body', $body));
                 }
             } else {
                 if ('application/pdf' === $mimeType) {
                     $doc = Pdf::loadPdf($file->getContent());
                     // the zend classes only understand docx and not doc files
                 } else {
                     if ($fileExtension === 'docx') {
                         $doc = Document\Docx::loadDocxFile($path);
                         //} else if ('application/msexcel' === $mimeType) {
                     } else {
                         if ($fileExtension === 'xlsx') {
                             $doc = Document\Xlsx::loadXlsxFile($path);
                             //} else if ('application/mspowerpoint' === $mimeType) {
                         } else {
                             if ($fileExtension === 'pptx') {
                                 $doc = Document\Pptx::loadPptxFile($path);
                             } else {
                                 if ($fileExtension === 'odt') {
                                     $doc = Odt::loadOdtFile($path);
                                 } else {
                                     if ($fileExtension === 'ods') {
                                         $doc = Ods::loadOdsFile($path);
                                     } else {
                                         throw new NotIndexedException();
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     // Store filecache id as unique id to lookup by when deleting
     $doc->addField(Document\Field::Keyword('fileId', $file->getId()));
     // Store document path for the search results
     $doc->addField(Document\Field::Text('path', $file->getPath(), 'UTF-8'));
     $doc->addField(Document\Field::unIndexed('mtime', $file->getMTime()));
     $doc->addField(Document\Field::unIndexed('size', $file->getSize()));
     $doc->addField(Document\Field::unIndexed('mimetype', $mimeType));
     $this->index->updateFile($doc, $file->getId(), $commit);
     return true;
 }
 public function processAction()
 {
     $userEmail = $this->getAuthService()->getStorage()->read();
     if (!$userEmail) {
         $this->flashMessenger()->addErrorMessage("not authorized");
         return $this->getResponse()->setContent("not authorized");
     }
     $request = $this->getRequest();
     $form = new UploadForm();
     $uploadFile = $this->params()->fromFiles('fileupload');
     if ($request->isPost()) {
         $form->setData($request->getPost());
         if ($form->isValid()) {
             // Получение конфигурации из конфигурационных данных модуля
             $uploadPath = $this->getFileUploadLocation();
             // Сохранение выгруженного файла
             $adapter = new \Zend\File\Transfer\Adapter\Http();
             $adapter->setDestination($uploadPath);
             if ($adapter->receive($uploadFile['name'])) {
                 $userTable = $this->getServiceLocator()->get('UserTable');
                 $user = $userTable->getUserByEmail($userEmail);
                 $upload = new \Users\Model\Upload();
                 // Успешная выгрузка файла
                 $exchange_data = array();
                 $exchange_data['label'] = $request->getPost()->get('label');
                 $exchange_data['filename'] = $uploadFile['name'];
                 $exchange_data['user_id'] = $user->getId();
                 $upload->exchangeArray($exchange_data);
                 $uploadTable = $this->getServiceLocator()->get('UploadTable');
                 $uploadTable->save($upload);
                 $upload->setId($uploadTable->getLastInsertValue());
                 //добавить в Lucene
                 $searchIndexLocation = $this->getIndexLocation();
                 $index = Lucene\Lucene::create($searchIndexLocation);
                 // создание полей lucene
                 $fileUploadId = Document\Field::unIndexed('upload_id', $upload->getId());
                 $label = Document\Field::Text('label', $upload->getLabel());
                 $owner = Document\Field::Text('owner', $user->getName());
                 $uploadPath = $this->getFileUploadLocation();
                 $fileName = $upload->getFilename();
                 $filePath = $uploadPath . DIRECTORY_SEPARATOR . $fileName;
                 if (substr_compare($fileName, ".xlsx", strlen($fileName) - strlen(".xlsx"), strlen(".xlsx")) === 0) {
                     // Индексирование таблицы excel
                     $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($filePath);
                 } else {
                     if (substr_compare($fileName, ".docx", strlen($fileName) - strlen(".docx"), strlen(".docx")) === 0) {
                         // Индексирование документа Word
                         $indexDoc = Lucene\Document\Docx::loadDocxFile($filePath);
                     } else {
                         $indexDoc = new Lucene\Document();
                     }
                 }
                 // создание нового документа и добавление всех полей
                 $indexDoc = new Lucene\Document();
                 $indexDoc->addField($label);
                 $indexDoc->addField($owner);
                 $indexDoc->addField($fileUploadId);
                 $index->addDocument($indexDoc);
                 $index->commit();
             }
         }
     }
     return $this->redirect()->toRoute('uploads', array('action' => 'index'));
 }
Пример #10
0
 /**
  * Add a new document to the index.
  * Any existing document with the given $id should be deleted first.
  * $fields should be indexed but not necessarily stored in the index.
  * $parameters should be stored in the index but not necessarily indexed.
  *
  * @param mixed $id
  * @param array $fields
  * @param array $parameters
  * 
  * @return bool
  */
 public function insert($id, array $fields, array $parameters = array())
 {
     // Remove any existing documents.
     $this->delete($id);
     // Create new document.
     $doc = new \ZendSearch\Lucene\Document();
     // Add id parameters.
     $doc->addField(\ZendSearch\Lucene\Document\Field::keyword('xref_id', $id));
     // Add fields to document to be indexed and stored.
     foreach ($fields as $field => $value) {
         if (is_array($value)) {
             $value = implode(' ', $value);
         }
         $doc->addField(\ZendSearch\Lucene\Document\Field::text(trim($field), trim($value)));
     }
     // Add parameters to document to be stored (but not indexed).
     $doc->addField(\ZendSearch\Lucene\Document\Field::unIndexed('_parameters', base64_encode(json_encode($parameters))));
     // Add document to index.
     $this->getIndex()->addDocument($doc);
     return true;
 }
 /**
  * Store uri, don't index it
  * 
  * @param Document $document
  */
 protected function addUri(Document $document)
 {
     $document->addField(Document\Field::unIndexed('uri', $this->resource->getUri()));
 }