/** * @dataProvider searchResultDataProvider */ function testSearchLuceneResultContent($fileId, $name, $path, $size, $score, $mimeType, $modified, $container) { require_once __DIR__ . '/util/dummyindex.php'; $index = new DummyIndex(); $doc = new Document(); $doc->addField(Document\Field::Keyword('fileId', $fileId)); $doc->addField(Document\Field::Text('path', '/test/files' . $path, 'UTF-8')); $doc->addField(Document\Field::unIndexed('mtime', $modified)); $doc->addField(Document\Field::unIndexed('size', $size)); $doc->addField(Document\Field::unIndexed('mimetype', $mimeType)); $index->addDocument($doc); $hit = new QueryHit($index); $hit->score = $score; $hit->id = 0; $hit->document_id = 0; $searchResult = new \OCA\Search_Lucene\Search\LuceneResult($hit); $this->assertInstanceOf('OCA\\Search_Lucene\\Search\\LuceneResult', $searchResult); $this->assertEquals($fileId, $searchResult->id); $this->assertEquals('lucene', $searchResult->type); $this->assertEquals($path, $searchResult->path); $this->assertEquals($name, $searchResult->name); $this->assertEquals($mimeType, $searchResult->mime_type); $this->assertEquals($size, $searchResult->size); $this->assertEquals($score, $searchResult->score); $this->assertEquals($modified, $searchResult->modified); }
public function createDocument(EntityInterface $entity) : Document { $document = new Document(); $context = $this->createContext($entity); $fieldsCollection = $context->getFieldsCollection(); $document->addField(Field::unIndexed('identifier', $entity->getId())); $fieldsCollection->map(function (SearchField $field) use($document) { $document->addField(Field::text($field->getName(), $field->getValue())); }); return $document; }
/** * {@inheritdoc} */ public function addProduct(ProductInterface $product, $indexName = ProductIndexerInterface::DEFAULT_INDEX_NAME) { $index = $this->searchIndexManager->getIndex(ProductIndexerInterface::DEFAULT_INDEX_NAME); $document = new Document(); $document->addField(Field::unIndexed('identifier', $product->getId())); $document->addField(Field::text('name', $product->translate('en')->getName())); $document->addField(Field::text('shortDescription', $product->translate()->getShortDescription())); $document->addField(Field::text('description', $product->translate()->getDescription())); $index->addDocument($document); $index->commit(); }
/** * Create or update an indexed document * * @param object $object */ public function index($object) { // create property accessor $accessor = PropertyAccess::createPropertyAccessor(); // delete existing documents with same id foreach ($this->index->find('id:' . $accessor->getValue($object, 'id')) as $hit) { $this->index->delete($hit->id); } // create new Lucene document $doc = new Document(); // add primary key to identify it in the search results $doc->addField(Field::keyword('id', $accessor->getValue($object, 'id'))); // add entity class reference to identify it in the search results $doc->addField(Field::unIndexed('entityClass', get_class($object))); // analyze each property's annotations to see which ones must be add to the document $reflClass = new ReflectionClass($object); foreach ($reflClass->getProperties() as $property) { $reflProperty = new \ReflectionProperty($object, $property->name); $annotation = $this->reader->getPropertyAnnotation($reflProperty, '\\Keratine\\Lucene\\Mapping\\Annotation\\DocumentField'); if ($annotation) { $value = $accessor->getValue($object, $property->name); $value = $this->ensureString($value); // use the appropriate indexing strategy for the field switch ($annotation->type) { case 'keyword': $doc->addField(Field::keyword($property->name, $value, 'UTF-8')); break; case 'unIndexed': $doc->addField(Field::unIndexed($property->name, $value, 'UTF-8')); break; case 'binary': $doc->addField(Field::binary($property->name, $value)); break; case 'text': $doc->addField(Field::text($property->name, $value, 'UTF-8')); break; case 'unStored': default: $doc->addField(Field::unStored($property->name, $value, 'UTF-8')); break; } } } // add the document to the index and commit it $this->index->addDocument($doc); $this->index->commit(); }
public function generateIndexAction() { $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::create($searchIndexLocation); $userTable = $this->getServiceLocator()->get('UserTable'); $uploadTable = $this->getServiceLocator()->get('UploadTable'); $allUploads = $uploadTable->fetchAll(); foreach ($allUploads as $fileUpload) { $uploadOwner = $userTable->getById($fileUpload->getUserId()); // создание полей lucene $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->getId()); $label = Document\Field::Text('label', $fileUpload->getLabel()); $owner = Document\Field::Text('owner', $uploadOwner->getName()); $uploadPath = $this->getFileUploadLocation(); $fileName = $fileUpload->getFilename(); $filePath = $uploadPath . DIRECTORY_SEPARATOR . $fileName; if (substr_compare($fileName, ".xlsx", strlen($fileName) - strlen(".xlsx"), strlen(".xlsx")) === 0) { // Индексирование таблицы excel $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($filePath); } else { if (substr_compare($fileName, ".docx", strlen($fileName) - strlen(".docx"), strlen(".docx")) === 0) { // Индексирование документа Word $indexDoc = Lucene\Document\Docx::loadDocxFile($filePath); } else { $indexDoc = new Lucene\Document(); } } // создание нового документа и добавление всех полей $indexDoc = new Lucene\Document(); $indexDoc->addField($label); $indexDoc->addField($owner); $indexDoc->addField($fileUploadId); $index->addDocument($indexDoc); } $index->commit(); $response = $this->getResponse(); $response->setContent("Index Ok"); return $response; }
public function generateIndexAction() { $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::create($searchIndexLocation); $userTable = $this->getServiceLocator()->get('UserTable'); $uploadTable = $this->getServiceLocator()->get('UploadTable'); $allUploads = $uploadTable->fetchAll(); foreach ($allUploads as $fileUpload) { // $uploadOwner = $userTable->getUser($fileUpload->user_id); // id field $fileUploadId = Document\Field::unIndexed('upload_id', $fileUpload->id); // label field $label = Document\Field::Text('label', $fileUpload->label); // owner field $owner = Document\Field::Text('owner', $uploadOwner->name); if (substr_compare($fileUpload->filename, ".xlsx", strlen($fileUpload->filename) - strlen(".xlsx"), strlen(".xlsx")) === 0) { // index excel sheet $uploadPath = $this->getFileUploadLocation(); $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($uploadPath . "/" . $fileUpload->filename); } else { if (substr_compare($fileUpload->filename, ".docx", strlen($fileUpload->filename) - strlen(".docx"), strlen(".docx")) === 0) { // index word doc $uploadPath = $this->getFileUploadLocation(); $indexDoc = Lucene\Document\Docx::loadDocxFile($uploadPath . "/" . $fileUpload->filename); } else { $indexDoc = new Lucene\Document(); } } $indexDoc->addField($label); $indexDoc->addField($owner); $indexDoc->addField($fileUploadId); $index->addDocument($indexDoc); } $index->commit(); }
/** * create document from configured fields within extracted data * @param string $url * @param array $page * @return Document */ protected function createDocument($url, $page) { $document = new Document(); if (!isset($page['status_code'])) { $page['status_code'] = 00; //tmp } setlocale(LC_ALL, "cs_CZ.UTF-8"); $document->addField(Field::keyword('url', $url)); // ancestor URLs to search by URL $urlParts = parse_url($url); if (isset($urlParts['path']) && $urlParts['path'] && strlen($urlParts['path']) > 1) { $uri = $urlParts['path']; $uris = array($uri); do { $uri = substr($uri, 0, strrpos($uri, '/')); $uris[] = $uri; } while (strrpos($uri, '/') > 1); $document->addField(Field::text(Page::URIS_KEY, implode(' ', $uris))); } foreach (array(Page::TITLE_KEY, Page::DESCRIPTION_KEY, Page::BODY_KEY, Page::IMAGE_KEY) as $fieldName) { $fieldValue = isset($page[$fieldName]) ? $page[$fieldName] : ''; switch ($fieldName) { case Page::TITLE_KEY: case Page::DESCRIPTION_KEY: case Page::BODY_KEY: $field = Field::text($fieldName, $fieldValue); // translit $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue))); $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25; $document->addField($fieldTranslit); break; case Page::IMAGE_KEY: $field = Field::unIndexed($fieldName, $fieldValue); break; default: $translitValue = str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue)); $field = Field::text($fieldName, $fieldValue . ($translitValue != $fieldValue ? ' ' . $translitValue : '')); } $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25; $document->addField($field); } // title tags as configured i.e. h1, h2, ... foreach ($this->parameters[self::TITLE_TAGS_PARAM] as $fieldName) { $fieldValue = Page::hasHeadlineType($page, $fieldName) ? Page::getHeadline($page, $fieldName) : ''; $field = Field::text($fieldName, $fieldValue); $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1; $document->addField($field); $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue))); $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25; $document->addField($fieldTranslit); } // page ID if selector defined if ($this->parameters[self::PAGE_ID_PARAM]) { $fieldValue = isset($page[Page::PAGE_ID_KEY]) ? $page[Page::PAGE_ID_KEY] : ''; $field = Field::unIndexed(Page::PAGE_ID_KEY, $fieldValue); $document->addField($field); } // route name if selector defined if ($this->parameters[self::ROUTE_NAME_PARAM]) { $fieldValue = isset($page[Page::ROUTE_NAME_KEY]) ? $page[Page::ROUTE_NAME_KEY] : ''; $field = Field::unIndexed(Page::ROUTE_NAME_KEY, $fieldValue); $document->addField($field); } return $document; }
/** * index a file * * @param File $file the file to be indexed * @param bool $commit * * @return bool true when something was stored in the index, false otherwise (eg, folders are not indexed) * @throws NotIndexedException when an unsupported file type is encountered */ public function indexFile(File $file, $commit = true) { // we decide how to index on mime type or file extension $mimeType = $file->getMimeType(); $fileExtension = strtolower(pathinfo($file->getName(), PATHINFO_EXTENSION)); // initialize plain lucene document $doc = new Document(); // index content for local files only $storage = $file->getStorage(); if ($storage->isLocal()) { $path = $storage->getLocalFile($file->getInternalPath()); //try to use special lucene document types if ('text/html' === $mimeType) { //TODO could be indexed, even if not local $doc = HTML::loadHTML($file->getContent()); } else { if ('text/' === substr($mimeType, 0, 5) || 'application/x-tex' === $mimeType) { $body = $file->getContent(); if ($body != '') { $doc->addField(Document\Field::UnStored('body', $body)); } } else { if ('application/pdf' === $mimeType) { $doc = Pdf::loadPdf($file->getContent()); // the zend classes only understand docx and not doc files } else { if ($fileExtension === 'docx') { $doc = Document\Docx::loadDocxFile($path); //} else if ('application/msexcel' === $mimeType) { } else { if ($fileExtension === 'xlsx') { $doc = Document\Xlsx::loadXlsxFile($path); //} else if ('application/mspowerpoint' === $mimeType) { } else { if ($fileExtension === 'pptx') { $doc = Document\Pptx::loadPptxFile($path); } else { if ($fileExtension === 'odt') { $doc = Odt::loadOdtFile($path); } else { if ($fileExtension === 'ods') { $doc = Ods::loadOdsFile($path); } else { throw new NotIndexedException(); } } } } } } } } } // Store filecache id as unique id to lookup by when deleting $doc->addField(Document\Field::Keyword('fileId', $file->getId())); // Store document path for the search results $doc->addField(Document\Field::Text('path', $file->getPath(), 'UTF-8')); $doc->addField(Document\Field::unIndexed('mtime', $file->getMTime())); $doc->addField(Document\Field::unIndexed('size', $file->getSize())); $doc->addField(Document\Field::unIndexed('mimetype', $mimeType)); $this->index->updateFile($doc, $file->getId(), $commit); return true; }
public function processAction() { $userEmail = $this->getAuthService()->getStorage()->read(); if (!$userEmail) { $this->flashMessenger()->addErrorMessage("not authorized"); return $this->getResponse()->setContent("not authorized"); } $request = $this->getRequest(); $form = new UploadForm(); $uploadFile = $this->params()->fromFiles('fileupload'); if ($request->isPost()) { $form->setData($request->getPost()); if ($form->isValid()) { // Получение конфигурации из конфигурационных данных модуля $uploadPath = $this->getFileUploadLocation(); // Сохранение выгруженного файла $adapter = new \Zend\File\Transfer\Adapter\Http(); $adapter->setDestination($uploadPath); if ($adapter->receive($uploadFile['name'])) { $userTable = $this->getServiceLocator()->get('UserTable'); $user = $userTable->getUserByEmail($userEmail); $upload = new \Users\Model\Upload(); // Успешная выгрузка файла $exchange_data = array(); $exchange_data['label'] = $request->getPost()->get('label'); $exchange_data['filename'] = $uploadFile['name']; $exchange_data['user_id'] = $user->getId(); $upload->exchangeArray($exchange_data); $uploadTable = $this->getServiceLocator()->get('UploadTable'); $uploadTable->save($upload); $upload->setId($uploadTable->getLastInsertValue()); //добавить в Lucene $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::create($searchIndexLocation); // создание полей lucene $fileUploadId = Document\Field::unIndexed('upload_id', $upload->getId()); $label = Document\Field::Text('label', $upload->getLabel()); $owner = Document\Field::Text('owner', $user->getName()); $uploadPath = $this->getFileUploadLocation(); $fileName = $upload->getFilename(); $filePath = $uploadPath . DIRECTORY_SEPARATOR . $fileName; if (substr_compare($fileName, ".xlsx", strlen($fileName) - strlen(".xlsx"), strlen(".xlsx")) === 0) { // Индексирование таблицы excel $indexDoc = Lucene\Document\Xlsx::loadXlsxFile($filePath); } else { if (substr_compare($fileName, ".docx", strlen($fileName) - strlen(".docx"), strlen(".docx")) === 0) { // Индексирование документа Word $indexDoc = Lucene\Document\Docx::loadDocxFile($filePath); } else { $indexDoc = new Lucene\Document(); } } // создание нового документа и добавление всех полей $indexDoc = new Lucene\Document(); $indexDoc->addField($label); $indexDoc->addField($owner); $indexDoc->addField($fileUploadId); $index->addDocument($indexDoc); $index->commit(); } } } return $this->redirect()->toRoute('uploads', array('action' => 'index')); }
/** * Add a new document to the index. * Any existing document with the given $id should be deleted first. * $fields should be indexed but not necessarily stored in the index. * $parameters should be stored in the index but not necessarily indexed. * * @param mixed $id * @param array $fields * @param array $parameters * * @return bool */ public function insert($id, array $fields, array $parameters = array()) { // Remove any existing documents. $this->delete($id); // Create new document. $doc = new \ZendSearch\Lucene\Document(); // Add id parameters. $doc->addField(\ZendSearch\Lucene\Document\Field::keyword('xref_id', $id)); // Add fields to document to be indexed and stored. foreach ($fields as $field => $value) { if (is_array($value)) { $value = implode(' ', $value); } $doc->addField(\ZendSearch\Lucene\Document\Field::text(trim($field), trim($value))); } // Add parameters to document to be stored (but not indexed). $doc->addField(\ZendSearch\Lucene\Document\Field::unIndexed('_parameters', base64_encode(json_encode($parameters)))); // Add document to index. $this->getIndex()->addDocument($doc); return true; }
/** * Store uri, don't index it * * @param Document $document */ protected function addUri(Document $document) { $document->addField(Document\Field::unIndexed('uri', $this->resource->getUri())); }