/** * Get text search field * @param string $name * @param string $value * @return Field */ public function text($name, $value = null) { if ($value === null) { $value = $this->entity->getValue($name); } return Field::text($name, $value, $this->config()->getCharset()); }
public function createDocument(EntityInterface $entity) : Document { $document = new Document(); $context = $this->createContext($entity); $fieldsCollection = $context->getFieldsCollection(); $document->addField(Field::unIndexed('identifier', $entity->getId())); $fieldsCollection->map(function (SearchField $field) use($document) { $document->addField(Field::text($field->getName(), $field->getValue())); }); return $document; }
/** * {@inheritdoc} */ public function addProduct(ProductInterface $product, $indexName = ProductIndexerInterface::DEFAULT_INDEX_NAME) { $index = $this->searchIndexManager->getIndex(ProductIndexerInterface::DEFAULT_INDEX_NAME); $document = new Document(); $document->addField(Field::unIndexed('identifier', $product->getId())); $document->addField(Field::text('name', $product->translate('en')->getName())); $document->addField(Field::text('shortDescription', $product->translate()->getShortDescription())); $document->addField(Field::text('description', $product->translate()->getDescription())); $index->addDocument($document); $index->commit(); }
/** * Create or update an indexed document * * @param object $object */ public function index($object) { // create property accessor $accessor = PropertyAccess::createPropertyAccessor(); // delete existing documents with same id foreach ($this->index->find('id:' . $accessor->getValue($object, 'id')) as $hit) { $this->index->delete($hit->id); } // create new Lucene document $doc = new Document(); // add primary key to identify it in the search results $doc->addField(Field::keyword('id', $accessor->getValue($object, 'id'))); // add entity class reference to identify it in the search results $doc->addField(Field::unIndexed('entityClass', get_class($object))); // analyze each property's annotations to see which ones must be add to the document $reflClass = new ReflectionClass($object); foreach ($reflClass->getProperties() as $property) { $reflProperty = new \ReflectionProperty($object, $property->name); $annotation = $this->reader->getPropertyAnnotation($reflProperty, '\\Keratine\\Lucene\\Mapping\\Annotation\\DocumentField'); if ($annotation) { $value = $accessor->getValue($object, $property->name); $value = $this->ensureString($value); // use the appropriate indexing strategy for the field switch ($annotation->type) { case 'keyword': $doc->addField(Field::keyword($property->name, $value, 'UTF-8')); break; case 'unIndexed': $doc->addField(Field::unIndexed($property->name, $value, 'UTF-8')); break; case 'binary': $doc->addField(Field::binary($property->name, $value)); break; case 'text': $doc->addField(Field::text($property->name, $value, 'UTF-8')); break; case 'unStored': default: $doc->addField(Field::unStored($property->name, $value, 'UTF-8')); break; } } } // add the document to the index and commit it $this->index->addDocument($doc); $this->index->commit(); }
function wavProcess($index, $doc, $documentPath) { $fileName = basename($documentPath); $fileNameField = \ZendSearch\Lucene\Document\Field::text('filename', $fileName); //APPEL DU FICHIER PYTHON exec('python ../xmpParse.py ' . $fileName, $res, $retcode); // Title $title = $res[0]; $titleField = \ZendSearch\Lucene\Document\Field::text('title', $title); // echo "Title : " . $title[0] . PHP_EOL; // Subject for ($i = 1; $i < count($res); $i++) { $subject = $res[$i]; $subjectField = \ZendSearch\Lucene\Document\Field::text($subject, $subject); $doc->addField($subjectField); } // echo "Subject : " . $subject[0] . PHP_EOL; $doc->addField($fileNameField); $doc->addField($titleField); $index->addDocument($doc); }
/** * Lists all Post models. * @return mixed */ public function actionIndex() { $searchModel = new PostSearch(); $dataProvider = $searchModel->search(Yii::$app->request->post()); //setlocale(LC_ALL, 'en_US.UTF-8'); setlocale(LC_CTYPE, 'ru_RU.UTF-8'); //Lucene\Lucene::setDefaultSearchField('contents'); Lucene\Search\QueryParser::setDefaultEncoding('UTF-8'); Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive()); Lucene\Lucene::setResultSetLimit(10); // create blog posts index located in /data/posts_index ,make sure the folder is writable $index = Lucene\Lucene::create('data/posts_index'); $posts = Post::find()->all(); //var_dump($posts);die(); // iterate through posts and build the index foreach ($posts as $p) { $doc = new Lucene\Document(); $doc->addField(Lucene\Document\Field::UnIndexed('entry_id', $p->id)); $doc->addField(Lucene\Document\Field::Keyword('title', $p->title)); $doc->addField(Lucene\Document\Field::text('contents', $p->content)); $index->addDocument($doc); } // commit the index $index->commit(); //Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive()); // explode the search query to individual words $words = explode(' ', urldecode(Yii::$app->getRequest()->getQueryParam('q'))); // start a search query and add a term for each word to it $query = new Lucene\Search\Query\MultiTerm(); foreach ($words as $w) { $query->addTerm(new Lucene\Index\Term($w)); } // open and query the index $index = Lucene\Lucene::open('data/posts_index'); $results = $index->find($query); // the search results //var_dump($results); return $this->render('index', ['searchModel' => $searchModel, 'dataProvider' => $dataProvider, 'search' => $results, 'query' => $query]); }
public function generateSearchAction() { $searchIndexLocation = $this->getIndexLocation(); $index = Lucene\Lucene::create($searchIndexLocation); $allUsers = $this->getUserTable()->fetchAll(false); foreach ($allUsers as $user) { $id = Document\Field::keyword('userId', $user->userId); $firstName = Document\Field::text('firstName', $user->firstName); $lastName = Document\Field::text('lastName', $user->lastName); $email = Document\Field::text('email', $user->email); $role = Document\Field::text('role', $user->role); $activated = Document\Field::keyword('activated', $user->activated); $indexDoc = new Lucene\Document(); $indexDoc->addField($id); $indexDoc->addField($firstName); $indexDoc->addField($lastName); $indexDoc->addField($email); $indexDoc->addField($role); $indexDoc->addField($activated); $index->addDocument($indexDoc); } $index->commit(); }
/** * create document from configured fields within extracted data * @param string $url * @param array $page * @return Document */ protected function createDocument($url, $page) { $document = new Document(); if (!isset($page['status_code'])) { $page['status_code'] = 00; //tmp } setlocale(LC_ALL, "cs_CZ.UTF-8"); $document->addField(Field::keyword('url', $url)); // ancestor URLs to search by URL $urlParts = parse_url($url); if (isset($urlParts['path']) && $urlParts['path'] && strlen($urlParts['path']) > 1) { $uri = $urlParts['path']; $uris = array($uri); do { $uri = substr($uri, 0, strrpos($uri, '/')); $uris[] = $uri; } while (strrpos($uri, '/') > 1); $document->addField(Field::text(Page::URIS_KEY, implode(' ', $uris))); } foreach (array(Page::TITLE_KEY, Page::DESCRIPTION_KEY, Page::BODY_KEY, Page::IMAGE_KEY) as $fieldName) { $fieldValue = isset($page[$fieldName]) ? $page[$fieldName] : ''; switch ($fieldName) { case Page::TITLE_KEY: case Page::DESCRIPTION_KEY: case Page::BODY_KEY: $field = Field::text($fieldName, $fieldValue); // translit $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue))); $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25; $document->addField($fieldTranslit); break; case Page::IMAGE_KEY: $field = Field::unIndexed($fieldName, $fieldValue); break; default: $translitValue = str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue)); $field = Field::text($fieldName, $fieldValue . ($translitValue != $fieldValue ? ' ' . $translitValue : '')); } $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25; $document->addField($field); } // title tags as configured i.e. h1, h2, ... foreach ($this->parameters[self::TITLE_TAGS_PARAM] as $fieldName) { $fieldValue = Page::hasHeadlineType($page, $fieldName) ? Page::getHeadline($page, $fieldName) : ''; $field = Field::text($fieldName, $fieldValue); $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1; $document->addField($field); $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue))); $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25; $document->addField($fieldTranslit); } // page ID if selector defined if ($this->parameters[self::PAGE_ID_PARAM]) { $fieldValue = isset($page[Page::PAGE_ID_KEY]) ? $page[Page::PAGE_ID_KEY] : ''; $field = Field::unIndexed(Page::PAGE_ID_KEY, $fieldValue); $document->addField($field); } // route name if selector defined if ($this->parameters[self::ROUTE_NAME_PARAM]) { $fieldValue = isset($page[Page::ROUTE_NAME_KEY]) ? $page[Page::ROUTE_NAME_KEY] : ''; $field = Field::unIndexed(Page::ROUTE_NAME_KEY, $fieldValue); $document->addField($field); } return $document; }
/** * Add a new document to the index. * Any existing document with the given $id should be deleted first. * $fields should be indexed but not necessarily stored in the index. * $parameters should be stored in the index but not necessarily indexed. * * @param mixed $id * @param array $fields * @param array $parameters * * @return bool */ public function insert($id, array $fields, array $parameters = array()) { // Remove any existing documents. $this->delete($id); // Create new document. $doc = new \ZendSearch\Lucene\Document(); // Add id parameters. $doc->addField(\ZendSearch\Lucene\Document\Field::keyword('xref_id', $id)); // Add fields to document to be indexed and stored. foreach ($fields as $field => $value) { if (is_array($value)) { $value = implode(' ', $value); } $doc->addField(\ZendSearch\Lucene\Document\Field::text(trim($field), trim($value))); } // Add parameters to document to be stored (but not indexed). $doc->addField(\ZendSearch\Lucene\Document\Field::unIndexed('_parameters', base64_encode(json_encode($parameters)))); // Add document to index. $this->getIndex()->addDocument($doc); return true; }
protected function createDocument($title, $body, $url) { $document = new Document(); $document->addField(Field::text('title', $title)); $document->addField(Field::text('body', strip_tags($body))); $document->addField(Field::keyword('url', $url)); return $document; }
private function appendToDocument(Lucene\Document $document, $name, $value) { if (is_string($value)) { $document->addField(Lucene\Document\Field::keyword($name, utf8_decode($value))); } elseif ($value instanceof \DateTime) { $document->addField(Lucene\Document\Field::keyword($name, $value->getTimestamp())); } elseif (is_array($value)) { $document->addField(Lucene\Document\Field::text($name, implode(',', $value))); } else { $document->addField(Lucene\Document\Field::binary($name, $value)); } }