Example #1
0
 /**
  * Get text search field
  * @param string $name
  * @param string $value
  * @return Field
  */
 public function text($name, $value = null)
 {
     if ($value === null) {
         $value = $this->entity->getValue($name);
     }
     return Field::text($name, $value, $this->config()->getCharset());
 }
 public function createDocument(EntityInterface $entity) : Document
 {
     $document = new Document();
     $context = $this->createContext($entity);
     $fieldsCollection = $context->getFieldsCollection();
     $document->addField(Field::unIndexed('identifier', $entity->getId()));
     $fieldsCollection->map(function (SearchField $field) use($document) {
         $document->addField(Field::text($field->getName(), $field->getValue()));
     });
     return $document;
 }
 /**
  * {@inheritdoc}
  */
 public function addProduct(ProductInterface $product, $indexName = ProductIndexerInterface::DEFAULT_INDEX_NAME)
 {
     $index = $this->searchIndexManager->getIndex(ProductIndexerInterface::DEFAULT_INDEX_NAME);
     $document = new Document();
     $document->addField(Field::unIndexed('identifier', $product->getId()));
     $document->addField(Field::text('name', $product->translate('en')->getName()));
     $document->addField(Field::text('shortDescription', $product->translate()->getShortDescription()));
     $document->addField(Field::text('description', $product->translate()->getDescription()));
     $index->addDocument($document);
     $index->commit();
 }
Example #4
0
 /**
  * Create or update an indexed document
  *
  * @param object $object
  */
 public function index($object)
 {
     // create property accessor
     $accessor = PropertyAccess::createPropertyAccessor();
     // delete existing documents with same id
     foreach ($this->index->find('id:' . $accessor->getValue($object, 'id')) as $hit) {
         $this->index->delete($hit->id);
     }
     // create new Lucene document
     $doc = new Document();
     // add primary key to identify it in the search results
     $doc->addField(Field::keyword('id', $accessor->getValue($object, 'id')));
     // add entity class reference to identify it in the search results
     $doc->addField(Field::unIndexed('entityClass', get_class($object)));
     // analyze each property's annotations to see which ones must be add to the document
     $reflClass = new ReflectionClass($object);
     foreach ($reflClass->getProperties() as $property) {
         $reflProperty = new \ReflectionProperty($object, $property->name);
         $annotation = $this->reader->getPropertyAnnotation($reflProperty, '\\Keratine\\Lucene\\Mapping\\Annotation\\DocumentField');
         if ($annotation) {
             $value = $accessor->getValue($object, $property->name);
             $value = $this->ensureString($value);
             // use the appropriate indexing strategy for the field
             switch ($annotation->type) {
                 case 'keyword':
                     $doc->addField(Field::keyword($property->name, $value, 'UTF-8'));
                     break;
                 case 'unIndexed':
                     $doc->addField(Field::unIndexed($property->name, $value, 'UTF-8'));
                     break;
                 case 'binary':
                     $doc->addField(Field::binary($property->name, $value));
                     break;
                 case 'text':
                     $doc->addField(Field::text($property->name, $value, 'UTF-8'));
                     break;
                 case 'unStored':
                 default:
                     $doc->addField(Field::unStored($property->name, $value, 'UTF-8'));
                     break;
             }
         }
     }
     // add the document to the index and commit it
     $this->index->addDocument($doc);
     $this->index->commit();
 }
function wavProcess($index, $doc, $documentPath)
{
    $fileName = basename($documentPath);
    $fileNameField = \ZendSearch\Lucene\Document\Field::text('filename', $fileName);
    //APPEL DU FICHIER PYTHON
    exec('python ../xmpParse.py ' . $fileName, $res, $retcode);
    // Title
    $title = $res[0];
    $titleField = \ZendSearch\Lucene\Document\Field::text('title', $title);
    // echo "Title : " . $title[0] . PHP_EOL;
    // Subject
    for ($i = 1; $i < count($res); $i++) {
        $subject = $res[$i];
        $subjectField = \ZendSearch\Lucene\Document\Field::text($subject, $subject);
        $doc->addField($subjectField);
    }
    // echo "Subject : " . $subject[0] . PHP_EOL;
    $doc->addField($fileNameField);
    $doc->addField($titleField);
    $index->addDocument($doc);
}
Example #6
0
 /**
  * Lists all Post models.
  * @return mixed
  */
 public function actionIndex()
 {
     $searchModel = new PostSearch();
     $dataProvider = $searchModel->search(Yii::$app->request->post());
     //setlocale(LC_ALL, 'en_US.UTF-8');
     setlocale(LC_CTYPE, 'ru_RU.UTF-8');
     //Lucene\Lucene::setDefaultSearchField('contents');
     Lucene\Search\QueryParser::setDefaultEncoding('UTF-8');
     Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     Lucene\Lucene::setResultSetLimit(10);
     // create blog posts index located in /data/posts_index ,make sure the folder is writable
     $index = Lucene\Lucene::create('data/posts_index');
     $posts = Post::find()->all();
     //var_dump($posts);die();
     // iterate through posts and build the index
     foreach ($posts as $p) {
         $doc = new Lucene\Document();
         $doc->addField(Lucene\Document\Field::UnIndexed('entry_id', $p->id));
         $doc->addField(Lucene\Document\Field::Keyword('title', $p->title));
         $doc->addField(Lucene\Document\Field::text('contents', $p->content));
         $index->addDocument($doc);
     }
     // commit the index
     $index->commit();
     //Lucene\Analysis\Analyzer\Analyzer::setDefault(new Lucene\Analysis\Analyzer\Common\Utf8\CaseInsensitive());
     // explode the search query to individual words
     $words = explode(' ', urldecode(Yii::$app->getRequest()->getQueryParam('q')));
     // start a search query and add a term for each word to it
     $query = new Lucene\Search\Query\MultiTerm();
     foreach ($words as $w) {
         $query->addTerm(new Lucene\Index\Term($w));
     }
     // open and query the index
     $index = Lucene\Lucene::open('data/posts_index');
     $results = $index->find($query);
     // the search results
     //var_dump($results);
     return $this->render('index', ['searchModel' => $searchModel, 'dataProvider' => $dataProvider, 'search' => $results, 'query' => $query]);
 }
 public function generateSearchAction()
 {
     $searchIndexLocation = $this->getIndexLocation();
     $index = Lucene\Lucene::create($searchIndexLocation);
     $allUsers = $this->getUserTable()->fetchAll(false);
     foreach ($allUsers as $user) {
         $id = Document\Field::keyword('userId', $user->userId);
         $firstName = Document\Field::text('firstName', $user->firstName);
         $lastName = Document\Field::text('lastName', $user->lastName);
         $email = Document\Field::text('email', $user->email);
         $role = Document\Field::text('role', $user->role);
         $activated = Document\Field::keyword('activated', $user->activated);
         $indexDoc = new Lucene\Document();
         $indexDoc->addField($id);
         $indexDoc->addField($firstName);
         $indexDoc->addField($lastName);
         $indexDoc->addField($email);
         $indexDoc->addField($role);
         $indexDoc->addField($activated);
         $index->addDocument($indexDoc);
     }
     $index->commit();
 }
 /**
  * create document from configured fields within extracted data
  * @param string $url
  * @param array $page
  * @return Document
  */
 protected function createDocument($url, $page)
 {
     $document = new Document();
     if (!isset($page['status_code'])) {
         $page['status_code'] = 00;
         //tmp
     }
     setlocale(LC_ALL, "cs_CZ.UTF-8");
     $document->addField(Field::keyword('url', $url));
     // ancestor URLs to search by URL
     $urlParts = parse_url($url);
     if (isset($urlParts['path']) && $urlParts['path'] && strlen($urlParts['path']) > 1) {
         $uri = $urlParts['path'];
         $uris = array($uri);
         do {
             $uri = substr($uri, 0, strrpos($uri, '/'));
             $uris[] = $uri;
         } while (strrpos($uri, '/') > 1);
         $document->addField(Field::text(Page::URIS_KEY, implode(' ', $uris)));
     }
     foreach (array(Page::TITLE_KEY, Page::DESCRIPTION_KEY, Page::BODY_KEY, Page::IMAGE_KEY) as $fieldName) {
         $fieldValue = isset($page[$fieldName]) ? $page[$fieldName] : '';
         switch ($fieldName) {
             case Page::TITLE_KEY:
             case Page::DESCRIPTION_KEY:
             case Page::BODY_KEY:
                 $field = Field::text($fieldName, $fieldValue);
                 // translit
                 $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue)));
                 $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25;
                 $document->addField($fieldTranslit);
                 break;
             case Page::IMAGE_KEY:
                 $field = Field::unIndexed($fieldName, $fieldValue);
                 break;
             default:
                 $translitValue = str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue));
                 $field = Field::text($fieldName, $fieldValue . ($translitValue != $fieldValue ? ' ' . $translitValue : ''));
         }
         $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25;
         $document->addField($field);
     }
     // title tags as configured i.e. h1, h2, ...
     foreach ($this->parameters[self::TITLE_TAGS_PARAM] as $fieldName) {
         $fieldValue = Page::hasHeadlineType($page, $fieldName) ? Page::getHeadline($page, $fieldName) : '';
         $field = Field::text($fieldName, $fieldValue);
         $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1;
         $document->addField($field);
         $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue)));
         $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25;
         $document->addField($fieldTranslit);
     }
     // page ID if selector defined
     if ($this->parameters[self::PAGE_ID_PARAM]) {
         $fieldValue = isset($page[Page::PAGE_ID_KEY]) ? $page[Page::PAGE_ID_KEY] : '';
         $field = Field::unIndexed(Page::PAGE_ID_KEY, $fieldValue);
         $document->addField($field);
     }
     // route name if selector defined
     if ($this->parameters[self::ROUTE_NAME_PARAM]) {
         $fieldValue = isset($page[Page::ROUTE_NAME_KEY]) ? $page[Page::ROUTE_NAME_KEY] : '';
         $field = Field::unIndexed(Page::ROUTE_NAME_KEY, $fieldValue);
         $document->addField($field);
     }
     return $document;
 }
Example #9
0
 /**
  * Add a new document to the index.
  * Any existing document with the given $id should be deleted first.
  * $fields should be indexed but not necessarily stored in the index.
  * $parameters should be stored in the index but not necessarily indexed.
  *
  * @param mixed $id
  * @param array $fields
  * @param array $parameters
  * 
  * @return bool
  */
 public function insert($id, array $fields, array $parameters = array())
 {
     // Remove any existing documents.
     $this->delete($id);
     // Create new document.
     $doc = new \ZendSearch\Lucene\Document();
     // Add id parameters.
     $doc->addField(\ZendSearch\Lucene\Document\Field::keyword('xref_id', $id));
     // Add fields to document to be indexed and stored.
     foreach ($fields as $field => $value) {
         if (is_array($value)) {
             $value = implode(' ', $value);
         }
         $doc->addField(\ZendSearch\Lucene\Document\Field::text(trim($field), trim($value)));
     }
     // Add parameters to document to be stored (but not indexed).
     $doc->addField(\ZendSearch\Lucene\Document\Field::unIndexed('_parameters', base64_encode(json_encode($parameters))));
     // Add document to index.
     $this->getIndex()->addDocument($doc);
     return true;
 }
 protected function createDocument($title, $body, $url)
 {
     $document = new Document();
     $document->addField(Field::text('title', $title));
     $document->addField(Field::text('body', strip_tags($body)));
     $document->addField(Field::keyword('url', $url));
     return $document;
 }
 private function appendToDocument(Lucene\Document $document, $name, $value)
 {
     if (is_string($value)) {
         $document->addField(Lucene\Document\Field::keyword($name, utf8_decode($value)));
     } elseif ($value instanceof \DateTime) {
         $document->addField(Lucene\Document\Field::keyword($name, $value->getTimestamp()));
     } elseif (is_array($value)) {
         $document->addField(Lucene\Document\Field::text($name, implode(',', $value)));
     } else {
         $document->addField(Lucene\Document\Field::binary($name, $value));
     }
 }