Пример #1
0
 /**
  * {@inheritdoc}
  */
 public function query($string, $offset, $perPage, SearchEngineOptions $options = null)
 {
     $options = $options ?: new SearchEngineOptions();
     $context = $this->context_factory->createContext($options);
     /** @var QueryCompiler $query_compiler */
     $query_compiler = $this->app['query_compiler'];
     $recordQuery = $query_compiler->compile($string, $context);
     $params = $this->createRecordQueryParams($recordQuery, $options, null);
     $params['body']['from'] = $offset;
     $params['body']['size'] = $perPage;
     if ($this->options->getHighlight()) {
         $params['body']['highlight'] = $this->buildHighlightRules($context);
     }
     if ($aggs = $this->getAggregationQueryParams($options)) {
         $params['body']['aggs'] = $aggs;
     }
     $res = $this->client->search($params);
     $results = new ArrayCollection();
     $n = 0;
     foreach ($res['hits']['hits'] as $hit) {
         $results[] = ElasticsearchRecordHydrator::hydrate($hit, $n++);
     }
     /** @var FacetsResponse $facets */
     $facets = $this->facetsResponseFactory->__invoke($res);
     $query['ast'] = $query_compiler->parse($string)->dump();
     $query['query_main'] = $recordQuery;
     $query['query'] = $params['body'];
     $query['query_string'] = json_encode($params['body']);
     return new SearchEngineResult($results, json_encode($query), $res['took'], $offset, $res['hits']['total'], $res['hits']['total'], null, null, $facets->getAsSuggestions(), [], $this->indexName, $facets);
 }
Пример #2
0
 private function setSetting($name, $value)
 {
     $index = $this->options->getIndexName();
     $params = array();
     $params['index'] = $index;
     $params['body'][$name] = $value;
     $response = $this->client->indices()->putSettings($params);
     return igorw\get_in($response, ['acknowledged']);
 }
 /**
  * @param ElasticsearchOptions $configuration
  * @return void
  */
 private function saveElasticSearchOptions(ElasticsearchOptions $configuration)
 {
     $this->getConf()->set(['main', 'search-engine', 'options'], $configuration->toArray());
 }
Пример #4
0
 /**
  * Find concepts linked to the provided Term
  *
  * In strict mode, term context matching is enforced:
  *   `orange (color)` will *not* match `orange` in the index
  *
  * @param  Term|string $term   Term object or a string containing term's value
  * @param  string|null $lang   Input language ("fr", "en", ...) for more effective results
  * @param  Filter|null $filter Filter to restrict search on a specified subset
  * @param  boolean     $strict Whether to enable strict search or not
  * @return Concept[]           Matching concepts
  */
 public function findConcepts($term, $lang = null, Filter $filter = null, $strict = false)
 {
     if (!$term instanceof TermInterface) {
         $term = new Term($term);
     }
     $this->logger->info(sprintf('Searching for term %s', $term), array('strict' => $strict, 'lang' => $lang));
     if ($strict) {
         $field_suffix = '.strict';
     } elseif ($lang) {
         $field_suffix = sprintf('.%s', $lang);
     } else {
         $field_suffix = '';
     }
     $field = sprintf('value%s', $field_suffix);
     $query = array();
     $query['match'][$field]['query'] = $term->getValue();
     $query['match'][$field]['operator'] = 'and';
     // Allow 25% of non-matching tokens
     // (not exactly the same that 75% of matching tokens)
     // $query['match'][$field]['minimum_should_match'] = '-25%';
     if ($term->hasContext()) {
         $value_query = $query;
         $field = sprintf('context%s', $field_suffix);
         $context_query = array();
         $context_query['match'][$field]['query'] = $term->getContext();
         $context_query['match'][$field]['operator'] = 'and';
         $query = array();
         $query['bool']['must'][0] = $value_query;
         $query['bool']['must'][1] = $context_query;
     } elseif ($strict) {
         $context_filter = array();
         $context_filter['missing']['field'] = 'context';
         $query = self::applyQueryFilter($query, $context_filter);
     }
     if ($lang) {
         $lang_filter = array();
         $lang_filter['term']['lang'] = $lang;
         $query = self::applyQueryFilter($query, $lang_filter);
     }
     if ($filter) {
         $this->logger->debug('Using filter', array('filter' => Filter::dump($filter)));
         $query = self::applyQueryFilter($query, $filter->getQueryFilter());
     }
     // Path deduplication
     $aggs = array();
     $aggs['dedup']['terms']['field'] = 'path.raw';
     // Search request
     $params = array();
     $params['index'] = $this->options->getIndexName();
     $params['type'] = TermIndexer::TYPE_NAME;
     $params['body']['query'] = $query;
     $params['body']['aggs'] = $aggs;
     // Arbitrary score low limit, we need find a more granular way to remove
     // inexact concepts.
     // We also need to disable TF/IDF on terms, and try to boost score only
     // when the search match nearly all tokens of term's value field.
     $params['body']['min_score'] = $this->options->getMinScore();
     // No need to get any hits since we extract data from aggs
     $params['body']['size'] = 0;
     $this->logger->debug('Sending search', $params['body']);
     $response = $this->client->search($params);
     // Extract concept paths from response
     $concepts = array();
     $buckets = \igorw\get_in($response, ['aggregations', 'dedup', 'buckets'], []);
     $keys = array();
     foreach ($buckets as $bucket) {
         if (isset($bucket['key'])) {
             $keys[] = $bucket['key'];
             $concepts[] = new Concept($bucket['key']);
         }
     }
     $this->logger->info(sprintf('Found %d matching concepts', count($concepts)), array('concepts' => $keys));
     return $concepts;
 }
 public function register(Application $app)
 {
     $app['phraseanet.SE'] = function ($app) {
         return $app['search_engine'];
     };
     $app['phraseanet.SE.logger'] = $app->share(function (Application $app) {
         return new SearchEngineLogger($app);
     });
     $app['search_engine'] = $app->share(function ($app) {
         $type = $app['conf']->get(['main', 'search-engine', 'type']);
         if ($type !== SearchEngineInterface::TYPE_ELASTICSEARCH) {
             throw new InvalidArgumentException(sprintf('Invalid search engine type "%s".', $type));
         }
         /** @var ElasticsearchOptions $options */
         $options = $app['elasticsearch.options'];
         return new ElasticSearchEngine($app, $app['search_engine.structure'], $app['elasticsearch.client'], $options->getIndexName(), $app['query_context.factory'], $app['elasticsearch.facets_response.factory'], $options);
     });
     $app['search_engine.structure'] = $app->share(function (\Alchemy\Phrasea\Application $app) {
         $databoxes = $app->getDataboxes();
         return GlobalStructure::createFromDataboxes($databoxes);
     });
     $app['elasticsearch.facets_response.factory'] = $app->protect(function (array $response) use($app) {
         return new FacetsResponse(new Escaper(), $response, $app['search_engine.structure']);
     });
     /* Indexer related services */
     $app['elasticsearch.indexer'] = $app->share(function ($app) {
         return new Indexer($app['elasticsearch.client'], $app['elasticsearch.options'], $app['elasticsearch.indexer.term_indexer'], $app['elasticsearch.indexer.record_indexer'], $app['phraseanet.appbox']);
     });
     $app['elasticsearch.indexer.term_indexer'] = $app->share(function ($app) {
         return new TermIndexer($app['phraseanet.appbox'], array_keys($app['locales.available']));
     });
     $app['elasticsearch.indexer.record_indexer'] = $app->share(function ($app) {
         // TODO Use upcomming monolog factory
         $logger = new \Monolog\Logger('indexer');
         $logger->pushHandler(new \Monolog\Handler\ErrorLogHandler());
         return new RecordIndexer($app['search_engine.structure'], $app['elasticsearch.record_helper'], $app['thesaurus'], $app['phraseanet.appbox'], array_keys($app['locales.available']), $logger);
     });
     $app['elasticsearch.record_helper'] = $app->share(function ($app) {
         return new RecordHelper($app['phraseanet.appbox']);
     });
     $app['dispatcher'] = $app->share($app->extend('dispatcher', function (EventDispatcherInterface $dispatcher, $app) {
         $subscriber = new IndexerSubscriber(new LazyLocator($app, 'elasticsearch.indexer'));
         $dispatcher->addSubscriber($subscriber);
         $listener = array($subscriber, 'flushQueue');
         // Add synchronous flush when used in CLI.
         if (isset($app['console'])) {
             foreach (array_keys($subscriber->getSubscribedEvents()) as $eventName) {
                 $dispatcher->addListener($eventName, $listener, -10);
             }
             return $dispatcher;
         }
         $dispatcher->addListener(KernelEvents::TERMINATE, $listener);
         return $dispatcher;
     }));
     /* Low-level elasticsearch services */
     $app['elasticsearch.client'] = $app->share(function ($app) {
         /** @var ElasticsearchOptions $options */
         $options = $app['elasticsearch.options'];
         $clientParams = ['hosts' => [sprintf('%s:%s', $options->getHost(), $options->getPort())]];
         // Create file logger for debug
         if ($app['debug']) {
             /** @var Logger $logger */
             $logger = new $app['monolog.logger.class']('search logger');
             $logger->pushHandler(new RotatingFileHandler($app['log.path'] . DIRECTORY_SEPARATOR . 'elasticsearch.log', 2), Logger::INFO);
             $clientParams['logObject'] = $logger;
             $clientParams['logging'] = true;
         }
         return new Client($clientParams);
     });
     $app['elasticsearch.options'] = $app->share(function ($app) {
         $options = ElasticsearchOptions::fromArray($app['conf']->get(['main', 'search-engine', 'options'], []));
         if (empty($options->getIndexName())) {
             $options->setIndexName(strtolower(sprintf('phraseanet_%s', str_replace(array('/', '.'), array('', ''), $app['conf']->get(['main', 'key'])))));
         }
         return $options;
     });
     /* Querying helper services */
     $app['thesaurus'] = $app->share(function ($app) {
         // TODO Use upcomming monolog factory
         $logger = new \Monolog\Logger('thesaurus');
         $logger->pushHandler(new \Monolog\Handler\ErrorLogHandler());
         return new Thesaurus($app['elasticsearch.client'], $app['elasticsearch.options'], $logger);
     });
     $app['query_context.factory'] = $app->share(function ($app) {
         return new QueryContextFactory($app['search_engine.structure'], array_keys($app['locales.available']), $app['locale']);
     });
     $app['query_parser.grammar_path'] = function ($app) {
         $configPath = ['registry', 'searchengine', 'query-grammar-path'];
         $grammarPath = $app['conf']->get($configPath, 'grammar/query.pp');
         $projectRoot = '../../../../..';
         return realpath(implode('/', [__DIR__, $projectRoot, $grammarPath]));
     };
     $app['query_parser'] = $app->share(function ($app) {
         $grammarPath = $app['query_parser.grammar_path'];
         return Compiler\Llk\Llk::load(new File\Read($grammarPath));
     });
     $app['query_visitor.factory'] = $app->protect(function () use($app) {
         return new QueryVisitor($app['search_engine.structure']);
     });
     $app['query_compiler'] = $app->share(function ($app) {
         return new QueryCompiler($app['query_parser'], $app['query_visitor.factory'], $app['thesaurus']);
     });
 }