Пример #1
0
 private function hydrate(array &$record, array $fields, array $index_fields)
 {
     if (!isset($record['databox_id'])) {
         throw new Exception('Expected a record with the "databox_id" key set.');
     }
     $values = array();
     $terms = array();
     $filters = array();
     $field_names = array();
     foreach ($fields as $name => $root_concepts) {
         // Loop through all values to prepare bulk query
         $field_values = \igorw\get_in($record, explode('.', $index_fields[$name]));
         if ($field_values !== null) {
             // Concepts are databox's specific, but when no root concepts are
             // given we need to make sure we only match in the right databox.
             $filter = $root_concepts ? Filter::childOfConcepts($record['databox_id'], $root_concepts) : Filter::byDatabox($record['databox_id']);
             foreach ($field_values as $value) {
                 $values[] = $value;
                 $terms[] = Term::parse($value);
                 $filters[] = $filter;
                 $field_names[] = $name;
             }
         }
     }
     $bulk = $this->thesaurus->findConceptsBulk($terms, null, $filters, true);
     foreach ($bulk as $offset => $item_concepts) {
         if ($item_concepts && is_array($item_concepts) && count($item_concepts) > 0) {
             $name = $field_names[$offset];
             foreach ($item_concepts as $concept) {
                 $record['concept_path'][$name][] = $concept->getPath();
             }
         } else {
             $this->candidate_terms->insert($field_names[$offset], $values[$offset]);
         }
     }
 }
Пример #2
0
 /**
  * Find concepts linked to the provided Term
  *
  * In strict mode, term context matching is enforced:
  *   `orange (color)` will *not* match `orange` in the index
  *
  * @param  Term|string $term   Term object or a string containing term's value
  * @param  string|null $lang   Input language ("fr", "en", ...) for more effective results
  * @param  Filter|null $filter Filter to restrict search on a specified subset
  * @param  boolean     $strict Whether to enable strict search or not
  * @return Concept[]           Matching concepts
  */
 public function findConcepts($term, $lang = null, Filter $filter = null, $strict = false)
 {
     if (!$term instanceof TermInterface) {
         $term = new Term($term);
     }
     $this->logger->info(sprintf('Searching for term %s', $term), array('strict' => $strict, 'lang' => $lang));
     if ($strict) {
         $field_suffix = '.strict';
     } elseif ($lang) {
         $field_suffix = sprintf('.%s', $lang);
     } else {
         $field_suffix = '';
     }
     $field = sprintf('value%s', $field_suffix);
     $query = array();
     $query['match'][$field]['query'] = $term->getValue();
     $query['match'][$field]['operator'] = 'and';
     // Allow 25% of non-matching tokens
     // (not exactly the same that 75% of matching tokens)
     // $query['match'][$field]['minimum_should_match'] = '-25%';
     if ($term->hasContext()) {
         $value_query = $query;
         $field = sprintf('context%s', $field_suffix);
         $context_query = array();
         $context_query['match'][$field]['query'] = $term->getContext();
         $context_query['match'][$field]['operator'] = 'and';
         $query = array();
         $query['bool']['must'][0] = $value_query;
         $query['bool']['must'][1] = $context_query;
     } elseif ($strict) {
         $context_filter = array();
         $context_filter['missing']['field'] = 'context';
         $query = self::applyQueryFilter($query, $context_filter);
     }
     if ($lang) {
         $lang_filter = array();
         $lang_filter['term']['lang'] = $lang;
         $query = self::applyQueryFilter($query, $lang_filter);
     }
     if ($filter) {
         $this->logger->debug('Using filter', array('filter' => Filter::dump($filter)));
         $query = self::applyQueryFilter($query, $filter->getQueryFilter());
     }
     // Path deduplication
     $aggs = array();
     $aggs['dedup']['terms']['field'] = 'path.raw';
     // Search request
     $params = array();
     $params['index'] = $this->options->getIndexName();
     $params['type'] = TermIndexer::TYPE_NAME;
     $params['body']['query'] = $query;
     $params['body']['aggs'] = $aggs;
     // Arbitrary score low limit, we need find a more granular way to remove
     // inexact concepts.
     // We also need to disable TF/IDF on terms, and try to boost score only
     // when the search match nearly all tokens of term's value field.
     $params['body']['min_score'] = $this->options->getMinScore();
     // No need to get any hits since we extract data from aggs
     $params['body']['size'] = 0;
     $this->logger->debug('Sending search', $params['body']);
     $response = $this->client->search($params);
     // Extract concept paths from response
     $concepts = array();
     $buckets = \igorw\get_in($response, ['aggregations', 'dedup', 'buckets'], []);
     $keys = array();
     foreach ($buckets as $bucket) {
         if (isset($bucket['key'])) {
             $keys[] = $bucket['key'];
             $concepts[] = new Concept($bucket['key']);
         }
     }
     $this->logger->info(sprintf('Found %d matching concepts', count($concepts)), array('concepts' => $keys));
     return $concepts;
 }