setFieldType() public method

Set field type.
public setFieldType ( string $field, string $type )
$field string
$type string
コード例 #1
0
 /**
  * Find all documents where the values are matched in the field. The type option
  * allows you to specify the type of match, can be either phrase or phrase_prefix.
  *
  * The phrase match analyzes the text and creates a phrase query out of the
  * analyzed text.
  *
  * The phrase prefix match is the same as phrase, except that it allows for
  * prefix matches on the last term in the text.
  *
  * @link https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
  *
  * @param string $field The field to search in the index
  * @param string $query The values to search for
  * @param string $type The match type
  * @param bool $fuzzy Set whether the match should be fuzzy
  * @return Query
  */
 public function match($field, $query, $type = 'phrase', $fuzzy = false)
 {
     $match = new Match();
     $match->setFieldQuery($field, $query);
     $match->setFieldType($field, $type);
     if ($fuzzy) {
         $match->setFieldFuzziness($field, 'AUTO');
     }
     $query = $this->newQuery($match);
     $this->query[] = $query;
     return $query;
 }
コード例 #2
0
 public function testMatchPhrasePrefix()
 {
     $client = $this->_getClient();
     $index = $client->getIndex('test');
     $index->create(array(), true);
     $type = $index->getType('test');
     $doc = new Document(1, array('name' => 'Basel-Stadt'));
     $type->addDocument($doc);
     $doc = new Document(2, array('name' => 'New York'));
     $type->addDocument($doc);
     $doc = new Document(3, array('name' => 'New Hampshire'));
     $type->addDocument($doc);
     $doc = new Document(4, array('name' => 'Basel Land'));
     $type->addDocument($doc);
     $index->refresh();
     $field = 'name';
     $type = 'phrase_prefix';
     $query = new Match();
     $query->setFieldQuery($field, 'New');
     $query->setFieldType($field, $type);
     $resultSet = $index->search($query);
     $this->assertEquals(2, $resultSet->count());
 }
コード例 #3
0
    /**
     * Search articles with provided term.
     * @param $term string term to search
     * @param boolean $showSuggestion should this search suggest alternative searches that might be better?
     * @return Status(mixed) status containing results defined by resultsType on success
     */
    public function searchText($term, $showSuggestion)
    {
        $checkLengthStatus = self::checkTextSearchRequestLength($term);
        if (!$checkLengthStatus->isOk()) {
            return $checkLengthStatus;
        }
        // Transform Mediawiki specific syntax to filters and extra (pre-escaped) query string
        $searcher = $this;
        $originalTerm = $term;
        $searchContainedSyntax = false;
        $this->term = $term;
        $this->boostLinks = $this->config->get('CirrusSearchBoostLinks');
        $searchType = 'full_text';
        // Handle title prefix notation
        $prefixPos = strpos($this->term, 'prefix:');
        if ($prefixPos !== false) {
            $value = substr($this->term, 7 + $prefixPos);
            $value = trim($value, '"');
            // Trim quotes in case the user wanted to quote the prefix
            if (strlen($value) > 0) {
                $searchContainedSyntax = true;
                $this->term = substr($this->term, 0, max(0, $prefixPos - 1));
                $this->suggestSuffixes[] = ' prefix:' . $value;
                // Suck namespaces out of $value
                $cirrusSearchEngine = new CirrusSearch();
                $cirrusSearchEngine->setConnection($this->connection);
                $value = trim($cirrusSearchEngine->replacePrefixes($value));
                $this->namespaces = $cirrusSearchEngine->namespaces;
                // If the namespace prefix wasn't the entire prefix filter then add a filter for the title
                if (strpos($value, ':') !== strlen($value) - 1) {
                    $value = str_replace('_', ' ', $value);
                    $prefixQuery = new \Elastica\Query\Match();
                    $prefixQuery->setFieldQuery('title.prefix', $value);
                    $this->filters[] = new \Elastica\Filter\Query($prefixQuery);
                }
            }
        }
        $preferRecentDecayPortion = $this->config->get('CirrusSearchPreferRecentDefaultDecayPortion');
        $preferRecentHalfLife = $this->config->get('CirrusSearchPreferRecentDefaultHalfLife');
        $unspecifiedDecayPortion = $this->config->get('CirrusSearchPreferRecentUnspecifiedDecayPortion');
        // Matches "prefer-recent:" and then an optional floating point number <= 1 but >= 0 (decay
        // portion) and then an optional comma followed by another floating point number >= 0 (half life)
        $this->extractSpecialSyntaxFromTerm('/prefer-recent:(1|0?(?:\\.\\d+)?)?(?:,(\\d*\\.?\\d+))? ?/', function ($matches) use($unspecifiedDecayPortion, &$preferRecentDecayPortion, &$preferRecentHalfLife, &$searchContainedSyntax) {
            if (isset($matches[1]) && strlen($matches[1])) {
                $preferRecentDecayPortion = floatval($matches[1]);
            } else {
                $preferRecentDecayPortion = $unspecifiedDecayPortion;
            }
            if (isset($matches[2])) {
                $preferRecentHalfLife = floatval($matches[2]);
            }
            $searchContainedSyntax = true;
            return '';
        });
        $this->preferRecentDecayPortion = $preferRecentDecayPortion;
        $this->preferRecentHalfLife = $preferRecentHalfLife;
        $this->extractSpecialSyntaxFromTerm('/^\\s*local:/', function ($matches) use($searcher) {
            $searcher->limitSearchToLocalWiki(true);
            return '';
        });
        // Handle other filters
        $filters = $this->filters;
        $notFilters = $this->notFilters;
        $boostTemplates = self::getDefaultBoostTemplates();
        $highlightSource = array();
        $this->extractSpecialSyntaxFromTerm('/(?<not>-)?insource:\\/(?<pattern>(?:[^\\\\\\/]|\\\\.)+)\\/(?<insensitive>i)? ?/', function ($matches) use($searcher, &$filters, &$notFilters, &$searchContainedSyntax, &$searchType, &$highlightSource) {
            if (!$searcher->config->get('CirrusSearchEnableRegex')) {
                return;
            }
            $searchContainedSyntax = true;
            $searchType = 'regex';
            $insensitive = !empty($matches['insensitive']);
            $filterDestination =& $filters;
            if (!empty($matches['not'])) {
                $filterDestination =& $notFilters;
            } else {
                $highlightSource[] = array('pattern' => $matches['pattern'], 'locale' => $searcher->config->get('LanguageCode'), 'insensitive' => $insensitive);
            }
            $regex = $searcher->config->getElement('CirrusSearchWikimediaExtraPlugin', 'regex');
            if ($regex && in_array('use', $regex)) {
                $filter = new SourceRegex($matches['pattern'], 'source_text', 'source_text.trigram');
                if (isset($regex['max_inspect'])) {
                    $filter->setMaxInspect($regex['max_inspect']);
                } else {
                    $filter->setMaxInspect(10000);
                }
                $filter->setMaxDeterminizedStates($searcher->config->get('CirrusSearchRegexMaxDeterminizedStates'));
                if (isset($regex['max_ngrams_extracted'])) {
                    $filter->setMaxNgramExtracted($regex['max_ngrams_extracted']);
                }
                $filter->setCaseSensitive(!$insensitive);
                $filter->setLocale($this->config->get('LanguageCode'));
                $filterDestination[] = $filter;
            } else {
                // Without the extra plugin we need to use groovy to attempt the regex.
                // Its less good but its something.
                $script = <<<GROOVY
import org.apache.lucene.util.automaton.*;
sourceText = _source.get("source_text");
if (sourceText == null) {
\tfalse;
} else {
\tif (automaton == null) {
\t\tif (insensitive) {
\t\t\tlocale = new Locale(language);
\t\t\tpattern = pattern.toLowerCase(locale);
\t\t}
\t\tregexp = new RegExp(pattern, RegExp.ALL ^ RegExp.AUTOMATON);
\t\tautomaton = new CharacterRunAutomaton(regexp.toAutomaton());
\t}
\tif (insensitive) {
\t\tsourceText = sourceText.toLowerCase(locale);
\t}
\tautomaton.run(sourceText);
}

GROOVY;
                $filterDestination[] = new \Elastica\Filter\Script(new \Elastica\Script($script, array('pattern' => '.*(' . $matches['pattern'] . ').*', 'insensitive' => $insensitive, 'language' => $searcher->config->get('LanguageCode'), 'automaton' => null, 'locale' => null), 'groovy'));
            }
        });
        // Match filters that look like foobar:thing or foobar:"thing thing"
        // The {7,15} keeps this from having horrible performance on big strings
        $escaper = $this->escaper;
        $fuzzyQuery = $this->fuzzyQuery;
        $isEmptyQuery = false;
        $this->extractSpecialSyntaxFromTerm('/(?<key>[a-z\\-]{7,15}):\\s*(?<value>"(?<quoted>(?:[^"]|(?<=\\\\)")+)"|(?<unquoted>\\S+)) ?/', function ($matches) use($searcher, $escaper, &$filters, &$notFilters, &$boostTemplates, &$searchContainedSyntax, &$fuzzyQuery, &$highlightSource, &$isEmptyQuery) {
            $key = $matches['key'];
            $quotedValue = $matches['value'];
            $value = $matches['quoted'] !== '' ? str_replace('\\"', '"', $matches['quoted']) : $matches['unquoted'];
            $filterDestination =& $filters;
            $keepText = true;
            if ($key[0] === '-') {
                $key = substr($key, 1);
                $filterDestination =& $notFilters;
                $keepText = false;
            }
            switch ($key) {
                case 'boost-templates':
                    $boostTemplates = Searcher::parseBoostTemplates($value);
                    if ($boostTemplates === null) {
                        $boostTemplates = Searcher::getDefaultBoostTemplates();
                    }
                    $searchContainedSyntax = true;
                    return '';
                case 'hastemplate':
                    // We emulate template syntax here as best as possible,
                    // so things in NS_MAIN are prefixed with ":" and things
                    // in NS_TEMPLATE don't have a prefix at all. Since we
                    // don't actually index templates like that, munge the
                    // query here
                    if (strpos($value, ':') === 0) {
                        $value = substr($value, 1);
                    } else {
                        $title = Title::newFromText($value);
                        if ($title && $title->getNamespace() == NS_MAIN) {
                            $value = Title::makeTitle(NS_TEMPLATE, $title->getDBkey())->getPrefixedText();
                        }
                    }
                    $filterDestination[] = $searcher->matchPage('template', $value);
                    $searchContainedSyntax = true;
                    return '';
                case 'linksto':
                    $filterDestination[] = $searcher->matchPage('outgoing_link', $value, true);
                    $searchContainedSyntax = true;
                    return '';
                case 'incategory':
                    $categories = array_slice(explode('|', $value), 0, $searcher->config->get('CirrusSearchMaxIncategoryOptions'));
                    $categoryFilters = $searcher->matchPageCategories($categories);
                    if ($categoryFilters === null) {
                        $isEmptyQuery = true;
                    } else {
                        $filterDestination[] = $categoryFilters;
                    }
                    $searchContainedSyntax = true;
                    return '';
                case 'insource':
                    $updateReferences = Filters::insource($escaper, $searcher->getSearchContext(), $quotedValue);
                    $updateReferences($fuzzyQuery, $filterDestination, $highlightSource, $searchContainedSyntax);
                    return '';
                case 'intitle':
                    $updateReferences = Filters::intitle($escaper, $searcher->getSearchContext(), $quotedValue);
                    $updateReferences($fuzzyQuery, $filterDestination, $highlightSource, $searchContainedSyntax);
                    return $keepText ? "{$quotedValue} " : '';
                default:
                    return $matches[0];
            }
        });
        if ($isEmptyQuery) {
            return Status::newGood(new SearchResultSet(true));
        }
        $this->filters = $filters;
        $this->notFilters = $notFilters;
        $this->boostTemplates = $boostTemplates;
        $this->searchContext->setSearchContainedSyntax($searchContainedSyntax);
        $this->fuzzyQuery = $fuzzyQuery;
        $this->highlightSource = $highlightSource;
        $this->term = $this->escaper->escapeQuotes($this->term);
        $this->term = trim($this->term);
        // Match quoted phrases including those containing escaped quotes
        // Those phrases can optionally be followed by ~ then a number (this is the phrase slop)
        // That can optionally be followed by a ~ (this matches stemmed words in phrases)
        // The following all match: "a", "a boat", "a\"boat", "a boat"~, "a boat"~9, "a boat"~9~, -"a boat", -"a boat"~9~
        $slop = $this->config->get('CirrusSearchPhraseSlop');
        $query = self::replacePartsOfQuery($this->term, '/(?<![\\]])(?<negate>-|!)?(?<main>"((?:[^"]|(?<=\\\\)")+)"(?<slop>~\\d+)?)(?<fuzzy>~)?/', function ($matches) use($searcher, $escaper, $slop) {
            $negate = $matches['negate'][0] ? 'NOT ' : '';
            $main = $escaper->fixupQueryStringPart($matches['main'][0]);
            if (!$negate && !isset($matches['fuzzy']) && !isset($matches['slop']) && preg_match('/^"([^"*]+)[*]"/', $main, $matches)) {
                $phraseMatch = new Elastica\Query\Match();
                $phraseMatch->setFieldQuery("all.plain", $matches[1]);
                $phraseMatch->setFieldType("all.plain", "phrase_prefix");
                $this->nonTextQueries[] = $phraseMatch;
                $phraseHighlightMatch = new Elastica\Query\QueryString();
                $phraseHighlightMatch->setQuery($matches[1] . '*');
                $phraseHighlightMatch->setFields(array('all.plain'));
                $this->nonTextHighlightQueries[] = $phraseHighlightMatch;
                return array();
            }
            if (!isset($matches['fuzzy'])) {
                if (!isset($matches['slop'])) {
                    $main = $main . '~' . $slop['precise'];
                }
                // Got to collect phrases that don't use the all field so we can highlight them.
                // The highlighter locks phrases to the fields that specify them.  It doesn't do
                // that with terms.
                return array('escaped' => $negate . $searcher->switchSearchToExact($main, true), 'nonAll' => $negate . $searcher->switchSearchToExact($main, false));
            }
            return array('escaped' => $negate . $main);
        });
        // Find prefix matches and force them to only match against the plain analyzed fields.  This
        // prevents prefix matches from getting confused by stemming.  Users really don't expect stemming
        // in prefix queries.
        $query = self::replaceAllPartsOfQuery($query, '/\\w+\\*(?:\\w*\\*?)*/u', function ($matches) use($searcher, $escaper) {
            $term = $escaper->fixupQueryStringPart($matches[0][0]);
            return array('escaped' => $searcher->switchSearchToExactForWildcards($term), 'nonAll' => $searcher->switchSearchToExactForWildcards($term));
        });
        $escapedQuery = array();
        $nonAllQuery = array();
        $nearMatchQuery = array();
        foreach ($query as $queryPart) {
            if (isset($queryPart['escaped'])) {
                $escapedQuery[] = $queryPart['escaped'];
                if (isset($queryPart['nonAll'])) {
                    $nonAllQuery[] = $queryPart['nonAll'];
                } else {
                    $nonAllQuery[] = $queryPart['escaped'];
                }
                continue;
            }
            if (isset($queryPart['raw'])) {
                $fixed = $this->escaper->fixupQueryStringPart($queryPart['raw']);
                $escapedQuery[] = $fixed;
                $nonAllQuery[] = $fixed;
                $nearMatchQuery[] = $queryPart['raw'];
                continue;
            }
            LoggerFactory::getInstance('CirrusSearch')->warning('Unknown query part: {queryPart}', array('queryPart' => serialize($queryPart)));
        }
        // Actual text query
        list($queryStringQueryString, $this->fuzzyQuery) = $escaper->fixupWholeQueryString(implode(' ', $escapedQuery));
        // Note that no escaping is required for near_match's match query.
        $nearMatchQuery = implode(' ', $nearMatchQuery);
        if ($queryStringQueryString !== '') {
            if (preg_match('/(?<!\\\\)[?*+~"!|-]|AND|OR|NOT/', $queryStringQueryString)) {
                $this->searchContext->setSearchContainedSyntax(true);
                // We're unlikey to make good suggestions for query string with special syntax in them....
                $showSuggestion = false;
            }
            $fields = array_merge($this->buildFullTextSearchFields(1, '.plain', true), $this->buildFullTextSearchFields($this->config->get('CirrusSearchStemmedWeight'), '', true));
            $nearMatchFields = $this->buildFullTextSearchFields($this->config->get('CirrusSearchNearMatchWeight'), '.near_match', true);
            $this->query = $this->buildSearchTextQuery($fields, $nearMatchFields, $queryStringQueryString, $nearMatchQuery);
            // The highlighter doesn't know about the weightinging from the all fields so we have to send
            // it a query without the all fields.  This swaps one in.
            if ($this->config->getElement('CirrusSearchAllFields', 'use')) {
                $nonAllFields = array_merge($this->buildFullTextSearchFields(1, '.plain', false), $this->buildFullTextSearchFields($this->config->get('CirrusSearchStemmedWeight'), '', false));
                list($nonAllQueryString, ) = $escaper->fixupWholeQueryString(implode(' ', $nonAllQuery));
                $this->highlightQuery = $this->buildSearchTextQueryForFields($nonAllFields, $nonAllQueryString, 1, false, true);
            } else {
                $nonAllFields = $fields;
            }
            // Only do a phrase match rescore if the query doesn't include any quotes and has a space.
            // Queries without spaces are either single term or have a phrase query generated.
            // Queries with the quote already contain a phrase query and we can't build phrase queries
            // out of phrase queries at this point.
            if ($this->config->get('CirrusSearchPhraseRescoreBoost') > 1.0 && $this->config->get('CirrusSearchPhraseRescoreWindowSize') && !$this->searchContext->isSearchContainedSyntax() && strpos($queryStringQueryString, '"') === false && strpos($queryStringQueryString, ' ') !== false) {
                $rescoreFields = $fields;
                if (!$this->config->get('CirrusSearchAllFieldsForRescore')) {
                    $rescoreFields = $nonAllFields;
                }
                $this->rescore[] = array('window_size' => $this->config->get('CirrusSearchPhraseRescoreWindowSize'), 'query' => array('rescore_query' => $this->buildSearchTextQueryForFields($rescoreFields, '"' . $queryStringQueryString . '"', $this->config->getElement('CirrusSearchPhraseSlop', 'boost'), true), 'query_weight' => 1.0, 'rescore_query_weight' => $this->config->get('CirrusSearchPhraseRescoreBoost')));
            }
            $showSuggestion = $showSuggestion && $this->offset == 0;
            if ($showSuggestion) {
                $this->suggest = array('text' => $this->term, 'suggest' => $this->buildSuggestConfig('suggest'));
            }
            $result = $this->search($searchType, $originalTerm);
            if (!$result->isOK() && $this->isParseError($result)) {
                // Elasticsearch has reported a parse error and we've already logged it when we built the status
                // so at this point all we can do is retry the query as a simple query string query.
                $this->query = new \Elastica\Query\Simple(array('simple_query_string' => array('fields' => $fields, 'query' => $queryStringQueryString, 'default_operator' => 'AND')));
                $this->rescore = array();
                // Not worth trying in this state.
                $result = $this->search('degraded_full_text', $originalTerm);
                // If that doesn't work we're out of luck but it should.  There no guarantee it'll work properly
                // with the syntax we've built above but it'll do _something_ and we'll still work on fixing all
                // the parse errors that come in.
            }
        } else {
            $result = $this->search($searchType, $originalTerm);
            // No need to check for a parse error here because we don't actually create a query for
            // Elasticsearch to parse
        }
        return $result;
    }