/** * Set the search index to search in. * false is allowed (means we'll search *all* types) * * @param string|false $type * @throws InvalidInputException */ public function setType($type) { $allowedTypes = array_merge(Connection::getAllTypes(), array(false)); if (!in_array($type, $allowedTypes)) { throw new InvalidInputException('Invalid search sort requested', 'invalid-input'); } $this->type = $type; }
/** * We want to retrieve the total amount of search word hits * (static::termsAggregation) but our search terms may not be how * ElasticSearch stores the words in its index. * Elastic will "analyze" text (perform stemming, etc) and store * the terms in a normalized way. * AFAICT, there is not really a way to get to that information * from within a search query. * * Luckily, since 1.0, Elastic supports _termvector, which gives * you statistics about the terms in your document. * Since 1.4, Elastic supports feeding _termvector documents to * analyze. * We're going to (ab)use this by letting it respond with term * information on a bogus document that contains only our current * search terms. * So we'll give it a document with just our keywords for the * column that we're searching in (revisions.text) and Elastic will * use that column's configuration to analyze the text we feed it. * It will then respond with the normalized terms & their stats. * * @param string $terms * @return array */ protected function getTerms($terms) { $terms = preg_split('/\\s+/', $terms); // _termvectors only works on a type, but our types are // configured exactly the same so it doesn't matter which $types = Connection::getAllTypes(); $searchable = Connection::getFlowIndex($this->indexBaseName); $searchable = $searchable->getType(array_pop($types)); $query = array('doc' => array('revisions' => array('text' => $terms)), "fields" => array("revisions.text")); // Elastica has no abstraction over _termvector like it has // for _query, so just do the request ourselves $response = $searchable->request('_termvector', Request::POST, $query, array()); $data = $response->getData(); return array_keys($data['term_vectors']['revisions.text']['terms']); }
public function getParamDescription() { $p = $this->getModulePrefix(); return array('term' => 'Search term', 'title' => "Title of the boards to search in. Cannot be used together with {$p}pageid", 'pageid' => "ID of the boards to search in. Cannot be used together with {$p}title", 'namespaces' => 'Namespaces to search in', 'moderationState' => 'Search for revisions in (a) particular moderation state(s)', 'sort' => 'What to order the search results by', 'type' => 'Desired type of results (' . implode('|', Connection::getAllTypes()) . ')', 'offset' => 'Offset value to start fetching results at', 'limit' => 'Amount of results to fetch'); }