Beispiel #1
0
    /**
     * Transforms a generic Query object into an Elastic Search query DSL
     *
     * @param Query $query
     */
    public function mapQuery(Query $query)
    {
        $arguments = array();

        $map = new Boolean();

        if ($query->hasQueryString()) {

            Lucene::setDefaultSearchField($query->getQueryString()->getDefaultField());

            QueryParser::setDefaultOperator($query->getQueryString()->getDefaultOperator() == Query::OPERATOR_AND ? QueryParser::B_AND : QueryParser::B_OR);

            $keyword = $query->getQueryString()->getQuery();

            if ("*" === $keyword) {
                $subQuery = new Wildcard(new Term($keyword));
                $subQuery->setMinPrefixLength(0);
            } else {
                $subQuery = QueryParser::parse($keyword);
            }

            $map->addSubquery($subQuery, true);
        }

        $arguments[] = $map;

        foreach ($query->getSort() as $sort) {
            $arguments[] = key($sort);
            $arguments[] = SORT_REGULAR;
            $arguments[] = current($sort) == 'asc' ? SORT_ASC : SORT_DESC;
        }

        return $arguments;
    }
Beispiel #2
0
    /**
     * Parses a query string
     *
     * @param string $strQuery
     * @param string $encoding
     * @throws \Zend\Search\Lucene\Search\Exception\QueryParserException
     * @throws \Zend\Search\Lucene\Exception\RuntimeException
     * @return \Zend\Search\Lucene\Search\Query\AbstractQuery
     */
    public static function parse($strQuery, $encoding = null)
    {
        self::_getInstance();

        // Reset FSM if previous parse operation didn't return it into a correct state
        self::$_instance->reset();

        try {
            self::$_instance->_encoding     = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
            self::$_instance->_lastToken    = null;
            self::$_instance->_context      = new QueryParserContext(self::$_instance->_encoding);
            self::$_instance->_contextStack = array();
            self::$_instance->_tokens       = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);

            // Empty query
            if (count(self::$_instance->_tokens) == 0) {
                return new Query\Insignificant();
            }


            foreach (self::$_instance->_tokens as $token) {
                try {
                    self::$_instance->_currentToken = $token;
                    self::$_instance->process($token->type);

                    self::$_instance->_lastToken = $token;
                } catch (\Exception $e) {
                    if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
                        throw new QueryParserException( 'Syntax error at char position ' . $token->position . '.', 0, $e);
                    }

                    throw new RuntimeException($e->getMessage(), $e->getCode(), $e);
                }
            }

            if (count(self::$_instance->_contextStack) != 0) {
                throw new QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
            }

            return self::$_instance->_context->getQuery();
        } catch (QueryParserException $e) {
            if (self::$_instance->_suppressQueryParsingExceptions) {
                $queryTokens = Analyzer\Analyzer::getDefault()->tokenize($strQuery, self::$_instance->_encoding);

                $query = new Query\MultiTerm();
                $termsSign = (self::$_instance->_defaultOperator == self::B_AND) ? true /* required term */ :
                                                                                   null /* optional term */;

                foreach ($queryTokens as $token) {
                    $query->addTerm(new Index\Term($token->getTermText()), $termsSign);
                }


                return $query;
            } else {
                throw new RuntimeException($e->getMessage(), $e->getCode(), $e);
            }
        }
    }
Beispiel #3
0
 public function testBooleanQueryWithNonExistingPhraseSubquery()
 {
     $index = Lucene\Lucene::open(dirname(__FILE__) . '/_index23Sample/_files');
     $query = Search\QueryParser::parse('"Non-existing phrase" AND Home');
     $this->assertEquals($query->__toString(), '+("Non-existing phrase") +(Home)');
     $this->assertEquals($query->rewrite($index)->__toString(), '+((pathkeyword:"non existing phrase") (path:"non existing phrase") (modified:"non existing phrase") (contents:"non existing phrase")) +(pathkeyword:home path:home modified:home contents:home)');
     $this->assertEquals($query->rewrite($index)->optimize($index)->__toString(), '<EmptyQuery>');
 }
Beispiel #4
0
 /**
  * Generate 'signs style' query from the context
  * '+term1 term2 -term3 +(<subquery1>) ...'
  *
  * @return \Zend\Search\Lucene\Search\Query\AbstractQuery
  */
 public function _signStyleExpressionQuery()
 {
     $query = new Query\Boolean();
     if (QueryParser::getDefaultOperator() == QueryParser::B_AND) {
         $defaultSign = true;
         // required
     } else {
         $defaultSign = null;
         // optional
     }
     foreach ($this->_entries as $entryId => $entry) {
         $sign = $this->_signs[$entryId] !== null ? $this->_signs[$entryId] : $defaultSign;
         $query->addSubquery($entry->getQuery($this->_encoding), $sign);
     }
     return $query;
 }
 public function testHighlightRangeNonInclusive()
 {
     $query = Search\QueryParser::parse('{business TO by}');
     $html = '<HTML>' . '<HEAD><TITLE>Page title</TITLE></HEAD>' . '<BODY>' . 'Test of text using range query. ' . 'It has to match "buss" and "but" words, but has to skip "business", "by" and "bus"' . '</BODY>' . '</HTML>';
     $highlightedHTML = $query->highlightMatches($html);
     $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">buss</b>') !== false);
     $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">but</b>') !== false);
     // Check that "bus" word is skipped
     $this->assertTrue(strpos($highlightedHTML, 'has to skip "business", "by" and "bus"') !== false);
 }
Beispiel #6
0
    /**
     * Performs a query against the index and returns an array
     * of Zend_Search_Lucene_Search_QueryHit objects.
     * Input is a string or Zend_Search_Lucene_Search_Query.
     *
     * @param \Zend\Search\Lucene\Search\QueryParser|string $query
     * @return array \Zend\Search\Lucene\Search\QueryHit
     * @throws \Zend\Search\Lucene\Exception\InvalidArgumentException
     * @throws \Zend\Search\Lucene\Exception\RuntimeException
     */
    public function find($query)
    {
        if (is_string($query)) {
            $query = Search\QueryParser::parse($query);
        }

        if (!$query instanceof Search\Query\AbstractQuery) {
            throw new InvalidArgumentException('Query must be a string or Zend\Search\Lucene\Search\Query object');
        }

        $this->commit();

        $hits   = array();
        $scores = array();
        $ids    = array();

        $query = $query->rewrite($this)->optimize($this);

        $query->execute($this);

        $topScore = 0;

        $resultSetLimit = Lucene::getResultSetLimit();
        foreach ($query->matchedDocs() as $id => $num) {
            $docScore = $query->score($id, $this);
            if( $docScore != 0 ) {
                $hit = new Search\QueryHit($this);
                $hit->id = $id;
                $hit->score = $docScore;

                $hits[]   = $hit;
                $ids[]    = $id;
                $scores[] = $docScore;

                if ($docScore > $topScore) {
                    $topScore = $docScore;
                }
            }

            if ($resultSetLimit != 0  &&  count($hits) >= $resultSetLimit) {
                break;
            }
        }

        if (count($hits) == 0) {
            // skip sorting, which may cause a error on empty index
            return array();
        }

        if ($topScore > 1) {
            foreach ($hits as $hit) {
                $hit->score /= $topScore;
            }
        }

        if (func_num_args() == 1) {
            // sort by scores
            array_multisort($scores, SORT_DESC, SORT_NUMERIC,
                            $ids,    SORT_ASC,  SORT_NUMERIC,
                            $hits);
        } else {
            // sort by given field names

            $argList    = func_get_args();
            $fieldNames = $this->getFieldNames();
            $sortArgs   = array();

            // PHP 5.3 now expects all arguments to array_multisort be passed by
            // reference (if it's invoked through call_user_func_array());
            // since constants can't be passed by reference, create some placeholder variables.
            $sortReg    = SORT_REGULAR;
            $sortAsc    = SORT_ASC;
            $sortNum    = SORT_NUMERIC;

            $sortFieldValues = array();

            for ($count = 1; $count < count($argList); $count++) {
                $fieldName = $argList[$count];

                if (!is_string($fieldName)) {
                    throw new RuntimeException('Field name must be a string.');
                }

                if (strtolower($fieldName) == 'score') {
                    $sortArgs[] = &$scores;
                } else {
                    if (!in_array($fieldName, $fieldNames)) {
                        throw new RuntimeException('Wrong field name.');
                    }

                    if (!isset($sortFieldValues[$fieldName])) {
                        $valuesArray = array();
                        foreach ($hits as $hit) {
                            try {
                                $value = $hit->getDocument()->getFieldValue($fieldName);
                            } catch (\Exception $e) {
                                if (strpos($e->getMessage(), 'not found') === false) {
                                    throw new RuntimeException($e->getMessage(), $e->getCode(), $e);
                                } else {
                                    $value = null;
                                }
                            }

                            $valuesArray[] = $value;
                        }

                        // Collect loaded values in $sortFieldValues
                        // Required for PHP 5.3 which translates references into values when source
                        // variable is destroyed
                        $sortFieldValues[$fieldName] = $valuesArray;
                    }

                    $sortArgs[] = &$sortFieldValues[$fieldName];
                }

                if ($count + 1 < count($argList)  &&  is_integer($argList[$count+1])) {
                    $count++;
                    $sortArgs[] = &$argList[$count];

                    if ($count + 1 < count($argList)  &&  is_integer($argList[$count+1])) {
                        $count++;
                        $sortArgs[] = &$argList[$count];
                    } else {
                        if ($argList[$count] == SORT_ASC  || $argList[$count] == SORT_DESC) {
                            $sortArgs[] = &$sortReg;
                        } else {
                            $sortArgs[] = &$sortAsc;
                        }
                    }
                } else {
                    $sortArgs[] = &$sortAsc;
                    $sortArgs[] = &$sortReg;
                }
            }

            // Sort by id's if values are equal
            $sortArgs[] = &$ids;
            $sortArgs[] = &$sortAsc;
            $sortArgs[] = &$sortNum;

            // Array to be sorted
            $sortArgs[] = &$hits;

            // Do sort
            call_user_func_array('array_multisort', $sortArgs);
        }

        return $hits;
    }
 /**
  * default (omitted) + NOT operator processing
  */
 public function emptyNotOperatorAction()
 {
     if (QueryParser::getDefaultOperator() == QueryParser::B_AND) {
         // Do nothing
     } else {
         $this->orOperatorAction();
     }
     // Process NOT operator
     $this->notOperatorAction();
 }