Example #1
0
    /**
     * Optimize query in the context of specified index
     *
     * @param \Zend\Search\Lucene\SearchIndex $index
     * @return \Zend\Search\Lucene\Search\Query\AbstractQuery
     */
    public function optimize(Lucene\SearchIndex $index)
    {
        $subqueries = array();
        $signs      = array();

        // Optimize all subqueries
        foreach ($this->_subqueries as $id => $subquery) {
            $subqueries[] = $subquery->optimize($index);
            $signs[]      = ($this->_signs === null)? true : $this->_signs[$id];
        }

        // Remove insignificant subqueries
        foreach ($subqueries as $id => $subquery) {
            if ($subquery instanceof Insignificant) {
                // Insignificant subquery has to be removed anyway
                unset($subqueries[$id]);
                unset($signs[$id]);
            }
        }
        if (count($subqueries) == 0) {
            // Boolean query doesn't has non-insignificant subqueries
            return new Insignificant();
        }
        // Check if all non-insignificant subqueries are prohibited
        $allProhibited = true;
        foreach ($signs as $sign) {
            if ($sign !== false) {
                $allProhibited = false;
                break;
            }
        }
        if ($allProhibited) {
            return new Insignificant();
        }


        // Check for empty subqueries
        foreach ($subqueries as $id => $subquery) {
            if ($subquery instanceof EmptyResult) {
                if ($signs[$id] === true) {
                    // Matching is required, but is actually empty
                    return new EmptyResult();
                } else {
                    // Matching is optional or prohibited, but is empty
                    // Remove it from subqueries and signs list
                    unset($subqueries[$id]);
                    unset($signs[$id]);
                }
            }
        }

        // Check, if reduced subqueries list is empty
        if (count($subqueries) == 0) {
            return new EmptyResult();
        }

        // Check if all non-empty subqueries are prohibited
        $allProhibited = true;
        foreach ($signs as $sign) {
            if ($sign !== false) {
                $allProhibited = false;
                break;
            }
        }
        if ($allProhibited) {
            return new EmptyResult();
        }


        // Check, if reduced subqueries list has only one entry
        if (count($subqueries) == 1) {
            // It's a query with only one required or optional clause
            // (it's already checked, that it's not a prohibited clause)

            if ($this->getBoost() == 1) {
                return reset($subqueries);
            }

            $optimizedQuery = clone reset($subqueries);
            $optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost());

            return $optimizedQuery;
        }


        // Prepare first candidate for optimized query
        $optimizedQuery = new self($subqueries, $signs);
        $optimizedQuery->setBoost($this->getBoost());


        $terms        = array();
        $tsigns       = array();
        $boostFactors = array();

        // Try to decompose term and multi-term subqueries
        foreach ($subqueries as $id => $subquery) {
            if ($subquery instanceof Term) {
                $terms[]        = $subquery->getTerm();
                $tsigns[]       = $signs[$id];
                $boostFactors[] = $subquery->getBoost();

                // remove subquery from a subqueries list
                unset($subqueries[$id]);
                unset($signs[$id]);
           } else if ($subquery instanceof MultiTerm) {
                $subTerms = $subquery->getTerms();
                $subSigns = $subquery->getSigns();

                if ($signs[$id] === true) {
                    // It's a required multi-term subquery.
                    // Something like '... +(+term1 -term2 term3 ...) ...'

                    // Multi-term required subquery can be decomposed only if it contains
                    // required terms and doesn't contain prohibited terms:
                    // ... +(+term1 term2 ...) ... => ... +term1 term2 ...
                    //
                    // Check this
                    $hasRequired   = false;
                    $hasProhibited = false;
                    if ($subSigns === null) {
                        // All subterms are required
                        $hasRequired = true;
                    } else {
                        foreach ($subSigns as $sign) {
                            if ($sign === true) {
                                $hasRequired   = true;
                            } else if ($sign === false) {
                                $hasProhibited = true;
                                break;
                            }
                        }
                    }
                    // Continue if subquery has prohibited terms or doesn't have required terms
                    if ($hasProhibited  ||  !$hasRequired) {
                        continue;
                    }

                    foreach ($subTerms as $termId => $term) {
                        $terms[]        = $term;
                        $tsigns[]       = ($subSigns === null)? true : $subSigns[$termId];
                        $boostFactors[] = $subquery->getBoost();
                    }

                    // remove subquery from a subqueries list
                    unset($subqueries[$id]);
                    unset($signs[$id]);

                } else { // $signs[$id] === null  ||  $signs[$id] === false
                    // It's an optional or prohibited multi-term subquery.
                    // Something like '... (+term1 -term2 term3 ...) ...'
                    // or
                    // something like '... -(+term1 -term2 term3 ...) ...'

                    // Multi-term optional and required subqueries can be decomposed
                    // only if all terms are optional.
                    //
                    // Check if all terms are optional.
                    $onlyOptional = true;
                    if ($subSigns === null) {
                        // All subterms are required
                        $onlyOptional = false;
                    } else {
                        foreach ($subSigns as $sign) {
                            if ($sign !== null) {
                                $onlyOptional = false;
                                break;
                            }
                        }
                    }

                    // Continue if non-optional terms are presented in this multi-term subquery
                    if (!$onlyOptional) {
                        continue;
                    }

                    foreach ($subTerms as $termId => $term) {
                        $terms[]  = $term;
                        $tsigns[] = ($signs[$id] === null)? null  /* optional */ :
                                                            false /* prohibited */;
                        $boostFactors[] = $subquery->getBoost();
                    }

                    // remove subquery from a subqueries list
                    unset($subqueries[$id]);
                    unset($signs[$id]);
                }
            }
        }


        // Check, if there are no decomposed subqueries
        if (count($terms) == 0 ) {
            // return prepared candidate
            return $optimizedQuery;
        }


        // Check, if all subqueries have been decomposed and all terms has the same boost factor
        if (count($subqueries) == 0  &&  count(array_unique($boostFactors)) == 1) {
            $optimizedQuery = new MultiTerm($terms, $tsigns);
            $optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost());

            return $optimizedQuery;
        }


        // This boolean query can't be transformed to Term/MultiTerm query and still contains
        // several subqueries

        // Separate prohibited terms
        $prohibitedTerms        = array();
        foreach ($terms as $id => $term) {
            if ($tsigns[$id] === false) {
                $prohibitedTerms[]        = $term;

                unset($terms[$id]);
                unset($tsigns[$id]);
                unset($boostFactors[$id]);
            }
        }

        if (count($terms) == 1) {
            $clause = new Term(reset($terms));
            $clause->setBoost(reset($boostFactors));

            $subqueries[] = $clause;
            $signs[]      = reset($tsigns);

            // Clear terms list
            $terms = array();
        } else if (count($terms) > 1  &&  count(array_unique($boostFactors)) == 1) {
            $clause = new MultiTerm($terms, $tsigns);
            $clause->setBoost(reset($boostFactors));

            $subqueries[] = $clause;
            // Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
            $signs[]      = (in_array(true, $tsigns))? true : null;

            // Clear terms list
            $terms = array();
        }

        if (count($prohibitedTerms) == 1) {
            // (boost factors are not significant for prohibited clauses)
            $subqueries[] = new Term(reset($prohibitedTerms));
            $signs[]      = false;

            // Clear prohibited terms list
            $prohibitedTerms = array();
        } else if (count($prohibitedTerms) > 1) {
            // prepare signs array
            $prohibitedSigns = array();
            foreach ($prohibitedTerms as $id => $term) {
                // all prohibited term are grouped as optional into multi-term query
                $prohibitedSigns[$id] = null;
            }

            // (boost factors are not significant for prohibited clauses)
            $subqueries[] = new MultiTerm($prohibitedTerms, $prohibitedSigns);
            // Clause sign is 'prohibited'
            $signs[]      = false;

            // Clear terms list
            $prohibitedTerms = array();
        }

        /** @todo Group terms with the same boost factors together */

        // Check, that all terms are processed
        // Replace candidate for optimized query
        if (count($terms) == 0  &&  count($prohibitedTerms) == 0) {
            $optimizedQuery = new self($subqueries, $signs);
            $optimizedQuery->setBoost($this->getBoost());
        }

        return $optimizedQuery;
    }