Ejemplo n.º 1
0
Archivo: query.php Proyecto: 01J/topm
 /**
  * Method to process the query input string and extract required, optional,
  * and excluded tokens; taxonomy filters; and date filters.
  *
  * @param   string  $input  The query input string.
  * @param   string  $lang   The query input language.
  * @param   string  $mode   The query matching mode.
  *
  * @return  boolean  True on success.
  *
  * @since   2.5
  * @throws  Exception on database error.
  */
 protected function processString($input, $lang, $mode)
 {
     // Clean up the input string.
     $input = html_entity_decode($input, ENT_QUOTES, 'UTF-8');
     $input = JString::strtolower($input);
     $input = preg_replace('#\\s+#mi', ' ', $input);
     $input = JString::trim($input);
     $debug = JFactory::getConfig()->get('debug_lang');
     /*
      * First, we need to handle string based modifiers. String based
      * modifiers could potentially include things like "category:blah" or
      * "before:2009-10-21" or "type:article", etc.
      */
     $patterns = array('before' => JText::_('COM_FINDER_FILTER_WHEN_BEFORE'), 'after' => JText::_('COM_FINDER_FILTER_WHEN_AFTER'));
     // Add the taxonomy branch titles to the possible patterns.
     foreach (FinderIndexerTaxonomy::getBranchTitles() as $branch) {
         // Add the pattern.
         $patterns[$branch] = JString::strtolower(JText::_(FinderHelperLanguage::branchSingular($branch)));
     }
     // Container for search terms and phrases.
     $terms = array();
     $phrases = array();
     // Cleared filter branches.
     $cleared = array();
     /*
      * Compile the suffix pattern. This is used to match the values of the
      * filter input string. Single words can be input directly, multi-word
      * values have to be wrapped in double quotes.
      */
     $quotes = html_entity_decode('‘’'', ENT_QUOTES, 'UTF-8');
     $suffix = '(([\\w\\d' . $quotes . '-]+)|\\"([\\w\\d\\s' . $quotes . '-]+)\\")';
     /*
      * Iterate through the possible filter patterns and search for matches.
      * We need to match the key, colon, and a value pattern for the match
      * to be valid.
      */
     foreach ($patterns as $modifier => $pattern) {
         $matches = array();
         if ($debug) {
             $pattern = substr($pattern, 2, -2);
         }
         // Check if the filter pattern is in the input string.
         if (preg_match('#' . $pattern . '\\s*:\\s*' . $suffix . '#mi', $input, $matches)) {
             // Get the value given to the modifier.
             $value = isset($matches[3]) ? $matches[3] : $matches[1];
             // Now we have to handle the filter string.
             switch ($modifier) {
                 // Handle a before and after date filters.
                 case 'before':
                 case 'after':
                     // Get the time offset.
                     $offset = JFactory::getApplication()->get('offset');
                     // Array of allowed when values.
                     $whens = array('before', 'after', 'exact');
                     // The value of 'today' is a special case that we need to handle.
                     if ($value === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY'))) {
                         $today = JFactory::getDate('now', $offset);
                         $value = $today->format('%Y-%m-%d');
                     }
                     // Try to parse the date string.
                     $date = JFactory::getDate($value, $offset);
                     // Check if the date was parsed successfully.
                     if ($date->toUnix() !== null) {
                         // Set the date filter.
                         $this->date1 = $date->toSQL();
                         $this->when1 = in_array($modifier, $whens) ? $modifier : 'before';
                     }
                     break;
                     // Handle a taxonomy branch filter.
                 // Handle a taxonomy branch filter.
                 default:
                     // Try to find the node id.
                     $return = FinderIndexerTaxonomy::getNodeByTitle($modifier, $value);
                     // Check if the node id was found.
                     if ($return) {
                         // Check if the branch has been cleared.
                         if (!in_array($modifier, $cleared)) {
                             // Clear the branch.
                             $this->filters[$modifier] = array();
                             // Add the branch to the cleared list.
                             $cleared[] = $modifier;
                         }
                         // Add the filter to the list.
                         $this->filters[$modifier][$return->title] = (int) $return->id;
                     }
                     break;
             }
             // Clean up the input string again.
             $input = str_replace($matches[0], '', $input);
             $input = preg_replace('#\\s+#mi', ' ', $input);
             $input = JString::trim($input);
         }
     }
     /*
      * Extract the tokens enclosed in double quotes so that we can handle
      * them as phrases.
      */
     if (JString::strpos($input, '"') !== false) {
         $matches = array();
         // Extract the tokens enclosed in double quotes.
         if (preg_match_all('#\\"([^"]+)\\"#mi', $input, $matches)) {
             /*
              * One or more phrases were found so we need to iterate through
              * them, tokenize them as phrases, and remove them from the raw
              * input string before we move on to the next processing step.
              */
             foreach ($matches[1] as $key => $match) {
                 // Find the complete phrase in the input string.
                 $pos = JString::strpos($input, $matches[0][$key]);
                 $len = JString::strlen($matches[0][$key]);
                 // Add any terms that are before this phrase to the stack.
                 if (JString::trim(JString::substr($input, 0, $pos))) {
                     $terms = array_merge($terms, explode(' ', JString::trim(JString::substr($input, 0, $pos))));
                 }
                 // Strip out everything up to and including the phrase.
                 $input = JString::substr($input, $pos + $len);
                 // Clean up the input string again.
                 $input = preg_replace('#\\s+#mi', ' ', $input);
                 $input = JString::trim($input);
                 // Get the number of words in the phrase.
                 $parts = explode(' ', $match);
                 // Check if the phrase is longer than three words.
                 if (count($parts) > 3) {
                     /*
                      * If the phrase is longer than three words, we need to
                      * break it down into smaller chunks of phrases that
                      * are less than or equal to three words. We overlap
                      * the chunks so that we can ensure that a match is
                      * found for the complete phrase and not just portions
                      * of it.
                      */
                     for ($i = 0, $c = count($parts); $i < $c; $i += 2) {
                         // Set up the chunk.
                         $chunk = array();
                         // The chunk has to be assembled based on how many
                         // pieces are available to use.
                         switch ($c - $i) {
                             /*
                              * If only one word is left, we can break from
                              * the switch and loop because the last word
                              * was already used at the end of the last
                              * chunk.
                              */
                             case 1:
                                 break 2;
                                 // If there words are left, we use them both as
                                 // the last chunk of the phrase and we're done.
                             // If there words are left, we use them both as
                             // the last chunk of the phrase and we're done.
                             case 2:
                                 $chunk[] = $parts[$i];
                                 $chunk[] = $parts[$i + 1];
                                 break;
                                 // If there are three or more words left, we
                                 // build a three word chunk and continue on.
                             // If there are three or more words left, we
                             // build a three word chunk and continue on.
                             default:
                                 $chunk[] = $parts[$i];
                                 $chunk[] = $parts[$i + 1];
                                 $chunk[] = $parts[$i + 2];
                                 break;
                         }
                         // If the chunk is not empty, add it as a phrase.
                         if (count($chunk)) {
                             $phrases[] = implode(' ', $chunk);
                             $terms[] = implode(' ', $chunk);
                         }
                     }
                 } else {
                     // The phrase is <= 3 words so we can use it as is.
                     $phrases[] = $match;
                     $terms[] = $match;
                 }
             }
         }
     }
     // Add the remaining terms if present.
     if (!empty($input)) {
         $terms = array_merge($terms, explode(' ', $input));
     }
     // An array of our boolean operators. $operator => $translation
     $operators = array('AND' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_AND')), 'OR' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_OR')), 'NOT' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_NOT')));
     // If language debugging is enabled you need to ignore the debug strings in matching.
     if (JDEBUG) {
         $debugStrings = array('**', '??');
         $operators = str_replace($debugStrings, '', $operators);
     }
     /*
      * Iterate through the terms and perform any sorting that needs to be
      * done based on boolean search operators. Terms that are before an
      * and/or/not modifier have to be handled in relation to their operator.
      */
     for ($i = 0, $c = count($terms); $i < $c; $i++) {
         // Check if the term is followed by an operator that we understand.
         if (isset($terms[$i + 1]) && in_array($terms[$i + 1], $operators)) {
             // Get the operator mode.
             $op = array_search($terms[$i + 1], $operators);
             // Handle the AND operator.
             if ($op === 'AND' && isset($terms[$i + 2])) {
                 // Tokenize the current term.
                 $token = FinderIndexerHelper::tokenize($terms[$i], $lang, true);
                 $token = $this->getTokenData($token);
                 // Set the required flag.
                 $token->required = true;
                 // Add the current token to the stack.
                 $this->included[] = $token;
                 $this->highlight = array_merge($this->highlight, array_keys($token->matches));
                 // Skip the next token (the mode operator).
                 $this->operators[] = $terms[$i + 1];
                 // Tokenize the term after the next term (current plus two).
                 $other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true);
                 $other = $this->getTokenData($other);
                 // Set the required flag.
                 $other->required = true;
                 // Add the token after the next token to the stack.
                 $this->included[] = $other;
                 $this->highlight = array_merge($this->highlight, array_keys($other->matches));
                 // Remove the processed phrases if possible.
                 if (($pk = array_search($terms[$i], $phrases)) !== false) {
                     unset($phrases[$pk]);
                 }
                 if (($pk = array_search($terms[$i + 2], $phrases)) !== false) {
                     unset($phrases[$pk]);
                 }
                 // Remove the processed terms.
                 unset($terms[$i]);
                 unset($terms[$i + 1]);
                 unset($terms[$i + 2]);
                 // Adjust the loop.
                 $i += 2;
                 continue;
             } elseif ($op === 'OR' && isset($terms[$i + 2])) {
                 // Tokenize the current term.
                 $token = FinderIndexerHelper::tokenize($terms[$i], $lang, true);
                 $token = $this->getTokenData($token);
                 // Set the required flag.
                 $token->required = false;
                 // Add the current token to the stack.
                 if (count($token->matches)) {
                     $this->included[] = $token;
                     $this->highlight = array_merge($this->highlight, array_keys($token->matches));
                 } else {
                     $this->ignored[] = $token;
                 }
                 // Skip the next token (the mode operator).
                 $this->operators[] = $terms[$i + 1];
                 // Tokenize the term after the next term (current plus two).
                 $other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true);
                 $other = $this->getTokenData($other);
                 // Set the required flag.
                 $other->required = false;
                 // Add the token after the next token to the stack.
                 if (count($other->matches)) {
                     $this->included[] = $other;
                     $this->highlight = array_merge($this->highlight, array_keys($other->matches));
                 } else {
                     $this->ignored[] = $other;
                 }
                 // Remove the processed phrases if possible.
                 if (($pk = array_search($terms[$i], $phrases)) !== false) {
                     unset($phrases[$pk]);
                 }
                 if (($pk = array_search($terms[$i + 2], $phrases)) !== false) {
                     unset($phrases[$pk]);
                 }
                 // Remove the processed terms.
                 unset($terms[$i]);
                 unset($terms[$i + 1]);
                 unset($terms[$i + 2]);
                 // Adjust the loop.
                 $i += 2;
                 continue;
             }
         } elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators) === 'OR') {
             // Skip the next token (the mode operator).
             $this->operators[] = $terms[$i];
             // Tokenize the next term (current plus one).
             $other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true);
             $other = $this->getTokenData($other);
             // Set the required flag.
             $other->required = false;
             // Add the token after the next token to the stack.
             if (count($other->matches)) {
                 $this->included[] = $other;
                 $this->highlight = array_merge($this->highlight, array_keys($other->matches));
             } else {
                 $this->ignored[] = $other;
             }
             // Remove the processed phrase if possible.
             if (($pk = array_search($terms[$i + 1], $phrases)) !== false) {
                 unset($phrases[$pk]);
             }
             // Remove the processed terms.
             unset($terms[$i]);
             unset($terms[$i + 1]);
             // Adjust the loop.
             $i += 1;
             continue;
         } elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators) === 'NOT') {
             // Skip the next token (the mode operator).
             $this->operators[] = $terms[$i];
             // Tokenize the next term (current plus one).
             $other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true);
             $other = $this->getTokenData($other);
             // Set the required flag.
             $other->required = false;
             // Add the next token to the stack.
             if (count($other->matches)) {
                 $this->excluded[] = $other;
             } else {
                 $this->ignored[] = $other;
             }
             // Remove the processed phrase if possible.
             if (($pk = array_search($terms[$i + 1], $phrases)) !== false) {
                 unset($phrases[$pk]);
             }
             // Remove the processed terms.
             unset($terms[$i]);
             unset($terms[$i + 1]);
             // Adjust the loop.
             $i += 1;
             continue;
         }
     }
     /*
      * Iterate through any search phrases and tokenize them. We handle
      * phrases as autonomous units and do not break them down into two and
      * three word combinations.
      */
     for ($i = 0, $c = count($phrases); $i < $c; $i++) {
         // Tokenize the phrase.
         $token = FinderIndexerHelper::tokenize($phrases[$i], $lang, true);
         $token = $this->getTokenData($token);
         // Set the required flag.
         $token->required = true;
         // Add the current token to the stack.
         $this->included[] = $token;
         $this->highlight = array_merge($this->highlight, array_keys($token->matches));
         // Remove the processed term if possible.
         if (($pk = array_search($phrases[$i], $terms)) !== false) {
             unset($terms[$pk]);
         }
         // Remove the processed phrase.
         unset($phrases[$i]);
     }
     /*
      * Handle any remaining tokens using the standard processing mechanism.
      */
     if (!empty($terms)) {
         // Tokenize the terms.
         $terms = implode(' ', $terms);
         $tokens = FinderIndexerHelper::tokenize($terms, $lang, false);
         // Make sure we are working with an array.
         $tokens = is_array($tokens) ? $tokens : array($tokens);
         // Get the token data and required state for all the tokens.
         foreach ($tokens as $token) {
             // Get the token data.
             $token = $this->getTokenData($token);
             // Set the required flag for the token.
             $token->required = $mode === 'AND' ? $token->phrase ? false : true : false;
             // Add the token to the appropriate stack.
             if (count($token->matches) || $token->required) {
                 $this->included[] = $token;
                 $this->highlight = array_merge($this->highlight, array_keys($token->matches));
             } else {
                 $this->ignored[] = $token;
             }
         }
     }
     return true;
 }
Ejemplo n.º 2
0
 /**
  * Method to optimize the index. We use this method to remove unused terms
  * and any other optimizations that might be necessary.
  *
  * @return  boolean  True on success.
  *
  * @since   3.1
  * @throws  Exception on database error.
  */
 public function optimize()
 {
     // Get the database object.
     $db = JFactory::getDbo();
     $query = $db->getQuery(true);
     // Delete all orphaned terms.
     $query->delete($db->quoteName('#__finder_terms'))->where($db->quoteName('links') . ' <= 0');
     $db->setQuery($query);
     $db->execute();
     // Remove the orphaned taxonomy nodes.
     FinderIndexerTaxonomy::removeOrphanNodes();
     return true;
 }
Ejemplo n.º 3
0
 /**
  * Method to optimize the index. We use this method to remove unused terms
  * and any other optimizations that might be necessary.
  *
  * @return  boolean  True on success.
  *
  * @since   2.5
  * @throws  Exception on database error.
  */
 public function optimize()
 {
     // Get the database object.
     $db = JFactory::getDbo();
     $query = $db->getQuery(true);
     // Delete all orphaned terms.
     $query->delete($db->quoteName('#__finder_terms'))->where($db->quoteName('links') . ' <= 0');
     $db->setQuery($query);
     $db->execute();
     // Optimize the links table.
     $db->setQuery('VACUUM ' . $db->quoteName('#__finder_links'));
     $db->execute();
     $db->setQuery('REINDEX TABLE ' . $db->quoteName('#__finder_links'));
     $db->execute();
     for ($i = 0; $i <= 15; $i++) {
         // Optimize the terms mapping table.
         $db->setQuery('VACUUM ' . $db->quoteName('#__finder_links_terms' . dechex($i)));
         $db->execute();
         $db->setQuery('REINDEX TABLE ' . $db->quoteName('#__finder_links_terms' . dechex($i)));
         $db->execute();
     }
     // Optimize the terms mapping table.
     $db->setQuery('REINDEX TABLE ' . $db->quoteName('#__finder_links_terms'));
     $db->execute();
     // Remove the orphaned taxonomy nodes.
     FinderIndexerTaxonomy::removeOrphanNodes();
     // Optimize the taxonomy mapping table.
     $db->setQuery('REINDEX TABLE ' . $db->quoteName('#__finder_taxonomy_map'));
     $db->execute();
     return true;
 }
Ejemplo n.º 4
0
 /**
  * Method to optimize the index. We use this method to remove unused terms
  * and any other optimizations that might be necessary.
  *
  * @return  boolean  True on success.
  *
  * @since   2.5
  * @throws  Exception on database error.
  */
 public static function optimize()
 {
     // Get the indexer state.
     $state = FinderIndexer::getState();
     // Get the database object.
     $db = JFactory::getDBO();
     $query = $db->getQuery(true);
     // Delete all orphaned terms.
     $query->delete();
     $query->from($db->quoteName('#__finder_terms'));
     $query->where($db->quoteName('links') . ' <= 0');
     $db->setQuery($query);
     $db->query();
     // Check for a database error.
     if ($db->getErrorNum()) {
         // Throw database error exception.
         throw new Exception($db->getErrorMsg(), 500);
     }
     // Optimize the links table.
     //@TODO: PostgreSQL doesn't support OPTIMIZE TABLE
     // Temporary workaround for non-MySQL solutions
     if (strpos($db->name, 'mysql') === 0) {
         $db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_links'));
         $db->query();
         // Check for a database error.
         if ($db->getErrorNum()) {
             // Throw database error exception.
             throw new Exception($db->getErrorMsg(), 500);
         }
     }
     //@TODO: PostgreSQL doesn't support OPTIMIZE TABLE
     // Temporary workaround for non-MySQL solutions
     if (strpos($db->name, 'mysql') === 0) {
         for ($i = 0; $i <= 15; $i++) {
             // Optimize the terms mapping table.
             $db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_links_terms' . dechex($i)));
             $db->query();
             // Check for a database error.
             if ($db->getErrorNum()) {
                 // Throw database error exception.
                 throw new Exception($db->getErrorMsg(), 500);
             }
         }
     }
     // Optimize the terms mapping table.
     //@TODO: PostgreSQL doesn't support OPTIMIZE TABLE
     // Temporary workaround for non-MySQL solutions
     if (strpos($db->name, 'mysql') === 0) {
         $db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_links_terms'));
         $db->query();
         // Check for a database error.
         if ($db->getErrorNum()) {
             // Throw database error exception.
             throw new Exception($db->getErrorMsg(), 500);
         }
     }
     // Remove the orphaned taxonomy nodes.
     FinderIndexerTaxonomy::removeOrphanNodes();
     // Optimize the taxonomy mapping table.
     //@TODO: PostgreSQL doesn't support OPTIMIZE TABLE
     // Temporary workaround for non-MySQL solutions
     if (strpos($db->name, 'mysql') === 0) {
         $db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_taxonomy_map'));
         $db->query();
         // Check for a database error.
         if ($db->getErrorNum()) {
             // Throw database error exception.
             throw new Exception($db->getErrorMsg(), 500);
         }
     }
     return true;
 }