Exemple #1
0
 /**
  * Parses a search string into search terms, supports quoted phrases and removes extra punctuation
  * 
  * @internal
  * 
  * @param  string  $terms              A text string from a form input to parse into search terms
  * @param  boolean $ignore_stop_words  If stop words should be ignored, this setting will be ignored if all words are stop words
  * @return void
  */
 public static function parseSearchTerms($terms, $ignore_stop_words = FALSE)
 {
     $stop_words = array('i', 'a', 'an', 'are', 'as', 'at', 'be', 'by', 'de', 'en', 'en', 'for', 'from', 'how', 'in', 'is', 'it', 'la', 'of', 'on', 'or', 'that', 'the', 'this', 'to', 'was', 'what', 'when', 'where', 'who', 'will');
     preg_match_all('#(?:"[^"]+"|[^\\s]+)#', $terms, $matches);
     $good_terms = array();
     $ignored_terms = array();
     foreach ($matches[0] as $match) {
         // Remove phrases from quotes
         if ($match[0] == '"' && substr($match, -1)) {
             $match = substr($match, 1, -1);
             // Trim any punctuation off of the beginning and end of terms
         } else {
             if (self::$pcre_supports_unicode_character_properties === NULL) {
                 fCore::startErrorCapture();
                 preg_match('#\\pC#u', 'test');
                 self::$pcre_supports_unicode_character_properties = !(bool) fCore::stopErrorCapture();
             }
             if (self::$pcre_supports_unicode_character_properties) {
                 $match = preg_replace('#(^[\\pC\\pC\\pM\\pP\\pS\\pZ]+|[\\pC\\pC\\pM\\pP\\pS\\pZ]+$)#iDu', '', $match);
             } else {
                 // This just removes ascii non-alphanumeric characters, plus the unicode punctuation and supplemental punctuation blocks
                 $match = preg_replace('#(^[\\x21-\\x2F\\x3A-\\x40\\x5B-\\x60\\x7B-\\x7F\\x{2000}-\\x{206F}\\x{2E00}-\\x{2E7F}\\x{00A1}-\\x{00A9}\\x{00AB}-\\x{00B1}\\x{00B4}\\x{00B6}-\\x{00B8}\\x{00BB}\\x{00BF}\\x{00D7}\\x{00F7}]+|[\\x21-\\x2F\\x3A-\\x40\\x5B-\\x60\\x7B-\\x7F\\x{2000}-\\x{206F}\\x{2E00}-\\x{2E7F}\\x{00A1}-\\x{00A9}\\x{00AB}-\\x{00B1}\\x{00B4}\\x{00B6}-\\x{00B8}\\x{00BB}\\x{00BF}\\x{00D7}\\x{00F7}]+$)#iDu', '', $match);
             }
         }
         if ($ignore_stop_words && in_array(strtolower($match), $stop_words)) {
             $ignored_terms[] = $match;
             continue;
         }
         $good_terms[] = $match;
     }
     // If no terms were parsed, that means all words were stop words
     if ($ignored_terms && !$good_terms) {
         $good_terms = $ignored_terms;
     }
     return $good_terms;
 }