Beispiel #1
0
 /**
  * Tests that search_simplify() does the right thing with punctuation.
  */
 function testSearchSimplifyPunctuation()
 {
     $cases = array(array('20.03/94-28,876', '20039428876', 'Punctuation removed from numbers'), array('great...drupal--module', 'great drupal module', 'Multiple dot and dashes are word boundaries'), array('very_great-drupal.module', 'verygreatdrupalmodule', 'Single dot, dash, underscore are removed'), array('regular,punctuation;word', 'regular punctuation word', 'Punctuation is a word boundary'));
     foreach ($cases as $case) {
         $out = trim(search_simplify($case[0]));
         $this->assertEqual($out, $case[1], $case[2]);
     }
 }
Beispiel #2
0
 /**
  * Verifies that strings of non-CJK characters are not tokenized.
  *
  * This is just a sanity check - it verifies that strings of letters are
  * not tokenized.
  */
 function testNoTokenizer()
 {
     // Set the minimum word size to 1 (to split all CJK characters) and make
     // sure CJK tokenizing is turned on.
     $this->config('search.settings')->set('index.minimum_word_size', 1)->set('index.overlap_cjk', TRUE)->save();
     $this->refreshVariables();
     $letters = 'abcdefghijklmnopqrstuvwxyz';
     $out = trim(search_simplify($letters));
     $this->assertEqual($letters, $out, 'Letters are not CJK tokenized');
 }
Beispiel #3
0
 /**
  * Parses the search query into SQL conditions.
  *
  * Sets up the following variables:
  * - $this->keys
  * - $this->words
  * - $this->conditions
  * - $this->simple
  * - $this->matches
  */
 protected function parseSearchExpression()
 {
     // Matches words optionally prefixed by a - sign. A word in this case is
     // something between two spaces, optionally quoted.
     preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression, $keywords, PREG_SET_ORDER);
     if (count($keywords) == 0) {
         return;
     }
     // Classify tokens.
     $or = FALSE;
     $limit_combinations = \Drupal::config('search.settings')->get('and_or_limit');
     // The first search expression does not count as AND.
     $and_count = -1;
     $or_count = 0;
     foreach ($keywords as $match) {
         if ($or_count && $and_count + $or_count >= $limit_combinations) {
             // Ignore all further search expressions to prevent Denial-of-Service
             // attacks using a high number of AND/OR combinations.
             $this->status |= SearchQuery::EXPRESSIONS_IGNORED;
             break;
         }
         $phrase = FALSE;
         // Strip off phrase quotes.
         if ($match[2][0] == '"') {
             $match[2] = substr($match[2], 1, -1);
             $phrase = TRUE;
             $this->simple = FALSE;
         }
         // Simplify keyword according to indexing rules and external
         // preprocessors. Use same process as during search indexing, so it
         // will match search index.
         $words = search_simplify($match[2]);
         // Re-explode in case simplification added more words, except when
         // matching a phrase.
         $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
         // Negative matches.
         if ($match[1] == '-') {
             $this->keys['negative'] = array_merge($this->keys['negative'], $words);
         } elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
             $last = array_pop($this->keys['positive']);
             // Starting a new OR?
             if (!is_array($last)) {
                 $last = array($last);
             }
             $this->keys['positive'][] = $last;
             $or = TRUE;
             $or_count++;
             continue;
         } elseif ($match[2] == 'AND' || $match[2] == 'and') {
             continue;
         } else {
             if ($match[2] == 'or') {
                 // Lower-case "or" instead of "OR" is a warning condition.
                 $this->status |= SearchQuery::LOWER_CASE_OR;
             }
             if ($or) {
                 // Add to last element (which is an array).
                 $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words);
             } else {
                 $this->keys['positive'] = array_merge($this->keys['positive'], $words);
                 $and_count++;
             }
         }
         $or = FALSE;
     }
     // Convert keywords into SQL statements.
     $simple_and = FALSE;
     $simple_or = FALSE;
     // Positive matches.
     foreach ($this->keys['positive'] as $key) {
         // Group of ORed terms.
         if (is_array($key) && count($key)) {
             $simple_or = TRUE;
             $any = FALSE;
             $queryor = db_or();
             foreach ($key as $or) {
                 list($num_new_scores) = $this->parseWord($or);
                 $any |= $num_new_scores;
                 $queryor->condition('d.data', "% {$or} %", 'LIKE');
             }
             if (count($queryor)) {
                 $this->conditions->condition($queryor);
                 // A group of OR keywords only needs to match once.
                 $this->matches += $any > 0;
             }
         } else {
             $simple_and = TRUE;
             list($num_new_scores, $num_valid_words) = $this->parseWord($key);
             $this->conditions->condition('d.data', "% {$key} %", 'LIKE');
             if (!$num_valid_words) {
                 $this->simple = FALSE;
             }
             // Each AND keyword needs to match at least once.
             $this->matches += $num_new_scores;
         }
     }
     if ($simple_and && $simple_or) {
         $this->simple = FALSE;
     }
     // Negative matches.
     foreach ($this->keys['negative'] as $key) {
         $this->conditions->condition('d.data', "% {$key} %", 'NOT LIKE');
         $this->simple = FALSE;
     }
 }