function makeboollist($a)
{
    global $stem_words;
    $a = utf8_decode($a);
    /* if words are passed as UTF8, translate to Latin-1 */
    $a = html_to_latin1($a);
    /* if any words are passed as HTML entities, translate to Latin-1 */
    $a = trim($a);
    $a = preg_replace("/"/", "\"", $a);
    $returnWords = array();
    //get all phrases
    $regs = array();
    while (preg_match("/([-]?)\"([^\"]+)\"/", $a, $regs)) {
        if ($regs[1] == '') {
            $returnWords['+s'][] = $regs[2];
            $returnWords['hilight'][] = $regs[2];
        } else {
            $returnWords['-s'][] = $regs[2];
        }
        $a = str_replace($regs[0], "", $a);
        /* remove the phrase from the search string */
    }
    $a = strtolower(preg_replace("/[ ]+/", " ", $a));
    /* replace multiple spaces by a single one, and convert to lower case */
    $a = trim($a);
    /* erase leading and trailing spaces */
    $words = explode(' ', $a);
    if ($a == "") {
        $limit = 0;
    } else {
        $limit = count($words);
    }
    $k = 0;
    //get all words (both include and exlude)
    $includeWords = array();
    while ($k < $limit) {
        if (substr($words[$k], 0, 1) == '+') {
            $includeWords[] = substr($words[$k], 1);
            if (!ignoreWord(substr($words[$k], 1))) {
                $returnWords['hilight'][] = substr($words[$k], 1);
                if ($stem_words == 1) {
                    $returnWords['hilight'][] = stem(substr($words[$k], 1));
                }
            }
        } else {
            if (substr($words[$k], 0, 1) == '-') {
                $returnWords['-'][] = substr($words[$k], 1);
            } else {
                $includeWords[] = $words[$k];
                if (!ignoreWord($words[$k])) {
                    $returnWords['hilight'][] = $words[$k];
                    if ($stem_words == 1) {
                        $returnWords['hilight'][] = stem($words[$k]);
                    }
                }
            }
        }
        $k++;
    }
    //add words from phrases to includes
    if (isset($returnWords['+s'])) {
        foreach ($returnWords['+s'] as $phrase) {
            $phrase = strtolower(preg_replace("/[ ]+/", " ", $phrase));
            $phrase = trim($phrase);
            $temparr = explode(' ', $phrase);
            foreach ($temparr as $w) {
                $includeWords[] = $w;
            }
        }
    }
    foreach ($includeWords as $word) {
        if (!($word == '')) {
            if (ignoreWord($word)) {
                $returnWords['ignore'][] = $word;
            } else {
                $returnWords['+'][] = $word;
            }
        }
    }
    return $returnWords;
}
Example #2
0
function makeboollist($a)
{
    global $entities, $stem_words;
    while ($char = each($entities)) {
        $a = preg_replace("/" . $char[0] . "/i", $char[1], $a);
    }
    $a = trim($a);
    $a = preg_replace("/&quot;/i", "\"", $a);
    $returnWords = array();
    //get all phrases
    $regs = array();
    while (preg_match("/([-]?)\"([^\"]+)\"/", $a, $regs)) {
        if ($regs[1] == '') {
            $returnWords['+s'][] = $regs[2];
            $returnWords['hilight'][] = $regs[2];
        } else {
            $returnWords['-s'][] = $regs[2];
        }
        $a = str_replace($regs[0], "", $a);
    }
    $a = strtolower(preg_replace("/[ ]+/", " ", $a));
    //		$a = remove_accents($a);
    $a = trim($a);
    $words = explode(' ', $a);
    if ($a == "") {
        $limit = 0;
    } else {
        $limit = count($words);
    }
    $k = 0;
    //get all words (both include and exlude)
    $includeWords = array();
    while ($k < $limit) {
        if (substr($words[$k], 0, 1) == '+') {
            $includeWords[] = substr($words[$k], 1);
            if (!ignoreWord(substr($words[$k], 1))) {
                $returnWords['hilight'][] = substr($words[$k], 1);
                if ($stem_words == 1) {
                    $returnWords['hilight'][] = stem(substr($words[$k], 1));
                }
            }
        } else {
            if (substr($words[$k], 0, 1) == '-') {
                $returnWords['-'][] = substr($words[$k], 1);
            } else {
                $includeWords[] = $words[$k];
                if (!ignoreWord($words[$k])) {
                    $returnWords['hilight'][] = $words[$k];
                    if ($stem_words == 1) {
                        $returnWords['hilight'][] = stem($words[$k]);
                    }
                }
            }
        }
        $k++;
    }
    //add words from phrases to includes
    if (isset($returnWords['+s'])) {
        foreach ($returnWords['+s'] as $phrase) {
            $phrase = strtolower(preg_replace("/[ ]+/", " ", $phrase));
            $phrase = trim($phrase);
            $temparr = explode(' ', $phrase);
            foreach ($temparr as $w) {
                $includeWords[] = $w;
            }
        }
    }
    foreach ($includeWords as $word) {
        if (!($word == '')) {
            if (ignoreWord($word)) {
                $returnWords['ignore'][] = $word;
            } else {
                $returnWords['+'][] = $word;
            }
        }
    }
    return $returnWords;
}
Example #3
0
 function makeboollist($a)
 {
     while ($char = each($this->entities)) {
         $a = eregi_replace($char[0], $char[1], $a);
     }
     $a = trim($a);
     $a = eregi_replace("&quot;", "\"", $a);
     $returnWords = array();
     //get all phrases
     $regs = array();
     while (eregi("([-]?)\"([^\"]+)\"", $a, $regs)) {
         if ($regs[1] == '') {
             $returnWords['+s'][] = $regs[2];
             $returnWords['hilight'][] = $regs[2];
         } else {
             $returnWords['-s'][] = $regs[2];
         }
         $a = str_replace($regs[0], "", $a);
     }
     if (Configure::read('case_sensitive') == 1) {
         $a = eregi_replace("[ ]+", " ", $a);
     } else {
         $a = lower_case(eregi_replace("[ ]+", " ", $a));
     }
     //  $a = remove_accents($a);
     $a = trim($a);
     $words = explode(' ', $a);
     if ($a == "") {
         $limit = 0;
     } else {
         $limit = count($words);
     }
     $k = 0;
     //get all words (both include and exlude)
     $includeWords = array();
     while ($k < $limit) {
         if (substr($words[$k], 0, 1) == '+') {
             $includeWords[] = substr($words[$k], 1);
             if (!ignoreWord(substr($words[$k], 1))) {
                 $returnWords['hilight'][] = substr($words[$k], 1);
                 if (Configure::read('stem_words') == 1) {
                     $returnWords['hilight'][] = stem(substr($words[$k], 1));
                 }
             }
         } else {
             if (substr($words[$k], 0, 1) == '-') {
                 $returnWords['-'][] = substr($words[$k], 1);
             } else {
                 $includeWords[] = $words[$k];
                 if (!$this->ignoreWord($words[$k])) {
                     $returnWords['hilight'][] = $words[$k];
                     if (Configure::read('stem_words') == 1) {
                         $returnWords['hilight'][] = stem($words[$k]);
                     }
                 }
             }
         }
         $k++;
     }
     //add words from phrases to includes
     if (isset($returnWords['+s'])) {
         foreach ($returnWords['+s'] as $phrase) {
             if (Configure::read('case_sensitive') == '0') {
                 $phrase = lower_case(eregi_replace("[ ]+", " ", $phrase));
             } else {
                 $phrase = eregi_replace("[ ]+", " ", $phrase);
             }
             $phrase = trim($phrase);
             $temparr = explode(' ', $phrase);
             foreach ($temparr as $w) {
                 $includeWords[] = $w;
             }
         }
     }
     foreach ($includeWords as $word) {
         if (!($word == '')) {
             if ($this->ignoreWord($word)) {
                 $returnWords['ignore'][] = $word;
             } else {
                 $returnWords['+'][] = $word;
             }
         }
     }
     return $returnWords;
 }