示例#1
0
 /**
  * Returns a string where HTML entities have been converted back to their original characters
  *
  * @param	string	String to be parsed
  * @param	boolean	Convert unicode characters back from HTML entities?
  *
  * @return	string
  */
 public static function unHtmlSpecialChars($text, $doUniCode = false)
 {
     if ($doUniCode) {
         $text = preg_replace_callback('/&#([0-9]+);/siU', function ($matches) {
             return vB5_String::convertIntToUtf8($matches[1]);
         }, $text);
     }
     return str_replace(array('&lt;', '&gt;', '&quot;', '&amp;'), array('<', '>', '"', '&'), $text);
 }
示例#2
0
 /**
  *	Break the keyword search into words
  * @param string keywords -- keyword string as entered by the user
  * @return array -- array of word records
  *  array('word' => $word,  'joiner' => {'', 'NOT', 'AND', 'OR'})
  *  The search implementation is expected to use these to build the search
  *	 query.
  */
 private function get_words($keywords)
 {
     $is_mb = preg_match('/&#([0-9]+);|[^\\x00-\\x7F]/siU', $keywords);
     // @todo handleing for thousand and decimal separators for numbers
     // removing punctuation
     $origKeywords = $keywords;
     $keywords = preg_replace('#(?!-)[\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]#' . ($is_mb ? 'u' : ''), ' ', $keywords);
     // a tokenizing based approach to building a search query
     preg_match_all('#("[^"]*"|[^\\s]+)#', $keywords, $matches, PREG_SET_ORDER);
     $token_joiner = null;
     $words = array();
     foreach ($matches as $match) {
         if ($is_mb) {
             $match = preg_replace_callback('/&#([0-9]+);/siU', function ($matches) {
                 return vB5_String::convertIntToUtf8($matches[1]);
             }, $match);
         }
         if ($is_mb) {
             $token = vB_String::vBStrToLower($match[1]);
         } else {
             $token = strtolower($match[1]);
         }
         //this means that we implicitly have a not joiner.
         if ($token[0] == '-') {
             //this effectively means two joiners, which is bad.
             if ($token_joiner) {
                 $this->add_error('invalid_search_syntax');
             } else {
                 $token = substr($token, 1);
                 $token_joiner = 'not';
             }
         }
         switch ($token) {
             case 'or':
             case 'and':
             case 'not':
                 // this isn't a searchable word, but a joiner
                 $token_joiner = strtoupper($token);
                 break;
             default:
                 //$lowWord = strtolower($token);
                 if (vB_Api_Search::is_index_word($token, true)) {
                     $words[] = array('word' => $token, 'joiner' => strtoupper($token_joiner));
                 } else {
                     $this->ignored_keywords[] = $match[1];
                 }
                 $token_joiner = null;
                 break;
         }
     }
     if (empty($matches) and !empty($origKeywords)) {
         $this->ignored_keywords[] = $origKeywords;
     }
     return $words;
 }