/** * Returns a string where HTML entities have been converted back to their original characters * * @param string String to be parsed * @param boolean Convert unicode characters back from HTML entities? * * @return string */ public static function unHtmlSpecialChars($text, $doUniCode = false) { if ($doUniCode) { $text = preg_replace_callback('/&#([0-9]+);/siU', function ($matches) { return vB5_String::convertIntToUtf8($matches[1]); }, $text); } return str_replace(array('<', '>', '"', '&'), array('<', '>', '"', '&'), $text); }
/** * Break the keyword search into words * @param string keywords -- keyword string as entered by the user * @return array -- array of word records * array('word' => $word, 'joiner' => {'', 'NOT', 'AND', 'OR'}) * The search implementation is expected to use these to build the search * query. */ private function get_words($keywords) { $is_mb = preg_match('/&#([0-9]+);|[^\\x00-\\x7F]/siU', $keywords); // @todo handleing for thousand and decimal separators for numbers // removing punctuation $origKeywords = $keywords; $keywords = preg_replace('#(?!-)[\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]#' . ($is_mb ? 'u' : ''), ' ', $keywords); // a tokenizing based approach to building a search query preg_match_all('#("[^"]*"|[^\\s]+)#', $keywords, $matches, PREG_SET_ORDER); $token_joiner = null; $words = array(); foreach ($matches as $match) { if ($is_mb) { $match = preg_replace_callback('/&#([0-9]+);/siU', function ($matches) { return vB5_String::convertIntToUtf8($matches[1]); }, $match); } if ($is_mb) { $token = vB_String::vBStrToLower($match[1]); } else { $token = strtolower($match[1]); } //this means that we implicitly have a not joiner. if ($token[0] == '-') { //this effectively means two joiners, which is bad. if ($token_joiner) { $this->add_error('invalid_search_syntax'); } else { $token = substr($token, 1); $token_joiner = 'not'; } } switch ($token) { case 'or': case 'and': case 'not': // this isn't a searchable word, but a joiner $token_joiner = strtoupper($token); break; default: //$lowWord = strtolower($token); if (vB_Api_Search::is_index_word($token, true)) { $words[] = array('word' => $token, 'joiner' => strtoupper($token_joiner)); } else { $this->ignored_keywords[] = $match[1]; } $token_joiner = null; break; } } if (empty($matches) and !empty($origKeywords)) { $this->ignored_keywords[] = $origKeywords; } return $words; }