/** * Extracts a keyword from a raw not encoded URL. * Will only extract keyword if a known search engine has been detected. * Returns the keyword: * - in UTF8: automatically converted from other charsets when applicable * - strtolowered: "QUErY test!" will return "query test!" * - trimmed: extra spaces before and after are removed * * A list of supported search engines can be found in /inc/sessions/model/_search_engines.php * The function returns false when a keyword couldn't be found. * eg. if the url is "http://www.google.com/partners.html" this will return false, * as the google keyword parameter couldn't be found. * * @param string URL referer * @return array|false false if a keyword couldn't be extracted, * or array( * 'engine_name' => 'Google', * 'keywords' => 'my searched keywords', * 'serprank' => 4) */ function extract_params_from_referer($ref) { global $Debuglog, $search_engine_params, $evo_charset, $current_charset; // Make sure we don't try params extraction twice $this->_search_params_tried = true; @(list($ref_host, $ref_path, $query, $fragment) = $this->is_search_referer($ref, true)); if (empty($ref_host)) { // Not a search referer return false; } $search_engine_name = $search_engine_params[$ref_host][0]; $keyword_param = NULL; if (!empty($search_engine_params[$ref_host][1])) { $keyword_param = $search_engine_params[$ref_host][1]; } if (is_null($keyword_param)) { // Get settings from first item in group $search_engine_names = $this->get_search_engine_names(); $url = $search_engine_names[$search_engine_name]; $keyword_param = $search_engine_params[$url][1]; } if (!is_array($keyword_param)) { $keyword_param = array($keyword_param); } if ($search_engine_name == 'Google Images' || $search_engine_name == 'Google' && strpos($ref, '/imgres') !== false) { // Google image search $search_engine_name = 'Google Images'; $query = urldecode(trim($this->get_param_from_string($query, 'prev'))); $query = str_replace('&', '&', strstr($query, '?')); } elseif ($search_engine_name == 'Google' && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0)) { // Google with "as_" param $keys = array(); if ($key = $this->get_param_from_string($query, 'as_q')) { array_push($keys, $key); } if ($key = $this->get_param_from_string($query, 'as_oq')) { array_push($keys, str_replace('+', ' OR ', $key)); } if ($key = $this->get_param_from_string($query, 'as_epq')) { array_push($keys, "\"{$key}\""); } if ($key = $this->get_param_from_string($query, 'as_eq')) { array_push($keys, "-{$key}"); } $key = trim(urldecode(implode(' ', $keys))); } if (empty($key)) { // we haven't extracted a search key with the special cases above... foreach ($keyword_param as $param) { if ($param[0] == '/') { // regular expression match if (@preg_match($param, $ref, $matches)) { $key = trim(urldecode($matches[1])); break; } } else { // search for keywords now &vname=keyword if ($key = $this->get_param_from_string($query, $param)) { $key = trim(urldecode($key)); if (!empty($key)) { break; } } } } } $key_param_in_query = false; if (empty($key) && !empty($keyword_param)) { // Check if empty key param exists in query, e.g. "/search?q=&other_param=text" foreach ($keyword_param as $k_param) { if (strpos($query, '&' . $k_param . '=') !== false || strpos($query, $k_param . '=') === 0) { // Key param with empty value exists in query, We can decide this referer url as from search engine $key_param_in_query = true; } } } if (empty($key) && !$key_param_in_query) { // Not a search referer if ($this->referer_type == 'search') { // If the referer was detected as 'search' we need to change it to 'special' // to keep search stats clean. $this->referer_type = 'special'; $Debuglog->add('Hit: extract_params_from_referer() overrides referer type set by detect_referer(): "search" -> "special"', 'request'); } return false; } // Convert encoding if (!empty($search_engine_params[$ref_host][3])) { $ie = $search_engine_params[$ref_host][3]; } elseif (isset($url) && !empty($search_engine_params[$url][3])) { $ie = $search_engine_params[$url][3]; } else { // Fallback to default encoding $ie = array('utf-8', 'iso-8859-15'); } if (is_array($ie)) { if (can_check_encoding()) { foreach ($ie as $test_encoding) { if (check_encoding($key, $test_encoding)) { $ie = $test_encoding; break; } } } else { $ie = $ie[0]; } } $key = convert_charset($key, $evo_charset, $ie); // convert to lower string but keep in evo_charset $saved_charset = $current_charset; $current_charset = $evo_charset; $key = utf8_strtolower($key); $current_charset = $saved_charset; // Extract the "serp rank" // Typically http://google.com?s=keyphraz&start=18 returns 18 if (!empty($search_engine_params[$ref_host][4])) { $serp_param = $search_engine_params[$ref_host][4]; } elseif (isset($url) && !empty($search_engine_params[$url][4])) { $serp_param = $search_engine_params[$url][4]; } else { // Fallback to default params $serp_param = array('offset', 'page', 'start'); } if (!is_array($serp_param)) { $serp_param = array($serp_param); } if (strpos($search_engine_name, 'Google') !== false) { // Append fragment which Google uses in instant search $query .= '&' . $fragment; } foreach ($serp_param as $param) { if ($var = $this->get_param_from_string($query, $param)) { if (ctype_digit($var)) { $serprank = $var; break; } } } $this->_search_engine = $search_engine_name; $this->_keyphrase = $key; $this->_serprank = isset($serprank) ? $serprank : NULL; return array('engine_name' => $this->_search_engine, 'keyphrase' => $this->_keyphrase, 'serprank' => $this->_serprank); }
/** * Properly converts the encoding of a string based upon the old table to the new table to utf8 encoding, as best as we can * * @param string The text to convert * @param string The old table (e.x. vB's user table) * @param string The new table (e.x. MyBB's user table) * @return string The converted text in utf8 format */ function encode_to_utf8($text, $old_table_name, $new_table_name) { global $import_session, $db, $module; if ($import_session['encode_to_utf8'] == 0) { return $text; } $old_table_name = OLD_TABLE_PREFIX . $old_table_name; $new_table_name = TABLE_PREFIX . $new_table_name; // Get the character set if needed if (empty($import_session['table_charset_old'][$old_table_name]) || empty($import_session['table_charset_new'][$new_table_name])) { $old_table_prefix = $db->table_prefix; $db->set_table_prefix(''); $old_old_db_table_prefix = $module->old_db->table_prefix; $module->old_db->set_table_prefix(''); $table = $module->old_db->show_create_table($old_table_name); preg_match("#CHARSET=(.*)#i", $table, $old_charset); $table = $db->show_create_table($new_table_name); preg_match("#CHARSET=(.*)#i", $table, $new_charset); $db->set_table_prefix($old_table_prefix); $module->old_db->set_table_prefix($old_old_db_table_prefix); $import_session['table_charset_old'][$old_table_name] = $old_charset[1]; $import_session['table_charset_new'][$new_table_name] = $new_charset[1]; } // Convert as needed if (($import_session['table_charset_new'][$new_table_name] != $import_session['table_charset_old'][$old_table_name] || check_encoding($text, fetch_iconv_encoding($import_session['table_charset_new'][$new_table_name])) === false) && $import_session['table_charset_old'][$old_table_name] != '' && $import_session['table_charset_new'][$new_table_name] != '') { if (!function_exists('iconv')) { if (fetch_iconv_encoding($import_session['table_charset_old'][$old_table_name]) != 'iso-8859-1' || !function_exists("utf8_encode")) { return $text; } return utf8_encode($text); } $converted_str = iconv(fetch_iconv_encoding($import_session['table_charset_old'][$old_table_name]), fetch_iconv_encoding($import_session['table_charset_new'][$new_table_name]) . '//TRANSLIT', $text); // Do we have bad characters? (i.e. db/table encoding set to UTF-8 but string is actually ISO) if (my_strlen($converted_str) < my_strlen($text)) { // Was our database/tables set to UTF-8 encoding and the data actually in iso encoding? // Stop trying to confuse us!! $converted_str = iconv("iso-8859-1", fetch_iconv_encoding($import_session['table_charset_new'][$new_table_name]) . '//IGNORE', $text); if (my_strlen($converted_str) >= my_strlen($text)) { return $converted_str; } } // Try to convert, but don't stop when a character cannot be converted return iconv(fetch_iconv_encoding($import_session['table_charset_old'][$old_table_name]), fetch_iconv_encoding($import_session['table_charset_new'][$new_table_name]) . '//IGNORE', $text); } return $text; }