Пример #1
0
 /**
  * Extracts a keyword from a raw not encoded URL.
  * Will only extract keyword if a known search engine has been detected.
  * Returns the keyword:
  * - in UTF8: automatically converted from other charsets when applicable
  * - strtolowered: "QUErY test!" will return "query test!"
  * - trimmed: extra spaces before and after are removed
  *
  * A list of supported search engines can be found in /inc/sessions/model/_search_engines.php
  * The function returns false when a keyword couldn't be found.
  * 	 eg. if the url is "http://www.google.com/partners.html" this will return false,
  *       as the google keyword parameter couldn't be found.
  *
  * @param string URL referer
  * @return array|false false if a keyword couldn't be extracted,
  * 						or array(
  * 							'engine_name' => 'Google',
  * 							'keywords' => 'my searched keywords',
  *							'serprank' => 4)
  */
 function extract_params_from_referer($ref)
 {
     global $Debuglog, $search_engine_params, $evo_charset, $current_charset;
     // Make sure we don't try params extraction twice
     $this->_search_params_tried = true;
     @(list($ref_host, $ref_path, $query, $fragment) = $this->is_search_referer($ref, true));
     if (empty($ref_host)) {
         // Not a search referer
         return false;
     }
     $search_engine_name = $search_engine_params[$ref_host][0];
     $keyword_param = NULL;
     if (!empty($search_engine_params[$ref_host][1])) {
         $keyword_param = $search_engine_params[$ref_host][1];
     }
     if (is_null($keyword_param)) {
         // Get settings from first item in group
         $search_engine_names = $this->get_search_engine_names();
         $url = $search_engine_names[$search_engine_name];
         $keyword_param = $search_engine_params[$url][1];
     }
     if (!is_array($keyword_param)) {
         $keyword_param = array($keyword_param);
     }
     if ($search_engine_name == 'Google Images' || $search_engine_name == 'Google' && strpos($ref, '/imgres') !== false) {
         // Google image search
         $search_engine_name = 'Google Images';
         $query = urldecode(trim($this->get_param_from_string($query, 'prev')));
         $query = str_replace('&', '&', strstr($query, '?'));
     } elseif ($search_engine_name == 'Google' && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0)) {
         // Google with "as_" param
         $keys = array();
         if ($key = $this->get_param_from_string($query, 'as_q')) {
             array_push($keys, $key);
         }
         if ($key = $this->get_param_from_string($query, 'as_oq')) {
             array_push($keys, str_replace('+', ' OR ', $key));
         }
         if ($key = $this->get_param_from_string($query, 'as_epq')) {
             array_push($keys, "\"{$key}\"");
         }
         if ($key = $this->get_param_from_string($query, 'as_eq')) {
             array_push($keys, "-{$key}");
         }
         $key = trim(urldecode(implode(' ', $keys)));
     }
     if (empty($key)) {
         // we haven't extracted a search key with the special cases above...
         foreach ($keyword_param as $param) {
             if ($param[0] == '/') {
                 // regular expression match
                 if (@preg_match($param, $ref, $matches)) {
                     $key = trim(urldecode($matches[1]));
                     break;
                 }
             } else {
                 // search for keywords now &vname=keyword
                 if ($key = $this->get_param_from_string($query, $param)) {
                     $key = trim(urldecode($key));
                     if (!empty($key)) {
                         break;
                     }
                 }
             }
         }
     }
     $key_param_in_query = false;
     if (empty($key) && !empty($keyword_param)) {
         // Check if empty key param exists in query, e.g. "/search?q=&other_param=text"
         foreach ($keyword_param as $k_param) {
             if (strpos($query, '&' . $k_param . '=') !== false || strpos($query, $k_param . '=') === 0) {
                 // Key param with empty value exists in query, We can decide this referer url as from search engine
                 $key_param_in_query = true;
             }
         }
     }
     if (empty($key) && !$key_param_in_query) {
         // Not a search referer
         if ($this->referer_type == 'search') {
             // If the referer was detected as 'search' we need to change it to 'special'
             // to keep search stats clean.
             $this->referer_type = 'special';
             $Debuglog->add('Hit: extract_params_from_referer() overrides referer type set by detect_referer(): "search" -> "special"', 'request');
         }
         return false;
     }
     // Convert encoding
     if (!empty($search_engine_params[$ref_host][3])) {
         $ie = $search_engine_params[$ref_host][3];
     } elseif (isset($url) && !empty($search_engine_params[$url][3])) {
         $ie = $search_engine_params[$url][3];
     } else {
         // Fallback to default encoding
         $ie = array('utf-8', 'iso-8859-15');
     }
     if (is_array($ie)) {
         if (can_check_encoding()) {
             foreach ($ie as $test_encoding) {
                 if (check_encoding($key, $test_encoding)) {
                     $ie = $test_encoding;
                     break;
                 }
             }
         } else {
             $ie = $ie[0];
         }
     }
     $key = convert_charset($key, $evo_charset, $ie);
     // convert to lower string but keep in evo_charset
     $saved_charset = $current_charset;
     $current_charset = $evo_charset;
     $key = utf8_strtolower($key);
     $current_charset = $saved_charset;
     // Extract the "serp rank"
     // Typically http://google.com?s=keyphraz&start=18 returns 18
     if (!empty($search_engine_params[$ref_host][4])) {
         $serp_param = $search_engine_params[$ref_host][4];
     } elseif (isset($url) && !empty($search_engine_params[$url][4])) {
         $serp_param = $search_engine_params[$url][4];
     } else {
         // Fallback to default params
         $serp_param = array('offset', 'page', 'start');
     }
     if (!is_array($serp_param)) {
         $serp_param = array($serp_param);
     }
     if (strpos($search_engine_name, 'Google') !== false) {
         // Append fragment which Google uses in instant search
         $query .= '&' . $fragment;
     }
     foreach ($serp_param as $param) {
         if ($var = $this->get_param_from_string($query, $param)) {
             if (ctype_digit($var)) {
                 $serprank = $var;
                 break;
             }
         }
     }
     $this->_search_engine = $search_engine_name;
     $this->_keyphrase = $key;
     $this->_serprank = isset($serprank) ? $serprank : NULL;
     return array('engine_name' => $this->_search_engine, 'keyphrase' => $this->_keyphrase, 'serprank' => $this->_serprank);
 }
Пример #2
0
/**
 * Properly converts the encoding of a string based upon the old table to the new table to utf8 encoding, as best as we can
 *
 * @param string The text to convert
 * @param string The old table (e.x. vB's user table)
 * @param string The new table (e.x. MyBB's user table)
 * @return string The converted text in utf8 format
 */
function encode_to_utf8($text, $old_table_name, $new_table_name)
{
    global $import_session, $db, $module;
    if ($import_session['encode_to_utf8'] == 0) {
        return $text;
    }
    $old_table_name = OLD_TABLE_PREFIX . $old_table_name;
    $new_table_name = TABLE_PREFIX . $new_table_name;
    // Get the character set if needed
    if (empty($import_session['table_charset_old'][$old_table_name]) || empty($import_session['table_charset_new'][$new_table_name])) {
        $old_table_prefix = $db->table_prefix;
        $db->set_table_prefix('');
        $old_old_db_table_prefix = $module->old_db->table_prefix;
        $module->old_db->set_table_prefix('');
        $table = $module->old_db->show_create_table($old_table_name);
        preg_match("#CHARSET=(.*)#i", $table, $old_charset);
        $table = $db->show_create_table($new_table_name);
        preg_match("#CHARSET=(.*)#i", $table, $new_charset);
        $db->set_table_prefix($old_table_prefix);
        $module->old_db->set_table_prefix($old_old_db_table_prefix);
        $import_session['table_charset_old'][$old_table_name] = $old_charset[1];
        $import_session['table_charset_new'][$new_table_name] = $new_charset[1];
    }
    // Convert as needed
    if (($import_session['table_charset_new'][$new_table_name] != $import_session['table_charset_old'][$old_table_name] || check_encoding($text, fetch_iconv_encoding($import_session['table_charset_new'][$new_table_name])) === false) && $import_session['table_charset_old'][$old_table_name] != '' && $import_session['table_charset_new'][$new_table_name] != '') {
        if (!function_exists('iconv')) {
            if (fetch_iconv_encoding($import_session['table_charset_old'][$old_table_name]) != 'iso-8859-1' || !function_exists("utf8_encode")) {
                return $text;
            }
            return utf8_encode($text);
        }
        $converted_str = iconv(fetch_iconv_encoding($import_session['table_charset_old'][$old_table_name]), fetch_iconv_encoding($import_session['table_charset_new'][$new_table_name]) . '//TRANSLIT', $text);
        // Do we have bad characters? (i.e. db/table encoding set to UTF-8 but string is actually ISO)
        if (my_strlen($converted_str) < my_strlen($text)) {
            // Was our database/tables set to UTF-8 encoding and the data actually in iso encoding?
            // Stop trying to confuse us!!
            $converted_str = iconv("iso-8859-1", fetch_iconv_encoding($import_session['table_charset_new'][$new_table_name]) . '//IGNORE', $text);
            if (my_strlen($converted_str) >= my_strlen($text)) {
                return $converted_str;
            }
        }
        // Try to convert, but don't stop when a character cannot be converted
        return iconv(fetch_iconv_encoding($import_session['table_charset_old'][$old_table_name]), fetch_iconv_encoding($import_session['table_charset_new'][$new_table_name]) . '//IGNORE', $text);
    }
    return $text;
}