示例#1
0
/**
 * Return search engine URL by name
 *
 * @see core/DataFiles/SearchEnginges.php
 *
 * @param string $name
 * @return string URL
 */
function getSearchEngineUrlFromName($name)
{
    $searchEngineNames = Common::getSearchEngineNames();
    if (isset($searchEngineNames[$name])) {
        $url = 'http://' . $searchEngineNames[$name];
    } else {
        $url = 'URL unknown!';
    }
    return $url;
}
示例#2
0
 /**
  * Extracts a keyword from a raw not encoded URL.
  * Will only extract keyword if a known search engine has been detected.
  * Returns the keyword:
  * - in UTF8: automatically converted from other charsets when applicable
  * - strtolowered: "QUErY test!" will return "query test!"
  * - trimmed: extra spaces before and after are removed
  *
  * Lists of supported search engines can be found in /core/DataFiles/SearchEngines.php
  * The function returns false when a keyword couldn't be found.
  *     eg. if the url is "http://www.google.com/partners.html" this will return false,
  *       as the google keyword parameter couldn't be found.
  *
  * @see unit tests in /tests/core/Common.test.php
  * @param string $referrerUrl URL referrer URL, eg. $_SERVER['HTTP_REFERER']
  * @return array|bool   false if a keyword couldn't be extracted,
  *                        or array(
  *                            'name' => 'Google',
  *                            'keywords' => 'my searched keywords')
  */
 public static function extractSearchEngineInformationFromUrl($referrerUrl)
 {
     $referrerParsed = @parse_url($referrerUrl);
     $referrerHost = '';
     if (isset($referrerParsed['host'])) {
         $referrerHost = $referrerParsed['host'];
     }
     if (empty($referrerHost)) {
         return false;
     }
     // some search engines (eg. Bing Images) use the same domain
     // as an existing search engine (eg. Bing), we must also use the url path
     $referrerPath = '';
     if (isset($referrerParsed['path'])) {
         $referrerPath = $referrerParsed['path'];
     }
     // no search query
     if (!isset($referrerParsed['query'])) {
         $referrerParsed['query'] = '';
     }
     $query = $referrerParsed['query'];
     // Google Referrers URLs sometimes have the fragment which contains the keyword
     if (!empty($referrerParsed['fragment'])) {
         $query .= '&' . $referrerParsed['fragment'];
     }
     $searchEngines = Common::getSearchEngineUrls();
     $hostPattern = self::getLossyUrl($referrerHost);
     /*
      * Try to get the best matching 'host' in definitions
      * 1. check if host + path matches an definition
      * 2. check if host only matches
      * 3. check if host pattern + path matches
      * 4. check if host pattern matches
      * 5. special handling
      */
     if (array_key_exists($referrerHost . $referrerPath, $searchEngines)) {
         $referrerHost = $referrerHost . $referrerPath;
     } elseif (array_key_exists($referrerHost, $searchEngines)) {
         // no need to change host
     } elseif (array_key_exists($hostPattern . $referrerPath, $searchEngines)) {
         $referrerHost = $hostPattern . $referrerPath;
     } elseif (array_key_exists($hostPattern, $searchEngines)) {
         $referrerHost = $hostPattern;
     } elseif (!array_key_exists($referrerHost, $searchEngines)) {
         if (!strncmp($query, 'cx=partner-pub-', 15)) {
             // Google custom search engine
             $referrerHost = 'google.com/cse';
         } elseif (!strncmp($referrerPath, '/pemonitorhosted/ws/results/', 28)) {
             // private-label search powered by InfoSpace Metasearch
             $referrerHost = 'wsdsold.infospace.com';
         } elseif (strpos($referrerHost, '.images.search.yahoo.com') != false) {
             // Yahoo! Images
             $referrerHost = 'images.search.yahoo.com';
         } elseif (strpos($referrerHost, '.search.yahoo.com') != false) {
             // Yahoo!
             $referrerHost = 'search.yahoo.com';
         } else {
             return false;
         }
     }
     $searchEngineName = $searchEngines[$referrerHost][0];
     $variableNames = null;
     if (isset($searchEngines[$referrerHost][1])) {
         $variableNames = $searchEngines[$referrerHost][1];
     }
     if (!$variableNames) {
         $searchEngineNames = Common::getSearchEngineNames();
         $url = $searchEngineNames[$searchEngineName];
         $variableNames = $searchEngines[$url][1];
     }
     if (!is_array($variableNames)) {
         $variableNames = array($variableNames);
     }
     $key = null;
     if ($searchEngineName === 'Google Images' || $searchEngineName === 'Google' && strpos($referrerUrl, '/imgres') !== false) {
         if (strpos($query, '&prev') !== false) {
             $query = urldecode(trim(self::getParameterFromQueryString($query, 'prev')));
             $query = str_replace('&', '&', strstr($query, '?'));
         }
         $searchEngineName = 'Google Images';
     } elseif ($searchEngineName === 'Google' && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0)) {
         $keys = array();
         $key = self::getParameterFromQueryString($query, 'as_q');
         if (!empty($key)) {
             array_push($keys, $key);
         }
         $key = self::getParameterFromQueryString($query, 'as_oq');
         if (!empty($key)) {
             array_push($keys, str_replace('+', ' OR ', $key));
         }
         $key = self::getParameterFromQueryString($query, 'as_epq');
         if (!empty($key)) {
             array_push($keys, "\"{$key}\"");
         }
         $key = self::getParameterFromQueryString($query, 'as_eq');
         if (!empty($key)) {
             array_push($keys, "-{$key}");
         }
         $key = trim(urldecode(implode(' ', $keys)));
     }
     if ($searchEngineName === 'Google') {
         // top bar menu
         $tbm = self::getParameterFromQueryString($query, 'tbm');
         switch ($tbm) {
             case 'isch':
                 $searchEngineName = 'Google Images';
                 break;
             case 'vid':
                 $searchEngineName = 'Google Video';
                 break;
             case 'shop':
                 $searchEngineName = 'Google Shopping';
                 break;
         }
     }
     if (empty($key)) {
         foreach ($variableNames as $variableName) {
             if ($variableName[0] == '/') {
                 // regular expression match
                 if (preg_match($variableName, $referrerUrl, $matches)) {
                     $key = trim(urldecode($matches[1]));
                     break;
                 }
             } else {
                 // search for keywords now &vname=keyword
                 $key = self::getParameterFromQueryString($query, $variableName);
                 $key = trim(urldecode($key));
                 // Special cases: empty or no keywords
                 if (empty($key) && ($searchEngineName == 'Google' && (empty($query) && (empty($referrerPath) || $referrerPath == '/') && empty($referrerParsed['fragment'])) || $searchEngineName == 'Yahoo!' && $referrerParsed['host'] == 'r.search.yahoo.com' || strpos($query, sprintf('&%s=', $variableName)) !== false || strpos($query, sprintf('?%s=', $variableName)) !== false || $searchEngineName == 'Ixquick' || $searchEngineName == 'Google Images' || $searchEngineName == 'DuckDuckGo')) {
                     $key = false;
                 }
                 if (!empty($key) || $key === false) {
                     break;
                 }
             }
         }
     }
     // $key === false is the special case "No keyword provided" which is a Search engine match
     if ($key === null || $key === '') {
         return false;
     }
     if (!empty($key)) {
         if (function_exists('iconv') && isset($searchEngines[$referrerHost][3])) {
             // accepts string, array, or comma-separated list string in preferred order
             $charsets = $searchEngines[$referrerHost][3];
             if (!is_array($charsets)) {
                 $charsets = explode(',', $charsets);
             }
             if (!empty($charsets)) {
                 $charset = $charsets[0];
                 if (count($charsets) > 1 && function_exists('mb_detect_encoding')) {
                     $charset = mb_detect_encoding($key, $charsets);
                     if ($charset === false) {
                         $charset = $charsets[0];
                     }
                 }
                 $newkey = @iconv($charset, 'UTF-8//IGNORE', $key);
                 if (!empty($newkey)) {
                     $key = $newkey;
                 }
             }
         }
         $key = Common::mb_strtolower($key);
     }
     return array('name' => $searchEngineName, 'keywords' => $key);
 }