Exemplo n.º 1
0
 /**
  * Extracts a keyword from a raw not encoded URL.
  * Will only extract keyword if a known search engine has been detected.
  * Returns the keyword:
  * - in UTF8: automatically converted from other charsets when applicable
  * - strtolowered: "QUErY test!" will return "query test!"
  * - trimmed: extra spaces before and after are removed
  *
  * The function returns false when a keyword couldn't be found.
  *     eg. if the url is "http://www.google.com/partners.html" this will return false,
  *       as the google keyword parameter couldn't be found.
  *
  * @see unit tests in /tests/core/Common.test.php
  * @param string $referrerUrl URL referrer URL, eg. $_SERVER['HTTP_REFERER']
  * @return array|bool   false if a keyword couldn't be extracted,
  *                        or array(
  *                            'name' => 'Google',
  *                            'keywords' => 'my searched keywords')
  */
 public function extractInformationFromUrl($referrerUrl)
 {
     $referrerParsed = @parse_url($referrerUrl);
     $referrerHost = '';
     if (isset($referrerParsed['host'])) {
         $referrerHost = $referrerParsed['host'];
     }
     if (empty($referrerHost)) {
         return false;
     }
     // some search engines (eg. Bing Images) use the same domain
     // as an existing search engine (eg. Bing), we must also use the url path
     $referrerPath = '';
     if (isset($referrerParsed['path'])) {
         $referrerPath = $referrerParsed['path'];
     }
     $query = '';
     if (isset($referrerParsed['query'])) {
         $query = $referrerParsed['query'];
     }
     // Google Referrers URLs sometimes have the fragment which contains the keyword
     if (!empty($referrerParsed['fragment'])) {
         $query .= '&' . $referrerParsed['fragment'];
     }
     $referrerHost = $this->getEngineHostFromUrl($referrerHost, $referrerPath, $query);
     if (empty($referrerHost)) {
         return false;
     }
     $definitions = $this->getDefinitionByHost($referrerHost);
     $searchEngineName = $definitions['name'];
     $variableNames = $definitions['params'];
     $key = null;
     if ($searchEngineName === 'Google Images' || $searchEngineName === 'Google' && strpos($referrerUrl, '/imgres') !== false) {
         if (strpos($query, '&prev') !== false) {
             $query = urldecode(trim(UrlHelper::getParameterFromQueryString($query, 'prev')));
             $query = str_replace('&', '&', strstr($query, '?'));
         }
         $searchEngineName = 'Google Images';
     } elseif ($searchEngineName === 'Google' && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0)) {
         $keys = array();
         $key = UrlHelper::getParameterFromQueryString($query, 'as_q');
         if (!empty($key)) {
             array_push($keys, $key);
         }
         $key = UrlHelper::getParameterFromQueryString($query, 'as_oq');
         if (!empty($key)) {
             array_push($keys, str_replace('+', ' OR ', $key));
         }
         $key = UrlHelper::getParameterFromQueryString($query, 'as_epq');
         if (!empty($key)) {
             array_push($keys, "\"{$key}\"");
         }
         $key = UrlHelper::getParameterFromQueryString($query, 'as_eq');
         if (!empty($key)) {
             array_push($keys, "-{$key}");
         }
         $key = trim(urldecode(implode(' ', $keys)));
     }
     if ($searchEngineName === 'Google') {
         // top bar menu
         $tbm = UrlHelper::getParameterFromQueryString($query, 'tbm');
         switch ($tbm) {
             case 'isch':
                 $searchEngineName = 'Google Images';
                 break;
             case 'vid':
                 $searchEngineName = 'Google Video';
                 break;
             case 'shop':
                 $searchEngineName = 'Google Shopping';
                 break;
         }
     }
     if (empty($key)) {
         foreach ($variableNames as $variableName) {
             if ($variableName[0] == '/') {
                 // regular expression match
                 if (preg_match($variableName, $referrerUrl, $matches)) {
                     $key = trim(urldecode($matches[1]));
                     break;
                 }
             } else {
                 // search for keywords now &vname=keyword
                 $key = UrlHelper::getParameterFromQueryString($query, $variableName);
                 $key = trim(urldecode($key));
                 // Special cases: empty or no keywords
                 if (empty($key) && ($searchEngineName == 'Google' && (empty($query) && (empty($referrerPath) || $referrerPath == '/') && empty($referrerParsed['fragment'])) || $searchEngineName == 'Yahoo!' && $referrerParsed['host'] == 'r.search.yahoo.com' || strpos($query, sprintf('&%s=', $variableName)) !== false || strpos($query, sprintf('?%s=', $variableName)) !== false || $searchEngineName == 'Ixquick' || $searchEngineName == 'Google Images' || $searchEngineName == 'DuckDuckGo')) {
                     $key = false;
                 }
                 if (!empty($key) || $key === false) {
                     break;
                 }
             }
         }
     }
     // $key === false is the special case "No keyword provided" which is a Search engine match
     if ($key === null || $key === '') {
         return false;
     }
     if (!empty($key)) {
         if (!empty($definitions['charsets'])) {
             $key = $this->convertCharset($key, $definitions['charsets']);
         }
         $key = Common::mb_strtolower($key);
     }
     return array('name' => $searchEngineName, 'keywords' => $key);
 }