/** * Return search engine URL by name * * @see core/DataFiles/SearchEnginges.php * * @param string $name * @return string URL */ function getSearchEngineUrlFromName($name) { $searchEngineNames = Common::getSearchEngineNames(); if (isset($searchEngineNames[$name])) { $url = 'http://' . $searchEngineNames[$name]; } else { $url = 'URL unknown!'; } return $url; }
/** * Extracts a keyword from a raw not encoded URL. * Will only extract keyword if a known search engine has been detected. * Returns the keyword: * - in UTF8: automatically converted from other charsets when applicable * - strtolowered: "QUErY test!" will return "query test!" * - trimmed: extra spaces before and after are removed * * Lists of supported search engines can be found in /core/DataFiles/SearchEngines.php * The function returns false when a keyword couldn't be found. * eg. if the url is "http://www.google.com/partners.html" this will return false, * as the google keyword parameter couldn't be found. * * @see unit tests in /tests/core/Common.test.php * @param string $referrerUrl URL referrer URL, eg. $_SERVER['HTTP_REFERER'] * @return array|bool false if a keyword couldn't be extracted, * or array( * 'name' => 'Google', * 'keywords' => 'my searched keywords') */ public static function extractSearchEngineInformationFromUrl($referrerUrl) { $referrerParsed = @parse_url($referrerUrl); $referrerHost = ''; if (isset($referrerParsed['host'])) { $referrerHost = $referrerParsed['host']; } if (empty($referrerHost)) { return false; } // some search engines (eg. Bing Images) use the same domain // as an existing search engine (eg. Bing), we must also use the url path $referrerPath = ''; if (isset($referrerParsed['path'])) { $referrerPath = $referrerParsed['path']; } // no search query if (!isset($referrerParsed['query'])) { $referrerParsed['query'] = ''; } $query = $referrerParsed['query']; // Google Referrers URLs sometimes have the fragment which contains the keyword if (!empty($referrerParsed['fragment'])) { $query .= '&' . $referrerParsed['fragment']; } $searchEngines = Common::getSearchEngineUrls(); $hostPattern = self::getLossyUrl($referrerHost); /* * Try to get the best matching 'host' in definitions * 1. check if host + path matches an definition * 2. check if host only matches * 3. check if host pattern + path matches * 4. check if host pattern matches * 5. special handling */ if (array_key_exists($referrerHost . $referrerPath, $searchEngines)) { $referrerHost = $referrerHost . $referrerPath; } elseif (array_key_exists($referrerHost, $searchEngines)) { // no need to change host } elseif (array_key_exists($hostPattern . $referrerPath, $searchEngines)) { $referrerHost = $hostPattern . $referrerPath; } elseif (array_key_exists($hostPattern, $searchEngines)) { $referrerHost = $hostPattern; } elseif (!array_key_exists($referrerHost, $searchEngines)) { if (!strncmp($query, 'cx=partner-pub-', 15)) { // Google custom search engine $referrerHost = 'google.com/cse'; } elseif (!strncmp($referrerPath, '/pemonitorhosted/ws/results/', 28)) { // private-label search powered by InfoSpace Metasearch $referrerHost = 'wsdsold.infospace.com'; } elseif (strpos($referrerHost, '.images.search.yahoo.com') != false) { // Yahoo! Images $referrerHost = 'images.search.yahoo.com'; } elseif (strpos($referrerHost, '.search.yahoo.com') != false) { // Yahoo! $referrerHost = 'search.yahoo.com'; } else { return false; } } $searchEngineName = $searchEngines[$referrerHost][0]; $variableNames = null; if (isset($searchEngines[$referrerHost][1])) { $variableNames = $searchEngines[$referrerHost][1]; } if (!$variableNames) { $searchEngineNames = Common::getSearchEngineNames(); $url = $searchEngineNames[$searchEngineName]; $variableNames = $searchEngines[$url][1]; } if (!is_array($variableNames)) { $variableNames = array($variableNames); } $key = null; if ($searchEngineName === 'Google Images' || $searchEngineName === 'Google' && strpos($referrerUrl, '/imgres') !== false) { if (strpos($query, '&prev') !== false) { $query = urldecode(trim(self::getParameterFromQueryString($query, 'prev'))); $query = str_replace('&', '&', strstr($query, '?')); } $searchEngineName = 'Google Images'; } elseif ($searchEngineName === 'Google' && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0)) { $keys = array(); $key = self::getParameterFromQueryString($query, 'as_q'); if (!empty($key)) { array_push($keys, $key); } $key = self::getParameterFromQueryString($query, 'as_oq'); if (!empty($key)) { array_push($keys, str_replace('+', ' OR ', $key)); } $key = self::getParameterFromQueryString($query, 'as_epq'); if (!empty($key)) { array_push($keys, "\"{$key}\""); } $key = self::getParameterFromQueryString($query, 'as_eq'); if (!empty($key)) { array_push($keys, "-{$key}"); } $key = trim(urldecode(implode(' ', $keys))); } if ($searchEngineName === 'Google') { // top bar menu $tbm = self::getParameterFromQueryString($query, 'tbm'); switch ($tbm) { case 'isch': $searchEngineName = 'Google Images'; break; case 'vid': $searchEngineName = 'Google Video'; break; case 'shop': $searchEngineName = 'Google Shopping'; break; } } if (empty($key)) { foreach ($variableNames as $variableName) { if ($variableName[0] == '/') { // regular expression match if (preg_match($variableName, $referrerUrl, $matches)) { $key = trim(urldecode($matches[1])); break; } } else { // search for keywords now &vname=keyword $key = self::getParameterFromQueryString($query, $variableName); $key = trim(urldecode($key)); // Special cases: empty or no keywords if (empty($key) && ($searchEngineName == 'Google' && (empty($query) && (empty($referrerPath) || $referrerPath == '/') && empty($referrerParsed['fragment'])) || $searchEngineName == 'Yahoo!' && $referrerParsed['host'] == 'r.search.yahoo.com' || strpos($query, sprintf('&%s=', $variableName)) !== false || strpos($query, sprintf('?%s=', $variableName)) !== false || $searchEngineName == 'Ixquick' || $searchEngineName == 'Google Images' || $searchEngineName == 'DuckDuckGo')) { $key = false; } if (!empty($key) || $key === false) { break; } } } } // $key === false is the special case "No keyword provided" which is a Search engine match if ($key === null || $key === '') { return false; } if (!empty($key)) { if (function_exists('iconv') && isset($searchEngines[$referrerHost][3])) { // accepts string, array, or comma-separated list string in preferred order $charsets = $searchEngines[$referrerHost][3]; if (!is_array($charsets)) { $charsets = explode(',', $charsets); } if (!empty($charsets)) { $charset = $charsets[0]; if (count($charsets) > 1 && function_exists('mb_detect_encoding')) { $charset = mb_detect_encoding($key, $charsets); if ($charset === false) { $charset = $charsets[0]; } } $newkey = @iconv($charset, 'UTF-8//IGNORE', $key); if (!empty($newkey)) { $key = $newkey; } } } $key = Common::mb_strtolower($key); } return array('name' => $searchEngineName, 'keywords' => $key); }