Esempio n. 1
0
 /**
  * Normalizes the URL (some of the best parts of RFC 3986)
  *
  * @param unknown_type $url
  * @return string
  */
 public static function normalize_url($url)
 {
     // TODO return error for bad URLs
     // Process from RFC 3986 http://en.wikipedia.org/wiki/URL_normalization
     // Limiting protocols.
     if (!parse_url($url, PHP_URL_SCHEME)) {
         $url = 'http://' . $url;
     }
     $parsed_url = parse_url($url);
     if ($parsed_url === false) {
         return '';
     }
     // user and pass components are ignored
     // TODO Removing or adding “www” as the first domain label.
     $host = preg_replace('/^www\\./', '', $parsed_url['host']);
     // Converting the scheme and host to lower case
     $scheme = strtolower($parsed_url['scheme']);
     $host = strtolower($host);
     $path = $parsed_url['path'];
     // TODO Capitalizing letters in escape sequences
     // TODO Decoding percent-encoded octets of unreserved characters
     // Removing the default port
     $port = '';
     if (isset($parsed_url['port'])) {
         $port = $parsed_url['port'];
     }
     if ($port == 80) {
         $port = '';
     }
     // Removing the fragment # (do not get fragment component)
     // Removing directory index (i.e. index.html, index.php)
     $path = str_replace('index.html', '', $path);
     $path = str_replace('index.php', '', $path);
     // Adding trailing /
     $path_last_char = $path[strlen($path) - 1];
     if ($path_last_char != '/') {
         $path = $path . '/';
     }
     // TODO Removing dot-segments.
     // TODO Replacing IP with domain name.
     // TODO Removing duplicate slashes
     $path = preg_replace("~\\\\+([\"\\'\\x00\\\\])~", "\$1", $path);
     // construct URL
     $url = $scheme . '://' . $host . $path;
     // Add query params if they exist
     // Sorting the query parameters.
     // Removing unused query variables
     // Removing default query parameters.
     // Removing the "?" when the query is empty.
     $query = '';
     if (isset($parsed_url['query'])) {
         $query = $parsed_url['query'];
     }
     if ($query) {
         $query_parts = explode('&', $query);
         $params = array();
         foreach ($query_parts as $param) {
             $items = explode('=', $param, 2);
             $name = $items[0];
             $value = '';
             if (count($items) == 2) {
                 $value = $items[1];
             }
             $params[$name] = $value;
         }
         ksort($params);
         $count_params = count($params);
         if ($count_params > 0) {
             $url .= '?';
             $index = 0;
             foreach ($params as $name => $value) {
                 $url .= $name;
                 if (strlen($value) != 0) {
                     $url .= '=' . $value;
                 }
                 if ($index++ < $count_params - 1) {
                     $url .= '&';
                 }
             }
         }
     }
     // Remove some query params which we do not want
     $url = HA_Common::remove_query_string_params($url, HA_Common::$ignore_query_params);
     return $url;
 }