public function testURIPathComponentEscape() { $this->assertEqual('a%252Fb', phutil_escape_uri_path_component('a/b')); $str = ''; for ($ii = 0; $ii <= 255; $ii++) { $str .= chr($ii); } $this->assertEqual($str, phutil_unescape_uri_path_component(rawurldecode(phutil_escape_uri_path_component($str)))); }
/** * Internal. Public only for unit tests. * * Parse the request URI into components. * * @param string URI blob. * @param bool True if this VCS supports branches. * @return map Parsed URI. * * @task uri */ public static function parseRequestBlob($blob, $supports_branches) { $result = array('branch' => null, 'path' => null, 'commit' => null, 'line' => null); $matches = null; if ($supports_branches) { // Consume the front part of the URI, up to the first "/". This is the // path-component encoded branch name. if (preg_match('@^([^/]+)/@', $blob, $matches)) { $result['branch'] = phutil_unescape_uri_path_component($matches[1]); $blob = substr($blob, strlen($matches[1]) + 1); } } // Consume the back part of the URI, up to the first "$". Use a negative // lookbehind to prevent matching '$$'. We double the '$' symbol when // encoding so that files with names like "money/$100" will survive. $pattern = '@(?:(?:^|[^$])(?:[$][$])*)[$]([\\d-,]+)$@'; if (preg_match($pattern, $blob, $matches)) { $result['line'] = $matches[1]; $blob = substr($blob, 0, -(strlen($matches[1]) + 1)); } // We've consumed the line number if it exists, so unescape "$" in the // rest of the string. $blob = str_replace('$$', '$', $blob); // Consume the commit name, stopping on ';;'. We allow any character to // appear in commits names, as they can sometimes be symbolic names (like // tag names or refs). if (preg_match('@(?:(?:^|[^;])(?:;;)*);([^;].*)$@', $blob, $matches)) { $result['commit'] = $matches[1]; $blob = substr($blob, 0, -(strlen($matches[1]) + 1)); } // We've consumed the commit if it exists, so unescape ";" in the rest // of the string. $blob = str_replace(';;', ';', $blob); if (strlen($blob)) { $result['path'] = $blob; } $parts = explode('/', $result['path']); foreach ($parts as $part) { // Prevent any hyjinx since we're ultimately shipping this to the // filesystem under a lot of workflows. if ($part == '..') { throw new Exception(pht('Invalid path URI.')); } } return $result; }
/** * Internal. Public only for unit tests. * * Parse the request URI into components. * * @param string URI blob. * @param bool True if this VCS supports branches. * @return map Parsed URI. * * @task uri */ public static function parseRequestBlob($blob, $supports_branches) { $result = array('branch' => null, 'path' => null, 'commit' => null, 'line' => null); $matches = null; if ($supports_branches) { // Consume the front part of the URI, up to the first "/". This is the // path-component encoded branch name. if (preg_match('@^([^/]+)/@', $blob, $matches)) { $result['branch'] = phutil_unescape_uri_path_component($matches[1]); $blob = substr($blob, strlen($matches[1]) + 1); } } // Consume the back part of the URI, up to the first "$". Use a negative // lookbehind to prevent matching '$$'. We double the '$' symbol when // encoding so that files with names like "money/$100" will survive. if (preg_match('@(?<![$])[$]([\\d-]+)$@', $blob, $matches)) { $result['line'] = $matches[1]; $blob = substr($blob, 0, -(strlen($matches[1]) + 1)); } // Consume the commit name, stopping on ';;'. if (preg_match('@(?<!;);([a-z0-9]+)$@', $blob, $matches)) { $result['commit'] = $matches[1]; $blob = substr($blob, 0, -(strlen($matches[1]) + 1)); } // Un-double our delimiter characters. if (strlen($blob)) { $result['path'] = str_replace(array(';;', '$$'), array(';', '$'), $blob); } $parts = explode('/', $result['path']); foreach ($parts as $part) { // Prevent any hyjinx since we're ultimately shipping this to the // filesystem under a lot of workflows. if ($part == '..') { throw new Exception("Invalid path URI."); } } return $result; }