Example #1
0
 /**
  * The basic URL parser.
  * @link https://url.spec.whatwg.org/#concept-basic-url-parser URL Standard
  * @param string $input A UTF-8 string.
  * @param URL|null $base A base URL.
  * @param string|null $encodingOverride A valid name of an encoding.
  * @param (URL|string)[]|null $urlAndStateOverride An URL ("url" key) and a state override ("state override" key).
  * @throws \DomainException If $urlAndStateOverride['state override'] is invalid.
  * @return URL|false|null
  */
 public static function parseBasicURL($input, self $base = null, $encodingOverride = null, array $urlAndStateOverride = null)
 {
     $input = str_replace(["\t", "\n", "\r"], '', $input);
     if ($urlAndStateOverride) {
         $url = $urlAndStateOverride['url'];
         $stateOverride = (string) $urlAndStateOverride['state override'];
         $string = (string) $input;
         $state = $stateOverride;
     } else {
         $url = new self();
         $stateOverride = null;
         $string = trim($input, ".. ");
         $state = 'scheme start state';
     }
     $encoding = $encodingOverride ? URLencoding::getOutputEncoding((string) $encodingOverride) : 'UTF-8';
     $buffer = '';
     $atFlag = false;
     $bracketFlag = false;
     $codePoints = preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY);
     for ($pointer = 0; true; $pointer++) {
         $c = isset($codePoints[$pointer]) ? $codePoints[$pointer] : '';
         switch ($state) {
             case 'scheme start state':
                 if (stripos('abcdefghijklmnopqrstuvwxyz', $c) !== false) {
                     $buffer .= strtolower($c);
                     $state = 'scheme state';
                 } elseif (!$stateOverride) {
                     $state = 'no scheme state';
                     $pointer--;
                 } else {
                     return;
                 }
                 break;
             case 'scheme state':
                 if (stripos('0123456789abcdefghijklmnopqrstuvwxyz+-.', $c) !== false) {
                     $buffer .= strtolower($c);
                 } elseif ($c === ':') {
                     if ($stateOverride && array_key_exists($url->scheme, self::$specialSchemes) !== array_key_exists($buffer, self::$specialSchemes)) {
                         return;
                     }
                     $url->scheme = $buffer;
                     $buffer = '';
                     if ($stateOverride) {
                         return;
                     }
                     if ($url->scheme === 'file') {
                         $state = 'file state';
                     } elseif ($url->isSpecial() && $base && $base->scheme === $url->scheme) {
                         $state = 'special relative or authority state';
                     } elseif ($url->isSpecial()) {
                         $state = 'special authority slashes state';
                     } elseif (isset($codePoints[$pointer + 1]) && $codePoints[$pointer + 1] === '/') {
                         $state = 'path or authority state';
                         $pointer++;
                     } else {
                         $url->nonRelativeFlag = $url->cannotBeABaseURLFlag = true;
                         $url->path[] = '';
                         $state = 'non-relative path state';
                     }
                 } elseif (!$stateOverride) {
                     $buffer = '';
                     $state = 'no scheme state';
                     $pointer = -1;
                 } else {
                     return;
                 }
                 break;
             case 'no scheme state':
                 if (!$base || $base->cannotBeABaseURLFlag && $c !== '#') {
                     return false;
                 } elseif ($base->cannotBeABaseURLFlag && $c === '#') {
                     $url->scheme = $base->scheme;
                     $url->path = $base->path;
                     $url->query = $base->query;
                     $url->fragment = '';
                     $url->nonRelativeFlag = $url->cannotBeABaseURLFlag = true;
                     $state = 'fragment state';
                 } elseif ($base->scheme !== 'file') {
                     $state = 'relative state';
                     $pointer--;
                 } else {
                     $state = 'file state';
                     $pointer--;
                 }
                 break;
             case 'special relative or authority state':
                 if ($c === '/' && isset($codePoints[$pointer + 1]) && $codePoints[$pointer + 1] === '/') {
                     $state = 'special authority ignore slashes state';
                     $pointer++;
                 } else {
                     $state = 'relative state';
                     $pointer--;
                 }
                 break;
             case 'path or authority state':
                 if ($c === '/') {
                     $state = 'authority state';
                 } else {
                     $state = 'path state';
                     $pointer--;
                 }
                 break;
             case 'relative state':
                 $url->scheme = $base->scheme;
                 switch ($c) {
                     case '':
                         $url->username = $base->username;
                         $url->password = $base->password;
                         $url->host = $base->host;
                         $url->port = $base->port;
                         $url->path = $base->path;
                         $url->query = $base->query;
                         break;
                     case '/':
                         $state = 'relative slash state';
                         break;
                     case '?':
                         $url->username = $base->username;
                         $url->password = $base->password;
                         $url->host = $base->host;
                         $url->port = $base->port;
                         $url->path = $base->path;
                         $url->query = '';
                         $state = 'query state';
                         break;
                     case '#':
                         $url->username = $base->username;
                         $url->password = $base->password;
                         $url->host = $base->host;
                         $url->port = $base->port;
                         $url->path = $base->path;
                         $url->query = $base->query;
                         $url->fragment = '';
                         $state = 'fragment state';
                         break;
                     default:
                         if ($c === '\\' && $url->isSpecial()) {
                             $state = 'relative slash state';
                         } else {
                             $url->username = $base->username;
                             $url->password = $base->password;
                             $url->host = $base->host;
                             $url->port = $base->port;
                             $url->path = $base->path;
                             array_pop($url->path);
                             $state = 'path state';
                             $pointer--;
                         }
                 }
                 break;
             case 'relative slash state':
                 if ($c === '/' || $c === '\\' && $url->isSpecial()) {
                     $state = 'special authority ignore slashes state';
                 } else {
                     $url->username = $base->username;
                     $url->password = $base->password;
                     $url->host = $base->host;
                     $url->port = $base->port;
                     $state = 'path state';
                     $pointer--;
                 }
                 break;
             case 'special authority slashes state':
                 if ($c === '/' && isset($codePoints[$pointer + 1]) && $codePoints[$pointer + 1] === '/') {
                     $state = 'special authority ignore slashes state';
                     $pointer++;
                 } else {
                     $state = 'special authority ignore slashes state';
                     $pointer--;
                 }
                 break;
             case 'special authority ignore slashes state':
                 if (!in_array($c, ['/', '\\'])) {
                     $state = 'authority state';
                     $pointer--;
                 }
                 break;
             case 'authority state':
                 if ($c === '@') {
                     if ($atFlag) {
                         $buffer = '%40' . $buffer;
                     }
                     $atFlag = true;
                     $usernameAndPassword = explode(':', $buffer, 2);
                     $url->username .= self::percentEncodeCodePoints(Infrastructure::USERINFO_ENCODE_SET, $usernameAndPassword[0]);
                     if (isset($usernameAndPassword[1])) {
                         $url->password .= self::percentEncodeCodePoints(Infrastructure::USERINFO_ENCODE_SET, $usernameAndPassword[1]);
                     }
                     $buffer = '';
                 } elseif (in_array($c, ['', '/', '?', '#']) || $c === '\\' && $url->isSpecial()) {
                     $pointer -= mb_strlen($buffer, 'UTF-8') + 1;
                     $buffer = '';
                     $state = 'host state';
                 } else {
                     $buffer .= $c;
                 }
                 break;
             case 'host state':
             case 'hostname state':
                 if ($c === ':' && !$bracketFlag) {
                     if ($buffer === '' && $url->isSpecial()) {
                         return false;
                     }
                     $host = HostProcessing::parseHost($buffer);
                     if ($host === false) {
                         return false;
                     }
                     $url->host = $host;
                     $buffer = '';
                     $state = 'port state';
                     if ($stateOverride === 'hostname state') {
                         return;
                     }
                 } elseif (in_array($c, ['', '/', '?', '#']) || $c === '\\' && $url->isSpecial()) {
                     $pointer--;
                     if ($buffer === '' && $url->isSpecial()) {
                         return false;
                     }
                     $host = HostProcessing::parseHost($buffer);
                     if ($host === false) {
                         return false;
                     }
                     $url->host = $host;
                     $buffer = '';
                     $state = 'path start state';
                     if ($stateOverride) {
                         return;
                     }
                 } else {
                     if ($c === '[') {
                         $bracketFlag = true;
                     }
                     if ($c === ']') {
                         $bracketFlag = false;
                     }
                     $buffer .= $c;
                 }
                 break;
             case 'port state':
                 if (ctype_digit($c)) {
                     $buffer .= $c;
                 } elseif (in_array($c, ['', '/', '?', '#']) || $c === '\\' && $url->isSpecial() || $stateOverride) {
                     if ($buffer !== '') {
                         $port = (int) $buffer;
                         if ($port > pow(2, 16) - 1) {
                             return false;
                         }
                         $url->port = isset(self::$specialSchemes[$url->scheme]) && self::$specialSchemes[$url->scheme] === $port ? null : $port;
                         $buffer = '';
                     }
                     if ($stateOverride) {
                         return;
                     }
                     $state = 'path start state';
                     $pointer--;
                 } else {
                     return false;
                 }
                 break;
             case 'file state':
                 $url->scheme = 'file';
                 switch ($c) {
                     case '':
                         if ($base && $base->scheme === 'file') {
                             $url->host = $base->host;
                             $url->path = $base->path;
                             $url->query = $base->query;
                         }
                         break;
                     case '\\':
                     case '/':
                         $state = 'file slash state';
                         break;
                     case '?':
                         if ($base && $base->scheme === 'file') {
                             $url->host = $base->host;
                             $url->path = $base->path;
                             $url->query = '';
                             $state = 'query state';
                         }
                         break;
                     case '#':
                         if ($base && $base->scheme === 'file') {
                             $url->host = $base->host;
                             $url->path = $base->path;
                             $url->query = $base->query;
                             $url->fragment = '';
                             $state = 'fragment state';
                         }
                         break;
                     default:
                         $remaining = array_slice($codePoints, $pointer + 1);
                         if ($base && $base->scheme === 'file' && isset($remaining[0]) && preg_match(Infrastructure::WINDOWS_DRIVE_LETTER, $c . $remaining[0]) === 0 && (count($remaining) === 1 || isset($remaining[1]) && strpos('/\\?#', $remaining[1]) === false)) {
                             $url->host = $base->host;
                             $url->path = $base->path;
                             $url->popPath();
                         }
                         $state = 'path state';
                         $pointer--;
                 }
                 break;
             case 'file slash state':
                 if ($c === '/' || $c === '\\') {
                     $state = 'file host state';
                 } else {
                     if ($base && $base->scheme === 'file' && isset($base->path[0]) && preg_match(Infrastructure::NORMALIZED_WINDOWS_DRIVE_LETTER, $base->path[0]) === 1) {
                         $url->path[] = $base->path[0];
                     }
                     $state = 'path state';
                     $pointer--;
                 }
                 break;
             case 'file host state':
                 if (in_array($c, ['', '/', '\\', '?', '#'])) {
                     $pointer--;
                     if (preg_match(Infrastructure::WINDOWS_DRIVE_LETTER, $buffer) === 1) {
                         $state = 'path state';
                     } elseif ($buffer === '') {
                         $state = 'path start state';
                     } else {
                         $host = HostProcessing::parseHost($buffer);
                         if ($host === false) {
                             return false;
                         }
                         if ($host !== 'localhost') {
                             $url->host = $host;
                         }
                         $buffer = '';
                         $state = 'path start state';
                     }
                 } else {
                     $buffer .= $c;
                 }
                 break;
             case 'path start state':
                 $state = 'path state';
                 if (!($c === '/' || $c === '\\' && $url->isSpecial())) {
                     $pointer--;
                 }
                 break;
             case 'path state':
                 if (in_array($c, ['', '/']) || $c === '\\' && $url->isSpecial() || !$stateOverride && in_array($c, ['?', '#'])) {
                     if (preg_match(self::DOUBLE_DOT_PATH_SEGMENT, $buffer) === 1) {
                         $url->popPath();
                         if (!($c === '/' || $c === '\\' && $url->isSpecial())) {
                             $url->path[] = '';
                         }
                     } elseif (preg_match(self::SINGLE_DOT_PATH_SEGMENT, $buffer) === 1 && !($c === '/' || $c === '\\' && $url->isSpecial())) {
                         $url->path[] = '';
                     } elseif (preg_match(self::SINGLE_DOT_PATH_SEGMENT, $buffer) !== 1) {
                         if ($url->scheme === 'file' && !$url->path && preg_match(Infrastructure::WINDOWS_DRIVE_LETTER, $buffer) === 1) {
                             $url->host = null;
                             $buffer[1] = ':';
                         }
                         $url->path[] = $buffer;
                     }
                     $buffer = '';
                     if ($c === '?') {
                         $url->query = '';
                         $state = 'query state';
                     } elseif ($c === '#') {
                         $url->fragment = '';
                         $state = 'fragment state';
                     }
                 } else {
                     if (stripos(implode('', array_slice($codePoints, $pointer)), '%2e') === 0) {
                         $buffer .= '.';
                         $pointer += 2;
                     } else {
                         $buffer .= Infrastructure::utf8PercentEncode(Infrastructure::DEFAULT_ENCODE_SET, $c);
                     }
                 }
                 break;
             case 'non-relative path state':
                 if ($c === '?') {
                     $url->query = '';
                     $state = 'query state';
                 } elseif ($c === '#') {
                     $url->fragment = '';
                     $state = 'fragment state';
                 } else {
                     if ($c !== '') {
                         $url->path[0] .= Infrastructure::utf8PercentEncode(Infrastructure::SIMPLE_ENCODE_SET, $c);
                     }
                 }
                 break;
             case 'query state':
                 if ($c === '' || !$stateOverride && $c === '#') {
                     if (!$url->isSpecial() || $url->scheme === 'ws' || $url->scheme === 'wss') {
                         $encoding = 'UTF-8';
                     }
                     $buffer = URLencoding::encode($buffer, $encoding);
                     $url->query = self::percentEncodeCodePoints('/[^!$-;=?-~]/', $buffer);
                     $buffer = '';
                     if ($c === '#') {
                         $url->fragment = '';
                         $state = 'fragment state';
                     }
                 } else {
                     $buffer .= $c;
                 }
                 break;
             case 'fragment state':
                 if ($c !== '') {
                     $url->fragment .= str_replace("", '', implode('', array_slice($codePoints, $pointer)));
                 }
                 break 2;
             default:
                 throw new \DomainException(sprintf('"%s" is an unknown state', $state));
         }
         if ($pointer >= 0 && !isset($codePoints[$pointer])) {
             break;
         }
     }
     return $url;
 }