/** * Get all urls from html dom. * * @param string $baseUrl * @param string $string * @return array */ public function getDomUrls($baseUrl, $string) { // Remove all html comments. $string = preg_replace('#<!--.*-->#isU', '', $string); // Store all links to $matches variable. preg_match_all('#(src|href)="(.*)"#iU', $string, $matches); $isOriginalHost = $this->host === \Sabre\Uri\parse($baseUrl)['host']; foreach ($matches[2] as $match) { if (false === parse_url($match)) { continue; } $url = \Sabre\Uri\resolve($baseUrl, $match); $components = \Sabre\Uri\parse($url); if (in_array($components['scheme'], ['http', 'https'])) { $urls[] = ['url' => is_null($components['fragment']) ? $url : strstr($url, '#', true), 'external' => !$isOriginalHost]; } } return isset($urls) ? $urls : []; }
/** * Нормализиция Url * @param $url * @return string */ protected function normalizeUrl($url) { $url = \Sabre\Uri\normalize($url); $parse = \Sabre\Uri\parse($url); $host = isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : $_SERVER['SERVER_NAME']; $parse['host'] = isset($parse['host']) ? $parse['host'] : $host; $parse['scheme'] = isset($parse['scheme']) ? $parse['scheme'] : 'http'; return \Sabre\Uri\build($parse); }
/** * Called when the command is run. Sets up the options and environment and * then passes off to a more specific handler */ function run() { // // Fetch options and set up the environment // $u = $_SERVER['REQUEST_URI']; $this->request_uri = \Sabre\Uri\parse($u); $this->requested_path = \Sabre\Uri\normalize($this->request_uri['path']); // Is this a Multisite install? if ($this->options['multisite']) { // We're in a Multisite install. There are a couple of extra steps. Or just one? if (preg_match('/^\\/[_0-9a-zA-Z-]+\\/(wp-(content|admin|includes).*)/', $this->requested_path, $matches)) { $this->requested_path = "/" . $matches[1]; } // TODO: Is there anything else we need to do? } if ($this->startswith($this->requested_path, '/wp-content/')) { $this->request_path = $this->options['wp-content'] . substr($this->requested_path, 11); } else { $this->request_path = $this->options['wp-root'] . $this->requested_path; } // If the path is to a directory, append the default document if (is_dir($this->request_path)) { $this->request_path .= "index.php"; } // If you don't set this, WordPress adds index.php into all the links. $_SERVER['SERVER_SOFTWARE'] = 'Apache'; // Set up a custom error handler so that we can make errors and notices purty set_error_handler(array($this, "handle_php_error"), E_ALL); // // What sort of request is this? // // Save the start time $this->start_time = microtime(true); // Is it a real file, other than the root of the site? if ($this->request_path != '/' && file_exists($this->request_path)) { // If so, is it PHP that we need to execute? if (preg_match('/\\.php$/', $this->request_path) && !isset($this->options['no-scripts'])) { $this->request_message(); return $this->serve_script(); } // If not, assume it's a static asset if ($this->options['show-assets']) { $this->request_message(); } // This gets set in load_whippet for wordpress requests, but that might not get included $this->register_shutdown_function(); return $this->serve_file(); } // It's not a real file, and Multisite is not enabled, so it must be a wordpress permalink. Execute index.php $this->request_message(); $this->message("Processing {$this->request_path}"); return $this->serve_wordpress(); }