/** * Fetch the URL and return a WP_Error if we get one, otherwise a Response class. * * @return WP_Error|Simply_Static_Url_Response */ public static function fetch($url) { // Don't process URLs that don't match the URL of this WordPress installation if (!sist_is_local_url($url)) { return new WP_Error('attempting_to_fetch_remote_url', sprintf(__("Attempting to fetch remote URL: %s", Simply_Static::SLUG), $url)); } $response = wp_remote_get($url, array('timeout' => self::TIMEOUT, 'sslverify' => false, 'redirection' => 0)); if (is_wp_error($response)) { return $response; } else { return new Simply_Static_Url_Response($url, $response); } }
/** * Create a static version of the site * * @return void */ public function create_archive() { global $blog_id; // TODO: Do ajax calls instead of just running forever and ever set_time_limit(0); // Create archive directory $current_user = wp_get_current_user(); $archive_name = join('-', array($this->slug, $blog_id, time(), $current_user->user_login)); $this->archive_dir = trailingslashit($this->temp_files_dir . $archive_name); if (!file_exists($this->archive_dir)) { wp_mkdir_p($this->archive_dir); } // Add URLs to queue $origin_url = home_url(); $destination_url = $this->destination_scheme . '://' . $this->destination_host; $urls_queue = array_unique(array_merge(array(trailingslashit($origin_url)), preg_split("/\r\n|\n|\r/", $this->additional_urls))); while (count($urls_queue)) { $current_url = array_shift($urls_queue); $response = Simply_Static_Url_Fetcher::fetch($current_url); // If we get a WP_Error then somehow our request failed (e.g. space in URL) // TODO: Keep a queue of failed urls too if (is_wp_error($response)) { continue; } $url_parts = parse_url($response->url); $path = $url_parts['path']; $is_html = $response->is_html(); // If we get a 30x redirect... if (in_array($response->code, array(301, 302, 303, 307))) { $redirect_url = $response->get_redirect_url(); // WP likes to 301 redirect `/path` to `/path/` -- we want to // check for this and just add the trailing slashed version if ($redirect_url === trailingslashit($current_url)) { $urls_queue = $this->add_url_to_queue($urls_queue, $redirect_url); } else { /// convert our potentially relative URL to an absolute URL $redirect_url = sist_relative_to_absolute_url($redirect_url, $current_url); if ($redirect_url) { // check if this is a local URL if (sist_is_local_url($redirect_url)) { // add the redirected page to the queue $urls_queue = $this->add_url_to_queue($urls_queue, $redirect_url); // and update the URL $redirect_url = str_replace($origin_url, $destination_url, $redirect_url); } $view = new Simply_Static_View(); $content = $view->set_template('redirect')->assign('redirect_url', $redirect_url)->render_to_string(); $this->save_url_to_file($path, $content, $is_html); $this->export_log[] = $current_url; } } continue; } // Not a 200 for the response code? Move on. // TODO: Keep a queue of failed urls too if ($response->code != 200) { continue; } $this->export_log[] = $current_url; // Fetch all URLs from the page and add them to the queue... $urls = $response->extract_urls(); foreach ($urls as $url) { $urls_queue = $this->add_url_to_queue($urls_queue, $url); } // Replace the origin URL with the destination URL $response->replace_url($origin_url, $destination_url); // Save the page to our archive $content = $response->body; $this->save_url_to_file($path, $content, $is_html); } }
/** * Add an extracted URL (relative or absolute) to the extracted URLs array * * Absolute URLs are only added if the scheme/host matches the site it was * extracted from. Relative URLs are converted to absolute URLs before being * added to the array. * * @return void */ private function add_to_extracted_urls($extracted_url) { $absolute_url = sist_relative_to_absolute_url($extracted_url, $this->response->url); if ($absolute_url && sist_is_local_url($absolute_url)) { $this->extracted_urls[] = sist_remove_params_and_fragment($absolute_url); } }