/** * Create a static version of the site * * @return void */ public function create_archive() { global $blog_id; // TODO: Do ajax calls instead of just running forever and ever set_time_limit(0); // Create archive directory $current_user = wp_get_current_user(); $archive_name = join('-', array($this->slug, $blog_id, time(), $current_user->user_login)); $this->archive_dir = trailingslashit($this->temp_files_dir . $archive_name); if (!file_exists($this->archive_dir)) { wp_mkdir_p($this->archive_dir); } // Add URLs to queue $origin_url = sist_origin_url(); $destination_url = $this->destination_scheme . '://' . $this->destination_host; $origin_path_length = strlen(parse_url($origin_url, PHP_URL_PATH)); $urls_queue = array_unique(array_merge(array(trailingslashit($origin_url)), preg_split("/\r\n|\n|\r/", $this->additional_urls))); while (count($urls_queue)) { $current_url = array_shift($urls_queue); $response = Simply_Static_Url_Fetcher::fetch($current_url); // If we get a WP_Error then somehow our request failed (e.g. space in URL) // TODO: Keep a queue of failed urls too if (is_wp_error($response)) { continue; } $url_parts = parse_url($response->url); // TODO: This could throw an `Undefined index` error on URLs without // a path, e.g. http://www.example.com (no trailing slash) $path = $url_parts['path']; if ($origin_path_length > 1) { // prevents removal of '/' $path = substr($path, $origin_path_length); } $is_html = $response->is_html(); // If we get a 30x redirect... if (in_array($response->code, array(301, 302, 303, 307))) { $redirect_url = $response->get_redirect_url(); // WP likes to 301 redirect `/path` to `/path/` -- we want to // check for this and just add the trailing slashed version if ($redirect_url === trailingslashit($current_url)) { $urls_queue = $this->add_url_to_queue($urls_queue, $redirect_url); } else { /// convert our potentially relative URL to an absolute URL $redirect_url = sist_relative_to_absolute_url($redirect_url, $current_url); if ($redirect_url) { // check if this is a local URL if (sist_is_local_url($redirect_url)) { // add the redirected page to the queue $urls_queue = $this->add_url_to_queue($urls_queue, $redirect_url); // and update the URL $redirect_url = str_replace($origin_url, $destination_url, $redirect_url); } $view = new Simply_Static_View(); $content = $view->set_template('redirect')->assign('redirect_url', $redirect_url)->render_to_string(); $this->save_url_to_file($path, $content, $is_html); $this->export_log[] = $current_url; } } continue; } // Not a 200 for the response code? Move on. // TODO: Keep a queue of failed urls too if ($response->code != 200) { continue; } $this->export_log[] = $current_url; // Fetch all URLs from the page and add them to the queue... $urls = $response->extract_urls(); foreach ($urls as $url) { $urls_queue = $this->add_url_to_queue($urls_queue, $url); } // Replace the origin URL with the destination URL $response->replace_urls($destination_url); // Save the page to our archive $content = $response->body; $this->save_url_to_file($path, $content, $is_html); } }
/** * Check if URL starts with same URL as WordPress installation * * @param string $url URL to check * @return boolean true if URL is local, false otherwise */ function sist_is_local_url($url) { return stripos($url, sist_origin_url()) === 0; }
/** * Replaces base URL * * @param string $origin_url * @param string $destination_url * @return void */ public function replace_urls($destination_url) { /* TODO: Might want to eventually rope this into extract_urls_from_html/ extract_urls_from_css so that we're only doing preg_replace/ str_replace once. Only reason I'm not doing that now is because of the fix for wp_json_encode. */ if ($this->is_html() || $this->is_css()) { // replace any instance of the origin url, whether it starts with https://, http://, or // $response_body = preg_replace('/(https?:)?\\/\\/' . addcslashes(sist_origin_host(), '/') . '/i', $destination_url, $this->body); // also replace wp_json_encode'd urls, as used by WP's `concatemoji` $response_body = str_replace(addcslashes(sist_origin_url(), '/'), addcslashes($destination_url, '/'), $response_body); $this->body = $response_body; } }