/**
  * Create a static version of the site
  *
  * @return void
  */
 public function create_archive()
 {
     global $blog_id;
     // TODO: Do ajax calls instead of just running forever and ever
     set_time_limit(0);
     // Create archive directory
     $current_user = wp_get_current_user();
     $archive_name = join('-', array($this->slug, $blog_id, time(), $current_user->user_login));
     $this->archive_dir = trailingslashit($this->temp_files_dir . $archive_name);
     if (!file_exists($this->archive_dir)) {
         wp_mkdir_p($this->archive_dir);
     }
     // Add URLs to queue
     $origin_url = sist_origin_url();
     $destination_url = $this->destination_scheme . '://' . $this->destination_host;
     $origin_path_length = strlen(parse_url($origin_url, PHP_URL_PATH));
     $urls_queue = array_unique(array_merge(array(trailingslashit($origin_url)), preg_split("/\r\n|\n|\r/", $this->additional_urls)));
     while (count($urls_queue)) {
         $current_url = array_shift($urls_queue);
         $response = Simply_Static_Url_Fetcher::fetch($current_url);
         // If we get a WP_Error then somehow our request failed (e.g. space in URL)
         // TODO: Keep a queue of failed urls too
         if (is_wp_error($response)) {
             continue;
         }
         $url_parts = parse_url($response->url);
         // TODO: This could throw an `Undefined index` error on URLs without
         // a path, e.g. http://www.example.com (no trailing slash)
         $path = $url_parts['path'];
         if ($origin_path_length > 1) {
             // prevents removal of '/'
             $path = substr($path, $origin_path_length);
         }
         $is_html = $response->is_html();
         // If we get a 30x redirect...
         if (in_array($response->code, array(301, 302, 303, 307))) {
             $redirect_url = $response->get_redirect_url();
             // WP likes to 301 redirect `/path` to `/path/` -- we want to
             // check for this and just add the trailing slashed version
             if ($redirect_url === trailingslashit($current_url)) {
                 $urls_queue = $this->add_url_to_queue($urls_queue, $redirect_url);
             } else {
                 /// convert our potentially relative URL to an absolute URL
                 $redirect_url = sist_relative_to_absolute_url($redirect_url, $current_url);
                 if ($redirect_url) {
                     // check if this is a local URL
                     if (sist_is_local_url($redirect_url)) {
                         // add the redirected page to the queue
                         $urls_queue = $this->add_url_to_queue($urls_queue, $redirect_url);
                         // and update the URL
                         $redirect_url = str_replace($origin_url, $destination_url, $redirect_url);
                     }
                     $view = new Simply_Static_View();
                     $content = $view->set_template('redirect')->assign('redirect_url', $redirect_url)->render_to_string();
                     $this->save_url_to_file($path, $content, $is_html);
                     $this->export_log[] = $current_url;
                 }
             }
             continue;
         }
         // Not a 200 for the response code? Move on.
         // TODO: Keep a queue of failed urls too
         if ($response->code != 200) {
             continue;
         }
         $this->export_log[] = $current_url;
         // Fetch all URLs from the page and add them to the queue...
         $urls = $response->extract_urls();
         foreach ($urls as $url) {
             $urls_queue = $this->add_url_to_queue($urls_queue, $url);
         }
         // Replace the origin URL with the destination URL
         $response->replace_urls($destination_url);
         // Save the page to our archive
         $content = $response->body;
         $this->save_url_to_file($path, $content, $is_html);
     }
 }
Exemplo n.º 2
0
/**
 * Check if URL starts with same URL as WordPress installation
 *
 * @param  string  $url URL to check
 * @return boolean      true if URL is local, false otherwise
 */
function sist_is_local_url($url)
{
    return stripos($url, sist_origin_url()) === 0;
}
 /**
  * Replaces base URL
  *
  * @param string $origin_url
  * @param string $destination_url
  * @return void
  */
 public function replace_urls($destination_url)
 {
     /* TODO: Might want to eventually rope this into extract_urls_from_html/
     		 	extract_urls_from_css so that we're only doing preg_replace/
     			str_replace once. Only reason I'm not doing that now is because of
     			the fix for wp_json_encode.
     		*/
     if ($this->is_html() || $this->is_css()) {
         // replace any instance of the origin url, whether it starts with https://, http://, or //
         $response_body = preg_replace('/(https?:)?\\/\\/' . addcslashes(sist_origin_host(), '/') . '/i', $destination_url, $this->body);
         // also replace wp_json_encode'd urls, as used by WP's `concatemoji`
         $response_body = str_replace(addcslashes(sist_origin_url(), '/'), addcslashes($destination_url, '/'), $response_body);
         $this->body = $response_body;
     }
 }