Пример #1
0
function _pugpig_package_edition_package($final_package_url, $content_xml_url, $relative_path, $debug = FALSE, $edition_tag = '', $return_manifest_asset_urls = FALSE, $timestamp = '', $tmp_root, $save_root, $cdn = '', $package_url_base = '', $test_mode = FALSE, $image_test_mode = FALSE, $concurrent = 5)
{
    $output = '';
    $html_zip_paths = array();
    $asset_zip_paths = array();
    $save_root = str_replace(DIRECTORY_SEPARATOR, '/', $save_root);
    $tmp_root = str_replace(DIRECTORY_SEPARATOR, '/', $tmp_root);
    $domain = '/';
    $colon_pos = strpos($content_xml_url, '://');
    if ($colon_pos > 0) {
        $domain = substr($content_xml_url, 0, strpos($content_xml_url, '/', $colon_pos + 3));
    }
    // $relative_path = _pugpig_package_url_remove_domain(substr($content_xml_url, 0, strrpos($content_xml_url, '/')) . '/');
    // WORDPRESS TEST
    //if (endsWith($content_xml_url, "pugpig_atom_contents.manifest")) $relative_path = '/';
    if (!$test_mode && !file_exists($save_root)) {
        mkdir($save_root, 0777, TRUE);
    }
    $tmp_path = $tmp_root . 'package-' . $timestamp . '/';
    pugpig_interface_output_header("Pugpig - Edition Packager");
    if ($test_mode) {
        print_r("<h1>Performing Pugpig Package Test Run</h1>");
    } else {
        if ($image_test_mode) {
            print_r("<h1>Performing Pugpig Package Image Preview</h1>");
        } else {
            print_r("<h1>Creating Pugpig Package</h1>");
        }
    }
    print_r("<button style='cursor: pointer;' onclick=\"toggle_visibility('info');\">Info</button> ");
    print_r("<button style='cursor: pointer;' onclick=\"toggle_visibility('key');\">Key</button> ");
    print_r("<br />Packager version " . pugpig_get_standalone_version() . " <br />");
    print_r("<span id='key' style='display:none;'>");
    print_r("<span class='pass'>* - downloaded</span><br />");
    print_r("<span class='skip'>* - skipped as already downloaded</span><br />");
    print_r("<span class='warning'>* - downloaded, but large file warning</span><br />");
    print_r("<span class='bigwarning'>* - downloaded, but VERY large file warning</span><br />");
    print_r("<span class='slowwarning'>* - downloaded, but a little bit slowly</span><br />");
    print_r("<span class='veryslowwarning'>* - downloaded, but too slowly for comfort</span><br />");
    print_r("<span class='fail'>* - failed to fetch or save resource</span><br />");
    print_r("</span>");
    print_r("<span id='info' style='display:none;'>");
    print_r("<em>Final Package URL: <a href='{$final_package_url}'>" . $final_package_url . '</a></em><br />');
    print_r("<em>Packaging ATOM URL: <a href='{$content_xml_url}'>" . $content_xml_url . '</a></em><br />');
    print_r("<em>Domain is: " . $domain . '</em><br />');
    print_r("<em>Relative path is: " . $relative_path . '</em><br />');
    print_r("<em>Package URL base is: " . $package_url_base . '</em><br />');
    print_r("<em>Save root is: " . $save_root . '</em><br />');
    print_r("<em>Temp path is: " . $tmp_path . '</em><br />');
    print_r("<em>CDN is: " . $cdn . '</em><br />');
    print_r("<em>Debug Mode is: " . ($debug ? "ON" : "OFF") . '</em><br />');
    print_r("<em>Test Mode is: " . ($test_mode ? "ON" : "OFF") . '</em><br />');
    print_r("<em>Image Mode is: " . ($image_test_mode ? "ON" : "OFF") . '</em><br />');
    print_r("<em>cURL timeout is: " . PUGPIG_CURL_TIMEOUT . ' seconds with ' . $concurrent . ' concurrent requests</em><br />');
    print_r("</span>");
    print_r("<h1>Retrieving files</h1>");
    _print_immediately('Package ' . $timestamp . ' started at ' . date(PUGPIG_DATE_FORMAT, $timestamp) . '<br />');
    // Array used to store errors in the responses
    $format_failures = array();
    // Get the ATOM feeds - the real and and the one that might contain hidden extras
    $entries = array();
    $content_xml_hidden_save_path = $tmp_path . 'content-hidden.xml';
    $content_xml_hidden_path = $content_xml_url . (strpos($content_xml_url, '?') > 0 ? '&' : '?') . 'include_hidden=yes';
    $entries = _pugpig_relative_urls_to_download_array($relative_path, array($content_xml_url), $domain, $tmp_path);
    $entries[$content_xml_hidden_path] = $content_xml_hidden_save_path;
    $entries = _pugpig_package_download_batch("Public and Hidden ATOM Feeds", $entries, $debug, $concurrent);
    $content_xml_save_path = $entries[$content_xml_url];
    if (file_exists($content_xml_save_path)) {
        // Read the ATOM from the hidden file
        $fhandle = fopen($content_xml_save_path, 'r');
        $atom_excluding_hidden = fread($fhandle, filesize($content_xml_save_path));
        fclose($fhandle);
        $msg = check_xml_is_valid($atom_excluding_hidden);
        if ($msg != '') {
            $format_failures[$content_xml_url] = "XML Invalid: " . $msg;
            $atom_excluding_hidden = '';
        }
    }
    $atom_ret = null;
    if (file_exists($content_xml_hidden_save_path)) {
        // Read the ATOM from the hidden file
        $fhandle = fopen($content_xml_hidden_save_path, 'r');
        $atom_including_hidden = fread($fhandle, filesize($content_xml_hidden_save_path));
        fclose($fhandle);
        $msg = check_xml_is_valid($atom_including_hidden);
        if ($msg != '') {
            $format_failures[$content_xml_hidden_path] = "XML Invalid: " . $msg;
            $atom_including_hidden = '';
        } else {
            $atom_ret = _pugpig_package_parse_atom($atom_including_hidden);
        }
        unset($entries[$content_xml_hidden_path]);
        // We only want the real atom in the zip
        $html_zip_paths = array_merge($html_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug));
    }
    // Check that the XML is valid, and show the errors if not.
    _pugpig_package_show_failures($format_failures);
    if (!$atom_ret) {
        return;
    }
    // Update the edition tag if we have something from the feed
    if ($debug) {
        _print_immediately('Edition tag was <b>' . $edition_tag . '<br />');
    }
    if (!strlen($edition_tag)) {
        $edition_tag = $atom_ret['edition_tag'];
    }
    _print_immediately('Edition tag is <b>' . $edition_tag . '<br />');
    // Process the manifests - these are relative to the ATOM content XML
    $entries = _pugpig_relative_urls_to_download_array($relative_path, $atom_ret['manifest_urls'], $content_xml_url, $tmp_path);
    $entries = _pugpig_package_download_batch("Manifests", $entries, $debug, $concurrent);
    $asset_zip_paths = array_merge($asset_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug));
    // Keep for the asset zip
    // Getting the list of static files from the manifests
    $manifest_entries = array();
    $format_failures = array();
    foreach ($entries as $url => $sfile) {
        $fhandle = fopen($sfile, 'r');
        $fcontents = trim(fread($fhandle, filesize($sfile)));
        fclose($fhandle);
        if (!startsWith($fcontents, "CACHE MANIFEST")) {
            // This is dodgy. We have a 200 that isn't a manifest.
            // Sometimes under really high concurrency, Drupal doesn't load includes properly
            // Delete the saved file in case it is better next time.
            $format_failures[$url] = "Manifest format not correct - CACHE MANIFEST not at start of response. Got: " . $fcontents;
            unlink($sfile);
        } else {
            //print_r("Read: " . $sfile . " - " . filesize($sfile) . " bytes<br />");
            $manifest_entries = _pugpig_package_get_asset_urls_from_manifest($fcontents, $manifest_entries, $url);
        }
    }
    _pugpig_package_show_failures($format_failures);
    $manifest_entries = array_unique($manifest_entries);
    // Stop now and return the list of manifest items if required
    if ($return_manifest_asset_urls) {
        _print_immediately('<em>Returning ' . count($manifest_entries) . ' assets</em><br />');
        return $manifest_entries;
    }
    // Process the static files
    $entries = _pugpig_relative_urls_to_download_array($relative_path, $manifest_entries, $domain, $tmp_path);
    if ($image_test_mode) {
        _pugpig_package_show_images_in_package($entries);
    } else {
        $entries = _pugpig_package_download_batch("Static Files", $entries, $debug, $concurrent);
        $asset_zip_paths = array_merge($asset_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug));
        // Keep for the asset zip
        // Process the HTML files
        $entries = _pugpig_relative_urls_to_download_array($relative_path, $atom_ret['html_urls'], $content_xml_url, $tmp_path);
        $entries = _pugpig_package_download_batch("HTML Pages", $entries, $debug, $concurrent);
        $html_zip_paths = array_merge($html_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug));
        // Keep for the html zip
        if (!$test_mode) {
            print_r("<h2>Packaging files</h2>");
            // Figure put where the packages will live
            $zip_base_url = $relative_path;
            if (!empty($package_url_base)) {
                $zip_base_url = $package_url_base;
            }
            _pugpig_package_create_zip("public assets", $edition_tag . '-assets-' . $timestamp . '.zip', $tmp_path, $save_root, $asset_zip_paths, $zip_base_url);
            _pugpig_package_create_zip("secure html", $edition_tag . '-html-' . $timestamp . '.zip', $tmp_path, $save_root, $html_zip_paths, $zip_base_url);
            // Create package - TODO: Check on why we save this
            print_r("<h3>Creating Package XML</h3>");
            $package_name = $edition_tag . '-package-' . $timestamp . '.xml';
            _print_immediately('<em>Saving package xml to ' . $save_root . $package_name . '</em><br />');
            $package_xml = _package_edition_package_list_xml($save_root, $edition_tag, $package_url_base, $cdn, $save_root . $package_name, $timestamp);
            _print_immediately("<a target='_blank' href='" . $final_package_url . "'>View XML file</a><br />");
            if (is_null($package_xml)) {
                _print_immediately('Error in saving package file.<br /><br /><b>Aborting!</b><br /><a href="javascript:location.reload(true);">Refresh this page to reload and try again. (It will resume from where it last succeeded.)</a><br />');
                exit;
            }
            $deleted_files = _pugpig_clean_package_folder($save_root);
            if (count($deleted_files)) {
                print_r("<h3>Deleting old packagage files</h3>");
                _print_immediately("<b>Deleted " . count($deleted_files) . " old files</b><br />");
                foreach ($deleted_files as $f) {
                    _print_immediately("Deleted {$f}<br />");
                }
            }
        }
    }
    // Delete the temp area
    if (!$debug) {
        _package_rmdir($tmp_path);
    } else {
        _print_immediately("<p><b>Debug mode - not deleting temp files</b></p>");
    }
    _fill_buffer(16000);
    if (!$test_mode && !$image_test_mode) {
        print_r("<h2>Packaging Complete</h2>");
    } else {
        print_r("<h2>Test Run Complete</h2>");
    }
    return $edition_tag . '-package-' . $timestamp . '.xml';
}
function _pugpig_package_edition_package($final_package_url, $content_xml_url, $relative_path, $debug = false, $edition_tag = '', $return_manifest_asset_urls = false, $timestamp = '', $tmp_root, $save_root, $cdn = '', $package_url_base = '', $test_mode = false, $image_test_mode = false, $concurrent = 5, $bucket_allocator = null)
{
    $verbose = $debug;
    if (empty($bucket_allocator)) {
        // create default bucket allocator
        $bucket_allocator = new PackagedFileAllocatorOriginal();
    }
    $bucket_allocator->setVerbose($verbose);
    // sanitise inputs
    $save_root = str_replace(DIRECTORY_SEPARATOR, '/', $save_root);
    $tmp_root = str_replace(DIRECTORY_SEPARATOR, '/', $tmp_root);
    // process inputs
    $domain = '/';
    $colon_pos = strpos($content_xml_url, '://');
    if ($colon_pos > 0) {
        $domain = substr($content_xml_url, 0, strpos($content_xml_url, '/', $colon_pos + 3));
    }
    $last_slash = strrpos($content_xml_url, '/');
    $content_xml_leaf = $last_slash === false ? $content_xml : substr($content_xml_url, $last_slash + 1);
    $tmp_path = $tmp_root . 'package-' . $timestamp . '/';
    // ensure save root folder exists
    if (!$test_mode && !file_exists($save_root)) {
        mkdir($save_root, 0777, true);
    }
    pugpig_interface_output_header("Pugpig - Edition Packager");
    if ($test_mode) {
        echo "<h1>Performing Pugpig Package Test Run</h1>";
    } elseif ($image_test_mode) {
        echo "<h1>Performing Pugpig Package Image Preview</h1>";
    } else {
        echo "<h1>Creating Pugpig Package</h1>";
    }
    $host = $_SERVER['HTTP_HOST'];
    if (!pugpig_test_ping($host)) {
        echo "<p><b><font color='red'>{$host}: Ping Failed. Maybe you need a local host entry?<br />127.0.0.1 {$host}</b></p>";
    }
    print_r("<button style='cursor: pointer;' onclick=\"toggle_visibility('info');\">Info</button> ");
    print_r("<button style='cursor: pointer;' onclick=\"toggle_visibility('key');\">Key</button> ");
    print_r("<br />Packager version " . pugpig_get_standalone_version() . " <br />");
    print_r("<span id='key' style='display:none;'>");
    print_r("<span class='pass'>* - downloaded</span><br />");
    print_r("<span class='skip'>* - skipped as already downloaded</span><br />");
    print_r("<span class='fail'>* - failed to fetch or save resource</span><br />");
    print_r("</span>");
    print_r("<span id='info' style='display:none;'>");
    print_r("<em>Final Package URL: <a href='{$final_package_url}'>" . $final_package_url . '</a></em><br />');
    print_r("<em>Packaging ATOM URL: <a href='{$content_xml_url}'>" . $content_xml_url . '</a></em><br />');
    print_r("<em>Content leaf: {$content_xml_leaf}</em><br />");
    print_r("<em>Domain is: " . $domain . '</em><br />');
    print_r("<em>Relative path is: " . $relative_path . '</em><br />');
    print_r("<em>Package URL base is: " . $package_url_base . '</em><br />');
    print_r("<em>Save root is: " . $save_root . '</em><br />');
    print_r("<em>Temp path is: " . $tmp_path . '</em><br />');
    print_r("<em>CDN is: " . $cdn . '</em><br />');
    print_r("<em>Debug Mode is: " . ($debug ? "ON" : "OFF") . '</em><br />');
    print_r("<em>Test Mode is: " . ($test_mode ? "ON" : "OFF") . '</em><br />');
    print_r("<em>Image Mode is: " . ($image_test_mode ? "ON" : "OFF") . '</em><br />');
    print_r("<em>cURL timeout is: " . PUGPIG_CURL_TIMEOUT . ' seconds with ' . $concurrent . ' concurrent requests</em><br />');
    echo $bucket_allocator->describeAllocator();
    print_r("</span>");
    print_r("<h1>Retrieving: {$content_xml_url}</h1>");
    _print_immediately('Package ' . $timestamp . ' started at ' . date(PUGPIG_DATE_FORMAT, $timestamp) . '<br />');
    // Array used to store errors in the responses
    $format_failures = array();
    // Get the ATOM feeds - the real and and the one that might contain hidden extras
    $entries = array();
    $content_xml_hidden_save_path = $tmp_path . 'content-hidden.xml';
    $content_xml_hidden_path = $content_xml_url . (strpos($content_xml_url, '?') > 0 ? '&' : '?') . 'include_hidden=yes';
    // get the entry for the content xml
    $entries = _pugpig_relative_urls_to_download_array($relative_path, array($content_xml_url), $domain, $tmp_path);
    // and the hidden version
    $entries[$content_xml_hidden_path] = $content_xml_hidden_save_path;
    $entries = _pugpig_package_download_batch("Public and Hidden ATOM Feeds", $entries, $debug, $concurrent);
    // validate feed that doesn't have hidden entries
    _pugpig_validate_saved_feed($entries[$content_xml_url], $content_xml_url, $format_failures);
    // validate feed that has hidden entries
    $feed_contents_with_hidden = _pugpig_validate_saved_feed($content_xml_hidden_save_path, $content_xml_hidden_path, $format_failures);
    $atom_filenames = null;
    $atom_ret = null;
    if (!empty($feed_contents_with_hidden)) {
        $atom_ret = _pugpig_package_parse_atom($feed_contents_with_hidden);
        unset($entries[$content_xml_hidden_path]);
        // we only want the real atom in the zip
        $atom_filenames = _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug);
    }
    _pugpig_package_show_failures($format_failures);
    if (!$atom_ret) {
        return;
    }
    $contextualised_urls = $atom_ret['contextualised_urls'];
    foreach ($contextualised_urls as $page_id => &$context) {
        $context['manifest_urls'] = _pugpig_relative_urls_to_download_array($relative_path, $context['manifest_urls'], $content_xml_url, $tmp_path);
        $context['html_urls'] = _pugpig_relative_urls_to_download_array($relative_path, $context['html_urls'], $content_xml_url, $tmp_path);
    }
    unset($context);
    $manifest_pages = $atom_ret['manifest_pages'];
    $manifest_pages_absolute = array();
    foreach ($manifest_pages as $url => $page_ids) {
        $filenames = array_keys(_pugpig_relative_urls_to_download_array($relative_path, array($url), $content_xml_url, $tmp_path));
        if (count($filenames > 0)) {
            $absolute_filename = $filenames[0];
            $manifest_pages_absolute[$absolute_filename] = $page_ids;
        }
    }
    // Get the Edition Tag if we don't have it
    if (!strlen($edition_tag)) {
        $edition_tag = $atom_ret['edition_tag'];
    }
    // Update the edition tag if we have something from the feed
    _print_immediately('<h2>Edition: ' . $atom_ret['edition_title'] . ' (' . $edition_tag . ')</h2>');
    // Process the manifests - these are relative to the ATOM content XML
    $entries = _pugpig_relative_urls_to_download_array($relative_path, $atom_ret['manifest_urls'], $content_xml_url, $tmp_path);
    $entries = _pugpig_package_download_batch("Manifests", $entries, $debug, $concurrent);
    // Getting the list of static files from the manifests
    $manifest_entries = array();
    $format_failures = array();
    foreach ($entries as $url => $sfile) {
        $fhandle = fopen($sfile, 'r');
        $fcontents = trim(fread($fhandle, filesize($sfile)));
        fclose($fhandle);
        if (!startsWith($fcontents, "CACHE MANIFEST")) {
            // This is dodgy. We have a 200 that isn't a manifest.
            // Sometimes under really high concurrency, Drupal doesn't load includes properly
            // Delete the saved file in case it is better next time.
            $format_failures[$url] = "Manifest format not correct - CACHE MANIFEST not at start of response. Got: " . $fcontents;
            unlink($sfile);
        } else {
            //print_r("Read: " . $sfile . " - " . filesize($sfile) . " bytes<br />");
            //
            $this_manifest_files = _pugpig_package_get_asset_urls_from_manifest($fcontents, array(), $url);
            $this_manifest_files_src_dest = _pugpig_relative_urls_to_download_array($relative_path, $this_manifest_files, $content_xml_url, $tmp_path);
            //$this_manifest_files_src_dest = _pugpig_package_zip_paths($this_manifest_files_src_dest, $tmp_path, $package_url_base, $relative_path, $debug);
            $manifest_pages = $manifest_pages_absolute[$url];
            foreach ($manifest_pages as $manifest_page) {
                if (empty($contextualised_urls[$manifest_page]['manifest_files'])) {
                    $contextualised_urls[$manifest_page]['manifest_files'] = $this_manifest_files_src_dest;
                } else {
                    $contextualised_urls[$manifest_page]['manifest_files'][] = $this_manifest_files_src_dest;
                }
            }
            $entries = _pugpig_relative_urls_to_download_array($relative_path, $atom_ret['manifest_urls'], $content_xml_url, $tmp_path);
            $manifest_entries = _pugpig_package_get_asset_urls_from_manifest($fcontents, $manifest_entries, $url);
        }
    }
    _pugpig_package_show_failures($format_failures);
    $manifest_entries = array_unique($manifest_entries);
    // Stop now and return the list of manifest items if required
    if ($return_manifest_asset_urls) {
        _print_immediately('<em>Returning ' . count($manifest_entries) . ' assets</em><br />');
        return $manifest_entries;
    }
    // Process the static files
    $entries = _pugpig_relative_urls_to_download_array($relative_path, $manifest_entries, $domain, $tmp_path);
    if ($image_test_mode) {
        _pugpig_package_show_images_in_package($entries);
    } else {
        $entries = _pugpig_package_download_batch("Static Files", $entries, $debug, $concurrent);
        // Process the HTML files
        $entries = _pugpig_relative_urls_to_download_array($relative_path, $atom_ret['html_urls'], $content_xml_url, $tmp_path);
        $entries = _pugpig_package_download_batch("HTML Pages", $entries, $debug, $concurrent);
        if (!$test_mode) {
            print_r("<h2>Packaging files</h2>");
            $bucket_allocator->allocateAtomFileToBuckets($atom_filenames);
            foreach ($contextualised_urls as $page_id => $info) {
                $html_files = _pugpig_package_zip_paths($info['html_urls'], $tmp_path, $package_url_base, $relative_path, $debug);
                $manifest_files = _pugpig_package_zip_paths($info['manifest_urls'], $tmp_path, $package_url_base, $relative_path, $debug);
                $manifest_contents = _pugpig_package_zip_paths($info['manifest_files'], $tmp_path, $package_url_base, $relative_path, $debug);
                $context_xml = $info['entry'];
                $bucket_allocator->allocatePageFilesToBuckets($html_files, $manifest_files, $manifest_contents, $context_xml);
            }
            $bucket_allocator->finaliseBuckets();
            // Figure put where the packages will live
            $zip_base_url = $relative_path;
            if (!empty($package_url_base)) {
                $zip_base_url = $package_url_base;
            }
            $prefered_max_size = 20 * 1024 * 1024;
            $bucket_zips = _pugpig_package_create_chunked_zips_for_buckets($bucket_allocator->getBuckets(), $edition_tag, $timestamp, $tmp_path, $save_root, $zip_base_url, $prefered_max_size, $verbose);
            // Create package - TODO: Check on why we save this
            print_r("<h3>Creating Package XML</h3>");
            $package_name = "{$edition_tag}-package-{$timestamp}.xml";
            _print_immediately("<em>Saving package xml to {$save_root}{$package_name}</em><br />");
            $package_xml = _package_edition_package_list_xml_using_buckets($save_root, $edition_tag, $bucket_allocator, $bucket_zips, $package_url_base, $cdn, $save_root . $package_name, $timestamp, $content_xml_leaf);
            _print_immediately("<a target='_blank' href='" . $final_package_url . "'>View XML file</a><br />");
            if (is_null($package_xml)) {
                _print_immediately('Error in saving package file.<br /><br /><b>Aborting!</b><br /><a href="javascript:location.reload(true);">Refresh this page to reload and try again. (It will resume from where it last succeeded.)</a><br />');
                exit;
            }
            $deleted_files = _pugpig_clean_package_folder($save_root);
            if (count($deleted_files)) {
                print_r("<h3>Deleting old package files</h3>");
                _print_immediately("<b>Deleted " . count($deleted_files) . " old files</b><br />");
                foreach ($deleted_files as $f) {
                    _print_immediately("Deleted {$f}<br />");
                }
            }
        }
    }
    // Delete the temp area
    if (!$debug) {
        _package_rmdir($tmp_path);
    } else {
        _print_immediately("<p><b>Debug mode - not deleting temp files</b></p>");
    }
    _fill_buffer(16000);
    if (!$test_mode && !$image_test_mode) {
        print_r("<h2>Packaging Complete</h2>");
    } else {
        print_r("<h2>Test Run Complete</h2>");
    }
    return $edition_tag . '-package-' . $timestamp . '.xml';
}