function pugpig_validate_file($file, $mime)
{
    // CHECK UTF-8??
    if (!file_exists($file)) {
        return "No file";
    }
    if (FALSE) {
        return "File encoding is not UTF-8";
    }
    // Check XML
    if (strpos($mime, 'xml') !== FALSE || endsWith($file, '.xml')) {
        $f = file_get_contents($file);
        return check_xml_is_valid($f);
    }
    // Check JSON
    if (startsWith($mime, 'application/json') || endsWith($file, '.json')) {
        $f = file_get_contents($file);
        $json = json_decode($f);
        $err = json_last_error();
        if ($err == JSON_ERROR_NONE) {
            return "";
        }
        return "Error: {$err}";
    }
    // Check Manifests
    if (startsWith($mime, 'text/cache-manifest') || endsWith($file, '.manifest') || endsWith($file, '.appcache')) {
        $f = file_get_contents($file);
        if (!startsWith($f, "CACHE MANIFEST")) {
            return "Manifest did not start with CACHE Manifest. Instead got:\n{$f}";
        }
    }
    return "";
}
コード例 #2
0
function _pugpig_package_edition_package($final_package_url, $content_xml_url, $relative_path, $debug = FALSE, $edition_tag = '', $return_manifest_asset_urls = FALSE, $timestamp = '', $tmp_root, $save_root, $cdn = '', $package_url_base = '', $test_mode = FALSE, $image_test_mode = FALSE, $concurrent = 5)
{
    $output = '';
    $html_zip_paths = array();
    $asset_zip_paths = array();
    $save_root = str_replace(DIRECTORY_SEPARATOR, '/', $save_root);
    $tmp_root = str_replace(DIRECTORY_SEPARATOR, '/', $tmp_root);
    $domain = '/';
    $colon_pos = strpos($content_xml_url, '://');
    if ($colon_pos > 0) {
        $domain = substr($content_xml_url, 0, strpos($content_xml_url, '/', $colon_pos + 3));
    }
    // $relative_path = _pugpig_package_url_remove_domain(substr($content_xml_url, 0, strrpos($content_xml_url, '/')) . '/');
    // WORDPRESS TEST
    //if (endsWith($content_xml_url, "pugpig_atom_contents.manifest")) $relative_path = '/';
    if (!$test_mode && !file_exists($save_root)) {
        mkdir($save_root, 0777, TRUE);
    }
    $tmp_path = $tmp_root . 'package-' . $timestamp . '/';
    pugpig_interface_output_header("Pugpig - Edition Packager");
    if ($test_mode) {
        print_r("<h1>Performing Pugpig Package Test Run</h1>");
    } else {
        if ($image_test_mode) {
            print_r("<h1>Performing Pugpig Package Image Preview</h1>");
        } else {
            print_r("<h1>Creating Pugpig Package</h1>");
        }
    }
    print_r("<button style='cursor: pointer;' onclick=\"toggle_visibility('info');\">Info</button> ");
    print_r("<button style='cursor: pointer;' onclick=\"toggle_visibility('key');\">Key</button> ");
    print_r("<br />Packager version " . pugpig_get_standalone_version() . " <br />");
    print_r("<span id='key' style='display:none;'>");
    print_r("<span class='pass'>* - downloaded</span><br />");
    print_r("<span class='skip'>* - skipped as already downloaded</span><br />");
    print_r("<span class='warning'>* - downloaded, but large file warning</span><br />");
    print_r("<span class='bigwarning'>* - downloaded, but VERY large file warning</span><br />");
    print_r("<span class='slowwarning'>* - downloaded, but a little bit slowly</span><br />");
    print_r("<span class='veryslowwarning'>* - downloaded, but too slowly for comfort</span><br />");
    print_r("<span class='fail'>* - failed to fetch or save resource</span><br />");
    print_r("</span>");
    print_r("<span id='info' style='display:none;'>");
    print_r("<em>Final Package URL: <a href='{$final_package_url}'>" . $final_package_url . '</a></em><br />');
    print_r("<em>Packaging ATOM URL: <a href='{$content_xml_url}'>" . $content_xml_url . '</a></em><br />');
    print_r("<em>Domain is: " . $domain . '</em><br />');
    print_r("<em>Relative path is: " . $relative_path . '</em><br />');
    print_r("<em>Package URL base is: " . $package_url_base . '</em><br />');
    print_r("<em>Save root is: " . $save_root . '</em><br />');
    print_r("<em>Temp path is: " . $tmp_path . '</em><br />');
    print_r("<em>CDN is: " . $cdn . '</em><br />');
    print_r("<em>Debug Mode is: " . ($debug ? "ON" : "OFF") . '</em><br />');
    print_r("<em>Test Mode is: " . ($test_mode ? "ON" : "OFF") . '</em><br />');
    print_r("<em>Image Mode is: " . ($image_test_mode ? "ON" : "OFF") . '</em><br />');
    print_r("<em>cURL timeout is: " . PUGPIG_CURL_TIMEOUT . ' seconds with ' . $concurrent . ' concurrent requests</em><br />');
    print_r("</span>");
    print_r("<h1>Retrieving files</h1>");
    _print_immediately('Package ' . $timestamp . ' started at ' . date(PUGPIG_DATE_FORMAT, $timestamp) . '<br />');
    // Array used to store errors in the responses
    $format_failures = array();
    // Get the ATOM feeds - the real and and the one that might contain hidden extras
    $entries = array();
    $content_xml_hidden_save_path = $tmp_path . 'content-hidden.xml';
    $content_xml_hidden_path = $content_xml_url . (strpos($content_xml_url, '?') > 0 ? '&' : '?') . 'include_hidden=yes';
    $entries = _pugpig_relative_urls_to_download_array($relative_path, array($content_xml_url), $domain, $tmp_path);
    $entries[$content_xml_hidden_path] = $content_xml_hidden_save_path;
    $entries = _pugpig_package_download_batch("Public and Hidden ATOM Feeds", $entries, $debug, $concurrent);
    $content_xml_save_path = $entries[$content_xml_url];
    if (file_exists($content_xml_save_path)) {
        // Read the ATOM from the hidden file
        $fhandle = fopen($content_xml_save_path, 'r');
        $atom_excluding_hidden = fread($fhandle, filesize($content_xml_save_path));
        fclose($fhandle);
        $msg = check_xml_is_valid($atom_excluding_hidden);
        if ($msg != '') {
            $format_failures[$content_xml_url] = "XML Invalid: " . $msg;
            $atom_excluding_hidden = '';
        }
    }
    $atom_ret = null;
    if (file_exists($content_xml_hidden_save_path)) {
        // Read the ATOM from the hidden file
        $fhandle = fopen($content_xml_hidden_save_path, 'r');
        $atom_including_hidden = fread($fhandle, filesize($content_xml_hidden_save_path));
        fclose($fhandle);
        $msg = check_xml_is_valid($atom_including_hidden);
        if ($msg != '') {
            $format_failures[$content_xml_hidden_path] = "XML Invalid: " . $msg;
            $atom_including_hidden = '';
        } else {
            $atom_ret = _pugpig_package_parse_atom($atom_including_hidden);
        }
        unset($entries[$content_xml_hidden_path]);
        // We only want the real atom in the zip
        $html_zip_paths = array_merge($html_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug));
    }
    // Check that the XML is valid, and show the errors if not.
    _pugpig_package_show_failures($format_failures);
    if (!$atom_ret) {
        return;
    }
    // Update the edition tag if we have something from the feed
    if ($debug) {
        _print_immediately('Edition tag was <b>' . $edition_tag . '<br />');
    }
    if (!strlen($edition_tag)) {
        $edition_tag = $atom_ret['edition_tag'];
    }
    _print_immediately('Edition tag is <b>' . $edition_tag . '<br />');
    // Process the manifests - these are relative to the ATOM content XML
    $entries = _pugpig_relative_urls_to_download_array($relative_path, $atom_ret['manifest_urls'], $content_xml_url, $tmp_path);
    $entries = _pugpig_package_download_batch("Manifests", $entries, $debug, $concurrent);
    $asset_zip_paths = array_merge($asset_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug));
    // Keep for the asset zip
    // Getting the list of static files from the manifests
    $manifest_entries = array();
    $format_failures = array();
    foreach ($entries as $url => $sfile) {
        $fhandle = fopen($sfile, 'r');
        $fcontents = trim(fread($fhandle, filesize($sfile)));
        fclose($fhandle);
        if (!startsWith($fcontents, "CACHE MANIFEST")) {
            // This is dodgy. We have a 200 that isn't a manifest.
            // Sometimes under really high concurrency, Drupal doesn't load includes properly
            // Delete the saved file in case it is better next time.
            $format_failures[$url] = "Manifest format not correct - CACHE MANIFEST not at start of response. Got: " . $fcontents;
            unlink($sfile);
        } else {
            //print_r("Read: " . $sfile . " - " . filesize($sfile) . " bytes<br />");
            $manifest_entries = _pugpig_package_get_asset_urls_from_manifest($fcontents, $manifest_entries, $url);
        }
    }
    _pugpig_package_show_failures($format_failures);
    $manifest_entries = array_unique($manifest_entries);
    // Stop now and return the list of manifest items if required
    if ($return_manifest_asset_urls) {
        _print_immediately('<em>Returning ' . count($manifest_entries) . ' assets</em><br />');
        return $manifest_entries;
    }
    // Process the static files
    $entries = _pugpig_relative_urls_to_download_array($relative_path, $manifest_entries, $domain, $tmp_path);
    if ($image_test_mode) {
        _pugpig_package_show_images_in_package($entries);
    } else {
        $entries = _pugpig_package_download_batch("Static Files", $entries, $debug, $concurrent);
        $asset_zip_paths = array_merge($asset_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug));
        // Keep for the asset zip
        // Process the HTML files
        $entries = _pugpig_relative_urls_to_download_array($relative_path, $atom_ret['html_urls'], $content_xml_url, $tmp_path);
        $entries = _pugpig_package_download_batch("HTML Pages", $entries, $debug, $concurrent);
        $html_zip_paths = array_merge($html_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug));
        // Keep for the html zip
        if (!$test_mode) {
            print_r("<h2>Packaging files</h2>");
            // Figure put where the packages will live
            $zip_base_url = $relative_path;
            if (!empty($package_url_base)) {
                $zip_base_url = $package_url_base;
            }
            _pugpig_package_create_zip("public assets", $edition_tag . '-assets-' . $timestamp . '.zip', $tmp_path, $save_root, $asset_zip_paths, $zip_base_url);
            _pugpig_package_create_zip("secure html", $edition_tag . '-html-' . $timestamp . '.zip', $tmp_path, $save_root, $html_zip_paths, $zip_base_url);
            // Create package - TODO: Check on why we save this
            print_r("<h3>Creating Package XML</h3>");
            $package_name = $edition_tag . '-package-' . $timestamp . '.xml';
            _print_immediately('<em>Saving package xml to ' . $save_root . $package_name . '</em><br />');
            $package_xml = _package_edition_package_list_xml($save_root, $edition_tag, $package_url_base, $cdn, $save_root . $package_name, $timestamp);
            _print_immediately("<a target='_blank' href='" . $final_package_url . "'>View XML file</a><br />");
            if (is_null($package_xml)) {
                _print_immediately('Error in saving package file.<br /><br /><b>Aborting!</b><br /><a href="javascript:location.reload(true);">Refresh this page to reload and try again. (It will resume from where it last succeeded.)</a><br />');
                exit;
            }
            $deleted_files = _pugpig_clean_package_folder($save_root);
            if (count($deleted_files)) {
                print_r("<h3>Deleting old packagage files</h3>");
                _print_immediately("<b>Deleted " . count($deleted_files) . " old files</b><br />");
                foreach ($deleted_files as $f) {
                    _print_immediately("Deleted {$f}<br />");
                }
            }
        }
    }
    // Delete the temp area
    if (!$debug) {
        _package_rmdir($tmp_path);
    } else {
        _print_immediately("<p><b>Debug mode - not deleting temp files</b></p>");
    }
    _fill_buffer(16000);
    if (!$test_mode && !$image_test_mode) {
        print_r("<h2>Packaging Complete</h2>");
    } else {
        print_r("<h2>Test Run Complete</h2>");
    }
    return $edition_tag . '-package-' . $timestamp . '.xml';
}
コード例 #3
0
function _pugpig_validate_saved_feed($content_xml_file_path, $url, &$format_failures)
{
    $out = null;
    if (file_exists($content_xml_file_path)) {
        $fhandle = fopen($content_xml_file_path, 'r');
        $atom_contents = fread($fhandle, filesize($content_xml_file_path));
        fclose($fhandle);
        $msg = check_xml_is_valid($atom_contents);
        if (empty($msg)) {
            $out = $atom_contents;
        } else {
            $format_failures[$url] = "XML Invalid: " . $msg;
        }
    }
    return $out;
}
コード例 #4
0
function _pugpig_package_parse_opds($opds_body)
{
    $opds_ret = array();
    $message = check_xml_is_valid($opds_body);
    if (!empty($message)) {
        $opds_ret['failure'] = "Not Valid XML: {$message}";
        return $opds_ret;
    }
    $editions = array();
    $feed_title = '';
    $feed_subtitle = '';
    if ($opds_body != '') {
        $atom = new XMLReader();
        $atom->XML($opds_body);
        while ($atom->read()) {
            if ($atom->localName == 'entry' && $atom->nodeType == XMLReader::ELEMENT) {
                $edition_cover = "";
                $edition_id = "";
                $edition_title = "";
                $edition_summary = "";
                $edition_type = "";
                $edition_url = "";
                $edition_updated = "";
                $edition_free = true;
                $edition_sample = false;
                $edition_draft = false;
                $edition_categories = array();
                while ($atom->read() && $atom->localName != 'entry') {
                    // ID of an entry
                    if ($atom->localName == 'id' && $atom->nodeType == XMLReader::ELEMENT) {
                        $atom->read();
                        $edition_id = $atom->value;
                    }
                    // ID of an entry
                    if ($atom->localName == 'updated' && $atom->nodeType == XMLReader::ELEMENT) {
                        $atom->read();
                        $edition_updated = $atom->value;
                    }
                    // ID of an entry
                    if ($atom->localName == 'title' && $atom->nodeType == XMLReader::ELEMENT) {
                        $atom->read();
                        $edition_title = $atom->value;
                    }
                    // ID of an entry
                    if ($atom->localName == 'summary' && $atom->nodeType == XMLReader::ELEMENT) {
                        $atom->read();
                        $edition_summary = $atom->value;
                    }
                    // ID of an entry
                    if ($atom->localName == 'draft' && $atom->nodeType == XMLReader::ELEMENT) {
                        $atom->read();
                        $edition_draft = true;
                    }
                    // Categories of an entry
                    if ($atom->localName == 'category' && $atom->nodeType == XMLReader::ELEMENT) {
                        $edition_categories[$atom->getAttribute('scheme')] = $atom->getAttribute('term');
                    }
                    // Links in an entry
                    if ($atom->localName == 'link' && $atom->nodeType == XMLReader::ELEMENT) {
                        $lrel = $atom->getAttribute('rel');
                        $ltype = $atom->getAttribute('type');
                        $lurl = $atom->getAttribute('href');
                        if ($lrel == 'http://opds-spec.org/image') {
                            $edition_cover = $lurl;
                        }
                        if ($lrel == 'http://opds-spec.org/acquisition' || $lrel == 'http://opds-spec.org/acquisition/buy') {
                            if ($ltype == 'application/pugpigpkg+xml') {
                                $edition_type = 'package';
                            } elseif ($ltype == 'application/atom+xml') {
                                $edition_type = 'atom';
                            } else {
                                $edition_type = 'Unknown';
                            }
                            $edition_url = $lurl;
                        }
                        if ($lrel == 'http://opds-spec.org/acquisition/buy') {
                            $edition_free = false;
                        }
                        if ($lrel == 'http://opds-spec.org/acquisition/sample') {
                            $edition_sample = true;
                        }
                    }
                }
                //print_r("Processed edition ".$edition_id." - " . $edition_title. " " . $edition_summary . "  <br />");
                $editions[$edition_id]['cover'] = $edition_cover;
                $editions[$edition_id]['title'] = $edition_title;
                $editions[$edition_id]['summary'] = $edition_summary;
                $editions[$edition_id]['url'] = $edition_url;
                $editions[$edition_id]['type'] = $edition_type;
                $editions[$edition_id]['free'] = $edition_free;
                $editions[$edition_id]['samples'] = $edition_sample;
                $editions[$edition_id]['draft'] = $edition_draft;
                $editions[$edition_id]['categories'] = $edition_categories;
                $editions[$edition_id]['updated'] = $edition_updated;
            } else {
                if ($atom->localName == 'title' && $atom->nodeType == XMLReader::ELEMENT) {
                    $atom->read();
                    $feed_title = $atom->value;
                }
                if ($atom->localName == 'subtitle' && $atom->nodeType == XMLReader::ELEMENT) {
                    $atom->read();
                    $feed_subtitle = $atom->value;
                }
            }
        }
        $atom->close();
    }
    // print_r($editions);
    $opds_ret['title'] = $feed_title;
    if ($feed_subtitle != '') {
        $opds_ret['title'] .= ' - ' . $feed_subtitle;
    }
    $opds_ret['editions'] = $editions;
    return $opds_ret;
}