function pugpig_validate_file($file, $mime) { // CHECK UTF-8?? if (!file_exists($file)) { return "No file"; } if (FALSE) { return "File encoding is not UTF-8"; } // Check XML if (strpos($mime, 'xml') !== FALSE || endsWith($file, '.xml')) { $f = file_get_contents($file); return check_xml_is_valid($f); } // Check JSON if (startsWith($mime, 'application/json') || endsWith($file, '.json')) { $f = file_get_contents($file); $json = json_decode($f); $err = json_last_error(); if ($err == JSON_ERROR_NONE) { return ""; } return "Error: {$err}"; } // Check Manifests if (startsWith($mime, 'text/cache-manifest') || endsWith($file, '.manifest') || endsWith($file, '.appcache')) { $f = file_get_contents($file); if (!startsWith($f, "CACHE MANIFEST")) { return "Manifest did not start with CACHE Manifest. Instead got:\n{$f}"; } } return ""; }
function _pugpig_package_edition_package($final_package_url, $content_xml_url, $relative_path, $debug = FALSE, $edition_tag = '', $return_manifest_asset_urls = FALSE, $timestamp = '', $tmp_root, $save_root, $cdn = '', $package_url_base = '', $test_mode = FALSE, $image_test_mode = FALSE, $concurrent = 5) { $output = ''; $html_zip_paths = array(); $asset_zip_paths = array(); $save_root = str_replace(DIRECTORY_SEPARATOR, '/', $save_root); $tmp_root = str_replace(DIRECTORY_SEPARATOR, '/', $tmp_root); $domain = '/'; $colon_pos = strpos($content_xml_url, '://'); if ($colon_pos > 0) { $domain = substr($content_xml_url, 0, strpos($content_xml_url, '/', $colon_pos + 3)); } // $relative_path = _pugpig_package_url_remove_domain(substr($content_xml_url, 0, strrpos($content_xml_url, '/')) . '/'); // WORDPRESS TEST //if (endsWith($content_xml_url, "pugpig_atom_contents.manifest")) $relative_path = '/'; if (!$test_mode && !file_exists($save_root)) { mkdir($save_root, 0777, TRUE); } $tmp_path = $tmp_root . 'package-' . $timestamp . '/'; pugpig_interface_output_header("Pugpig - Edition Packager"); if ($test_mode) { print_r("<h1>Performing Pugpig Package Test Run</h1>"); } else { if ($image_test_mode) { print_r("<h1>Performing Pugpig Package Image Preview</h1>"); } else { print_r("<h1>Creating Pugpig Package</h1>"); } } print_r("<button style='cursor: pointer;' onclick=\"toggle_visibility('info');\">Info</button> "); print_r("<button style='cursor: pointer;' onclick=\"toggle_visibility('key');\">Key</button> "); print_r("<br />Packager version " . pugpig_get_standalone_version() . " <br />"); print_r("<span id='key' style='display:none;'>"); print_r("<span class='pass'>* - downloaded</span><br />"); print_r("<span class='skip'>* - skipped as already downloaded</span><br />"); print_r("<span class='warning'>* - downloaded, but large file warning</span><br />"); print_r("<span class='bigwarning'>* - downloaded, but VERY large file warning</span><br />"); print_r("<span class='slowwarning'>* - downloaded, but a little bit slowly</span><br />"); print_r("<span class='veryslowwarning'>* - downloaded, but too slowly for comfort</span><br />"); print_r("<span class='fail'>* - failed to fetch or save resource</span><br />"); print_r("</span>"); print_r("<span id='info' style='display:none;'>"); print_r("<em>Final Package URL: <a href='{$final_package_url}'>" . $final_package_url . '</a></em><br />'); print_r("<em>Packaging ATOM URL: <a href='{$content_xml_url}'>" . $content_xml_url . '</a></em><br />'); print_r("<em>Domain is: " . $domain . '</em><br />'); print_r("<em>Relative path is: " . $relative_path . '</em><br />'); print_r("<em>Package URL base is: " . $package_url_base . '</em><br />'); print_r("<em>Save root is: " . $save_root . '</em><br />'); print_r("<em>Temp path is: " . $tmp_path . '</em><br />'); print_r("<em>CDN is: " . $cdn . '</em><br />'); print_r("<em>Debug Mode is: " . ($debug ? "ON" : "OFF") . '</em><br />'); print_r("<em>Test Mode is: " . ($test_mode ? "ON" : "OFF") . '</em><br />'); print_r("<em>Image Mode is: " . ($image_test_mode ? "ON" : "OFF") . '</em><br />'); print_r("<em>cURL timeout is: " . PUGPIG_CURL_TIMEOUT . ' seconds with ' . $concurrent . ' concurrent requests</em><br />'); print_r("</span>"); print_r("<h1>Retrieving files</h1>"); _print_immediately('Package ' . $timestamp . ' started at ' . date(PUGPIG_DATE_FORMAT, $timestamp) . '<br />'); // Array used to store errors in the responses $format_failures = array(); // Get the ATOM feeds - the real and and the one that might contain hidden extras $entries = array(); $content_xml_hidden_save_path = $tmp_path . 'content-hidden.xml'; $content_xml_hidden_path = $content_xml_url . (strpos($content_xml_url, '?') > 0 ? '&' : '?') . 'include_hidden=yes'; $entries = _pugpig_relative_urls_to_download_array($relative_path, array($content_xml_url), $domain, $tmp_path); $entries[$content_xml_hidden_path] = $content_xml_hidden_save_path; $entries = _pugpig_package_download_batch("Public and Hidden ATOM Feeds", $entries, $debug, $concurrent); $content_xml_save_path = $entries[$content_xml_url]; if (file_exists($content_xml_save_path)) { // Read the ATOM from the hidden file $fhandle = fopen($content_xml_save_path, 'r'); $atom_excluding_hidden = fread($fhandle, filesize($content_xml_save_path)); fclose($fhandle); $msg = check_xml_is_valid($atom_excluding_hidden); if ($msg != '') { $format_failures[$content_xml_url] = "XML Invalid: " . $msg; $atom_excluding_hidden = ''; } } $atom_ret = null; if (file_exists($content_xml_hidden_save_path)) { // Read the ATOM from the hidden file $fhandle = fopen($content_xml_hidden_save_path, 'r'); $atom_including_hidden = fread($fhandle, filesize($content_xml_hidden_save_path)); fclose($fhandle); $msg = check_xml_is_valid($atom_including_hidden); if ($msg != '') { $format_failures[$content_xml_hidden_path] = "XML Invalid: " . $msg; $atom_including_hidden = ''; } else { $atom_ret = _pugpig_package_parse_atom($atom_including_hidden); } unset($entries[$content_xml_hidden_path]); // We only want the real atom in the zip $html_zip_paths = array_merge($html_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug)); } // Check that the XML is valid, and show the errors if not. _pugpig_package_show_failures($format_failures); if (!$atom_ret) { return; } // Update the edition tag if we have something from the feed if ($debug) { _print_immediately('Edition tag was <b>' . $edition_tag . '<br />'); } if (!strlen($edition_tag)) { $edition_tag = $atom_ret['edition_tag']; } _print_immediately('Edition tag is <b>' . $edition_tag . '<br />'); // Process the manifests - these are relative to the ATOM content XML $entries = _pugpig_relative_urls_to_download_array($relative_path, $atom_ret['manifest_urls'], $content_xml_url, $tmp_path); $entries = _pugpig_package_download_batch("Manifests", $entries, $debug, $concurrent); $asset_zip_paths = array_merge($asset_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug)); // Keep for the asset zip // Getting the list of static files from the manifests $manifest_entries = array(); $format_failures = array(); foreach ($entries as $url => $sfile) { $fhandle = fopen($sfile, 'r'); $fcontents = trim(fread($fhandle, filesize($sfile))); fclose($fhandle); if (!startsWith($fcontents, "CACHE MANIFEST")) { // This is dodgy. We have a 200 that isn't a manifest. // Sometimes under really high concurrency, Drupal doesn't load includes properly // Delete the saved file in case it is better next time. $format_failures[$url] = "Manifest format not correct - CACHE MANIFEST not at start of response. Got: " . $fcontents; unlink($sfile); } else { //print_r("Read: " . $sfile . " - " . filesize($sfile) . " bytes<br />"); $manifest_entries = _pugpig_package_get_asset_urls_from_manifest($fcontents, $manifest_entries, $url); } } _pugpig_package_show_failures($format_failures); $manifest_entries = array_unique($manifest_entries); // Stop now and return the list of manifest items if required if ($return_manifest_asset_urls) { _print_immediately('<em>Returning ' . count($manifest_entries) . ' assets</em><br />'); return $manifest_entries; } // Process the static files $entries = _pugpig_relative_urls_to_download_array($relative_path, $manifest_entries, $domain, $tmp_path); if ($image_test_mode) { _pugpig_package_show_images_in_package($entries); } else { $entries = _pugpig_package_download_batch("Static Files", $entries, $debug, $concurrent); $asset_zip_paths = array_merge($asset_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug)); // Keep for the asset zip // Process the HTML files $entries = _pugpig_relative_urls_to_download_array($relative_path, $atom_ret['html_urls'], $content_xml_url, $tmp_path); $entries = _pugpig_package_download_batch("HTML Pages", $entries, $debug, $concurrent); $html_zip_paths = array_merge($html_zip_paths, _pugpig_package_zip_paths($entries, $tmp_path, $package_url_base, $relative_path, $debug)); // Keep for the html zip if (!$test_mode) { print_r("<h2>Packaging files</h2>"); // Figure put where the packages will live $zip_base_url = $relative_path; if (!empty($package_url_base)) { $zip_base_url = $package_url_base; } _pugpig_package_create_zip("public assets", $edition_tag . '-assets-' . $timestamp . '.zip', $tmp_path, $save_root, $asset_zip_paths, $zip_base_url); _pugpig_package_create_zip("secure html", $edition_tag . '-html-' . $timestamp . '.zip', $tmp_path, $save_root, $html_zip_paths, $zip_base_url); // Create package - TODO: Check on why we save this print_r("<h3>Creating Package XML</h3>"); $package_name = $edition_tag . '-package-' . $timestamp . '.xml'; _print_immediately('<em>Saving package xml to ' . $save_root . $package_name . '</em><br />'); $package_xml = _package_edition_package_list_xml($save_root, $edition_tag, $package_url_base, $cdn, $save_root . $package_name, $timestamp); _print_immediately("<a target='_blank' href='" . $final_package_url . "'>View XML file</a><br />"); if (is_null($package_xml)) { _print_immediately('Error in saving package file.<br /><br /><b>Aborting!</b><br /><a href="javascript:location.reload(true);">Refresh this page to reload and try again. (It will resume from where it last succeeded.)</a><br />'); exit; } $deleted_files = _pugpig_clean_package_folder($save_root); if (count($deleted_files)) { print_r("<h3>Deleting old packagage files</h3>"); _print_immediately("<b>Deleted " . count($deleted_files) . " old files</b><br />"); foreach ($deleted_files as $f) { _print_immediately("Deleted {$f}<br />"); } } } } // Delete the temp area if (!$debug) { _package_rmdir($tmp_path); } else { _print_immediately("<p><b>Debug mode - not deleting temp files</b></p>"); } _fill_buffer(16000); if (!$test_mode && !$image_test_mode) { print_r("<h2>Packaging Complete</h2>"); } else { print_r("<h2>Test Run Complete</h2>"); } return $edition_tag . '-package-' . $timestamp . '.xml'; }
function _pugpig_validate_saved_feed($content_xml_file_path, $url, &$format_failures) { $out = null; if (file_exists($content_xml_file_path)) { $fhandle = fopen($content_xml_file_path, 'r'); $atom_contents = fread($fhandle, filesize($content_xml_file_path)); fclose($fhandle); $msg = check_xml_is_valid($atom_contents); if (empty($msg)) { $out = $atom_contents; } else { $format_failures[$url] = "XML Invalid: " . $msg; } } return $out; }
function _pugpig_package_parse_opds($opds_body) { $opds_ret = array(); $message = check_xml_is_valid($opds_body); if (!empty($message)) { $opds_ret['failure'] = "Not Valid XML: {$message}"; return $opds_ret; } $editions = array(); $feed_title = ''; $feed_subtitle = ''; if ($opds_body != '') { $atom = new XMLReader(); $atom->XML($opds_body); while ($atom->read()) { if ($atom->localName == 'entry' && $atom->nodeType == XMLReader::ELEMENT) { $edition_cover = ""; $edition_id = ""; $edition_title = ""; $edition_summary = ""; $edition_type = ""; $edition_url = ""; $edition_updated = ""; $edition_free = true; $edition_sample = false; $edition_draft = false; $edition_categories = array(); while ($atom->read() && $atom->localName != 'entry') { // ID of an entry if ($atom->localName == 'id' && $atom->nodeType == XMLReader::ELEMENT) { $atom->read(); $edition_id = $atom->value; } // ID of an entry if ($atom->localName == 'updated' && $atom->nodeType == XMLReader::ELEMENT) { $atom->read(); $edition_updated = $atom->value; } // ID of an entry if ($atom->localName == 'title' && $atom->nodeType == XMLReader::ELEMENT) { $atom->read(); $edition_title = $atom->value; } // ID of an entry if ($atom->localName == 'summary' && $atom->nodeType == XMLReader::ELEMENT) { $atom->read(); $edition_summary = $atom->value; } // ID of an entry if ($atom->localName == 'draft' && $atom->nodeType == XMLReader::ELEMENT) { $atom->read(); $edition_draft = true; } // Categories of an entry if ($atom->localName == 'category' && $atom->nodeType == XMLReader::ELEMENT) { $edition_categories[$atom->getAttribute('scheme')] = $atom->getAttribute('term'); } // Links in an entry if ($atom->localName == 'link' && $atom->nodeType == XMLReader::ELEMENT) { $lrel = $atom->getAttribute('rel'); $ltype = $atom->getAttribute('type'); $lurl = $atom->getAttribute('href'); if ($lrel == 'http://opds-spec.org/image') { $edition_cover = $lurl; } if ($lrel == 'http://opds-spec.org/acquisition' || $lrel == 'http://opds-spec.org/acquisition/buy') { if ($ltype == 'application/pugpigpkg+xml') { $edition_type = 'package'; } elseif ($ltype == 'application/atom+xml') { $edition_type = 'atom'; } else { $edition_type = 'Unknown'; } $edition_url = $lurl; } if ($lrel == 'http://opds-spec.org/acquisition/buy') { $edition_free = false; } if ($lrel == 'http://opds-spec.org/acquisition/sample') { $edition_sample = true; } } } //print_r("Processed edition ".$edition_id." - " . $edition_title. " " . $edition_summary . " <br />"); $editions[$edition_id]['cover'] = $edition_cover; $editions[$edition_id]['title'] = $edition_title; $editions[$edition_id]['summary'] = $edition_summary; $editions[$edition_id]['url'] = $edition_url; $editions[$edition_id]['type'] = $edition_type; $editions[$edition_id]['free'] = $edition_free; $editions[$edition_id]['samples'] = $edition_sample; $editions[$edition_id]['draft'] = $edition_draft; $editions[$edition_id]['categories'] = $edition_categories; $editions[$edition_id]['updated'] = $edition_updated; } else { if ($atom->localName == 'title' && $atom->nodeType == XMLReader::ELEMENT) { $atom->read(); $feed_title = $atom->value; } if ($atom->localName == 'subtitle' && $atom->nodeType == XMLReader::ELEMENT) { $atom->read(); $feed_subtitle = $atom->value; } } } $atom->close(); } // print_r($editions); $opds_ret['title'] = $feed_title; if ($feed_subtitle != '') { $opds_ret['title'] .= ' - ' . $feed_subtitle; } $opds_ret['editions'] = $editions; return $opds_ret; }