예제 #1
0
 /**
  * Fetch the URL and associated assets and pass it on to the designated Storage service
  * @param $url
  * @return 
  */
 public function fetch($url)
 {
     if (!$url) {
         throw new RuntimeException("Empty URL");
     }
     // Check the robots.txt
     if (!AmberRobots::robots_allowed($url)) {
         throw new RuntimeException("Blocked by robots.txt");
     }
     // Send a GET request
     $root_item = AmberNetworkUtils::open_url($url);
     // Decide whether the item should be cached
     if (!$this->cacheable_item($root_item, $reason)) {
         throw new RuntimeException($reason);
     }
     $size = $root_item['info']['size_download'];
     if ($size == 0) {
         throw new RuntimeException("Empty document");
     }
     // Get other assets
     if (isset($root_item['headers']['Content-Type']) && ($content_type = $root_item['headers']['Content-Type']) && AmberNetworkUtils::is_html_mime_type($content_type)) {
         $body = $root_item['body'];
         $asset_paths = $this->assetHelper->extract_assets($body);
         /* Use the url of the document we end up downloading as a reference point for
            relative asset references, since we may have been redirected from the one
            we originally requested. */
         $assets = $this->assetHelper->expand_asset_references($root_item['info']['url'], $asset_paths, $this->assetHelper->extract_base_tag($body));
         $assets = $this->download_assets($assets, $root_item['info']['url']);
         $assets = $this->download_css_assets_recursive($assets, $root_item['info']['url'], $size);
         $body = $this->assetHelper->rewrite_links($body, $assets);
         $body = $this->assetHelper->insert_banner($body, $this->headerText, array("url" => $url, "date" => date('Y/m/d')));
         $root_item['body'] = $body;
         /* Check total size of the file combined with its assets */
         if ($size > $this->maxFileSize * 1024) {
             throw new RuntimeException("File size of document + assets too large");
         }
     }
     if ($this->storage && $root_item) {
         $result = $this->storage->save($url, $root_item['body'], $root_item['headers'], isset($assets) ? $assets : array());
         if (!$result) {
             throw new RuntimeException("Could not save cache");
         }
         $storage_metadata = $this->storage->get_metadata($url);
         if (!$storage_metadata || empty($storage_metadata)) {
             throw new RuntimeException("Could not retrieve metadata");
         }
         //TODO: If cannot retrieve storage metadata, or id/url/cache not populated (perhaps due to permissions errors
         //      in saving the cache), fail more gracefully instead of with errors because the keys are not set
         return array('id' => $storage_metadata['id'], 'url' => $storage_metadata['url'], 'type' => isset($storage_metadata['type']) ? $storage_metadata['type'] : 'application/octet-stream', 'date' => strtotime($storage_metadata['cache']['amber']['date']), 'location' => $storage_metadata['cache']['amber']['location'], 'size' => $size, 'provider' => $this->storage->provider_id(), 'provider_id' => $storage_metadata['id']);
     } else {
         throw new RuntimeException("Content empty or could not save to disk");
     }
 }
예제 #2
0
 public function testMimeTypeChecking()
 {
     $this->assertTrue(AmberNetworkUtils::is_html_mime_type("text/html"));
     $this->assertFalse(AmberNetworkUtils::is_html_mime_type("image/jpg"));
     $this->assertTrue(AmberNetworkUtils::is_html_mime_type("application/xhtml+xml"));
 }