/**
  * Fetch the URL and associated assets and pass it on to the designated Storage service
  * @param $url
  * @return
  */
 public function fetch($url)
 {
     if (!$url) {
         throw new RuntimeException("Empty URL");
     }
     if (!$this->apiKey) {
         throw new InvalidArgumentException("Missing required API key for accessing Perma");
     }
     $api_endpoint = $this->apiUrl . '/v1/archives/?api_key=' . $this->apiKey;
     $curl_options = array(CURLOPT_POST => TRUE, CURLOPT_POSTFIELDS => json_encode(array('url' => $url)), CURLOPT_HTTPHEADER => array("Content-type: application/json"), CURLOPT_FOLLOWLOCATION => TRUE);
     $perma_result = AmberNetworkUtils::open_single_url($api_endpoint, $curl_options);
     /* Make sure that we got a valid response from Perma */
     if ($perma_result === FALSE || $perma_result['body'] === FALSE) {
         $message = "";
         if (isset($perma_result['info']['http_code'])) {
             $message = "HTTP response code=" . $perma_result['info']['http_code'];
         }
         throw new RuntimeException(join(":", array("Error submitting URL to Perma", $message)));
     }
     $json_result = json_decode($perma_result['body'], true);
     if (!isset($json_result['guid'])) {
         throw new RuntimeException("Perma response did not include GUID");
     }
     $result = array('id' => md5($json_result['url']), 'url' => $json_result['url'], 'type' => '', 'date' => strtotime($json_result['creation_timestamp']), 'location' => join("/", array($this->archiveUrl, $json_result['guid'])), 'size' => 0, 'provider' => 1, 'provider_id' => $json_result['guid']);
     return $result;
 }
 /**
  * Query the Timegate server for a memento for this URL and date
  * @param  string $url    URL to query
  * @param  string $date   preferred date for the memento
  * @return string 		  JSON structure with memento location and date (if any)
  */
 public function getMemento($url, $date)
 {
     $header = array('Accept-Datetime: ' . gmdate(DATE_RFC1123, strtotime($date)));
     $options = array(CURLOPT_NOBODY => true, CURLOPT_HTTPHEADER => $header);
     /* Be forgiving of trailing slashes (or lack thereof) in server URL */
     $query_url = implode("/", array(trim($this->serverUrl, "/"), $url));
     $result = AmberNetworkUtils::open_single_url($query_url, $options, FALSE);
     if ($result !== FALSE && isset($result['headers']['Location'])) {
         $url = $result['headers']['Location'];
         return array('url' => $url, 'date' => $this->getArchiveDate($url));
     } else {
         return array();
     }
 }
 /**
  * Query the NetClerk server for the status of the URLs in a particular country
  * @param  array  $urls    array of URLs to query
  * @param  string $country two-character ISO code for the user's country
  * @return string 			body of the response from the NetClerk server
  */
 public function query_status_from_netclerk(array $urls, $country)
 {
     $fields = array('country' => $country, 'url' => $urls);
     $fields_string = http_build_query($fields);
     /* http_build_query represents arrays as "urls[0]=foo&urls[1]=bar", 
        but we need "urls[]=foo&urls[]=bar" */
     $fields_string = preg_replace('/%5B[0-9]+%5D/', '%5B%5D', $fields_string);
     $options = array(CURLOPT_POST => true, CURLOPT_POSTFIELDS => $fields_string);
     $result = AmberNetworkUtils::open_single_url($this->serverUrl . "/statuses", $options);
     if ($result !== FALSE && isset($result['body'])) {
         return $result['body'];
     } else {
         return FALSE;
     }
 }
 /**
  * Fetch the URL and associated assets and pass it on to the designated Storage service
  * @param $url
  * @return
  */
 public function fetch($url)
 {
     if (!$url) {
         throw new RuntimeException("Empty URL");
     }
     $api_endpoint = join("", array($this->archiveUrl, "/save/", $url));
     $ia_result = AmberNetworkUtils::open_single_url($api_endpoint, array(), FALSE);
     /* Make sure that we got a valid response from the Archive */
     if ($ia_result === FALSE) {
         throw new RuntimeException(join(":", array("Error submitting to Internet Archive")));
     }
     if (isset($ia_result['info']['http_code']) && $ia_result['info']['http_code'] == 403) {
         throw new RuntimeException(join(":", array("Permission denied when submitting to Internet Archive (may be blocked by robots.txt)")));
     }
     if (!isset($ia_result['headers']['Content-Location'])) {
         throw new RuntimeException("Internet Archive response did not include archive location");
     }
     $location = $ia_result['headers']['Content-Location'];
     $content_type = isset($ia_result['headers']['X-Archive-Orig-Content-Type']) ? $ia_result['headers']['X-Archive-Orig-Content-Type'] : "";
     $size = isset($ia_result['headers']['X-Archive-Orig-Content-Length']) ? $ia_result['headers']['X-Archive-Orig-Content-Length'] : 0;
     $result = array('id' => md5($url), 'url' => $url, 'type' => $content_type, 'date' => time(), 'location' => $this->archiveUrl . $location, 'size' => $size, 'provider' => 2, 'provider_id' => $location);
     return $result;
 }
 /**
  * Open one or more URL, and return an array of arrays with dictionary of header information and a stream to the contents of the URL
  * @param $urls array of strings of resource to download
  * @return array of dictionaries of header information and the contents of the URL
  */
 public static function open_multi_url($urls, $additional_options = array())
 {
     if (AmberNetworkUtils::curl_installed()) {
         $result = array();
         try {
             $options = array(CURLOPT_FAILONERROR => TRUE, CURLOPT_FOLLOWLOCATION => AmberNetworkUtils::curl_redirects_allowed(), CURLOPT_MAXREDIRS => 10, CURLOPT_CONNECTTIMEOUT => 5, CURLOPT_TIMEOUT => 10, CURLOPT_RETURNTRANSFER => 1, CURLOPT_HEADER => TRUE, CURLOPT_USERAGENT => AmberNetworkUtils::get_user_agent_string(), CURLOPT_ENCODING => '', CURLINFO_HEADER_OUT => 1);
             $multi = curl_multi_init();
             $channels = array();
             foreach ($urls as $url) {
                 if (($ch = curl_init($url)) === FALSE) {
                     error_log(join(":", array(__FILE__, __METHOD__, $url, "CURL init error")));
                     return FALSE;
                 }
                 if (curl_setopt_array($ch, $additional_options + $options) === FALSE) {
                     throw new RuntimeException(join(":", array(__FILE__, __METHOD__, "Error setting CURL options", $url, curl_error($ch))));
                 }
                 curl_multi_add_handle($multi, $ch);
                 $channels[$url] = $ch;
             }
             /* While we're still active, execute curl over all the channels */
             $active = null;
             do {
                 $mrc = curl_multi_exec($multi, $active);
             } while ($mrc == CURLM_CALL_MULTI_PERFORM);
             while ($active && $mrc == CURLM_OK) {
                 curl_multi_select($multi);
                 do {
                     $mrc = curl_multi_exec($multi, $active);
                 } while ($mrc == CURLM_CALL_MULTI_PERFORM);
             }
             /* Now we should have all of the data */
             foreach ($channels as $url => $channel) {
                 /* Get the CURL result */
                 $data = curl_multi_getcontent($channel);
                 $response_info = curl_getinfo($channel);
                 /* Split into header and body */
                 $header_size = $response_info['header_size'];
                 $header = substr($data, 0, $header_size - 1);
                 $body = substr($data, $header_size);
                 $headers = AmberNetworkUtils::extract_headers($header);
                 $result[$url] = array("headers" => $headers, "body" => $body, "info" => $response_info);
                 curl_multi_remove_handle($multi, $channel);
             }
             curl_multi_close($multi);
             /* It's possible that one or more of these responses may require a redirect
                that hasn't yet been followed. Some cases where this could happen:
                - The webserver has safe_mode or open_basedir set, so we couldn't set CURLOPT_FOLLOWLOCATION
                - The redirect is triggered by a META tag in the HTML
                - The redirect is triggered by Javascript (We do NOT handle this case)
                For the first two cases, which we can handle, we find URLs that still need redirection,
                and fetch them. */
             $redirects_required = AmberNetworkUtils::find_urls_requiring_redirects($result);
             foreach ($redirects_required as $url => $data) {
                 $a = AmberNetworkUtils::open_single_url($url, $additional_options);
                 if ($a) {
                     $result[$url] = $a;
                 }
             }
             return $result;
         } catch (RuntimeException $e) {
             error_log($e->getMessage());
             curl_multi_close($multi);
             return FALSE;
         }
     } else {
         // TODO: If curl is not installed, see if remote file opening is enabled, and fall back to that method
         error_log(join(":", array(__FILE__, __METHOD__, "CURL not installed")));
         return FALSE;
     }
 }