PHP AmberNetworkUtils примеры использования

Язык программирования: PHP

Класс/Тип: AmberNetworkUtils

Примеров на hotexamples.com: 10

PHP AmberNetworkUtils - 10 примеров найдено. Это лучшие примеры PHP кода для AmberNetworkUtils, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

open_single_url(5)

extract_headers(3)

open_url(3)

clean_up_path(2)

find_meta_no_archive(2)

find_meta_redirect(2)

full_relative_path(2)

is_html_mime_type(2)

open_multi_url(2)

curl_installed(1)

curl_redirects_allowed(1)

find_urls_requiring_redirects(1)

get_head(1)

get_user_agent_string(1)

Пример #1

Показать файл

Файл: AmberChecker.php Проект: genevec/amber_wordpress

 /**
  * Check whether a URL is available, and update the status of the URL in the database
  * @param $last_check array of the data from the last check for the URL
  * @param bool $force true if the check should be forced to happen, even if it's not yet scheduled
  * @return array|bool
  */
 public function check($last_check, $force = false)
 {
     $url = $last_check['url'];
     $id = isset($last_check['id']) ? $last_check['id'] : md5($url);
     //TODO: Unify ID generation
     /* Make sure we're still scheduled to check the $url */
     $next_check_timestamp = isset($last_check['next_check']) ? $last_check['next_check'] : 0;
     if (!$force && $next_check_timestamp > time()) {
         return false;
     }
     $date = new DateTime();
     if (!AmberRobots::robots_allowed($url)) {
         /* If blocked by robots.txt, schedule next check for 6 months out */
         $next = $date->add(new DateInterval("P6M"))->getTimestamp();
         $status = isset($last_check['status']) ? $last_check['status'] : NULL;
         error_log(join(":", array(__FILE__, __METHOD__, "Blocked by robots.txt", $url)));
         $message = "Blocked by robots.txt";
     } else {
         $fetch_result = AmberNetworkUtils::open_url($url, array(CURLOPT_FAILONERROR => FALSE));
         $status = $this->is_up($fetch_result);
         $next = $this->next_check_date(isset($last_check['status']) ? $last_check['status'] : NULL, isset($last_check['last_checked']) ? $last_check['last_checked'] : NULL, isset($last_check['next_check']) ? $last_check['next_check'] : NULL, $status);
     }
     $now = new DateTime();
     $result = array('id' => $id, 'url' => $url, 'last_checked' => $now->getTimestamp(), 'next_check' => $next, 'status' => isset($status) ? $status ? 1 : 0 : NULL, 'message' => isset($message) ? $message : NULL, 'details' => isset($fetch_result) ? $fetch_result : NULL);
     return $result;
 }

Пример #2

Показать файл

Файл: PermaFetcher.php Проект: genevec/amber_wordpress

 /**
  * Fetch the URL and associated assets and pass it on to the designated Storage service
  * @param $url
  * @return
  */
 public function fetch($url)
 {
     if (!$url) {
         throw new RuntimeException("Empty URL");
     }
     if (!$this->apiKey) {
         throw new InvalidArgumentException("Missing required API key for accessing Perma");
     }
     $api_endpoint = $this->apiUrl . '/v1/archives/?api_key=' . $this->apiKey;
     $curl_options = array(CURLOPT_POST => TRUE, CURLOPT_POSTFIELDS => json_encode(array('url' => $url)), CURLOPT_HTTPHEADER => array("Content-type: application/json"), CURLOPT_FOLLOWLOCATION => TRUE);
     $perma_result = AmberNetworkUtils::open_single_url($api_endpoint, $curl_options);
     /* Make sure that we got a valid response from Perma */
     if ($perma_result === FALSE || $perma_result['body'] === FALSE) {
         $message = "";
         if (isset($perma_result['info']['http_code'])) {
             $message = "HTTP response code=" . $perma_result['info']['http_code'];
         }
         throw new RuntimeException(join(":", array("Error submitting URL to Perma", $message)));
     }
     $json_result = json_decode($perma_result['body'], true);
     if (!isset($json_result['guid'])) {
         throw new RuntimeException("Perma response did not include GUID");
     }
     $result = array('id' => md5($json_result['url']), 'url' => $json_result['url'], 'type' => '', 'date' => strtotime($json_result['creation_timestamp']), 'location' => join("/", array($this->archiveUrl, $json_result['guid'])), 'size' => 0, 'provider' => 1, 'provider_id' => $json_result['guid']);
     return $result;
 }

Пример #3

Показать файл

Файл: AmberChecker.php Проект: su/amber_wordpress

 /**
  * Check to see if a given URL is available (if it returns 200 status code)
  * @param $url
  */
 public function up($url)
 {
     $item = AmberNetworkUtils::open_url($url, array(CURLOPT_FAILONERROR => FALSE));
     if (isset($item['info']['http_code'])) {
         return $item['info']['http_code'] == 200;
     } else {
         return false;
     }
 }

Пример #4

Показать файл

Файл: AmberMementoService.php Проект: genevec/amber_wordpress

 /**
  * Query the Timegate server for a memento for this URL and date
  * @param  string $url    URL to query
  * @param  string $date   preferred date for the memento
  * @return string 		  JSON structure with memento location and date (if any)
  */
 public function getMemento($url, $date)
 {
     $header = array('Accept-Datetime: ' . gmdate(DATE_RFC1123, strtotime($date)));
     $options = array(CURLOPT_NOBODY => true, CURLOPT_HTTPHEADER => $header);
     /* Be forgiving of trailing slashes (or lack thereof) in server URL */
     $query_url = implode("/", array(trim($this->serverUrl, "/"), $url));
     $result = AmberNetworkUtils::open_single_url($query_url, $options, FALSE);
     if ($result !== FALSE && isset($result['headers']['Location'])) {
         $url = $result['headers']['Location'];
         return array('url' => $url, 'date' => $this->getArchiveDate($url));
     } else {
         return array();
     }
 }

Пример #5

Показать файл

Файл: AmberAvailability.php Проект: genevec/amber_wordpress

 /**
  * Query the NetClerk server for the status of the URLs in a particular country
  * @param  array  $urls    array of URLs to query
  * @param  string $country two-character ISO code for the user's country
  * @return string 			body of the response from the NetClerk server
  */
 public function query_status_from_netclerk(array $urls, $country)
 {
     $fields = array('country' => $country, 'url' => $urls);
     $fields_string = http_build_query($fields);
     /* http_build_query represents arrays as "urls[0]=foo&urls[1]=bar", 
        but we need "urls[]=foo&urls[]=bar" */
     $fields_string = preg_replace('/%5B[0-9]+%5D/', '%5B%5D', $fields_string);
     $options = array(CURLOPT_POST => true, CURLOPT_POSTFIELDS => $fields_string);
     $result = AmberNetworkUtils::open_single_url($this->serverUrl . "/statuses", $options);
     if ($result !== FALSE && isset($result['body'])) {
         return $result['body'];
     } else {
         return FALSE;
     }
 }

Пример #6

Показать файл

Файл: InternetArchiveFetcher.php Проект: genevec/amber_wordpress

 /**
  * Fetch the URL and associated assets and pass it on to the designated Storage service
  * @param $url
  * @return
  */
 public function fetch($url)
 {
     if (!$url) {
         throw new RuntimeException("Empty URL");
     }
     $api_endpoint = join("", array($this->archiveUrl, "/save/", $url));
     $ia_result = AmberNetworkUtils::open_single_url($api_endpoint, array(), FALSE);
     /* Make sure that we got a valid response from the Archive */
     if ($ia_result === FALSE) {
         throw new RuntimeException(join(":", array("Error submitting to Internet Archive")));
     }
     if (isset($ia_result['info']['http_code']) && $ia_result['info']['http_code'] == 403) {
         throw new RuntimeException(join(":", array("Permission denied when submitting to Internet Archive (may be blocked by robots.txt)")));
     }
     if (!isset($ia_result['headers']['Content-Location'])) {
         throw new RuntimeException("Internet Archive response did not include archive location");
     }
     $location = $ia_result['headers']['Content-Location'];
     $content_type = isset($ia_result['headers']['X-Archive-Orig-Content-Type']) ? $ia_result['headers']['X-Archive-Orig-Content-Type'] : "";
     $size = isset($ia_result['headers']['X-Archive-Orig-Content-Length']) ? $ia_result['headers']['X-Archive-Orig-Content-Length'] : 0;
     $result = array('id' => md5($url), 'url' => $url, 'type' => $content_type, 'date' => time(), 'location' => $this->archiveUrl . $location, 'size' => $size, 'provider' => 2, 'provider_id' => $location);
     return $result;
 }

Пример #7

Показать файл

Файл: AmberFetcherTest.php Проект: genevec/amber_wordpress

 public function testCleanUpPathString()
 {
     $this->assertEquals("common.css", AmberNetworkUtils::clean_up_path("../common.css"));
     $this->assertEquals("_v_1.0.32/personal/common.css", AmberNetworkUtils::clean_up_path("_v_1.0.32/personal/common.css"));
     $this->assertEquals("_v_1.0.32/personal/common.css", AmberNetworkUtils::clean_up_path("_v_1.0.32/personal/photo/../common.css"));
     $this->assertEquals("_v_1.0.32/common.css", AmberNetworkUtils::clean_up_path("_v_1.0.32/personal/photo/../../common.css"));
     $this->assertEquals("common.css", AmberNetworkUtils::clean_up_path("_v_1.0.32/personal/photo/../../../../common.css"));
 }

Пример #8

Показать файл

Файл: AmberFetcher.php Проект: genevec/amber_wordpress

 /**
  * Find out if the access to the given URL is permitted by the robots.txt
  * @param $url
  * @return bool
  */
 public static function robots_allowed($url)
 {
     $p = parse_url($url);
     $p['path'] = "robots.txt";
     $robots_url = $p['scheme'] . "://" . $p['host'] . (isset($p['port']) ? ":" . $p['port'] : '') . '/robots.txt';
     $data = AmberNetworkUtils::open_url($robots_url, array(CURLOPT_FAILONERROR => FALSE));
     if (isset($data['info']['http_code']) && $data['info']['http_code'] == 200) {
         $body = $data['body'];
         return !$body || AmberRobots::url_permitted($body, $url);
     }
     return true;
 }

Пример #9

Показать файл

Файл: AmberNetworkUtils.php Проект: genevec/amber_wordpress

 /**
  * Respect the "noarchive" meta tag as described here: http://noarchive.net/meta/
  * Sample tags that will prevent archiving:
  *   <meta name="robots" content="noarchive">
  *   <meta name="amber" content="noarchive">
  *   <meta name="robots" content="noarchive, noindex">
  *   <meta name="amber" content="noindex">
  * @param  string $body HTML document to example
  * @return boolean       true if there is an application no-archive tag, false otherwise
  */
 public static function find_meta_no_archive($body)
 {
     $head = AmberNetworkUtils::get_head($body);
     if (preg_match("/<meta\\s+name\\s*=\\s*['\"](robots|amber)['\"].*content\\s*=\\s*['\"].*(noarchive|noindex).*['\"]/i", $head, $matches)) {
         return TRUE;
     } else {
         return FALSE;
     }
 }

Пример #10

Показать файл

Файл: AmberNetworkUtilsTest.php Проект: genevec/amber_wordpress

    public function testMetaNoArchiveTagDetectionNoIndex()
    {
        $this->assertTrue(AmberNetworkUtils::find_meta_no_archive(<<<EOD
<html>
<head><title>bad man</title>
<meta name="robots" content="noindex">
</head>
<body>
The meta tag only works in the head
</body>
</html>
EOD
));
        $this->assertTrue(AmberNetworkUtils::find_meta_no_archive(<<<EOD
<html>
<head><title>bad man</title>
<meta name="robots" content="noindex">
</head>
<body>
The meta tag only works in the head
</body>
</html>
EOD
));
        $this->assertTrue(AmberNetworkUtils::find_meta_no_archive(<<<EOD
<html>
<head><title>bad man</title>
<meta name="amber" content="noarchive, noindex">
</head>
<body>
The meta tag only works in the head
</body>
</html>
EOD
));
        $this->assertTrue(AmberNetworkUtils::find_meta_no_archive(<<<EOD
<html>
<head><title>bad man</title>
<meta name="robots" content="noindex,noarchive">
</head>
<body>
The meta tag only works in the head
</body>
</html>
EOD
));
    }