예제 #1
0
 /**
  * Request the page from IMDb
  * @param $url
  * @return string Page html. Empty string on failure
  */
 protected function requestPage($url)
 {
     $this->logger->info("[Page] Requesting [{$url}]");
     $req = new MDB_Request($url, $this->config);
     if (!$req->sendRequest()) {
         $this->logger->error("[Page] Failed to connect to server when requesting url [{$url}]");
         return '';
     }
     if (200 == $req->getStatus()) {
         return $req->getResponseBody();
     } elseif ($redirectUrl = $req->getRedirect()) {
         $this->logger->debug("[Page] Following redirect from [{$url}] to [{$redirectUrl}]");
         return $this->requestPage($redirectUrl);
     } else {
         $this->logger->error("[Page] Failed to retrieve url [{url}]. Response headers:{headers}", array('url' => $url, 'headers' => $req->getLastResponseHeaders()));
         return '';
     }
 }
 /** Convert IMDB redirect-URLs of external sites to real URLs
  * @method convertIMDBtoRealURL
  * @param string url redirect-url
  * @return string url real-url
  */
 protected function convertIMDBtoRealURL($url)
 {
     if (preg_match('/^http:\\/\\//', $url)) {
         return $url;
     }
     $req = new MDB_Request("");
     $req->setURL("http://" . $this->imdbsite . $url);
     if ($req->sendRequest() !== FALSE) {
         $head = $req->getLastResponseHeaders();
         foreach ($head as $header) {
             if (preg_match('/:/', $header)) {
                 list($type, $value) = explode(':', $header, 2);
                 if ($type == 'Location') {
                     return preg_replace('/\\s/', '', $value);
                 }
             }
         }
     }
     return false;
 }
 /** Obtain page from web server
  * @method protected getWebPage
  * @param string wt internal name of the page
  * @param string url URL to open
  */
 protected function getWebPage($wt, $url)
 {
     $req = new MDB_Request("");
     $req->setURL($url);
     if ($req->sendRequest() !== FALSE) {
         $head = $req->getLastResponseHeaders();
     } else {
         $head[0] = "HTTP/1.1 000";
     }
     $response = explode(" ", $head[0]);
     $this->lastServerResponse = $response[1];
     switch (substr($head[0], 0, 12)) {
         case "HTTP/1.1 000":
             $this->page[$wt] = "cannot open page";
             $this->debug_scalar("cannot open page (could not connect to host): {$url}");
             return false;
             break;
         case "HTTP/1.1 404":
             $this->page[$wt] = "cannot open page";
             $this->debug_scalar("cannot open page (error 404): {$url}");
             $this->debug_object($response);
             return false;
             break;
         case "HTTP/1.1 301":
             // permanent redirect
         // permanent redirect
         case "HTTP/1.1 302":
             // found
         // found
         case "HTTP/1.1 303":
             // see other
         // see other
         case "HTTP/1.1 307":
             // temporary redirect
             // in all these cases, the correct URL is to be found in the 'Location:' header
             foreach ($head as $headline) {
                 if (strpos(trim(strtolower($headline)), 'location') !== 0) {
                     continue;
                 }
                 $aline = explode(': ', $headline);
                 $target = trim($aline[1]);
                 $this->getWebPage($wt, $target);
                 return;
             }
             // echo "<pre>";print_r($head);echo "</pre>\n";
             // $this->debug_object($response);
         // echo "<pre>";print_r($head);echo "</pre>\n";
         // $this->debug_object($response);
         case "HTTP/1.1 200":
             break;
         default:
             $this->debug_scalar("HTTP response code not handled explicitly: '" . $head[0] . "'");
             break;
     }
     $this->page[$wt] = $req->getResponseBody();
     if (strpos(get_class($this), 'imdb') !== FALSE && $this->imdb_utf8recode && function_exists('mb_detect_encoding')) {
         $cur_encoding = mb_detect_encoding($this->page[$wt]);
         if (!($cur_encoding == "UTF-8" && mb_check_encoding($this->page[$wt], "UTF-8"))) {
             $this->page[$wt] = utf8_encode($this->page[$wt]);
         }
     }
 }
예제 #4
0
 /** Obtain page from web server
  * @method private getWebPage
  * @param string wt internal name of the page
  * @param string url URL to open
  */
 function getWebPage($wt, $url)
 {
     $req = new MDB_Request("");
     $req->setURL($url);
     if ($req->sendRequest() !== FALSE) {
         $head = $req->getLastResponseHeaders();
     } else {
         $head[0] = "HTTP/1.1 000";
     }
     $response = explode(" ", $head[0]);
     $this->lastServerResponse = $response[1];
     switch (substr($head[0], 0, 12)) {
         case "HTTP/1.1 000":
             $this->page[$wt] = "cannot open page";
             $this->debug_scalar("cannot open page (could not connect to host): {$url}");
             return false;
             break;
         case "HTTP/1.1 404":
             $this->page[$wt] = "cannot open page";
             $this->debug_scalar("cannot open page (error 404): {$url}");
             return false;
             break;
         case "HTTP/1.1 301":
         case "HTTP/1.1 302":
         case "HTTP/1.1 200":
             break;
         default:
             $this->debug_scalar("HTTP response code not handled explicitly: '" . $head[0] . "'");
             break;
     }
     $this->page[$wt] = $req->getResponseBody();
     if (strpos(get_class($this), 'imdb') !== FALSE && $this->imdb_utf8recode && function_exists('mb_detect_encoding')) {
         $cur_encoding = mb_detect_encoding($this->page[$wt]);
         if (!($cur_encoding == "UTF-8" && mb_check_encoding($this->page[$wt], "UTF-8"))) {
             $this->page[$wt] = utf8_encode($this->page[$wt]);
         }
     }
 }