/** * Request the page from IMDb * @param $url * @return string Page html. Empty string on failure */ protected function requestPage($url) { $this->logger->info("[Page] Requesting [{$url}]"); $req = new MDB_Request($url, $this->config); if (!$req->sendRequest()) { $this->logger->error("[Page] Failed to connect to server when requesting url [{$url}]"); return ''; } if (200 == $req->getStatus()) { return $req->getResponseBody(); } elseif ($redirectUrl = $req->getRedirect()) { $this->logger->debug("[Page] Following redirect from [{$url}] to [{$redirectUrl}]"); return $this->requestPage($redirectUrl); } else { $this->logger->error("[Page] Failed to retrieve url [{url}]. Response headers:{headers}", array('url' => $url, 'headers' => $req->getLastResponseHeaders())); return ''; } }
/** Convert IMDB redirect-URLs of external sites to real URLs * @method convertIMDBtoRealURL * @param string url redirect-url * @return string url real-url */ protected function convertIMDBtoRealURL($url) { if (preg_match('/^http:\\/\\//', $url)) { return $url; } $req = new MDB_Request(""); $req->setURL("http://" . $this->imdbsite . $url); if ($req->sendRequest() !== FALSE) { $head = $req->getLastResponseHeaders(); foreach ($head as $header) { if (preg_match('/:/', $header)) { list($type, $value) = explode(':', $header, 2); if ($type == 'Location') { return preg_replace('/\\s/', '', $value); } } } } return false; }
/** Obtain page from web server * @method protected getWebPage * @param string wt internal name of the page * @param string url URL to open */ protected function getWebPage($wt, $url) { $req = new MDB_Request(""); $req->setURL($url); if ($req->sendRequest() !== FALSE) { $head = $req->getLastResponseHeaders(); } else { $head[0] = "HTTP/1.1 000"; } $response = explode(" ", $head[0]); $this->lastServerResponse = $response[1]; switch (substr($head[0], 0, 12)) { case "HTTP/1.1 000": $this->page[$wt] = "cannot open page"; $this->debug_scalar("cannot open page (could not connect to host): {$url}"); return false; break; case "HTTP/1.1 404": $this->page[$wt] = "cannot open page"; $this->debug_scalar("cannot open page (error 404): {$url}"); $this->debug_object($response); return false; break; case "HTTP/1.1 301": // permanent redirect // permanent redirect case "HTTP/1.1 302": // found // found case "HTTP/1.1 303": // see other // see other case "HTTP/1.1 307": // temporary redirect // in all these cases, the correct URL is to be found in the 'Location:' header foreach ($head as $headline) { if (strpos(trim(strtolower($headline)), 'location') !== 0) { continue; } $aline = explode(': ', $headline); $target = trim($aline[1]); $this->getWebPage($wt, $target); return; } // echo "<pre>";print_r($head);echo "</pre>\n"; // $this->debug_object($response); // echo "<pre>";print_r($head);echo "</pre>\n"; // $this->debug_object($response); case "HTTP/1.1 200": break; default: $this->debug_scalar("HTTP response code not handled explicitly: '" . $head[0] . "'"); break; } $this->page[$wt] = $req->getResponseBody(); if (strpos(get_class($this), 'imdb') !== FALSE && $this->imdb_utf8recode && function_exists('mb_detect_encoding')) { $cur_encoding = mb_detect_encoding($this->page[$wt]); if (!($cur_encoding == "UTF-8" && mb_check_encoding($this->page[$wt], "UTF-8"))) { $this->page[$wt] = utf8_encode($this->page[$wt]); } } }
/** Obtain page from web server * @method private getWebPage * @param string wt internal name of the page * @param string url URL to open */ function getWebPage($wt, $url) { $req = new MDB_Request(""); $req->setURL($url); if ($req->sendRequest() !== FALSE) { $head = $req->getLastResponseHeaders(); } else { $head[0] = "HTTP/1.1 000"; } $response = explode(" ", $head[0]); $this->lastServerResponse = $response[1]; switch (substr($head[0], 0, 12)) { case "HTTP/1.1 000": $this->page[$wt] = "cannot open page"; $this->debug_scalar("cannot open page (could not connect to host): {$url}"); return false; break; case "HTTP/1.1 404": $this->page[$wt] = "cannot open page"; $this->debug_scalar("cannot open page (error 404): {$url}"); return false; break; case "HTTP/1.1 301": case "HTTP/1.1 302": case "HTTP/1.1 200": break; default: $this->debug_scalar("HTTP response code not handled explicitly: '" . $head[0] . "'"); break; } $this->page[$wt] = $req->getResponseBody(); if (strpos(get_class($this), 'imdb') !== FALSE && $this->imdb_utf8recode && function_exists('mb_detect_encoding')) { $cur_encoding = mb_detect_encoding($this->page[$wt]); if (!($cur_encoding == "UTF-8" && mb_check_encoding($this->page[$wt], "UTF-8"))) { $this->page[$wt] = utf8_encode($this->page[$wt]); } } }