/** * Initiates an new PHPCrawlerResponseHeader. * * @param string $header_string A complete response-header as it was send by the server * @param string $source_url The URL of the website the header was recevied from. * @internal */ public function __construct($header_string, $source_url) { $this->header_raw = $header_string; $this->source_url = $source_url; $this->http_status_code = PHPCrawlerUtils::getHTTPStatusCode($header_string); $this->content_type = strtolower(PHPCrawlerUtils::getHeaderValue($header_string, "content-type")); $this->content_length = strtolower(PHPCrawlerUtils::getHeaderValue($header_string, "content-length")); $this->cookies = PHPCrawlerUtils::getCookiesFromHeader($header_string, $source_url); }
function handleDocumentInfo($DocInfo) { echo "<table class=intbl>"; // Loop over the output-array and print info if wanted @reset($this->output_array); while (list($key) = @each($this->output_array)) { if ($key == "requested_url") { $str = '<a href="' . $DocInfo->url . '" target=blank>' . $DocInfo->url . '</a>'; echo "<tr><td width=130><nobr>Page requested:</nobr></td><td width=470>" . $str . "</td></tr>"; } if ($key == "http_status_code") { if ($DocInfo->http_status_code) { $str = $DocInfo->http_status_code; } else { $str = "-"; } echo "<tr><td>HTTP-Status:</td><td>" . $str . "</td></tr>"; } if ($key == "content_type") { if ($DocInfo->content_type) { $str = $DocInfo->content_type; } else { $str = "-"; } echo "<tr><td>Content-Type:</td><td>" . $str . "</td></tr>"; } if ($key == "content_size") { $str = PHPCrawlerUtils::getHeaderValue($DocInfo->header, "content-length"); if (trim($str) == "") { $str = "??"; } echo "<tr><td>Content-Size:</td><td >" . $str . " bytes</td></tr>"; } if ($key == "content_received") { if ($DocInfo->received == true) { $str = "Yes"; } else { $str = "No"; } echo "<tr><td>Content received:</td><td >" . $str . "</td></tr>"; } if ($key == "content_received_completely") { if ($DocInfo->received_completely == true) { $str = "Yes"; } else { $str = "No"; } echo "<tr><td><nobr>Received completely:</nobr></td><td >" . $str . "</td></tr>"; } if ($key == "bytes_received") { echo "<tr><td>Bytes received:</td><td>" . $DocInfo->bytes_received . " bytes</td></tr>"; } if ($key == "referer_url") { if ($DocInfo->referer_url == "") { $str = "-"; } else { $str =& $page_data["referer_url"]; } echo "<tr><td><nobr>Refering URL</nobr>:</td><td >" . $str . "</td></tr>"; } if ($key == "refering_linkcode") { if ($DocInfo->refering_linkcode == "") { $str = "-"; } else { $str = htmlentities($DocInfo->refering_linkcode); $str = str_replace("\n", "<br>", $str); } echo "<tr><td valign=top><nobr>Refering linkcode:</nobr></td><td >" . $str . "</td></tr>"; } if ($key == "refering_link_raw") { if ($DocInfo->refering_link_raw == "") { $str = "-"; } else { $str = $DocInfo->refering_link_raw; } echo "<tr><td><nobr>Refering Link RAW: </nobr></td><td >" . $str . "</td></tr>"; } if ($key == "refering_linktext") { if ($DocInfo->refering_linktext == "") { $str = "-"; } else { $str = $DocInfo->refering_linktext; $str = htmlentities($str); $str = str_replace("\n", "<br>", $str); } echo "<tr><td valign=top><nobr>Refering linktext</nobr>:</td><td >" . $str . "</td></tr>"; } if ($key == "header_send") { if ($DocInfo->header_send) { $str = str_replace("\n", "<br>", trim($DocInfo->header_send)); } else { $str = "-"; } echo "<tr><td valign=top>Send header:</td><td >" . $str . "</td></tr>"; } if ($key == "header") { if ($DocInfo->header) { $str = str_replace("\n", "<br>", trim($DocInfo->header)); } else { $str = "-"; } echo "<tr><td valign=top>Received header:</td><td >" . $str . "</td></tr>"; } if ($key == "nr_found_links") { $str = count($DocInfo->links_found); echo "<tr><td valign=top>Links found:</td><td >" . $str . "</td></tr>"; } if ($key == "all_found_links") { echo "<tr><td valign=top>List of found links:</td>"; echo "<td>"; for ($x = 0; $x < count($DocInfo->links_found_url_descriptors); $x++) { echo $DocInfo->links_found_url_descriptors[$x]->url_rebuild . "<br>"; } if (count($DocInfo->links_found_url_descriptors) == 0) { echo "-"; } echo "</td>"; echo "</tr>"; } if ($key == "received_to_file") { if ($DocInfo->received_to_file) { $str = "Yes"; } else { $str = "No"; } echo "<tr><td valign=top>Received to TMP-file:</td><td >" . $str . "</td></tr>"; } if ($key == "tmpfile_name_size") { if ($DocInfo->content_tmp_file) { $str = $DocInfo->content_tmp_file . " (" . filesize($DocInfo->content_tmp_file) . " bytes)"; } else { $str = "-"; } echo "<tr><td valign=top>Content TMP-file:</td><td >" . $str . "</td></tr>"; } if ($key == "received_to_memory") { if ($DocInfo->received_to_memory) { $str = "Yes"; } else { $str = "No"; } echo "<tr><td valign=top>Received to memory:</td><td >" . $str . "</td></tr>"; } if ($key == "memory_content_size") { echo "<tr><td valign=top>Memory-content-size:</td><td >" . strlen($DocInfo->source) . " bytes</td></tr>"; } } // Output error if theres one if ($DocInfo->error_occured) { echo "<tr>\n <td class=red>Error:</td>\n <td class=red>" . $DocInfo->error_string . "</td>\n </tr>"; } echo "</table> <br>"; $this->flushOutput(); }
/** * Checks whether the content of this page/file should be streamed directly to file. * * @param string $response_header The response-header * @return bool TRUE if the content should be streamed to TMP-file */ protected function decideStreamToFile($response_header) { if (count($this->receive_to_file_content_types) == 0) { return false; } // Get Content-Type from header $content_type = PHPCrawlerUtils::getHeaderValue($response_header, "content-type"); // No Content-Type given if ($content_type == null) { return false; } // Check against the given rules $receive = PHPCrawlerUtils::checkStringAgainstRegexArray($content_type, $this->receive_to_file_content_types); return $receive; }