/** * Reads the response-content. * * @param bool $stream_to_file If TRUE, the content will be streamed diretly to the temporary file and * this method will not return the content as a string. * @param int &$error_code Error-code by reference if an error occured. * @param &string &$error_string Error-string by reference * @param &string &$document_received_completely Flag indicatign whether the content was received completely passed by reference * * @return string The response-content/source. May be emtpy if an error ocdured or data was streamed to the tmp-file. */ protected function readResponseContent($stream_to_file = false, &$error_code, &$error_string, &$document_received_completely) { $this->content_bytes_received = 0; // If content should be streamed to file if ($stream_to_file == true) { $fp = @fopen($this->tmpFile, "w"); if ($fp == false) { $error_code = PHPCrawlerRequestErrors::ERROR_TMP_FILE_NOT_WRITEABLE; $error_string = "Couldn't open the temporary file " . $this->tmpFile . " for writing."; return ""; } } // Init $source_portion = ""; $source_complete = ""; $document_received_completely = true; $document_completed = false; $gzip_encoded_content = null; // Resume data-transfer-time benchmark PHPCrawlerBenchmark::start("data_transfer_time"); while ($document_completed == false) { // Get chunk from content $content_chunk = $this->readResponseContentChunk($document_completed, $error_code, $error_string, $document_received_completely); $source_portion .= $content_chunk; // Check if content is gzip-encoded (check only first chunk) if ($gzip_encoded_content === null) { if (PHPCrawlerEncodingUtils::isGzipEncoded($content_chunk)) { $gzip_encoded_content = true; } else { $gzip_encoded_content = false; } } // Stream to file or store source in memory if ($stream_to_file == true) { @fwrite($fp, $content_chunk); } else { $source_complete .= $content_chunk; } // Decode gzip-encoded content when done with document if ($document_completed == true && $gzip_encoded_content == true) { $source_complete = $source_portion = PHPCrawlerEncodingUtils::decodeGZipContent($source_complete); } // Find links in portion of the source if ($gzip_encoded_content == false && $stream_to_file == false && strlen($source_portion) >= $this->content_buffer_size || $document_completed == true) { if (PHPCrawlerUtils::checkStringAgainstRegexArray($this->lastResponseHeader->content_type, $this->linksearch_content_types)) { PHPCrawlerBenchmark::stop("data_transfer_time"); $this->LinkFinder->findLinksInHTMLChunk($source_portion); if ($this->source_overlap_size > 0) { $source_portion = substr($source_portion, -$this->source_overlap_size); } else { $source_portion = ""; } PHPCrawlerBenchmark::start("data_transfer_time"); } } } if ($stream_to_file == true) { @fclose($fp); } // Stop data-transfer-time benchmark PHPCrawlerBenchmark::stop("data_transfer_time"); $this->data_transfer_time = PHPCrawlerBenchmark::getElapsedTime("data_transfer_time"); return $source_complete; }