$html = convert_to_utf8($html, $single_page_response['headers']); debug("Retrieved single-page view from {$effective_url}"); } unset($single_page_response); } } if ($do_content_extraction) { debug('--------'); debug('Attempting to extract content'); $extraction_successful = $extractor->process($html, $effective_url); $readability = $extractor->readability; $content_block = $extraction_successful ? $extractor->getContent() : null; $extracted_title = $extraction_successful ? $extractor->getTitle() : ''; // Deal with multi-page articles //die('Next: '.$extractor->getNextPageUrl()); $is_multi_page = !$is_single_page && $extraction_successful && $extractor->getNextPageUrl(); if ($options->multipage && $is_multi_page && $options->content) { debug('--------'); debug('Attempting to process multi-page article'); $multi_page_urls = array(); $multi_page_content = array(); while ($next_page_url = $extractor->getNextPageUrl()) { debug('--------'); debug('Processing next page: ' . $next_page_url); // If we've got URL, resolve against $url if ($next_page_url = makeAbsoluteStr($effective_url, $next_page_url)) { // check it's not what we have already! if (!in_array($next_page_url, $multi_page_urls)) { // it's not, so let's attempt to fetch it $multi_page_urls[] = $next_page_url; $_prev_ref = $http->referer;
$html = convert_to_utf8($html, $single_page_response['headers']); debug("Retrieved single-page view from {$effective_url}"); } unset($single_page_response); } } if ($do_content_extraction) { debug('--------'); debug('Attempting to extract content'); $extract_result = $extractor->process($html, $effective_url); $readability = $extractor->readability; $content_block = $extract_result ? $extractor->getContent() : null; $extracted_title = $extract_result ? $extractor->getTitle() : ''; // Deal with multi-page articles //die('Next: '.$extractor->getNextPageUrl()); $is_multi_page = !$is_single_page && $extract_result && $extractor->getNextPageUrl(); if ($options->multipage && $is_multi_page && $options->content) { debug('--------'); debug('Attempting to process multi-page article'); $multi_page_urls = array(); $multi_page_content = array(); while ($next_page_url = $extractor->getNextPageUrl()) { debug('--------'); debug('Processing next page: ' . $next_page_url); // If we've got URL, resolve against $url if ($next_page_url = makeAbsoluteStr($effective_url, $next_page_url)) { // check it's not what we have already! if (!in_array($next_page_url, $multi_page_urls)) { // it's not, so let's attempt to fetch it $multi_page_urls[] = $next_page_url; $_prev_ref = $http->referer;