$html = convert_to_utf8($html, $single_page_response['headers']);
             debug("Retrieved single-page view from {$effective_url}");
         }
         unset($single_page_response);
     }
 }
 if ($do_content_extraction) {
     debug('--------');
     debug('Attempting to extract content');
     $extraction_successful = $extractor->process($html, $effective_url);
     $readability = $extractor->readability;
     $content_block = $extraction_successful ? $extractor->getContent() : null;
     $extracted_title = $extraction_successful ? $extractor->getTitle() : '';
     // Deal with multi-page articles
     //die('Next: '.$extractor->getNextPageUrl());
     $is_multi_page = !$is_single_page && $extraction_successful && $extractor->getNextPageUrl();
     if ($options->multipage && $is_multi_page && $options->content) {
         debug('--------');
         debug('Attempting to process multi-page article');
         $multi_page_urls = array();
         $multi_page_content = array();
         while ($next_page_url = $extractor->getNextPageUrl()) {
             debug('--------');
             debug('Processing next page: ' . $next_page_url);
             // If we've got URL, resolve against $url
             if ($next_page_url = makeAbsoluteStr($effective_url, $next_page_url)) {
                 // check it's not what we have already!
                 if (!in_array($next_page_url, $multi_page_urls)) {
                     // it's not, so let's attempt to fetch it
                     $multi_page_urls[] = $next_page_url;
                     $_prev_ref = $http->referer;
示例#2
0
             $html = convert_to_utf8($html, $single_page_response['headers']);
             debug("Retrieved single-page view from {$effective_url}");
         }
         unset($single_page_response);
     }
 }
 if ($do_content_extraction) {
     debug('--------');
     debug('Attempting to extract content');
     $extract_result = $extractor->process($html, $effective_url);
     $readability = $extractor->readability;
     $content_block = $extract_result ? $extractor->getContent() : null;
     $extracted_title = $extract_result ? $extractor->getTitle() : '';
     // Deal with multi-page articles
     //die('Next: '.$extractor->getNextPageUrl());
     $is_multi_page = !$is_single_page && $extract_result && $extractor->getNextPageUrl();
     if ($options->multipage && $is_multi_page && $options->content) {
         debug('--------');
         debug('Attempting to process multi-page article');
         $multi_page_urls = array();
         $multi_page_content = array();
         while ($next_page_url = $extractor->getNextPageUrl()) {
             debug('--------');
             debug('Processing next page: ' . $next_page_url);
             // If we've got URL, resolve against $url
             if ($next_page_url = makeAbsoluteStr($effective_url, $next_page_url)) {
                 // check it's not what we have already!
                 if (!in_array($next_page_url, $multi_page_urls)) {
                     // it's not, so let's attempt to fetch it
                     $multi_page_urls[] = $next_page_url;
                     $_prev_ref = $http->referer;