示例#1
0
文件: FilterTest.php 项目: nrbrt/news
 public function testStripHeadTag()
 {
     $input = '<html><head><title>test</title></head><body><h1>boo</h1></body>';
     $expected = '<html><body><h1>boo</h1></body>';
     $this->assertEquals($expected, Filter::stripHeadTags($input));
     $input = file_get_contents('tests/fixtures/html4_page.html');
     $expected = file_get_contents('tests/fixtures/html4_head_stripped_page.html');
     $this->assertEquals($expected, Filter::stripHeadTags($input));
     $input = file_get_contents('tests/fixtures/html_page.html');
     $expected = file_get_contents('tests/fixtures/html_head_stripped_page.html');
     $this->assertEquals($expected, Filter::stripHeadTags($input));
 }
示例#2
0
 /**
  * Normalize encoding and strip head tag.
  */
 public function prepareHtml()
 {
     $html_encoding = XmlParser::getEncodingFromMetaTag($this->html);
     $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding);
     $this->html = Filter::stripHeadTags($this->html);
     Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $this->encoding . '" ; HTML Encoding "' . $html_encoding . '"');
 }
示例#3
0
 /**
  * Parse the HTML content
  *
  * @access public
  * @return bool
  */
 public function parse()
 {
     if ($this->skip_processing) {
         return true;
     }
     if ($this->html) {
         $html_encoding = XmlParser::getEncodingFromMetaTag($this->html);
         // Encode everything in UTF-8
         Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $this->encoding . '" ; HTML Encoding "' . $html_encoding . '"');
         $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding);
         $this->html = Filter::stripHeadTags($this->html);
         Logger::setMessage(get_called_class() . ': Content length: ' . strlen($this->html) . ' bytes');
         $rules = $this->getRules();
         if (is_array($rules)) {
             Logger::setMessage(get_called_class() . ': Parse content with rules');
             $this->parseContentWithRules($rules);
         } else {
             Logger::setMessage(get_called_class() . ': Parse content with candidates');
             $this->parseContentWithCandidates();
         }
     } else {
         Logger::setMessage(get_called_class() . ': No content fetched');
     }
     Logger::setMessage(get_called_class() . ': Content length: ' . strlen($this->content) . ' bytes');
     Logger::setMessage(get_called_class() . ': Grabber done');
     return $this->content !== '';
 }