Esempio n. 1
0
 public function testStripHeadTag()
 {
     $input = '<html><head><title>test</title></head><body><h1>boo</h1></body>';
     $expected = '<html><body><h1>boo</h1></body>';
     $this->assertEquals($expected, Filter::stripHeadTags($input));
     $input = file_get_contents('tests/fixtures/html_page.html');
     $expected = file_get_contents('tests/fixtures/html_head_stripped_page.html');
     $this->assertEquals($expected, Filter::stripHeadTags($input));
 }
Esempio n. 2
0
 /**
  * Parse the HTML content
  *
  * @access public
  * @return bool
  */
 public function parse()
 {
     if ($this->html) {
         Logging::setMessage(get_called_class() . ' Fix encoding');
         Logging::setMessage(get_called_class() . ': HTTP Encoding "' . $this->encoding . '"');
         $this->html = Filter::stripHeadTags($this->html);
         $this->html = Encoding::convert($this->html, $this->encoding);
         Logging::setMessage(get_called_class() . ' Content length: ' . strlen($this->html) . ' bytes');
         $rules = $this->getRules();
         if (is_array($rules)) {
             Logging::setMessage(get_called_class() . ' Parse content with rules');
             $this->parseContentWithRules($rules);
         } else {
             Logging::setMessage(get_called_class() . ' Parse content with candidates');
             $this->parseContentWithCandidates();
         }
     } else {
         Logging::setMessage(get_called_class() . ' No content fetched');
     }
     Logging::setMessage(get_called_class() . ' Content length: ' . strlen($this->content) . ' bytes');
     Logging::setMessage(get_called_class() . ' Grabber done');
     return $this->content !== '';
 }