public function testStripHeadTag() { $input = '<html><head><title>test</title></head><body><h1>boo</h1></body>'; $expected = '<html><body><h1>boo</h1></body>'; $this->assertEquals($expected, Filter::stripHeadTags($input)); $input = file_get_contents('tests/fixtures/html_page.html'); $expected = file_get_contents('tests/fixtures/html_head_stripped_page.html'); $this->assertEquals($expected, Filter::stripHeadTags($input)); }
/** * Parse the HTML content * * @access public * @return bool */ public function parse() { if ($this->html) { Logging::setMessage(get_called_class() . ' Fix encoding'); Logging::setMessage(get_called_class() . ': HTTP Encoding "' . $this->encoding . '"'); $this->html = Filter::stripHeadTags($this->html); $this->html = Encoding::convert($this->html, $this->encoding); Logging::setMessage(get_called_class() . ' Content length: ' . strlen($this->html) . ' bytes'); $rules = $this->getRules(); if (is_array($rules)) { Logging::setMessage(get_called_class() . ' Parse content with rules'); $this->parseContentWithRules($rules); } else { Logging::setMessage(get_called_class() . ' Parse content with candidates'); $this->parseContentWithCandidates(); } } else { Logging::setMessage(get_called_class() . ' No content fetched'); } Logging::setMessage(get_called_class() . ' Content length: ' . strlen($this->content) . ' bytes'); Logging::setMessage(get_called_class() . ' Grabber done'); return $this->content !== ''; }