/** * @param $html * @return array */ public function load($html) { $metaTags = ['canonical' => '', 'robots' => '']; $this->crawler->loadHtml((string) $html); foreach ($this->crawler->find('meta') as $meta) { /** @var Element $meta */ $name = strtolower($meta->attr('name')); $content = $meta->attr('content'); $metaTags[$name] = $content; } $links = []; foreach ($this->crawler->find('a') as $link) { /** @var Element $link */ $rel = $link->attr('rel'); $href = $link->attr('href'); if ('nofollow' === strtolower($rel)) { continue; } $links[] = $href; } $this->links = array_unique($links); $this->metaTags = $metaTags; return ['links' => $links, 'meta' => $metaTags]; }
public function testXmlWithOptions() { $html = '<html><body><span></span></body></html>'; $document = new Document(); $document->loadHtml($html); $element = $document->find('span')[0]; $prolog = '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; $this->assertEquals($prolog . '<span/>', $element->xml()); $this->assertEquals($prolog . '<span></span>', $element->xml(LIBXML_NOEMPTYTAG)); }
public function testLoadHtmlException() { $this->setExpectedException('InvalidArgumentException'); $document = new Document(); $document->loadHtml(array('element')); }
/** * @expectedException InvalidArgumentException */ public function testLoadHtmlWithInvalidArgument() { $document = new Document(); $document->loadHtml(null); }
/** * Request callback * * @param callable $callback * @throws \Exception * @return Curl */ public function setCallback($callback) { if (!is_callable($callback)) { throw new \Exception(sprintf('Error: %s is not a valid callable', $callback)); } $http_response = $this->getHttpResponse(); $didom = new Document(); $dom = $didom->loadHtml($http_response); call_user_func_array($callback, [$http_response, $dom, $this]); return $this; }
public function testHtmlWithOptions() { $html = '<html><body><span></span></body></html>'; $document = new Document(); $document->loadHtml($html); $this->assertEquals('<html><body><span></span></body></html>', $document->html()); $this->assertEquals('<html><body><span/></body></html>', $document->html(0)); }