private function parseHTML_content() { $content = $this->htmlDom->find('div#js_content', 0); $this->content = trim($content->innerHtml()); $this->content = preg_replace("/<noscript>.*?<\\/noscript>/", "", $this->content); $this->content = preg_replace("/data-original=\".*?\"/", "", $this->content); $this->content = preg_replace("/data-actualsrc=\".*?\"/", "", $this->content); }
/** * 从内容里提取所有的脚本链接 * @param $content * @return array */ function extractScriptUrls($content) { $this->domParser->load($content); $scriptNodes = $this->domParser->find('script'); return array_map(function ($scriptNode) { return $scriptNode->getAttr('src'); }, $scriptNodes); }
public function reduce($closure) { $query = new ParserDom($this->response->getBody()); $catalogs = array(); foreach ($query->find('div.bookcont') as $catalog) { $chapter = array(); foreach ($catalog->find('div span a') as $node) { $chapter[$node->getPlainText()] = $node->getAttr('href'); } $catalogs[$catalog->find('div.bookMl strong', 0)->getPlainText()] = $chapter; } }
$html = '<html> <head> <meta charset="utf-8"> <title>test</title> </head> <body> <p class="test_class test_class1">p1</p> <p class="test_class test_class2">p2</p> <p class="test_class test_class3">p3</p> <p id="p_id" class="test_class test_id">p_id</p> <p id="p_id_2" class="test_class test_id">p_id_2</p> <p>p4</p> <div id="test1">测试1</div> </body> </html>'; $dom = new ParserDom($html); echo $html . "\n\n\n"; echo "p last one: " . $dom->findBreadthFirst('p', -1)->getPlainText() . "\n"; echo "have id first: " . $dom->findBreadthFirst('p[id]', 0)->getPlainText() . "\n"; echo "hava id p_id_2 first: " . $dom->findBreadthFirst('p[id=p_id_2]', 0)->getPlainText() . "\n"; echo "do not have id second: " . $dom->findBreadthFirst('p[!id]', 1)->getPlainText() . "\n"; echo "get by id test1: " . $dom->findBreadthFirst('#test1', 0)->getPlainText() . "\n"; $p1 = $dom->findBreadthFirst('p.test_class1', 0); echo "get by class test_class1: " . $p1->getPlainText() . "\n"; echo "class: " . $p1->getAttr('class') . "\n"; $p_array = $dom->findBreadthFirst('p.test_class'); echo "\np list: \n"; foreach ($p_array as $p) { echo $p->getPlainText() . "\n"; } echo "\n";
public function __construct() { parent::__construct(); }