private function parseHTML_content() { $content = $this->htmlDom->find('div#js_content', 0); $this->content = trim($content->innerHtml()); $this->content = preg_replace("/<noscript>.*?<\\/noscript>/", "", $this->content); $this->content = preg_replace("/data-original=\".*?\"/", "", $this->content); $this->content = preg_replace("/data-actualsrc=\".*?\"/", "", $this->content); }
/** * 从内容里提取所有的脚本链接 * @param $content * @return array */ function extractScriptUrls($content) { $this->domParser->load($content); $scriptNodes = $this->domParser->find('script'); return array_map(function ($scriptNode) { return $scriptNode->getAttr('src'); }, $scriptNodes); }
public function reduce($closure) { $query = new ParserDom($this->response->getBody()); $catalogs = array(); foreach ($query->find('div.bookcont') as $catalog) { $chapter = array(); foreach ($catalog->find('div span a') as $node) { $chapter[$node->getPlainText()] = $node->getAttr('href'); } $catalogs[$catalog->find('div.bookMl strong', 0)->getPlainText()] = $chapter; } }