コード例 #1
0
 private function parseHTML_content()
 {
     $content = $this->htmlDom->find('div#js_content', 0);
     $this->content = trim($content->innerHtml());
     $this->content = preg_replace("/<noscript>.*?<\\/noscript>/", "", $this->content);
     $this->content = preg_replace("/data-original=\".*?\"/", "", $this->content);
     $this->content = preg_replace("/data-actualsrc=\".*?\"/", "", $this->content);
 }
コード例 #2
0
 /**
  * 从内容里提取所有的脚本链接
  * @param $content
  * @return array
  */
 function extractScriptUrls($content)
 {
     $this->domParser->load($content);
     $scriptNodes = $this->domParser->find('script');
     return array_map(function ($scriptNode) {
         return $scriptNode->getAttr('src');
     }, $scriptNodes);
 }
コード例 #3
0
ファイル: Html.php プロジェクト: bigxu/yii2-container-spider
 public function reduce($closure)
 {
     $query = new ParserDom($this->response->getBody());
     $catalogs = array();
     foreach ($query->find('div.bookcont') as $catalog) {
         $chapter = array();
         foreach ($catalog->find('div span a') as $node) {
             $chapter[$node->getPlainText()] = $node->getAttr('href');
         }
         $catalogs[$catalog->find('div.bookMl strong', 0)->getPlainText()] = $chapter;
     }
 }