Beispiel #1
0
 public function getContent()
 {
     $data = Component::Explorer()->openLink($this->presentUrl);
     //如果这个链接没有获取到内容,则递归这个方法,直到获取到内容
     if (!$data) {
         //voidurl钩子,在获取无效链接后执行的钩子,会传递一个参数$url
         Component::Hook()->exeHook("voidurl", ["url" => $this->presentUrl]);
         //这里主要是为了检查是否还存在链接
         $this->stop();
         $this->nextUrl();
         $this->sleep();
         $this->getContent();
     } else {
         //opened钩子,在获取有效数据后执行的钩子,会传递两个参数$data,$url
         //$data是数据内容
         //$url是当前的url
         Component::Hook()->exeHook("opened", ["data" => $data, "url" => $this->presentUrl]);
         Component::Filter()->setRuleIndex($this->presentUrl);
         //beforefilter钩子,在设置完规则索引后执行,会传递一个参数$url
         //$url是当前访问的url
         Component::Hook()->exeHook("beforefilter", ["url" => $this->presentUrl]);
         $filteredData = Component::Filter()->exeRule($data);
         //filtered钩子,当爬虫过滤完数据后执行的钩子,会传递一个数组$filteredData
         //filteredHref,过滤出来的链接数组
         //filteredHtml,过滤出来的页面内容数组
         Component::Hook()->exeHook("filtered", ["filteredData" => $filteredData]);
         Component::GuideUrl()->setUrl($filteredData["filteredHref"]);
     }
 }