public function getContent() { $data = Component::Explorer()->openLink($this->presentUrl); //如果这个链接没有获取到内容,则递归这个方法,直到获取到内容 if (!$data) { //voidurl钩子,在获取无效链接后执行的钩子,会传递一个参数$url Component::Hook()->exeHook("voidurl", ["url" => $this->presentUrl]); //这里主要是为了检查是否还存在链接 $this->stop(); $this->nextUrl(); $this->sleep(); $this->getContent(); } else { //opened钩子,在获取有效数据后执行的钩子,会传递两个参数$data,$url //$data是数据内容 //$url是当前的url Component::Hook()->exeHook("opened", ["data" => $data, "url" => $this->presentUrl]); Component::Filter()->setRuleIndex($this->presentUrl); //beforefilter钩子,在设置完规则索引后执行,会传递一个参数$url //$url是当前访问的url Component::Hook()->exeHook("beforefilter", ["url" => $this->presentUrl]); $filteredData = Component::Filter()->exeRule($data); //filtered钩子,当爬虫过滤完数据后执行的钩子,会传递一个数组$filteredData //filteredHref,过滤出来的链接数组 //filteredHtml,过滤出来的页面内容数组 Component::Hook()->exeHook("filtered", ["filteredData" => $filteredData]); Component::GuideUrl()->setUrl($filteredData["filteredHref"]); } }