/** * 启动器的构造函数,注册component文件中的组件,并运行crawler * @param Interfaces\ConfigInterface $Config 配置文件类 * @param array $components 需要注册的所有组件 */ public function __construct(Interfaces\ConfigInterface $Config, $components) { $this->config = $Config; $this->components = $components; $this->bind(); $presentUrl = $this->config->get("presentUrl"); $maxLevel = $this->config->get("maxLevel"); $sleepTime = $this->config->get("sleepTime"); //调用crawler组件 $this->begin(Component::Crawler($maxLevel, $presentUrl, $sleepTime)); }
/** * 执行过滤规则 */ public function exeRule($content) { $filteredHtml = []; $filteredHref = []; //匹配链接 foreach ($this->hrefRule[$this->ruleIndex] as $k => $v) { preg_match_all($v, $content, $filteredData); $filteredHref = $filteredHref + $filteredData[1]; } //对获取的链接进行处理 Component::CorrectHref()->checkHref($filteredHref); //匹配内容 foreach ($this->htmlRule[$this->ruleIndex] as $k => $v) { preg_match_all($v, $content, $filteredData); $filteredHtml[$k] = $filteredData[1]; } return ["filteredHref" => $filteredHref, "filteredHtml" => $filteredHtml]; }
public function stop() { $stop = false; if (Component::UrlQueue()->lengthQueue() == 0) { $stop = true; } if ($this->presentLevel == $this->maxLevel && $this->maxLevel != 0) { $stop = true; } if ($stop) { //stop钩子,当爬虫停止前执行的钩子,没有任何参数 Component::Hook()->exeHook("stop"); exit; } else { return true; } }
public function __construct() { $this->settings = Component::Config()->get("db"); parent::__construct(); }
function addComponent(\Core\Component $component) { return $this->_components[$component->getName()] = $component; }
/** * 构造函数 * 实例化应用类 */ public function __construct() { $this->hookPrefix = Component::Config()->get("hookPrefix"); $app = Component::Config()->get("appName"); $this->app = new $app(); }
public function __construct() { parent::__construct(); }
/** * 执行过滤规则 */ public function exeRule($content) { $htmldom = Component::HtmlDom(); $htmldom->load($content); //匹配链接 if (!empty($this->hrefRule[$this->ruleIndex])) { $filteredHref = call_user_func_array($this->hrefRule[$this->ruleIndex], [$htmldom]); if (!empty($filteredHref)) { //对获取的链接进行处理 Component::CorrectHref()->checkHref($filteredHref); } } else { $filteredHref = ""; } //匹配内容 if (!empty($this->htmlRule[$this->ruleIndex])) { $filteredHtml = call_user_func_array($this->htmlRule[$this->ruleIndex], [$htmldom]); } else { $filteredHtml = ""; } unset($htmldom); return ["filteredHref" => $filteredHref, "filteredHtml" => $filteredHtml]; }