Beispiel #1
0
 /**
  * 启动器的构造函数,注册component文件中的组件,并运行crawler
  * @param Interfaces\ConfigInterface $Config 配置文件类
  * @param array $components 需要注册的所有组件
  */
 public function __construct(Interfaces\ConfigInterface $Config, $components)
 {
     $this->config = $Config;
     $this->components = $components;
     $this->bind();
     $presentUrl = $this->config->get("presentUrl");
     $maxLevel = $this->config->get("maxLevel");
     $sleepTime = $this->config->get("sleepTime");
     //调用crawler组件
     $this->begin(Component::Crawler($maxLevel, $presentUrl, $sleepTime));
 }
Beispiel #2
0
 /**
  * 执行过滤规则
  */
 public function exeRule($content)
 {
     $filteredHtml = [];
     $filteredHref = [];
     //匹配链接
     foreach ($this->hrefRule[$this->ruleIndex] as $k => $v) {
         preg_match_all($v, $content, $filteredData);
         $filteredHref = $filteredHref + $filteredData[1];
     }
     //对获取的链接进行处理
     Component::CorrectHref()->checkHref($filteredHref);
     //匹配内容
     foreach ($this->htmlRule[$this->ruleIndex] as $k => $v) {
         preg_match_all($v, $content, $filteredData);
         $filteredHtml[$k] = $filteredData[1];
     }
     return ["filteredHref" => $filteredHref, "filteredHtml" => $filteredHtml];
 }
Beispiel #3
0
 public function stop()
 {
     $stop = false;
     if (Component::UrlQueue()->lengthQueue() == 0) {
         $stop = true;
     }
     if ($this->presentLevel == $this->maxLevel && $this->maxLevel != 0) {
         $stop = true;
     }
     if ($stop) {
         //stop钩子,当爬虫停止前执行的钩子,没有任何参数
         Component::Hook()->exeHook("stop");
         exit;
     } else {
         return true;
     }
 }
Beispiel #4
0
 public function __construct()
 {
     $this->settings = Component::Config()->get("db");
     parent::__construct();
 }
Beispiel #5
0
 function addComponent(\Core\Component $component)
 {
     return $this->_components[$component->getName()] = $component;
 }
Beispiel #6
0
 /**
  * 构造函数
  * 实例化应用类
  */
 public function __construct()
 {
     $this->hookPrefix = Component::Config()->get("hookPrefix");
     $app = Component::Config()->get("appName");
     $this->app = new $app();
 }
Beispiel #7
0
 public function __construct()
 {
     parent::__construct();
 }
Beispiel #8
0
 /**
  * 执行过滤规则
  */
 public function exeRule($content)
 {
     $htmldom = Component::HtmlDom();
     $htmldom->load($content);
     //匹配链接
     if (!empty($this->hrefRule[$this->ruleIndex])) {
         $filteredHref = call_user_func_array($this->hrefRule[$this->ruleIndex], [$htmldom]);
         if (!empty($filteredHref)) {
             //对获取的链接进行处理
             Component::CorrectHref()->checkHref($filteredHref);
         }
     } else {
         $filteredHref = "";
     }
     //匹配内容
     if (!empty($this->htmlRule[$this->ruleIndex])) {
         $filteredHtml = call_user_func_array($this->htmlRule[$this->ruleIndex], [$htmldom]);
     } else {
         $filteredHtml = "";
     }
     unset($htmldom);
     return ["filteredHref" => $filteredHref, "filteredHtml" => $filteredHtml];
 }