Пример #1
0
 /**
  * 初始化采集器
  * @param $entranceUrl
  * @param $savePath
  * @param array $allowCaptureHosts
  * @param $whitelistUrls
  * @param $blacklistUrls
  * @param $urlPatterns
  * @param $onlyCaptureUrlPatterns
  */
 protected function initializeCollector($entranceUrl, $savePath, array $allowCaptureHosts, array $whitelistUrls, array $blacklistUrls, array $urlPatterns, $onlyCaptureUrlPatterns)
 {
     $this->collector->setRawEntranceUrl($entranceUrl);
     $this->collector->setSavePath($savePath);
     if (!empty($allowCaptureHosts)) {
         $this->collector->setAllowedCaptureHosts($allowCaptureHosts);
     }
     if (!empty($whitelistUrls)) {
         $this->collector->setWhitelistUrls($whitelistUrls);
     }
     if (!empty($blacklistUrls)) {
         $this->collector->setBlacklistUrls($blacklistUrls);
     }
     if (!empty($urlPatterns)) {
         $this->collector->setUrlPatterns($urlPatterns);
     }
     $this->collector->setOnlyCaptureUrlPatterns($onlyCaptureUrlPatterns);
 }
Пример #2
0
 function testRun()
 {
     $this->collector->setRawEntranceUrl('http://minimal.ondrejsvestka.cz/1-3-3/index.html');
     $this->collector->run();
     $this->assertNotEmpty('1');
 }
Пример #3
0
<?php

include __DIR__ . '/../vendor/autoload.php';
use Slince\Collector\Collector;
use Slince\Event\Event;
$collector = new Collector(__DIR__ . '/html', 'http://demo.sc.chinaz.com/Files/DownLoad/moban/201604/moban1178/index.html');
$collector->getDispatcher()->bind(Collector::EVENT_CAPTURE_URL_REPOSITORY, function (Event $event) {
    $repository = $event->getArgument('repository');
    echo 'Begin Capture ', $repository->getUrl()->getUrlString(), "\r\n";
});
$collector->getDispatcher()->bind(Collector::EVENT_CAPTURED_URL_REPOSITORY, function (Event $event) {
    $repository = $event->getArgument('repository');
    echo $repository->getUrl()->getUrlString() . " Captured OK!\r\n";
});
$collector->run();