public function generateProxy() { $crawler = new GeneralCrawler(self::URL, null, true); $extractor = new MultipleRowExtractor($crawler, __DIR__ . self::RULE_FILE); $results = $extractor->extract(); $proxies = array(); foreach ($results as $result) { $result['working'] = parent::checkProxy($result['ip'], $result['port']); if (!$result['working']) { continue; } $proxies[] = $this->getProxy($result); } return $proxies; }
<?php use Scraper\Scrape\Crawler\Types\GeneralCrawler; use Scraper\Scrape\Extractor\Types\MultipleRowExtractor; require_once __DIR__ . '/../vendor/autoload.php'; date_default_timezone_set('UTC'); $crawler = new GeneralCrawler('https://github.com/trending'); $path = __DIR__ . "/Data/git-repo.json"; $extractor = new MultipleRowExtractor($crawler, $path); $data = $extractor->extract(); print_r($data);