<?php require_once '../vendor/autoload.php'; require_once 'crawler.php'; require_once 'CKMoz.php'; require_once 'auth/auth.php'; $startURL = $_POST['url']; $urls = []; $crawler = new Crawler($startURL, 3); $urls = $crawler->run(); var_dump($urls); die; $encoded = json_encode($urls); header('Content-type: application/json'); exit($encoded); if (!is_array($urls)) { throw new Exception("There's no URL's to make the consult."); } $groups = array_chunk($urls, 10); $metricQ = new CKMoz(ACCESS_ID, SECRET_KEY); $cols = array('title', 'canonURL', 'ExEquityLinks', 'links', 'mozRankURL', 'mozRankSubDomain', 'httpCode', 'pageAuth', 'domainAuth'); $result = array(); foreach ($groups as $group) { var_dump($group); exit; $rs = $metricQ->batchedQuery($group, $cols); // if the result is an error of authentication if ($rs->code == 401) { header('Content-type: application/json'); exit($rs->data); }
<?php require 'class/Crawler.php'; require 'class/Utils.php'; $crawler = new Crawler(); $crawler->run();
print_r($districts); jsReload(); exit; } else { //Crawl wards foreach ($province['districts'] as $districtId => $district) { $excerptDistricts = ['318' => 1, 471 => 1, 498 => 1, 755 => 1]; if (!empty($excerptDistricts[$districtId])) { continue; } if (!isset($district['wards']) || count($district['wards']) < 1) { echo '<pre>'; echo '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />'; $url = 'http://www.gso.gov.vn/danhmuc/dmhc.aspx?macap2=' . $districtId; $crawler = new Crawler($url); $listHtml = $crawler->run(); $pattern = '/<td align=\'left\' ><font color=\'blue\'>([0-9]{5})<\\/a><\\/td><td align=\'left\' id=\'link\' ><font color=\'blue\'>([^<>]*)<\\/td>/'; preg_match_all($pattern, $listHtml, $matches); // print_r($matches); $wards = []; foreach ($matches[1] as $k => $wardId) { $wards[$wardId] = ['name' => $matches[2][$k]]; } $_SESSION['provinceList'][$provinceId]['districts'][$districtId]['wards'] = $wards; echo 'Crawled ', htmlentities($province['name']), ' -> ', htmlentities($district['name']), '<br>'; if (count($wards) < 1) { echo "<br>Da bi chan boi site gov. Reload lai sau 5'</br>"; jsReload(120000); } print_r($wards); jsReload();