if ('' !== trim($text)) { $rows[] = trim(strip_tags($text)); } } } $person->{$name} = $rows; } // 加上留言版和信箱 $links = new StdClass(); foreach ($this->findDomByCondition($persondoc, 'td', 'class', 'leg03_titbg06') as $td_dom) { $a_doms = $td_dom->getElementsByTagName('a'); if ($a_doms->length != 1) { continue; } $a_dom = $a_doms->item(0); $links->{$a_dom->nodeValue} = $this->getAbsoluteURL($link, $a_dom->getAttribute('href')); } $person->links = $links; $persons[] = $person; } echo json_encode($persons, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); } } if ($_SERVER['argv'][1]) { $url = $_SERVER['argv'][1]; } else { $url = 'http://www.ly.gov.tw/03_leg/0301_main/legList.action'; } $c = new Crawler(); $c->main($url);
try { $article = $this->getFromETTodayByGoogle(date('md', $time)); } catch (Exception $e) { continue; } if (!($headlinelog = HeadLineLog::find($article->time))) { HeadLineLog::insert(array('time' => $article->time, 'data' => json_encode($article, JSON_UNESCAPED_UNICODE))); } } // 再從 中央社粉絲團搜尋 ettoday 七天的資料 for ($i = 0; $i < 30; $i++) { $time = strtotime('00:00:00 -' . $i . 'day'); if (HeadLineLog::find($time)) { // 資料庫中已經有了就不用再找了 continue; } try { $article = $this->getFromCNAFacebookPage($time); } catch (Exception $e) { continue; } if ($article and !($headlinelog = HeadLineLog::find($article->time))) { HeadLineLog::insert(array('time' => $article->time, 'data' => json_encode($article, JSON_UNESCAPED_UNICODE))); } } exit; } } $c = new Crawler(); $c->main();
$params['__ASYNCPOST'] = "true"; $params['__VIEWSTATEENCRYPTED'] = ''; $params['ctl00$ctl00$cphMain$cphMain$btnAdvanceSearch'] = "查詢"; curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_HTTPHEADER, array('X-MicrosoftAjax' => 'Delta=true')); curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($params)); curl_setopt($curl, CURLOPT_REFERER, $url); curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36'); $content = curl_exec($curl); if (preg_match('#很抱歉.*月之資料,因此無法與去年同期用電量作比較!#', $content, $matches)) { throw new NoTownDataException($matches[0]); } $doc = new DOMDocument(); @$doc->loadHTML('<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head><body>' . $content . '</body></html>'); $ret = array(); foreach ($doc->getElementsByTagName('tr') as $tr_dom) { $td_doms = $tr_dom->getElementsByTagName('td'); if ($td_doms->length == 6) { $ret[trim($td_doms->item(2)->nodeValue)] = trim(str_replace(',', '', $td_doms->item(3)->nodeValue)); } elseif ($td_doms->length == 5) { $ret[trim($td_doms->item(1)->nodeValue)] = trim(str_replace(',', '', $td_doms->item(2)->nodeValue)); } elseif ($td_doms->length == 4) { $ret[trim($td_doms->item(0)->nodeValue)] = trim(str_replace(',', '', $td_doms->item(1)->nodeValue)); } } return $ret; } } $c = new Crawler(); $c->main($_SERVER['argv']);
// 資料來自 http://data.taipei/opendata/datalist/datasetMeta?oid=9b7d78d2-0d73-4b42-9b29-c1640efed0eb // 臺北市自動化3D近似建物模型 // 把這邊完整的 kmz, kml 爬到 kmzs/ 資料夾下 class Crawler { public function main($url) { $f = "kmzs/{$url}"; error_log($f); if (!file_exists(dirname($f))) { mkdir(dirname($f)); } if (!file_exists($f)) { file_put_contents($f, file_get_contents('http://adm3d.taipei.gov.tw/tcg/kml/Taipei3DBuilding/' . $url)); } $c = file_get_contents($f); preg_match_all('#<href>([^<]*)</href>#', $c, $matches); foreach ($matches[1] as $new_url) { if (strpos($new_url, 'http') === 0) { continue; } if (strpos($new_url, 'kmz')) { } $this->main(dirname($url) . '/' . str_replace('\\', '/', $new_url)); } } } $c = new Crawler(); $c->main('Taipei3DBuilding_nl.kml');