if ('' !== trim($text)) {
                            $rows[] = trim(strip_tags($text));
                        }
                    }
                }
                $person->{$name} = $rows;
            }
            // 加上留言版和信箱
            $links = new StdClass();
            foreach ($this->findDomByCondition($persondoc, 'td', 'class', 'leg03_titbg06') as $td_dom) {
                $a_doms = $td_dom->getElementsByTagName('a');
                if ($a_doms->length != 1) {
                    continue;
                }
                $a_dom = $a_doms->item(0);
                $links->{$a_dom->nodeValue} = $this->getAbsoluteURL($link, $a_dom->getAttribute('href'));
            }
            $person->links = $links;
            $persons[] = $person;
        }
        echo json_encode($persons, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
    }
}
if ($_SERVER['argv'][1]) {
    $url = $_SERVER['argv'][1];
} else {
    $url = 'http://www.ly.gov.tw/03_leg/0301_main/legList.action';
}
$c = new Crawler();
$c->main($url);
Exemple #2
0
            try {
                $article = $this->getFromETTodayByGoogle(date('md', $time));
            } catch (Exception $e) {
                continue;
            }
            if (!($headlinelog = HeadLineLog::find($article->time))) {
                HeadLineLog::insert(array('time' => $article->time, 'data' => json_encode($article, JSON_UNESCAPED_UNICODE)));
            }
        }
        // 再從 中央社粉絲團搜尋 ettoday 七天的資料
        for ($i = 0; $i < 30; $i++) {
            $time = strtotime('00:00:00 -' . $i . 'day');
            if (HeadLineLog::find($time)) {
                // 資料庫中已經有了就不用再找了
                continue;
            }
            try {
                $article = $this->getFromCNAFacebookPage($time);
            } catch (Exception $e) {
                continue;
            }
            if ($article and !($headlinelog = HeadLineLog::find($article->time))) {
                HeadLineLog::insert(array('time' => $article->time, 'data' => json_encode($article, JSON_UNESCAPED_UNICODE)));
            }
        }
        exit;
    }
}
$c = new Crawler();
$c->main();
Exemple #3
0
        $params['__ASYNCPOST'] = "true";
        $params['__VIEWSTATEENCRYPTED'] = '';
        $params['ctl00$ctl00$cphMain$cphMain$btnAdvanceSearch'] = "查詢";
        curl_setopt($curl, CURLOPT_URL, $url);
        curl_setopt($curl, CURLOPT_HTTPHEADER, array('X-MicrosoftAjax' => 'Delta=true'));
        curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($params));
        curl_setopt($curl, CURLOPT_REFERER, $url);
        curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36');
        $content = curl_exec($curl);
        if (preg_match('#很抱歉.*月之資料,因此無法與去年同期用電量作比較!#', $content, $matches)) {
            throw new NoTownDataException($matches[0]);
        }
        $doc = new DOMDocument();
        @$doc->loadHTML('<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head><body>' . $content . '</body></html>');
        $ret = array();
        foreach ($doc->getElementsByTagName('tr') as $tr_dom) {
            $td_doms = $tr_dom->getElementsByTagName('td');
            if ($td_doms->length == 6) {
                $ret[trim($td_doms->item(2)->nodeValue)] = trim(str_replace(',', '', $td_doms->item(3)->nodeValue));
            } elseif ($td_doms->length == 5) {
                $ret[trim($td_doms->item(1)->nodeValue)] = trim(str_replace(',', '', $td_doms->item(2)->nodeValue));
            } elseif ($td_doms->length == 4) {
                $ret[trim($td_doms->item(0)->nodeValue)] = trim(str_replace(',', '', $td_doms->item(1)->nodeValue));
            }
        }
        return $ret;
    }
}
$c = new Crawler();
$c->main($_SERVER['argv']);
Exemple #4
0
// 資料來自 http://data.taipei/opendata/datalist/datasetMeta?oid=9b7d78d2-0d73-4b42-9b29-c1640efed0eb
// 臺北市自動化3D近似建物模型
// 把這邊完整的 kmz, kml 爬到 kmzs/ 資料夾下
class Crawler
{
    public function main($url)
    {
        $f = "kmzs/{$url}";
        error_log($f);
        if (!file_exists(dirname($f))) {
            mkdir(dirname($f));
        }
        if (!file_exists($f)) {
            file_put_contents($f, file_get_contents('http://adm3d.taipei.gov.tw/tcg/kml/Taipei3DBuilding/' . $url));
        }
        $c = file_get_contents($f);
        preg_match_all('#<href>([^<]*)</href>#', $c, $matches);
        foreach ($matches[1] as $new_url) {
            if (strpos($new_url, 'http') === 0) {
                continue;
            }
            if (strpos($new_url, 'kmz')) {
            }
            $this->main(dirname($url) . '/' . str_replace('\\', '/', $new_url));
        }
    }
}
$c = new Crawler();
$c->main('Taipei3DBuilding_nl.kml');