Ejemplo n.º 1
0
 public function updateOrInsert($type, $config)
 {
     try {
         if ($config['sheet_info']) {
             $sheet_info = $config['sheet_info'];
         } else {
             $sheet_info = SheetHubTool::getSheetHubInfo('data.gov.tw', $type)->sheet;
         }
         $config['period'] = array_key_exists('period', $config) ? $config['period'] : 86400;
         if (!$config['force'] and time() - strtotime($sheet_info->meta->fetched_time) < $config['period']) {
             return "距離上次更新時間過短";
         }
         error_log("updating {$type}");
     } catch (Exception $e) {
         error_log("adding {$type} (because {$e->getMessage()}");
         $sheet_info = null;
     }
     $time = microtime(true);
     try {
         if (!$config['source']) {
             $config['source'] = $sheet_info->meta->source;
         }
         if (!$config['source']) {
             throw new Exception("找不到更新網址");
         }
         $portal_meta = $this->getMetaFromPortal($config['source']);
     } catch (Exception $e) {
         $this->error($type, $e);
         return "更新失敗,原因: " . $e->getMessage();
     }
     if (!$config['meta_only']) {
         $files = array();
         $download_url = null;
         if (count($portal_meta['下載']) == 1) {
             $download_url = $portal_meta['下載'][0]['url'];
             $filetype = $portal_meta['下載'][0]['type'];
         }
         if (is_null($download_url) and $config['choose_file']) {
             foreach ($portal_meta['下載'] as $info) {
                 if ($info['type'] == $config['choose_file']) {
                     $download_url = $info['url'];
                     $filetype = $info['type'];
                     break;
                 }
             }
         }
         if (is_null($download_url)) {
             $files = array();
             foreach ($portal_meta['下載'] as $info) {
                 $files[$info['type']] = $info['url'];
             }
             foreach (array('json', 'csv', 'excel', 'xml') as $t) {
                 if ($files[$t]) {
                     $download_url = $files[$t];
                     $filetype = $t;
                     break;
                 }
             }
         }
         if (is_null($download_url)) {
             try {
                 throw new Exception("超過一個檔可以下載,不知道要用哪個: " . implode(',', array_map(function ($i) {
                     return $i['type'];
                 }, $portal_meta['下載'])));
             } catch (Exception $e) {
                 $this->error($type, $e);
                 return "更新失敗,原因: " . $e->getMessage();
             }
         }
         try {
             error_log("downloading {$download_url}");
             list($fp, $filetype) = SheetHubTool::downloadFile($download_url, $filetype);
             error_log("downloaded");
         } catch (Exception $e) {
             $this->error($type, $e);
             return "更新失敗,原因: " . $e->getMessage();
         }
         $file = stream_get_meta_data($fp)['uri'];
         $md5 = md5_file($file);
         if (!$config['force'] and $sheet_info and $sheet_info->meta->file_hash and $md5 == $sheet_info->meta->file_hash) {
             error_log("md5 same, skip {$type}");
             SheetHubTool::setMeta('data.gov.tw', $type, array('fetched_time' => date('c', time())));
             return "下載原始檔案 md5 未變,不需更新";
         }
         if (property_exists($sheet_info->meta, 'updater:columns')) {
             $config['columns'] = explode(',', $sheet_info->meta->{'updater:columns'});
         }
         if (property_exists($sheet_info->meta, 'updater:row_begin')) {
             $config['row_begin'] = $sheet_info->meta->{'updater:row_begin'};
         }
         if ($config['filetype']) {
             $filetype = $config['filetype'];
         }
         $fp = DataGovTw::specialCase($fp, $config['source']);
         try {
             $upload_id = SheetHubTool::uploadToSheetHub($fp, $filetype);
             if ($sheet_info) {
                 $ret = SheetHubTool::updateFile('data.gov.tw', $type, $upload_id, $config);
                 $result = " insert: " . count($ret->insert) . ', update: ' . count($ret->update) . ', delete: ' . count($ret->delete);
                 error_log("Type={$type} done, insert: " . count($ret->insert) . ', update: ' . count($ret->update) . ', delete: ' . count($ret->delete));
                 $this->updateStatus($type, date('c', time()), sprintf("更新完成,共新增 %d 筆,更新 %d 筆,刪除 %d 筆", count($ret->insert), count($ret->update), count($ret->delete)));
             } else {
                 $sheet_info = SheetHubTool::newFile('data.gov.tw', $type, $upload_id, $config);
                 error_log("add {$type} done");
                 $this->updateStatus($type, date('c', time()), "建立完成");
             }
         } catch (Exception $e) {
             $this->error($type, $e);
             return "更新失敗,原因: " . $e->getMessage();
         }
     }
     $new_meta = $sheet_info->meta;
     if ($portal_meta['資料集修訂時間']) {
         $new_meta->updated_time = $portal_meta['資料集修訂時間'];
     }
     $new_meta->fetched_time = date('c', time());
     $new_meta->period = $portal_meta['更新頻率'];
     $new_meta->source = $config['source'];
     $new_meta->update_code = 'https://github.com/sheethub/data-import-script/blob/master/data.gov.tw/update.php';
     $new_meta->license = $portal_meta['授權方式'] . ' ' . $portal_meta['授權說明網址'];
     $new_meta->file_hash = $md5;
     foreach (array('資料集提供機關', '資料集提供機關聯絡人', '資料集提供機關聯絡人電話', '備註') as $k) {
         if ($portal_meta[$k]) {
             $new_meta->{$k} = $portal_meta[$k];
         }
     }
     SheetHubTool::setMeta('data.gov.tw', $type, $new_meta);
     if ($portal_meta['資料集描述'] and $portal_meta['資料集描述'] != $sheet_info->description) {
         SheetHubTool::setDescription('data.gov.tw', $type, $portal_meta['資料集描述']);
     }
     return "更新成功,已匯入 https://sheethub.com/data.gov.tw/{$type} , {$result}";
 }
Ejemplo n.º 2
0
<?php

function error($message)
{
    echo "<html><script>alert(" . json_encode($message) . "); document.location=document.location;</script></html>";
    exit;
}
if ($_POST['url']) {
    if (!filter_var($_POST['url'], FILTER_VALIDATE_URL)) {
        error("不是合法的網址");
        exit;
    }
    include __DIR__ . '/DataGovTw.php';
    $d = new DataGovTw();
    try {
        $portal_meta = $d->getMetaFromPortal($_POST['url']);
        $config = array('source' => $_POST['url'], 'period' => 0);
        $m = $d->updateOrInsert(str_replace('/', '_', $portal_meta['title']), $config);
    } catch (Exception $e) {
        error("匯入失敗,原因: " . $e->getMessage());
        exit;
    }
    error("匯入完成," . $m);
    exit;
}
?>
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>data.gov.tw importer</title>
Ejemplo n.º 3
0
<?php

include 'DataGovTw.php';
$d = new DataGovTw();
if ($argv = $_SERVER['argv'][1]) {
    if (filter_var($argv, FILTER_VALIDATE_URL)) {
        // 網址就直接抓下來傳傳看
        $portal_meta = $d->getMetaFromPortal($argv);
        $config = array('source' => $argv);
        if ($_SERVER['argv'][2]) {
            $config['meta_only'] = true;
        }
        $config['force'] = true;
        $d->updateOrInsert(str_replace('/', '_', $portal_meta['title']), $config);
    } else {
        $d->updateOrInsert($argv, array());
    }
} else {
    $url = 'https://sheethub.com/data.gov.tw/?format=json';
    while ($url) {
        $sheets = json_decode(file_get_contents($url));
        foreach ($sheets->data as $sheet_info) {
            if ($sheet_info->meta->update_code != 'https://github.com/sheethub/data-import-script/blob/master/data.gov.tw/update.php') {
                continue;
            }
            $d->updateOrInsert($sheet_info->name, array('sheet_info' => $sheet_info));
        }
        $url = $sheets->next_url;
    }
}