public function updateOrInsert($type, $config) { try { if ($config['sheet_info']) { $sheet_info = $config['sheet_info']; } else { $sheet_info = SheetHubTool::getSheetHubInfo('data.gov.tw', $type)->sheet; } $config['period'] = array_key_exists('period', $config) ? $config['period'] : 86400; if (!$config['force'] and time() - strtotime($sheet_info->meta->fetched_time) < $config['period']) { return "距離上次更新時間過短"; } error_log("updating {$type}"); } catch (Exception $e) { error_log("adding {$type} (because {$e->getMessage()}"); $sheet_info = null; } $time = microtime(true); try { if (!$config['source']) { $config['source'] = $sheet_info->meta->source; } if (!$config['source']) { throw new Exception("找不到更新網址"); } $portal_meta = $this->getMetaFromPortal($config['source']); } catch (Exception $e) { $this->error($type, $e); return "更新失敗,原因: " . $e->getMessage(); } if (!$config['meta_only']) { $files = array(); $download_url = null; if (count($portal_meta['下載']) == 1) { $download_url = $portal_meta['下載'][0]['url']; $filetype = $portal_meta['下載'][0]['type']; } if (is_null($download_url) and $config['choose_file']) { foreach ($portal_meta['下載'] as $info) { if ($info['type'] == $config['choose_file']) { $download_url = $info['url']; $filetype = $info['type']; break; } } } if (is_null($download_url)) { $files = array(); foreach ($portal_meta['下載'] as $info) { $files[$info['type']] = $info['url']; } foreach (array('json', 'csv', 'excel', 'xml') as $t) { if ($files[$t]) { $download_url = $files[$t]; $filetype = $t; break; } } } if (is_null($download_url)) { try { throw new Exception("超過一個檔可以下載,不知道要用哪個: " . implode(',', array_map(function ($i) { return $i['type']; }, $portal_meta['下載']))); } catch (Exception $e) { $this->error($type, $e); return "更新失敗,原因: " . $e->getMessage(); } } try { error_log("downloading {$download_url}"); list($fp, $filetype) = SheetHubTool::downloadFile($download_url, $filetype); error_log("downloaded"); } catch (Exception $e) { $this->error($type, $e); return "更新失敗,原因: " . $e->getMessage(); } $file = stream_get_meta_data($fp)['uri']; $md5 = md5_file($file); if (!$config['force'] and $sheet_info and $sheet_info->meta->file_hash and $md5 == $sheet_info->meta->file_hash) { error_log("md5 same, skip {$type}"); SheetHubTool::setMeta('data.gov.tw', $type, array('fetched_time' => date('c', time()))); return "下載原始檔案 md5 未變,不需更新"; } if (property_exists($sheet_info->meta, 'updater:columns')) { $config['columns'] = explode(',', $sheet_info->meta->{'updater:columns'}); } if (property_exists($sheet_info->meta, 'updater:row_begin')) { $config['row_begin'] = $sheet_info->meta->{'updater:row_begin'}; } if ($config['filetype']) { $filetype = $config['filetype']; } $fp = DataGovTw::specialCase($fp, $config['source']); try { $upload_id = SheetHubTool::uploadToSheetHub($fp, $filetype); if ($sheet_info) { $ret = SheetHubTool::updateFile('data.gov.tw', $type, $upload_id, $config); $result = " insert: " . count($ret->insert) . ', update: ' . count($ret->update) . ', delete: ' . count($ret->delete); error_log("Type={$type} done, insert: " . count($ret->insert) . ', update: ' . count($ret->update) . ', delete: ' . count($ret->delete)); $this->updateStatus($type, date('c', time()), sprintf("更新完成,共新增 %d 筆,更新 %d 筆,刪除 %d 筆", count($ret->insert), count($ret->update), count($ret->delete))); } else { $sheet_info = SheetHubTool::newFile('data.gov.tw', $type, $upload_id, $config); error_log("add {$type} done"); $this->updateStatus($type, date('c', time()), "建立完成"); } } catch (Exception $e) { $this->error($type, $e); return "更新失敗,原因: " . $e->getMessage(); } } $new_meta = $sheet_info->meta; if ($portal_meta['資料集修訂時間']) { $new_meta->updated_time = $portal_meta['資料集修訂時間']; } $new_meta->fetched_time = date('c', time()); $new_meta->period = $portal_meta['更新頻率']; $new_meta->source = $config['source']; $new_meta->update_code = 'https://github.com/sheethub/data-import-script/blob/master/data.gov.tw/update.php'; $new_meta->license = $portal_meta['授權方式'] . ' ' . $portal_meta['授權說明網址']; $new_meta->file_hash = $md5; foreach (array('資料集提供機關', '資料集提供機關聯絡人', '資料集提供機關聯絡人電話', '備註') as $k) { if ($portal_meta[$k]) { $new_meta->{$k} = $portal_meta[$k]; } } SheetHubTool::setMeta('data.gov.tw', $type, $new_meta); if ($portal_meta['資料集描述'] and $portal_meta['資料集描述'] != $sheet_info->description) { SheetHubTool::setDescription('data.gov.tw', $type, $portal_meta['資料集描述']); } return "更新成功,已匯入 https://sheethub.com/data.gov.tw/{$type} , {$result}"; }
<?php function error($message) { echo "<html><script>alert(" . json_encode($message) . "); document.location=document.location;</script></html>"; exit; } if ($_POST['url']) { if (!filter_var($_POST['url'], FILTER_VALIDATE_URL)) { error("不是合法的網址"); exit; } include __DIR__ . '/DataGovTw.php'; $d = new DataGovTw(); try { $portal_meta = $d->getMetaFromPortal($_POST['url']); $config = array('source' => $_POST['url'], 'period' => 0); $m = $d->updateOrInsert(str_replace('/', '_', $portal_meta['title']), $config); } catch (Exception $e) { error("匯入失敗,原因: " . $e->getMessage()); exit; } error("匯入完成," . $m); exit; } ?> <!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <title>data.gov.tw importer</title>
<?php include 'DataGovTw.php'; $d = new DataGovTw(); if ($argv = $_SERVER['argv'][1]) { if (filter_var($argv, FILTER_VALIDATE_URL)) { // 網址就直接抓下來傳傳看 $portal_meta = $d->getMetaFromPortal($argv); $config = array('source' => $argv); if ($_SERVER['argv'][2]) { $config['meta_only'] = true; } $config['force'] = true; $d->updateOrInsert(str_replace('/', '_', $portal_meta['title']), $config); } else { $d->updateOrInsert($argv, array()); } } else { $url = 'https://sheethub.com/data.gov.tw/?format=json'; while ($url) { $sheets = json_decode(file_get_contents($url)); foreach ($sheets->data as $sheet_info) { if ($sheet_info->meta->update_code != 'https://github.com/sheethub/data-import-script/blob/master/data.gov.tw/update.php') { continue; } $d->updateOrInsert($sheet_info->name, array('sheet_info' => $sheet_info)); } $url = $sheets->next_url; } }