public function updateOrInsert($type, $config) { try { if ($config['sheet_info']) { $sheet_info = $config['sheet_info']; } else { $sheet_info = SheetHubTool::getSheetHubInfo('data.gov.tw', $type)->sheet; } $config['period'] = array_key_exists('period', $config) ? $config['period'] : 86400; if (!$config['force'] and time() - strtotime($sheet_info->meta->fetched_time) < $config['period']) { return "距離上次更新時間過短"; } error_log("updating {$type}"); } catch (Exception $e) { error_log("adding {$type} (because {$e->getMessage()}"); $sheet_info = null; } $time = microtime(true); try { if (!$config['source']) { $config['source'] = $sheet_info->meta->source; } if (!$config['source']) { throw new Exception("找不到更新網址"); } $portal_meta = $this->getMetaFromPortal($config['source']); } catch (Exception $e) { $this->error($type, $e); return "更新失敗,原因: " . $e->getMessage(); } if (!$config['meta_only']) { $files = array(); $download_url = null; if (count($portal_meta['下載']) == 1) { $download_url = $portal_meta['下載'][0]['url']; $filetype = $portal_meta['下載'][0]['type']; } if (is_null($download_url) and $config['choose_file']) { foreach ($portal_meta['下載'] as $info) { if ($info['type'] == $config['choose_file']) { $download_url = $info['url']; $filetype = $info['type']; break; } } } if (is_null($download_url)) { $files = array(); foreach ($portal_meta['下載'] as $info) { $files[$info['type']] = $info['url']; } foreach (array('json', 'csv', 'excel', 'xml') as $t) { if ($files[$t]) { $download_url = $files[$t]; $filetype = $t; break; } } } if (is_null($download_url)) { try { throw new Exception("超過一個檔可以下載,不知道要用哪個: " . implode(',', array_map(function ($i) { return $i['type']; }, $portal_meta['下載']))); } catch (Exception $e) { $this->error($type, $e); return "更新失敗,原因: " . $e->getMessage(); } } try { error_log("downloading {$download_url}"); list($fp, $filetype) = SheetHubTool::downloadFile($download_url, $filetype); error_log("downloaded"); } catch (Exception $e) { $this->error($type, $e); return "更新失敗,原因: " . $e->getMessage(); } $file = stream_get_meta_data($fp)['uri']; $md5 = md5_file($file); if (!$config['force'] and $sheet_info and $sheet_info->meta->file_hash and $md5 == $sheet_info->meta->file_hash) { error_log("md5 same, skip {$type}"); SheetHubTool::setMeta('data.gov.tw', $type, array('fetched_time' => date('c', time()))); return "下載原始檔案 md5 未變,不需更新"; } if (property_exists($sheet_info->meta, 'updater:columns')) { $config['columns'] = explode(',', $sheet_info->meta->{'updater:columns'}); } if (property_exists($sheet_info->meta, 'updater:row_begin')) { $config['row_begin'] = $sheet_info->meta->{'updater:row_begin'}; } if ($config['filetype']) { $filetype = $config['filetype']; } $fp = DataGovTw::specialCase($fp, $config['source']); try { $upload_id = SheetHubTool::uploadToSheetHub($fp, $filetype); if ($sheet_info) { $ret = SheetHubTool::updateFile('data.gov.tw', $type, $upload_id, $config); $result = " insert: " . count($ret->insert) . ', update: ' . count($ret->update) . ', delete: ' . count($ret->delete); error_log("Type={$type} done, insert: " . count($ret->insert) . ', update: ' . count($ret->update) . ', delete: ' . count($ret->delete)); $this->updateStatus($type, date('c', time()), sprintf("更新完成,共新增 %d 筆,更新 %d 筆,刪除 %d 筆", count($ret->insert), count($ret->update), count($ret->delete))); } else { $sheet_info = SheetHubTool::newFile('data.gov.tw', $type, $upload_id, $config); error_log("add {$type} done"); $this->updateStatus($type, date('c', time()), "建立完成"); } } catch (Exception $e) { $this->error($type, $e); return "更新失敗,原因: " . $e->getMessage(); } } $new_meta = $sheet_info->meta; if ($portal_meta['資料集修訂時間']) { $new_meta->updated_time = $portal_meta['資料集修訂時間']; } $new_meta->fetched_time = date('c', time()); $new_meta->period = $portal_meta['更新頻率']; $new_meta->source = $config['source']; $new_meta->update_code = 'https://github.com/sheethub/data-import-script/blob/master/data.gov.tw/update.php'; $new_meta->license = $portal_meta['授權方式'] . ' ' . $portal_meta['授權說明網址']; $new_meta->file_hash = $md5; foreach (array('資料集提供機關', '資料集提供機關聯絡人', '資料集提供機關聯絡人電話', '備註') as $k) { if ($portal_meta[$k]) { $new_meta->{$k} = $portal_meta[$k]; } } SheetHubTool::setMeta('data.gov.tw', $type, $new_meta); if ($portal_meta['資料集描述'] and $portal_meta['資料集描述'] != $sheet_info->description) { SheetHubTool::setDescription('data.gov.tw', $type, $portal_meta['資料集描述']); } return "更新成功,已匯入 https://sheethub.com/data.gov.tw/{$type} , {$result}"; }
if (!$description) { continue; } $curl = curl_init("https://gist-map.motc.gov.tw" . $td_doms->item(5)->getElementsByTagName('a')->item(0)->getAttribute('href')); curl_setopt($curl, CURLOPT_CAINFO, __DIR__ . '/GRCA2.crt'); $fp = tmpfile(); curl_setopt($curl, CURLOPT_FILE, $fp); curl_exec($curl); curl_close($curl); $upload_id = SheetHubTool::uploadToSheetHub($fp, 'zip'); $new_file = false; try { $sheet_info = SheetHubTool::getSheetHubInfo('gist-map.motc.gov.tw', $title)->sheet; } catch (Exception $e) { $sheet_info = SheetHubTool::newFile('gist-map.motc.gov.tw', $title, $upload_id, array())->sheet; $new_file = true; } if (!$new_file) { $ret = SheetHubTool::updateFile('gist-map.motc.gov.tw', $title, $upload_id, array()); error_log("Type={$title} done, insert: " . count($ret->insert) . ', update: ' . count($ret->update) . ', delete: ' . count($ret->delete)); } SheetHubTool::setDescription('gist-map.motc.gov.tw', $title, $description); $file = stream_get_meta_data($fp)['uri']; $md5 = md5_file($file); $new_meta = $sheet_info->meta; $new_meta->file_hash = $md5; $new_meta->update_code = 'https://github.com/sheethub/data-import-script/blob/master/gist-map.motc.gov.tw/update.php'; $new_meta->source = 'https://gist-map.motc.gov.tw/Complex/MapTopic'; $new_meta->fetched_time = date('c', time()); SheetHubTool::setMeta('gist-map.motc.gov.tw', $title, $new_meta); }