Пример #1
0
 public function updateOrInsert($type, $config)
 {
     try {
         if ($config['sheet_info']) {
             $sheet_info = $config['sheet_info'];
         } else {
             $sheet_info = SheetHubTool::getSheetHubInfo('data.gov.tw', $type)->sheet;
         }
         $config['period'] = array_key_exists('period', $config) ? $config['period'] : 86400;
         if (!$config['force'] and time() - strtotime($sheet_info->meta->fetched_time) < $config['period']) {
             return "距離上次更新時間過短";
         }
         error_log("updating {$type}");
     } catch (Exception $e) {
         error_log("adding {$type} (because {$e->getMessage()}");
         $sheet_info = null;
     }
     $time = microtime(true);
     try {
         if (!$config['source']) {
             $config['source'] = $sheet_info->meta->source;
         }
         if (!$config['source']) {
             throw new Exception("找不到更新網址");
         }
         $portal_meta = $this->getMetaFromPortal($config['source']);
     } catch (Exception $e) {
         $this->error($type, $e);
         return "更新失敗,原因: " . $e->getMessage();
     }
     if (!$config['meta_only']) {
         $files = array();
         $download_url = null;
         if (count($portal_meta['下載']) == 1) {
             $download_url = $portal_meta['下載'][0]['url'];
             $filetype = $portal_meta['下載'][0]['type'];
         }
         if (is_null($download_url) and $config['choose_file']) {
             foreach ($portal_meta['下載'] as $info) {
                 if ($info['type'] == $config['choose_file']) {
                     $download_url = $info['url'];
                     $filetype = $info['type'];
                     break;
                 }
             }
         }
         if (is_null($download_url)) {
             $files = array();
             foreach ($portal_meta['下載'] as $info) {
                 $files[$info['type']] = $info['url'];
             }
             foreach (array('json', 'csv', 'excel', 'xml') as $t) {
                 if ($files[$t]) {
                     $download_url = $files[$t];
                     $filetype = $t;
                     break;
                 }
             }
         }
         if (is_null($download_url)) {
             try {
                 throw new Exception("超過一個檔可以下載,不知道要用哪個: " . implode(',', array_map(function ($i) {
                     return $i['type'];
                 }, $portal_meta['下載'])));
             } catch (Exception $e) {
                 $this->error($type, $e);
                 return "更新失敗,原因: " . $e->getMessage();
             }
         }
         try {
             error_log("downloading {$download_url}");
             list($fp, $filetype) = SheetHubTool::downloadFile($download_url, $filetype);
             error_log("downloaded");
         } catch (Exception $e) {
             $this->error($type, $e);
             return "更新失敗,原因: " . $e->getMessage();
         }
         $file = stream_get_meta_data($fp)['uri'];
         $md5 = md5_file($file);
         if (!$config['force'] and $sheet_info and $sheet_info->meta->file_hash and $md5 == $sheet_info->meta->file_hash) {
             error_log("md5 same, skip {$type}");
             SheetHubTool::setMeta('data.gov.tw', $type, array('fetched_time' => date('c', time())));
             return "下載原始檔案 md5 未變,不需更新";
         }
         if (property_exists($sheet_info->meta, 'updater:columns')) {
             $config['columns'] = explode(',', $sheet_info->meta->{'updater:columns'});
         }
         if (property_exists($sheet_info->meta, 'updater:row_begin')) {
             $config['row_begin'] = $sheet_info->meta->{'updater:row_begin'};
         }
         if ($config['filetype']) {
             $filetype = $config['filetype'];
         }
         $fp = DataGovTw::specialCase($fp, $config['source']);
         try {
             $upload_id = SheetHubTool::uploadToSheetHub($fp, $filetype);
             if ($sheet_info) {
                 $ret = SheetHubTool::updateFile('data.gov.tw', $type, $upload_id, $config);
                 $result = " insert: " . count($ret->insert) . ', update: ' . count($ret->update) . ', delete: ' . count($ret->delete);
                 error_log("Type={$type} done, insert: " . count($ret->insert) . ', update: ' . count($ret->update) . ', delete: ' . count($ret->delete));
                 $this->updateStatus($type, date('c', time()), sprintf("更新完成,共新增 %d 筆,更新 %d 筆,刪除 %d 筆", count($ret->insert), count($ret->update), count($ret->delete)));
             } else {
                 $sheet_info = SheetHubTool::newFile('data.gov.tw', $type, $upload_id, $config);
                 error_log("add {$type} done");
                 $this->updateStatus($type, date('c', time()), "建立完成");
             }
         } catch (Exception $e) {
             $this->error($type, $e);
             return "更新失敗,原因: " . $e->getMessage();
         }
     }
     $new_meta = $sheet_info->meta;
     if ($portal_meta['資料集修訂時間']) {
         $new_meta->updated_time = $portal_meta['資料集修訂時間'];
     }
     $new_meta->fetched_time = date('c', time());
     $new_meta->period = $portal_meta['更新頻率'];
     $new_meta->source = $config['source'];
     $new_meta->update_code = 'https://github.com/sheethub/data-import-script/blob/master/data.gov.tw/update.php';
     $new_meta->license = $portal_meta['授權方式'] . ' ' . $portal_meta['授權說明網址'];
     $new_meta->file_hash = $md5;
     foreach (array('資料集提供機關', '資料集提供機關聯絡人', '資料集提供機關聯絡人電話', '備註') as $k) {
         if ($portal_meta[$k]) {
             $new_meta->{$k} = $portal_meta[$k];
         }
     }
     SheetHubTool::setMeta('data.gov.tw', $type, $new_meta);
     if ($portal_meta['資料集描述'] and $portal_meta['資料集描述'] != $sheet_info->description) {
         SheetHubTool::setDescription('data.gov.tw', $type, $portal_meta['資料集描述']);
     }
     return "更新成功,已匯入 https://sheethub.com/data.gov.tw/{$type} , {$result}";
 }
Пример #2
0
    if (!$description) {
        continue;
    }
    $curl = curl_init("https://gist-map.motc.gov.tw" . $td_doms->item(5)->getElementsByTagName('a')->item(0)->getAttribute('href'));
    curl_setopt($curl, CURLOPT_CAINFO, __DIR__ . '/GRCA2.crt');
    $fp = tmpfile();
    curl_setopt($curl, CURLOPT_FILE, $fp);
    curl_exec($curl);
    curl_close($curl);
    $upload_id = SheetHubTool::uploadToSheetHub($fp, 'zip');
    $new_file = false;
    try {
        $sheet_info = SheetHubTool::getSheetHubInfo('gist-map.motc.gov.tw', $title)->sheet;
    } catch (Exception $e) {
        $sheet_info = SheetHubTool::newFile('gist-map.motc.gov.tw', $title, $upload_id, array())->sheet;
        $new_file = true;
    }
    if (!$new_file) {
        $ret = SheetHubTool::updateFile('gist-map.motc.gov.tw', $title, $upload_id, array());
        error_log("Type={$title} done, insert: " . count($ret->insert) . ', update: ' . count($ret->update) . ', delete: ' . count($ret->delete));
    }
    SheetHubTool::setDescription('gist-map.motc.gov.tw', $title, $description);
    $file = stream_get_meta_data($fp)['uri'];
    $md5 = md5_file($file);
    $new_meta = $sheet_info->meta;
    $new_meta->file_hash = $md5;
    $new_meta->update_code = 'https://github.com/sheethub/data-import-script/blob/master/gist-map.motc.gov.tw/update.php';
    $new_meta->source = 'https://gist-map.motc.gov.tw/Complex/MapTopic';
    $new_meta->fetched_time = date('c', time());
    SheetHubTool::setMeta('gist-map.motc.gov.tw', $title, $new_meta);
}