public function testCsv() { $filename = __DIR__ . '/../../_data/user-groups.csv'; $id = $this->api->userGroups()->postByCsv($filename); while (1) { $result = $this->api->csv()->result($id); if (!$result['done']) { continue; } if ($result['success']) { self::assertTrue(true); } else { self::fail($result['errorCode']); } break; } $content = $this->api->userGroups()->getByCsv(); $path = __DIR__ . '/../../_output/export-user-groups.csv'; file_put_contents($path, $content); $getCsv = new Reader($path, 'r+', false); while ($row = $getCsv->getRow()) { if ("example-title1" == reset($row)) { self::assertEquals($row, ["*****@*****.**", "example-group1"]); } if ("example-title2" == reset($row)) { self::assertEquals($row, ["*****@*****.**", "example-group2"]); } } }
public function testCsv() { $content = $this->api->organizations()->getByCsv(); $path = __DIR__ . '/../../_output/export-organizations.csv'; file_put_contents($path, $content); $filename = __DIR__ . '/../../_data/orgs.csv'; $id = $this->api->organizations()->postByCsv($filename); while (1) { $result = $this->api->csv()->result($id); if (!$result['done']) { continue; } if ($result['success']) { self::assertTrue(true); } else { self::fail($result['errorCode']); } break; } $content = $this->api->organizations()->getByCsv(); $path = __DIR__ . '/../../_output/export-organizations1.csv'; file_put_contents($path, $content); $getCsv = new Reader($path, 'r+', false); $flg1 = $flg2 = false; while ($row = $getCsv->getRow()) { if ("example-org1" == reset($row)) { $flg1 = true; } if ("example-org2" == reset($row)) { $flg2 = true; } } self::assertTrue($flg1 and $flg2); $filename = __DIR__ . '/../../_output/export-organizations.csv'; $id = $this->api->organizations()->postByCsv($filename); while (1) { $result = $this->api->csv()->result($id); if (!$result['done']) { continue; } if ($result['success']) { self::assertTrue(true); } else { self::fail($result['errorCode']); } break; } }
public function testGet() { $content = $this->api->csv()->get('user'); $path = __DIR__ . '/../../_output/export-csv.csv'; file_put_contents($path, $content); $getCsv = new Reader($path, 'r+', false); $flg = false; while ($row = $getCsv->getRow()) { if (UserTestHelper::getConfig()['login'] == reset($row)) { $flg = true; } } self::assertTrue($flg); try { $this->api->csv()->get('aaa'); self::fail('Not throw InvalidArgumentException.'); } catch (\InvalidArgumentException $e) { self::assertTrue(true); } }
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); //$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY); //$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY); /** * CSV * datasetName, newDatasetName */ $CkanManager->resultsDir = $results_dir; foreach (glob(CKANMNGR_DATA_DIR . '/rename*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); $basename = str_replace('.csv', '', basename($csv_file)); file_put_contents($results_dir . '/' . $basename . '_rename.log', $status, FILE_APPEND | LOCK_EX); $csv = new EasyCSV\Reader($csv_file, 'r+', false); $i = 1; while (true) { $row = $csv->getRow(); if (!$row) { break; } // skip headers if (in_array(trim(strtolower($row['0'])), ['dataset', 'url', 'old dataset url', 'from'])) { continue; } $datasetName = trim(basename($row['0'])); $newDatasetName = basename($row['1']); printf('[%04d] ', $i++); $CkanManager->renameDataset($datasetName, $newDatasetName, $basename); }
$prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset AND -harvest_source_id:[\'\' TO *]'); $prod->writeFromArray($prod_commerce); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_commerce = $prod->getAll(); } echo 'new.csv' . PHP_EOL; if (!is_file($results_dir . '/new.csv')) { $new = new Writer($results_dir . '/new.csv'); $new->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_API_URL); $UatCkanManager->resultsDir = $results_dir; $new_commerce = $UatCkanManager->exportBrief('extras_harvest_source_title:Commerce Non Spatial Data.json Harvest Source'); $new->writeFromArray($new_commerce); } else { $new = new Reader($results_dir . '/new.csv'); $new_commerce = $new->getAll(); } $new_commerce_by_title = []; foreach ($new_commerce as $name => $dataset) { $title = $dataset['title_simple']; $new_commerce_by_title[$title] = isset($new_commerce_by_title[$title]) ? $new_commerce_by_title[$title] : []; $new_commerce_by_title[$title][] = $dataset; } echo 'prod_vs_new.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_prod_commerce.csv') && unlink($results_dir . '/prod_vs_prod_commerce.csv'); $csv = new Writer($results_dir . '/prod_vs_prod_commerce.csv'); $csv->writeRow(['Prod Title', 'Prod URL', 'Prod Topics', 'Prod Categories', 'Matched', 'NEW Title', 'NEW URL', 'URL Match']); foreach ($prod_commerce as $name => $prod_dataset) { if (isset($new_commerce_by_title[$prod_dataset['title_simple']])) { foreach ($new_commerce_by_title[$prod_dataset['title_simple']] as $new_dataset) {
// If it takes more than 5 minutes => fail curl_setopt($curl_ch, CURLOPT_TIMEOUT, 60 * 5); // We don't want the header (use curl_getinfo()) curl_setopt($curl_ch, CURLOPT_HEADER, false); // Track the handle's request string curl_setopt($curl_ch, CURLINFO_HEADER_OUT, true); // Attempt to retrieve the modification date of the remote document. curl_setopt($curl_ch, CURLOPT_FILETIME, true); // Initialize cURL headers foreach (glob(CKANMNGR_DATA_DIR . '/check_*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; $basename = str_replace('.csv', '', basename($csv_file)); // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); $csv_source = new EasyCSV\Reader($csv_file, 'r+', false); $csv_destination = new EasyCSV\Writer($results_dir . '/' . $basename . '_log.csv'); $csv_destination->writeRow(['dataset', 'status', 'aapi found']); $i = 0; while (true) { if (!($i++ % 100)) { echo $i . PHP_EOL; } $row = $csv_source->getRow(); if (!$row) { break; } // skip headers if (in_array(trim(strtolower($row[0])), ['data.gov url'])) { continue; }
$cmp1_csv->writeFromArray($cmp1); } else { $cmp1_csv = new Reader($results_dir . '/cmp1.csv'); $cmp1_csv->getHeaders(); $cmp1 = $cmp1_csv->getAll(); } echo 'cmp2.csv' . PHP_EOL; if (!is_file($results_dir . '/cmp2.csv')) { $cmp2_csv = new Writer($results_dir . '/cmp2.csv'); $cmp2_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $CkanManager = new CkanManager(CKAN_API_URL); $CkanManager->resultsDir = $results_dir; $cmp2 = $CkanManager->exportBrief('organization:((eop-gov) OR (omb-eop-gov) OR (ondcp-eop-gov) OR (ceq-eop-gov) ' . 'OR (ostp-eop-gov) OR (ustr-eop-gov) OR (wh-eop-gov)) -DMS AND dataset_type:dataset'); $cmp2_csv->writeFromArray($cmp2); } else { $cmp2_csv = new Reader($results_dir . '/cmp2.csv'); $cmp2 = $cmp2_csv->getAll(); } $cmp2_by_title = $cmp2_by_guid = []; foreach ($cmp2 as $name => $dataset) { $title = $dataset['title_simple']; $cmp2_by_title[$title] = isset($cmp2_by_title[$title]) ? $cmp2_by_title[$title] : []; $cmp2_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $cmp2_by_guid[$guid] = isset($cmp2_by_guid[$guid]) ? $cmp2_by_guid[$guid] : []; $cmp2_by_guid[$guid][] = $dataset; } } echo 'comparison.csv' . PHP_EOL; is_file($results_dir . '/comparison.csv') && unlink($results_dir . '/comparison.csv');
$prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset'); $prod->writeFromArray($prod_commerce); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_commerce = $prod->getAll(); } echo 'qa.csv' . PHP_EOL; if (!is_file($results_dir . '/qa.csv')) { $qa = new Writer($results_dir . '/qa.csv'); $qa->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_QA_API_URL); $UatCkanManager->resultsDir = $results_dir; $qa_commerce = $UatCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset', '', 'http://qa-catalog-fe-data.reisys.com/dataset/'); $qa->writeFromArray($qa_commerce); } else { $qa = new Reader($results_dir . '/qa.csv'); $qa_commerce = $qa->getAll(); } $qa_commerce_by_title = []; foreach ($qa_commerce as $name => $dataset) { $title = $dataset['title_simple']; $qa_commerce_by_title[$title] = isset($qa_commerce_by_title[$title]) ? $qa_commerce_by_title[$title] : []; $qa_commerce_by_title[$title][] = $dataset; } echo 'prod_vs_qa.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_qa_commerce.csv') && unlink($results_dir . '/prod_vs_qa_commerce.csv'); $csv = new Writer($results_dir . '/prod_vs_qa_commerce.csv'); $csv->writeRow(['Prod Title', 'Prod URL', 'Prod Topics', 'Prod Categories', 'Matched', 'QA Title', 'QA URL', 'URL Match']); foreach ($prod_commerce as $name => $prod_dataset) { if (isset($qa_commerce_by_title[$prod_dataset['title_simple']])) { foreach ($qa_commerce_by_title[$prod_dataset['title_simple']] as $qa_dataset) {
$prod_epa = $ProdCkanManager->exportBrief('organization:epa-gov'); $prod->writeFromArray($prod_epa); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_epa = $prod->getAll(); } echo 'qa.csv' . PHP_EOL; if (!is_file($results_dir . '/qa.csv')) { $qa = new Writer($results_dir . '/qa.csv'); $qa->writeRow(['title', 'title_simple', 'name', 'url', 'guid', 'topics', 'categories']); $QaCkanManager = new CkanManager(CKAN_QA_API_URL); $QaCkanManager->resultsDir = $results_dir; $qa_epa = $QaCkanManager->exportBrief('organization:epa-gov', '', 'http://qa-catalog-fe-data.reisys.com/dataset/'); $qa->writeFromArray($qa_epa); } else { $qa = new Reader($results_dir . '/qa.csv'); $qa_epa = $qa->getAll(); } $qa_epa_by_title = $qa_epa_by_guid = []; foreach ($qa_epa as $name => $dataset) { $title = $dataset['title_simple']; $qa_epa_by_title[$title] = isset($qa_epa_by_title[$title]) ? $qa_epa_by_title[$title] : []; $qa_epa_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $qa_epa_by_guid[$guid] = isset($qa_epa_by_guid[$guid]) ? $qa_epa_by_guid[$guid] : []; $qa_epa_by_guid[$guid][] = $dataset; } } echo 'prod_vs_qa.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_qa_epa.csv') && unlink($results_dir . '/prod_vs_qa_epa.csv');
public function getReadersNoHeadersFirstRow() { $readerSemiColon = new Reader(__DIR__ . '/read_header_line_sc.csv', 'r+', false); $readerSemiColon->setDelimiter(';'); return array(array(new Reader(__DIR__ . '/read_header_line.csv', 'r+', false)), array($readerSemiColon)); }
$prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset AND -harvest_source_id:[\'\' TO *]'); $prod->writeFromArray($prod_commerce); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_commerce = $prod->getAll(); } echo 'uat.csv' . PHP_EOL; if (!is_file($results_dir . '/uat.csv')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_UAT_API_URL); $UatCkanManager->resultsDir = $results_dir; $uat_commerce = $UatCkanManager->exportBrief('extras_harvest_source_title:Commerce JSON', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); $uat->writeFromArray($uat_commerce); } else { $uat = new Reader($results_dir . '/uat.csv'); $uat_commerce = $uat->getAll(); } $uat_commerce_by_title = []; foreach ($uat_commerce as $name => $dataset) { $title = $dataset['title_simple']; $uat_commerce_by_title[$title] = isset($uat_commerce_by_title[$title]) ? $uat_commerce_by_title[$title] : []; $uat_commerce_by_title[$title][] = $dataset; } echo 'prod_vs_uat.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_uat_commerce.csv') && unlink($results_dir . '/prod_vs_uat_commerce.csv'); $csv = new Writer($results_dir . '/prod_vs_uat_commerce.csv'); $csv->writeRow(['Prod Title', 'Prod URL', 'Prod Topics', 'Prod Categories', 'Matched', 'UAT Title', 'UAT URL', 'URL Match']); foreach ($prod_commerce as $name => $prod_dataset) { if (isset($uat_commerce_by_title[$prod_dataset['title_simple']])) { foreach ($uat_commerce_by_title[$prod_dataset['title_simple']] as $uat_dataset) {
$prod_nuclear = $ProdCkanManager->exportBrief('organization:(nrc-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset'); $prod->writeFromArray($prod_nuclear); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_nuclear = $prod->getAll(); } echo 'uat.csv' . PHP_EOL; if (!is_file($results_dir . '/uat.csv')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_UAT_API_URL); $UatCkanManager->resultsDir = $results_dir; $uat_nuclear = $UatCkanManager->exportBrief('extras_harvest_source_title:NRC data.json', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); $uat->writeFromArray($uat_nuclear); } else { $uat = new Reader($results_dir . '/uat.csv'); $uat_nuclear = $uat->getAll(); } $uat_nuclear_by_title = []; foreach ($uat_nuclear as $name => $dataset) { $title = $dataset['title_simple']; $uat_nuclear_by_title[$title] = isset($uat_nuclear_by_title[$title]) ? $uat_nuclear_by_title[$title] : []; $uat_nuclear_by_title[$title][] = $dataset; } echo 'prod_vs_uat.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_uat_nuclear_geospatial.csv') && unlink($results_dir . '/prod_vs_uat_nuclear_geospatial.csv'); $csv = new Writer($results_dir . '/prod_vs_uat_nuclear_geospatial.csv'); $csv->writeRow(['Prod Title', 'Prod URL', 'Prod Topics', 'Prod Categories', 'Matched', 'UAT Title', 'UAT URL']); foreach ($prod_nuclear as $name => $prod_dataset) { if (isset($uat_nuclear_by_title[$prod_dataset['title_simple']])) { foreach ($uat_nuclear_by_title[$prod_dataset['title_simple']] as $uat_dataset) {
/** * {@inheritdoc} */ public function readerTest() { $csv = new Reader($this->path, 'r+', false); while ($row = $csv->getRow()) { } }
$json_backup_epa = $ProdCkanManager->exportBrief('organization:epa-gov AND metadata_type:geospatial'); $json->writeFromArray($json_backup_epa); } else { $json = new Reader($results_dir . '/json.csv'); $json_backup_epa = $json->getAll(); } echo 'prod.csv' . PHP_EOL; if (!is_file($results_dir . '/prod.csv')) { $prod = new Writer($results_dir . '/prod.csv'); $prod->writeRow(['title', 'title_simple', 'name', 'url', 'guid', 'topics', 'categories']); $QaCkanManager = new CkanManager(CKAN_UAT_API_URL); $QaCkanManager->resultsDir = $results_dir; $prod_epa = $QaCkanManager->exportBrief('organization:epa-gov AND metadata_type:geospatial'); $prod->writeFromArray($prod_epa); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_epa = $prod->getAll(); } $prod_epa_by_title = $prod_epa_by_guid = []; foreach ($prod_epa as $name => $dataset) { $title = $dataset['title_simple']; $prod_epa_by_title[$title] = isset($prod_epa_by_title[$title]) ? $prod_epa_by_title[$title] : []; $prod_epa_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $prod_epa_by_guid[$guid] = isset($prod_epa_by_guid[$guid]) ? $prod_epa_by_guid[$guid] : []; $prod_epa_by_guid[$guid][] = $dataset; } } echo 'json_vs_prod.csv' . PHP_EOL; is_file($results_dir . '/json_vs_prod_epa.csv') && unlink($results_dir . '/json_vs_prod_epa.csv');
$prod_ntsb = $ProdCkanManager->exportBrief('organization:ntsb-gov AND dataset_type:dataset'); $prod->writeFromArray($prod_ntsb); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_ntsb = $prod->getAll(); } echo 'uat.csv' . PHP_EOL; if (!is_file($results_dir . '/uat.csv')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'guid', 'topics', 'categories']); $QaCkanManager = new CkanManager(CKAN_UAT_API_URL); $QaCkanManager->resultsDir = $results_dir; $uat_ntsb = $QaCkanManager->exportBrief('organization:ntsb-gov AND (harvest_source_title:NTSB*) AND dataset_type:dataset', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); $uat->writeFromArray($uat_ntsb); } else { $uat = new Reader($results_dir . '/uat.csv'); $uat_ntsb = $uat->getAll(); } $uat_ntsb_by_title = $uat_ntsb_by_guid = []; foreach ($uat_ntsb as $name => $dataset) { $title = $dataset['title_simple']; $uat_ntsb_by_title[$title] = isset($uat_ntsb_by_title[$title]) ? $uat_ntsb_by_title[$title] : []; $uat_ntsb_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $uat_ntsb_by_guid[$guid] = isset($uat_ntsb_by_guid[$guid]) ? $uat_ntsb_by_guid[$guid] : []; $uat_ntsb_by_guid[$guid][] = $dataset; } } echo 'prod_vs_uat.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_uat_ntsb.csv') && unlink($results_dir . '/prod_vs_uat_ntsb.csv');