file_put_contents($results_dir . '/prod.json', json_encode($prod_epa, JSON_PRETTY_PRINT)); $prod->writeFromArray($prod_epa); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_epa) . PHP_EOL . PHP_EOL; } else { $prod_epa = json_decode(file_get_contents($results_dir . '/prod.json')); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_epa) . PHP_EOL . PHP_EOL; } echo 'json_backup.json' . PHP_EOL; if (!is_file($results_dir . '/json_backup.json')) { $json_backup_csv = new Writer($results_dir . '/json_backup.csv'); $json_backup_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $json_backupCkanManager = new CkanManager(CKAN_UAT_API_URL); $json_backupCkanManager->resultsDir = $results_dir; $json_backup_epa = $json_backupCkanManager->exportBriefFromJson(CKANMNGR_DATA_DIR . '/epa-gov.json'); file_put_contents($results_dir . '/json_backup.json', json_encode($json_backup_epa, JSON_PRETTY_PRINT)); $json_backup_csv->writeFromArray($json_backup_epa); echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_epa) . PHP_EOL . PHP_EOL; } else { $json_backup_epa = json_decode(file_get_contents($results_dir . '/json_backup.json')); echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_epa) . PHP_EOL . PHP_EOL; } $json_backup_tags = []; $json_datasets = json_decode(file_get_contents(CKANMNGR_DATA_DIR . '/epa-gov.json'), true); //assoc foreach ($json_datasets as $dataset_array) { $dataset = new Dataset($dataset_array); $groups_tags = $dataset->get_groups_and_tags(); if (!$groups_tags) { unset($dataset); continue; }
file_put_contents($results_dir . '/prod.json', json_encode($prod_pbgc, JSON_PRETTY_PRINT)); $prod->writeFromArray($prod_pbgc); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_pbgc) . PHP_EOL . PHP_EOL; } else { $prod_pbgc = json_decode(file_get_contents($results_dir . '/prod.json')); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_pbgc) . PHP_EOL . PHP_EOL; } echo 'uat.json' . PHP_EOL; if (!is_file($results_dir . '/uat.json')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $uatCkanManager = new CkanManager(CKAN_UAT_API_URL); $uatCkanManager->resultsDir = $results_dir; $uat_pbgc = $uatCkanManager->exportBrief('organization:pbgc-gov AND extras_harvest_source_title:PDGC Data.json Source AND dataset_type:dataset', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); file_put_contents($results_dir . '/uat.json', json_encode($uat_pbgc, JSON_PRETTY_PRINT)); $uat->writeFromArray($uat_pbgc); echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_pbgc) . PHP_EOL . PHP_EOL; } else { $uat_pbgc = json_decode(file_get_contents($results_dir . '/uat.json')); echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_pbgc) . PHP_EOL . PHP_EOL; } $uat_pbgc_by_title = $uat_pbgc_by_guid = []; foreach ($uat_pbgc as $name => $dataset) { $title = $dataset['title_simple']; $uat_pbgc_by_title[$title] = isset($uat_pbgc_by_title[$title]) ? $uat_pbgc_by_title[$title] : []; $uat_pbgc_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $uat_pbgc_by_guid[$guid] = isset($uat_pbgc_by_guid[$guid]) ? $uat_pbgc_by_guid[$guid] : []; $uat_pbgc_by_guid[$guid][] = $dataset; }
$ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset AND -harvest_source_id:[\'\' TO *]'); $prod->writeFromArray($prod_commerce); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_commerce = $prod->getAll(); } echo 'new.csv' . PHP_EOL; if (!is_file($results_dir . '/new.csv')) { $new = new Writer($results_dir . '/new.csv'); $new->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_API_URL); $UatCkanManager->resultsDir = $results_dir; $new_commerce = $UatCkanManager->exportBrief('extras_harvest_source_title:Commerce Non Spatial Data.json Harvest Source'); $new->writeFromArray($new_commerce); } else { $new = new Reader($results_dir . '/new.csv'); $new_commerce = $new->getAll(); } $new_commerce_by_title = []; foreach ($new_commerce as $name => $dataset) { $title = $dataset['title_simple']; $new_commerce_by_title[$title] = isset($new_commerce_by_title[$title]) ? $new_commerce_by_title[$title] : []; $new_commerce_by_title[$title][] = $dataset; } echo 'prod_vs_new.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_prod_commerce.csv') && unlink($results_dir . '/prod_vs_prod_commerce.csv'); $csv = new Writer($results_dir . '/prod_vs_prod_commerce.csv'); $csv->writeRow(['Prod Title', 'Prod URL', 'Prod Topics', 'Prod Categories', 'Matched', 'NEW Title', 'NEW URL', 'URL Match']); foreach ($prod_commerce as $name => $prod_dataset) {
file_put_contents($results_dir . '/prod.json', json_encode($prod_noaa, JSON_PRETTY_PRINT)); $prod->writeFromArray($prod_noaa); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL; } else { $prod_noaa = json_decode(file_get_contents($results_dir . '/prod.json')); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL; } echo 'uat.json' . PHP_EOL; if (!is_file($results_dir . '/uat.json')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $uatCkanManager = new CkanManager(CKAN_UAT_API_URL); $uatCkanManager->resultsDir = $results_dir; $uat_noaa = $uatCkanManager->exportBrief('organization:noaa-gov AND extras_harvest_source_title:NOAA New CSW AND dataset_type:dataset', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); file_put_contents($results_dir . '/uat.json', json_encode($uat_noaa, JSON_PRETTY_PRINT)); $uat->writeFromArray($uat_noaa); echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_noaa) . PHP_EOL . PHP_EOL; } else { $uat_noaa = json_decode(file_get_contents($results_dir . '/uat.json')); echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_noaa) . PHP_EOL . PHP_EOL; } $uat_noaa_by_title = $uat_noaa_by_guid = []; foreach ($uat_noaa as $name => $dataset) { $title = $dataset['title_simple']; $uat_noaa_by_title[$title] = isset($uat_noaa_by_title[$title]) ? $uat_noaa_by_title[$title] : []; $uat_noaa_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $uat_noaa_by_guid[$guid] = isset($uat_noaa_by_guid[$guid]) ? $uat_noaa_by_guid[$guid] : []; $uat_noaa_by_guid[$guid][] = $dataset; }
$ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset'); $prod->writeFromArray($prod_commerce); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_commerce = $prod->getAll(); } echo 'qa.csv' . PHP_EOL; if (!is_file($results_dir . '/qa.csv')) { $qa = new Writer($results_dir . '/qa.csv'); $qa->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_QA_API_URL); $UatCkanManager->resultsDir = $results_dir; $qa_commerce = $UatCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset', '', 'http://qa-catalog-fe-data.reisys.com/dataset/'); $qa->writeFromArray($qa_commerce); } else { $qa = new Reader($results_dir . '/qa.csv'); $qa_commerce = $qa->getAll(); } $qa_commerce_by_title = []; foreach ($qa_commerce as $name => $dataset) { $title = $dataset['title_simple']; $qa_commerce_by_title[$title] = isset($qa_commerce_by_title[$title]) ? $qa_commerce_by_title[$title] : []; $qa_commerce_by_title[$title][] = $dataset; } echo 'prod_vs_qa.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_qa_commerce.csv') && unlink($results_dir . '/prod_vs_qa_commerce.csv'); $csv = new Writer($results_dir . '/prod_vs_qa_commerce.csv'); $csv->writeRow(['Prod Title', 'Prod URL', 'Prod Topics', 'Prod Categories', 'Matched', 'QA Title', 'QA URL', 'URL Match']); foreach ($prod_commerce as $name => $prod_dataset) {
$CkanManager->resultsDir = $results_dir; $cmp1 = $CkanManager->exportBrief('organization:((eop-gov) OR (omb-eop-gov) OR (ondcp-eop-gov) OR (ceq-eop-gov) ' . 'OR (ostp-eop-gov) OR (ustr-eop-gov) OR (wh-eop-gov)) DMS AND dataset_type:dataset'); $cmp1_csv->writeFromArray($cmp1); } else { $cmp1_csv = new Reader($results_dir . '/cmp1.csv'); $cmp1_csv->getHeaders(); $cmp1 = $cmp1_csv->getAll(); } echo 'cmp2.csv' . PHP_EOL; if (!is_file($results_dir . '/cmp2.csv')) { $cmp2_csv = new Writer($results_dir . '/cmp2.csv'); $cmp2_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $CkanManager = new CkanManager(CKAN_API_URL); $CkanManager->resultsDir = $results_dir; $cmp2 = $CkanManager->exportBrief('organization:((eop-gov) OR (omb-eop-gov) OR (ondcp-eop-gov) OR (ceq-eop-gov) ' . 'OR (ostp-eop-gov) OR (ustr-eop-gov) OR (wh-eop-gov)) -DMS AND dataset_type:dataset'); $cmp2_csv->writeFromArray($cmp2); } else { $cmp2_csv = new Reader($results_dir . '/cmp2.csv'); $cmp2 = $cmp2_csv->getAll(); } $cmp2_by_title = $cmp2_by_guid = []; foreach ($cmp2 as $name => $dataset) { $title = $dataset['title_simple']; $cmp2_by_title[$title] = isset($cmp2_by_title[$title]) ? $cmp2_by_title[$title] : []; $cmp2_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $cmp2_by_guid[$guid] = isset($cmp2_by_guid[$guid]) ? $cmp2_by_guid[$guid] : []; $cmp2_by_guid[$guid][] = $dataset; } }
$ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_epa = $ProdCkanManager->exportBrief('organization:epa-gov'); $prod->writeFromArray($prod_epa); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_epa = $prod->getAll(); } echo 'qa.csv' . PHP_EOL; if (!is_file($results_dir . '/qa.csv')) { $qa = new Writer($results_dir . '/qa.csv'); $qa->writeRow(['title', 'title_simple', 'name', 'url', 'guid', 'topics', 'categories']); $QaCkanManager = new CkanManager(CKAN_QA_API_URL); $QaCkanManager->resultsDir = $results_dir; $qa_epa = $QaCkanManager->exportBrief('organization:epa-gov', '', 'http://qa-catalog-fe-data.reisys.com/dataset/'); $qa->writeFromArray($qa_epa); } else { $qa = new Reader($results_dir . '/qa.csv'); $qa_epa = $qa->getAll(); } $qa_epa_by_title = $qa_epa_by_guid = []; foreach ($qa_epa as $name => $dataset) { $title = $dataset['title_simple']; $qa_epa_by_title[$title] = isset($qa_epa_by_title[$title]) ? $qa_epa_by_title[$title] : []; $qa_epa_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $qa_epa_by_guid[$guid] = isset($qa_epa_by_guid[$guid]) ? $qa_epa_by_guid[$guid] : []; $qa_epa_by_guid[$guid][] = $dataset; } }
file_put_contents($results_dir . '/prod.json', json_encode($prod_noaa, JSON_PRETTY_PRINT)); $prod->writeFromArray($prod_noaa); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL; } else { $prod_noaa = json_decode(file_get_contents($results_dir . '/prod.json')); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL; } echo 'json_backup.json' . PHP_EOL; if (!is_file($results_dir . '/json_backup.json')) { $json_backup_csv = new Writer($results_dir . '/json_backup.csv'); $json_backup_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $json_backupCkanManager = new CkanManager(CKAN_UAT_API_URL); $json_backupCkanManager->resultsDir = $results_dir; $json_backup_noaa = $json_backupCkanManager->exportBriefFromJson(CKANMNGR_DATA_DIR . '/noaa-gov_geospatial_with_tags.json'); file_put_contents($results_dir . '/json_backup.json', json_encode($json_backup_noaa, JSON_PRETTY_PRINT)); $json_backup_csv->writeFromArray($json_backup_noaa); echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_noaa) . PHP_EOL . PHP_EOL; } else { $json_backup_noaa = json_decode(file_get_contents($results_dir . '/json_backup.json')); echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_noaa) . PHP_EOL . PHP_EOL; } $json_backup_tags = []; $json_datasets = json_decode(file_get_contents(CKANMNGR_DATA_DIR . '/noaa-gov_geospatial_with_tags.json'), true); //assoc foreach ($json_datasets as $dataset_array) { $dataset = new Dataset($dataset_array); $groups_tags = $dataset->get_groups_and_tags(); if (!$groups_tags) { unset($dataset); continue; }
// 'url', // 'identifier', // 'org title', // 'org name', // 'topics', // 'categories', //]); $CkanManager->resultsDir = $results_dir; //$brief = $CkanManager->exportShort('extras_license:"https\://creativecommons.org/publicdomain/zero/1.0/" AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('','((collection_package_id:* OR *:*) AND license_id:"cc-by-sa" AND license:"https\://creativecommons.org/publicdomain/zero/1.0/") AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('%28%28collection_package_id:*%20OR%20*:*%29+AND+license_id:"cc-by-sa"+AND+license:"https://creativecommons.org/publicdomain/zero/1.0/"%29'); //$brief = $CkanManager->exportShort('organization:wake-county AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('organization:gsa-gov AND harvest_source_title:Open* AND (dataset_type:dataset)', //$brief = $CkanManager->exportShort('organization:doe-gov AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('organization:dhs-gov AND (harvest_source_title:DHS*) AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('organization:epa-gov AND (harvest_source_title:*Gateway) AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('organization:epa-gov AND (metadata_type:geospatial) AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('organization:nasa-gov AND (harvest_source_title:NASA*) AND (dataset_type:dataset)'); $brief = $CkanManager->exportShort('organization:ntsb-gov AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('organization:noaa-gov AND metadata_type:geospatial AND (dataset_type:dataset) AND groups:*'); //$brief = $CkanManager->exportShort('metadata-source:dms AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('organization:doj-gov AND (dataset_type:dataset)'); // 'http://uat-catalog-fe-data.reisys.com/dataset/'); //$brief = $CkanManager->exportShort('(extra_harvest_source_title:Open+*) AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('organization:gsa-gov AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('extras_harvest_source_title:Test ISO WAF AND (dataset_type:dataset)'); $headers = array_keys($brief[array_keys($brief)[0]]); $csv->writeRow($headers); $csv->writeFromArray($brief); // show running time on finish timer();
$ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset AND -harvest_source_id:[\'\' TO *]'); $prod->writeFromArray($prod_commerce); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_commerce = $prod->getAll(); } echo 'uat.csv' . PHP_EOL; if (!is_file($results_dir . '/uat.csv')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_UAT_API_URL); $UatCkanManager->resultsDir = $results_dir; $uat_commerce = $UatCkanManager->exportBrief('extras_harvest_source_title:Commerce JSON', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); $uat->writeFromArray($uat_commerce); } else { $uat = new Reader($results_dir . '/uat.csv'); $uat_commerce = $uat->getAll(); } $uat_commerce_by_title = []; foreach ($uat_commerce as $name => $dataset) { $title = $dataset['title_simple']; $uat_commerce_by_title[$title] = isset($uat_commerce_by_title[$title]) ? $uat_commerce_by_title[$title] : []; $uat_commerce_by_title[$title][] = $dataset; } echo 'prod_vs_uat.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_uat_commerce.csv') && unlink($results_dir . '/prod_vs_uat_commerce.csv'); $csv = new Writer($results_dir . '/prod_vs_uat_commerce.csv'); $csv->writeRow(['Prod Title', 'Prod URL', 'Prod Topics', 'Prod Categories', 'Matched', 'UAT Title', 'UAT URL', 'URL Match']); foreach ($prod_commerce as $name => $prod_dataset) {
$ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_nuclear = $ProdCkanManager->exportBrief('organization:(nrc-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset'); $prod->writeFromArray($prod_nuclear); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_nuclear = $prod->getAll(); } echo 'uat.csv' . PHP_EOL; if (!is_file($results_dir . '/uat.csv')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_UAT_API_URL); $UatCkanManager->resultsDir = $results_dir; $uat_nuclear = $UatCkanManager->exportBrief('extras_harvest_source_title:NRC data.json', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); $uat->writeFromArray($uat_nuclear); } else { $uat = new Reader($results_dir . '/uat.csv'); $uat_nuclear = $uat->getAll(); } $uat_nuclear_by_title = []; foreach ($uat_nuclear as $name => $dataset) { $title = $dataset['title_simple']; $uat_nuclear_by_title[$title] = isset($uat_nuclear_by_title[$title]) ? $uat_nuclear_by_title[$title] : []; $uat_nuclear_by_title[$title][] = $dataset; } echo 'prod_vs_uat.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_uat_nuclear_geospatial.csv') && unlink($results_dir . '/prod_vs_uat_nuclear_geospatial.csv'); $csv = new Writer($results_dir . '/prod_vs_uat_nuclear_geospatial.csv'); $csv->writeRow(['Prod Title', 'Prod URL', 'Prod Topics', 'Prod Categories', 'Matched', 'UAT Title', 'UAT URL']); foreach ($prod_nuclear as $name => $prod_dataset) {
$ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $json_backup_epa = $ProdCkanManager->exportBrief('organization:epa-gov AND metadata_type:geospatial'); $json->writeFromArray($json_backup_epa); } else { $json = new Reader($results_dir . '/json.csv'); $json_backup_epa = $json->getAll(); } echo 'prod.csv' . PHP_EOL; if (!is_file($results_dir . '/prod.csv')) { $prod = new Writer($results_dir . '/prod.csv'); $prod->writeRow(['title', 'title_simple', 'name', 'url', 'guid', 'topics', 'categories']); $QaCkanManager = new CkanManager(CKAN_UAT_API_URL); $QaCkanManager->resultsDir = $results_dir; $prod_epa = $QaCkanManager->exportBrief('organization:epa-gov AND metadata_type:geospatial'); $prod->writeFromArray($prod_epa); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_epa = $prod->getAll(); } $prod_epa_by_title = $prod_epa_by_guid = []; foreach ($prod_epa as $name => $dataset) { $title = $dataset['title_simple']; $prod_epa_by_title[$title] = isset($prod_epa_by_title[$title]) ? $prod_epa_by_title[$title] : []; $prod_epa_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $prod_epa_by_guid[$guid] = isset($prod_epa_by_guid[$guid]) ? $prod_epa_by_guid[$guid] : []; $prod_epa_by_guid[$guid][] = $dataset; } }
$ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_ntsb = $ProdCkanManager->exportBrief('organization:ntsb-gov AND dataset_type:dataset'); $prod->writeFromArray($prod_ntsb); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_ntsb = $prod->getAll(); } echo 'uat.csv' . PHP_EOL; if (!is_file($results_dir . '/uat.csv')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'guid', 'topics', 'categories']); $QaCkanManager = new CkanManager(CKAN_UAT_API_URL); $QaCkanManager->resultsDir = $results_dir; $uat_ntsb = $QaCkanManager->exportBrief('organization:ntsb-gov AND (harvest_source_title:NTSB*) AND dataset_type:dataset', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); $uat->writeFromArray($uat_ntsb); } else { $uat = new Reader($results_dir . '/uat.csv'); $uat_ntsb = $uat->getAll(); } $uat_ntsb_by_title = $uat_ntsb_by_guid = []; foreach ($uat_ntsb as $name => $dataset) { $title = $dataset['title_simple']; $uat_ntsb_by_title[$title] = isset($uat_ntsb_by_title[$title]) ? $uat_ntsb_by_title[$title] : []; $uat_ntsb_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $uat_ntsb_by_guid[$guid] = isset($uat_ntsb_by_guid[$guid]) ? $uat_ntsb_by_guid[$guid] : []; $uat_ntsb_by_guid[$guid][] = $dataset; } }
$d['status'] = 'deleted'; } array_push($statistics, $d); } } $delete_csv = new Writer($results_dir . '/delete_' . $organization . '.csv'); $delete_csv->writeRow(['url']); $delete_csv->writeFromArray($delete); $delete_full_csv = new Writer($results_dir . '/' . $organization . '_delete_full.csv'); $headers = array_keys($delete_full[0]); $delete_full_csv->writeRow($headers); $delete_full_csv->writeFromArray($delete_full); $stats_csv = new Writer($results_dir . '/' . $organization . '_statistics.csv'); $headers = array_keys($statistics[0]); $stats_csv->writeRow($headers); $stats_csv->writeFromArray($statistics); $survivors_csv = new Writer($results_dir . '/' . $organization . '_survivors.csv'); $headers = array_keys($survivors[array_keys($survivors)[0]]); $survivors_csv->writeRow($headers); $survivors_csv->writeFromArray($survivors); $stitle = ''; foreach ($delete_full as $dataset) { if ($dataset['title_simple'] !== $stitle) { $stitle = $dataset['title_simple']; // echo PHP_EOL; } // echo printf('%20s %20s',$dataset['title_simple'],$dataset['name']).PHP_EOL; } } // show running time on finish timer();
$ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_noaa = $ProdCkanManager->exportBrief('organization:noaa-gov AND metadata_type:geospatial AND dataset_type:dataset'); $prod->writeFromArray($prod_noaa); file_put_contents($results_dir . '/prod.json', json_encode($prod_noaa, JSON_PRETTY_PRINT)); } else { $prod_noaa = json_decode(file_get_contents($results_dir . '/prod.json')); } echo 'qa.json' . PHP_EOL; if (!is_file($results_dir . '/qa.json')) { $qa = new Writer($results_dir . '/qa.csv'); $qa->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $QaCkanManager = new CkanManager(CKAN_QA_API_URL); $QaCkanManager->resultsDir = $results_dir; $qa_noaa = $QaCkanManager->exportBrief('organization:noaa-gov', '', 'http://qa-catalog-fe-data.reisys.com/dataset/'); $qa->writeFromArray($qa_noaa); file_put_contents($results_dir . '/qa.json', json_encode($qa_noaa, JSON_PRETTY_PRINT)); } else { $qa_noaa = json_decode(file_get_contents($results_dir . '/qa.json')); } $qa_noaa_by_title = $qa_noaa_by_guid = []; foreach ($qa_noaa as $name => $dataset) { $title = $dataset['title_simple']; $qa_noaa_by_title[$title] = isset($qa_noaa_by_title[$title]) ? $qa_noaa_by_title[$title] : []; $qa_noaa_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $qa_noaa_by_guid[$guid] = isset($qa_noaa_by_guid[$guid]) ? $qa_noaa_by_guid[$guid] : []; $qa_noaa_by_guid[$guid][] = $dataset; } }