/** */ public function testCheckDatasetConsistency() { $CkanClient = $this->prophesize('CKAN\\CkanClient'); $CkanClient->package_update($this->mockDataset)->willReturn(true); $CkanClient->package_show($this->mockDataset['name'])->willReturn(json_encode(['help' => 'some text', 'success' => true, 'result' => $this->mockDataset])); $this->CkanManager->setCkan($CkanClient->reveal()); $check = $this->CkanManager->checkDatasetConsistency($this->mockDataset); $this->assertTrue($check); }
<?php namespace CKAN\Manager; use EasyCSV\Writer; require_once dirname(__DIR__) . '/inc/common.php'; /** * Create results dir for logs and json results */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_SHORT'; mkdir($results_dir); $CkanManager = new CkanManager(CKAN_API_URL); //$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL); //$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL); //$CkanManager = new CkanManager(CKAN_UAT_API_URL); $csv = new Writer($results_dir . '/export.' . date('Y-m-d') . '.csv'); //$csv->writeRow([ // 'ckan id', // 'title', // 'name', // 'url', // 'identifier', // 'org title', // 'org name', // 'topics', // 'categories', //]); $CkanManager->resultsDir = $results_dir; //$brief = $CkanManager->exportShort('extras_license:"https\://creativecommons.org/publicdomain/zero/1.0/" AND (dataset_type:dataset)'); //$brief = $CkanManager->exportShort('','((collection_package_id:* OR *:*) AND license_id:"cc-by-sa" AND license:"https\://creativecommons.org/publicdomain/zero/1.0/") AND (dataset_type:dataset)');
$prod->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_pbgc = $ProdCkanManager->exportBrief('organization:pbgc-gov AND dataset_type:dataset'); file_put_contents($results_dir . '/prod.json', json_encode($prod_pbgc, JSON_PRETTY_PRINT)); $prod->writeFromArray($prod_pbgc); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_pbgc) . PHP_EOL . PHP_EOL; } else { $prod_pbgc = json_decode(file_get_contents($results_dir . '/prod.json')); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_pbgc) . PHP_EOL . PHP_EOL; } echo 'uat.json' . PHP_EOL; if (!is_file($results_dir . '/uat.json')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $uatCkanManager = new CkanManager(CKAN_UAT_API_URL); $uatCkanManager->resultsDir = $results_dir; $uat_pbgc = $uatCkanManager->exportBrief('organization:pbgc-gov AND extras_harvest_source_title:PDGC Data.json Source AND dataset_type:dataset', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); file_put_contents($results_dir . '/uat.json', json_encode($uat_pbgc, JSON_PRETTY_PRINT)); $uat->writeFromArray($uat_pbgc); echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_pbgc) . PHP_EOL . PHP_EOL; } else { $uat_pbgc = json_decode(file_get_contents($results_dir . '/uat.json')); echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_pbgc) . PHP_EOL . PHP_EOL; } $uat_pbgc_by_title = $uat_pbgc_by_guid = []; foreach ($uat_pbgc as $name => $dataset) { $title = $dataset['title_simple']; $uat_pbgc_by_title[$title] = isset($uat_pbgc_by_title[$title]) ? $uat_pbgc_by_title[$title] : []; $uat_pbgc_by_title[$title][] = $dataset; $guid = trim($dataset['guid']);
<?php /** * First run validation script, to find matches against CKAN, to get _legacy.csv file */ namespace CKAN\Manager; use EasyCSV; require_once dirname(__DIR__) . '/inc/common.php'; /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_RENAME_DATASETS'; mkdir($results_dir); $CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); //$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY); //$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY); /** * CSV * datasetName, newDatasetName */ $CkanManager->resultsDir = $results_dir; foreach (glob(CKANMNGR_DATA_DIR . '/rename*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); $basename = str_replace('.csv', '', basename($csv_file)); file_put_contents($results_dir . '/' . $basename . '_rename.log', $status, FILE_APPEND | LOCK_EX); $csv = new EasyCSV\Reader($csv_file, 'r+', false);
$prod->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_epa = $ProdCkanManager->exportBrief('organization:epa-gov AND metadata_type:geospatial AND dataset_type:dataset'); file_put_contents($results_dir . '/prod.json', json_encode($prod_epa, JSON_PRETTY_PRINT)); $prod->writeFromArray($prod_epa); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_epa) . PHP_EOL . PHP_EOL; } else { $prod_epa = json_decode(file_get_contents($results_dir . '/prod.json')); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_epa) . PHP_EOL . PHP_EOL; } echo 'json_backup.json' . PHP_EOL; if (!is_file($results_dir . '/json_backup.json')) { $json_backup_csv = new Writer($results_dir . '/json_backup.csv'); $json_backup_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $json_backupCkanManager = new CkanManager(CKAN_UAT_API_URL); $json_backupCkanManager->resultsDir = $results_dir; $json_backup_epa = $json_backupCkanManager->exportBriefFromJson(CKANMNGR_DATA_DIR . '/epa-gov.json'); file_put_contents($results_dir . '/json_backup.json', json_encode($json_backup_epa, JSON_PRETTY_PRINT)); $json_backup_csv->writeFromArray($json_backup_epa); echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_epa) . PHP_EOL . PHP_EOL; } else { $json_backup_epa = json_decode(file_get_contents($results_dir . '/json_backup.json')); echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_epa) . PHP_EOL . PHP_EOL; } $json_backup_tags = []; $json_datasets = json_decode(file_get_contents(CKANMNGR_DATA_DIR . '/epa-gov.json'), true); //assoc foreach ($json_datasets as $dataset_array) { $dataset = new Dataset($dataset_array); $groups_tags = $dataset->get_groups_and_tags();
if (!is_file($results_dir . '/prod.csv')) { $prod = new Writer($results_dir . '/prod.csv'); $prod->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset AND -harvest_source_id:[\'\' TO *]'); $prod->writeFromArray($prod_commerce); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_commerce = $prod->getAll(); } echo 'new.csv' . PHP_EOL; if (!is_file($results_dir . '/new.csv')) { $new = new Writer($results_dir . '/new.csv'); $new->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_API_URL); $UatCkanManager->resultsDir = $results_dir; $new_commerce = $UatCkanManager->exportBrief('extras_harvest_source_title:Commerce Non Spatial Data.json Harvest Source'); $new->writeFromArray($new_commerce); } else { $new = new Reader($results_dir . '/new.csv'); $new_commerce = $new->getAll(); } $new_commerce_by_title = []; foreach ($new_commerce as $name => $dataset) { $title = $dataset['title_simple']; $new_commerce_by_title[$title] = isset($new_commerce_by_title[$title]) ? $new_commerce_by_title[$title] : []; $new_commerce_by_title[$title][] = $dataset; } echo 'prod_vs_new.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_prod_commerce.csv') && unlink($results_dir . '/prod_vs_prod_commerce.csv');
$cmp1_csv = new Writer($results_dir . '/cmp1.csv'); $cmp1_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $CkanManager = new CkanManager(CKAN_API_URL); $CkanManager->resultsDir = $results_dir; $cmp1 = $CkanManager->exportBrief('organization:((eop-gov) OR (omb-eop-gov) OR (ondcp-eop-gov) OR (ceq-eop-gov) ' . 'OR (ostp-eop-gov) OR (ustr-eop-gov) OR (wh-eop-gov)) DMS AND dataset_type:dataset'); $cmp1_csv->writeFromArray($cmp1); } else { $cmp1_csv = new Reader($results_dir . '/cmp1.csv'); $cmp1_csv->getHeaders(); $cmp1 = $cmp1_csv->getAll(); } echo 'cmp2.csv' . PHP_EOL; if (!is_file($results_dir . '/cmp2.csv')) { $cmp2_csv = new Writer($results_dir . '/cmp2.csv'); $cmp2_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $CkanManager = new CkanManager(CKAN_API_URL); $CkanManager->resultsDir = $results_dir; $cmp2 = $CkanManager->exportBrief('organization:((eop-gov) OR (omb-eop-gov) OR (ondcp-eop-gov) OR (ceq-eop-gov) ' . 'OR (ostp-eop-gov) OR (ustr-eop-gov) OR (wh-eop-gov)) -DMS AND dataset_type:dataset'); $cmp2_csv->writeFromArray($cmp2); } else { $cmp2_csv = new Reader($results_dir . '/cmp2.csv'); $cmp2 = $cmp2_csv->getAll(); } $cmp2_by_title = $cmp2_by_guid = []; foreach ($cmp2 as $name => $dataset) { $title = $dataset['title_simple']; $cmp2_by_title[$title] = isset($cmp2_by_title[$title]) ? $cmp2_by_title[$title] : []; $cmp2_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $cmp2_by_guid[$guid] = isset($cmp2_by_guid[$guid]) ? $cmp2_by_guid[$guid] : [];
namespace CKAN\Manager; use EasyCSV; /** * http://www.data.gov/app/themes/roots-nextdatagov/assets/Json/fed_agency.json */ define('GROUP_TO_EXPORT', 'aapi0916'); // http://catalog.data.gov/api/3/action/package_search?fq=aapi0916 require_once dirname(__DIR__) . '/inc/common.php'; /** * Create results dir for logs and json results */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_BREAKDOWN_' . GROUP_TO_EXPORT; mkdir($results_dir); /** * Search for packages by terms found */ /** * Production */ $CkanManager = new CkanManager(CKAN_API_URL); /** * Staging */ //$CkanManager = new CkanManager(CKAN_STAGING_API_URL); $csv_agencies = new EasyCSV\Writer($results_dir . '/breakdown_' . GROUP_TO_EXPORT . '_by_agency_' . date('Ymd-His') . '.csv'); $csv_categories = new EasyCSV\Writer($results_dir . '/breakdown_' . GROUP_TO_EXPORT . '_by_category_' . date('Ymd-His') . '.csv'); $CkanManager->breakdownByGroup($csv_agencies, $csv_categories); // show running time on finish timer();
<?php namespace CKAN\Manager; use EasyCSV; require_once dirname(__DIR__) . '/inc/common.php'; /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_DELETE_DATASETS'; mkdir($results_dir); /** * Production */ $CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(CKAN_UAT_API_URL, CKAN_UAT_API_KEY); //$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY); /** * Staging */ //$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); /** * Dev */ //$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY); $CkanManager->resultsDir = $results_dir; /** * CSV * datasetName, orgId */ foreach (glob(CKANMNGR_DATA_DIR . '/undelete*.csv') as $csv_file) {
if (!is_file($results_dir . '/prod.csv')) { $prod = new Writer($results_dir . '/prod.csv'); $prod->writeRow(['title', 'title_simple', 'name', 'url', 'guid', 'topics', 'categories']); $ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_epa = $ProdCkanManager->exportBrief('organization:epa-gov'); $prod->writeFromArray($prod_epa); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_epa = $prod->getAll(); } echo 'qa.csv' . PHP_EOL; if (!is_file($results_dir . '/qa.csv')) { $qa = new Writer($results_dir . '/qa.csv'); $qa->writeRow(['title', 'title_simple', 'name', 'url', 'guid', 'topics', 'categories']); $QaCkanManager = new CkanManager(CKAN_QA_API_URL); $QaCkanManager->resultsDir = $results_dir; $qa_epa = $QaCkanManager->exportBrief('organization:epa-gov', '', 'http://qa-catalog-fe-data.reisys.com/dataset/'); $qa->writeFromArray($qa_epa); } else { $qa = new Reader($results_dir . '/qa.csv'); $qa_epa = $qa->getAll(); } $qa_epa_by_title = $qa_epa_by_guid = []; foreach ($qa_epa as $name => $dataset) { $title = $dataset['title_simple']; $qa_epa_by_title[$title] = isset($qa_epa_by_title[$title]) ? $qa_epa_by_title[$title] : []; $qa_epa_by_title[$title][] = $dataset; $guid = trim($dataset['guid']); if ($guid) { $qa_epa_by_guid[$guid] = isset($qa_epa_by_guid[$guid]) ? $qa_epa_by_guid[$guid] : [];
<?php namespace CKAN\Manager; use EasyCSV; require_once dirname(__DIR__) . '/inc/common.php'; /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_UPDATE_EXTRA'; mkdir($results_dir); //$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); $CkanManager = new CkanManager(CKAN_API_URL, CKAN_PROD_API_KEY); //$CkanManager = new CkanManager(CKAN_UAT_API_URL, CKAN_UAT_API_KEY); /** * Sample csv * dataset,group,categories * https://catalog.data.gov/dataset/food-access-research-atlas,Agriculture,"Natural Resources and Environment" * download-crossing-inventory-data-highway-rail-crossing,Agriculture, "Natural Resources and Environment;Plants and Plant Systems Agriculture" */ $CkanManager->resultsDir = $results_dir; foreach (glob(CKANMNGR_DATA_DIR . '/license_update*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; $basename = str_replace('.csv', '', basename($csv_file)); // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); // file_put_contents($resultsDir . '/' . $basename . '_tags.log', $status, FILE_APPEND | LOCK_EX); $csv = new EasyCSV\Reader($csv_file, 'r+', false); while (true) {
if (!is_file($results_dir . '/prod.csv')) { $prod = new Writer($results_dir . '/prod.csv'); $prod->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset AND -harvest_source_id:[\'\' TO *]'); $prod->writeFromArray($prod_commerce); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_commerce = $prod->getAll(); } echo 'uat.csv' . PHP_EOL; if (!is_file($results_dir . '/uat.csv')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_UAT_API_URL); $UatCkanManager->resultsDir = $results_dir; $uat_commerce = $UatCkanManager->exportBrief('extras_harvest_source_title:Commerce JSON', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); $uat->writeFromArray($uat_commerce); } else { $uat = new Reader($results_dir . '/uat.csv'); $uat_commerce = $uat->getAll(); } $uat_commerce_by_title = []; foreach ($uat_commerce as $name => $dataset) { $title = $dataset['title_simple']; $uat_commerce_by_title[$title] = isset($uat_commerce_by_title[$title]) ? $uat_commerce_by_title[$title] : []; $uat_commerce_by_title[$title][] = $dataset; } echo 'prod_vs_uat.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_uat_commerce.csv') && unlink($results_dir . '/prod_vs_uat_commerce.csv');
<?php namespace CKAN\Manager; use EasyCSV\Reader; require_once dirname(__DIR__) . '/inc/common.php'; /** * Create results dir for logs and json results */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_SHORT'; mkdir($results_dir); $start = isset($argv[1]) ? trim($argv[1]) : 0; $CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL); $CkanManager->resultsDir = $results_dir; foreach (glob(CKANMNGR_DATA_DIR . '/export_*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; $basename = str_replace('.csv', '', basename($csv_file)); // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); $csv = new Reader($csv_file, 'r+', false); $return = []; while (true) { $row = $csv->getRow(); if (!$row) { break; } // skip headers if (in_array(trim(strtolower($row['0'])), ['link', 'dataset', 'url', 'data.gov url'])) {
/** * http://www.data.gov/app/themes/roots-nextdatagov/assets/Json/fed_agency.json */ define('ORGANIZATION_TO_TAG', 'General Services Administration'); /** * Make it TRUE, if you want datasets to be marked as PRIVATE */ define('MARK_PRIVATE', true); require_once dirname(__DIR__) . '/inc/common.php'; /** * Get organization terms, including all children, as Array */ $OrgList = new OrganizationList(AGENCIES_LIST_URL); $termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_TAG); /** * sometimes there is no parent term (ex. Department of Labor) */ if (!defined('PARENT_TERM')) { define('PARENT_TERM', '_'); } /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_LEGACY_' . PARENT_TERM; mkdir($results_dir); $CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); $CkanManager->resultsDir = $results_dir; $CkanManager->reorganizeDatasets(ORGANIZATION_TO_TAG, $termsArray, CKANMNGR_BACKUP_DIR); // show running time on finish timer();
<?php namespace CKAN\Manager; use EasyCSV; require_once dirname(dirname(__DIR__)) . '/inc/common.php'; /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_REMOVE_GROUPS'; mkdir($results_dir); $CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(CKAN_UAT_API_URL, CKAN_UAT_API_KEY); //$CkanManager = new CkanManager(CKAN_QA_API_URL, CKAN_QA_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); //$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY); $CkanManager->resultsDir = $results_dir; foreach (glob(CKANMNGR_DATA_DIR . '/remove*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; $basename = str_replace('.csv', '', basename($csv_file)); // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); file_put_contents($results_dir . '/' . $basename . '_remove.log', $status, FILE_APPEND | LOCK_EX); $csv = new EasyCSV\Reader($csv_file, 'r+', false); while (true) { $row = $csv->getRow(); if (!$row) { break; } // skip headers
<?php /** * First run validation script, to find matches against CKAN, to get _legacy.csv file */ namespace CKAN\Manager; use EasyCSV; require_once dirname(__DIR__) . '/inc/common.php'; /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_RENAME_DATASETS'; mkdir($results_dir); $CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); //$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY); //$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY); /** * CSV * datasetName, newDatasetName */ $CkanManager->resultsDir = $results_dir; foreach (glob(CKANMNGR_DATA_DIR . '/prename*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); $basename = str_replace('.csv', '', basename($csv_file)); file_put_contents($results_dir . '/' . $basename . '_rename.log', $status, FILE_APPEND | LOCK_EX); $csv = new EasyCSV\Reader($csv_file, 'r+', false);
/** * @param $category * @param CkanManager $CkanManagerProduction */ public function checkGroupAgainstProd($category, self $CkanManagerProduction) { $csv = new Writer($this->resultsDir . '/' . $category . date('_Ymd-His') . '.csv'); $csv->writeRow(['Staging dataset name', 'Staging Source', 'Prod exists', 'Prod has ' . $category, 'Prod Source']); $ckan_query = '((groups:' . $category . ') + dataset_type:dataset)'; $start = 0; $per_page = 20; while (true) { $packages = $this->tryPackageSearch($ckan_query, '', $per_page, $start); if (!$packages) { echo "{$start} / {$per_page} :: finish" . PHP_EOL; break; } foreach ($packages as $package) { if (is_array($package['extras']) && sizeof($package['extras']) && strpos(json_encode($package['extras']), '"dms"')) { $resource_type = 'DMS'; // echo "DMS ".$package['name'].PHP_EOL; } elseif (is_array($package['extras']) && sizeof($package['extras']) && strpos(json_encode($package['extras']), '"value":"geospatial"')) { $resource_type = 'GEO'; // echo "GEO ".$package['name'].PHP_EOL; } elseif (is_array($package['extras']) && sizeof($package['extras']) && strpos(json_encode($package['extras']), 'source_datajson_identifier')) { $resource_type = 'JSON'; // echo "JSON ".$package['name'].PHP_EOL; } else { $resource_type = 'OTHER'; echo json_encode($package['extras']) . PHP_EOL; echo "UNKNOWN: " . $package['name'] . PHP_EOL; } $prod_package = $CkanManagerProduction->tryPackageShow($package['name']); $exists = $prod_package ? 'EXISTS' : 'NOT FOUND'; $prod_category_found = ''; $prod_resource_type = ''; if ($prod_package) { $prod_category_found = 'FALSE'; if (isset($prod_package['groups']) && sizeof($prod_package['groups']) && strpos(json_encode($prod_package['groups']), $category)) { $prod_category_found = 'HAS'; } if (is_array($prod_package['extras']) && sizeof($prod_package['extras']) && strpos(json_encode($prod_package['extras']), '"dms"')) { $prod_resource_type = 'DMS'; // echo "DMS ".$prod_package['name'].PHP_EOL; } elseif (is_array($prod_package['extras']) && sizeof($prod_package['extras']) && strpos(json_encode($prod_package['extras']), '"value":"geospatial"')) { $prod_resource_type = 'GEO'; // echo "GEO ".$prod_package['name'].PHP_EOL; } elseif (is_array($prod_package['extras']) && sizeof($prod_package['extras']) && strpos(json_encode($prod_package['extras']), 'source_datajson_identifier')) { $prod_resource_type = 'JSON'; // echo "JSON ".$prod_package['name'].PHP_EOL; } else { $prod_resource_type = 'OTHER'; echo json_encode($prod_package['extras']) . PHP_EOL; echo "UNKNOWN on PROD: " . $prod_package['name'] . PHP_EOL; } } $csv->writeRow([$package['name'], $resource_type, $exists, $prod_category_found, $prod_resource_type]); } $start += $per_page; } }
$prod->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_noaa = $ProdCkanManager->exportBrief('organization:noaa-gov AND metadata_type:geospatial AND dataset_type:dataset'); file_put_contents($results_dir . '/prod.json', json_encode($prod_noaa, JSON_PRETTY_PRINT)); $prod->writeFromArray($prod_noaa); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL; } else { $prod_noaa = json_decode(file_get_contents($results_dir . '/prod.json')); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL; } echo 'json_backup.json' . PHP_EOL; if (!is_file($results_dir . '/json_backup.json')) { $json_backup_csv = new Writer($results_dir . '/json_backup.csv'); $json_backup_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $json_backupCkanManager = new CkanManager(CKAN_UAT_API_URL); $json_backupCkanManager->resultsDir = $results_dir; $json_backup_noaa = $json_backupCkanManager->exportBriefFromJson(CKANMNGR_DATA_DIR . '/noaa-gov_geospatial_with_tags.json'); file_put_contents($results_dir . '/json_backup.json', json_encode($json_backup_noaa, JSON_PRETTY_PRINT)); $json_backup_csv->writeFromArray($json_backup_noaa); echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_noaa) . PHP_EOL . PHP_EOL; } else { $json_backup_noaa = json_decode(file_get_contents($results_dir . '/json_backup.json')); echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_noaa) . PHP_EOL . PHP_EOL; } $json_backup_tags = []; $json_datasets = json_decode(file_get_contents(CKANMNGR_DATA_DIR . '/noaa-gov_geospatial_with_tags.json'), true); //assoc foreach ($json_datasets as $dataset_array) { $dataset = new Dataset($dataset_array); $groups_tags = $dataset->get_groups_and_tags();
require_once dirname(dirname(__DIR__)) . '/inc/common.php'; /** * Get organization terms, including all children, as Array */ $OrgList = new OrganizationList(AGENCIES_LIST_URL); $termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_TAG); /** * sometimes there is no parent term (ex. Department of Labor) */ if (!defined('PARENT_TERM')) { die('PARENT_TERM not found'); } /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_LEGACY_' . PARENT_TERM; mkdir($results_dir); /** * Adding Legacy dms tag */ $CkanManager = new CkanManager(CKAN_API_URL, LIST_ONLY ? null : CKAN_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); $CkanManager->resultsDir = $results_dir; /** * We are skipping noaa-gov and nist-gov within current process */ unset($termsArray['noaa-gov']); unset($termsArray['nist-gov']); $CkanManager->tagLegacyDms($termsArray, 'metadata_from_legacy_dms'); // show running time on finish timer();
/** * Get organization terms, including all children, as Array */ $OrgList = new OrganizationList(AGENCIES_LIST_URL); $termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_EXPORT); /** * sometimes there is no parent term (ex. Department of Labor) */ if (!defined('PARENT_TERM')) { define('PARENT_TERM', '_'); } /** * Create results dir for logs and json results */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_TRACKING_' . PARENT_TERM; mkdir($results_dir); /** * Search for packages by terms found */ /** * Production */ $CkanManager = new CkanManager(CKAN_API_URL); /** * Staging */ //$CkanManager = new CkanManager(CKAN_STAGING_API_URL); $CkanManager->resultsDir = $results_dir; $CkanManager->exportTrackingByOrgTerms($termsArray); // show running time on finish timer();
<?php namespace CKAN\Manager; use EasyCSV; require_once dirname(__DIR__) . '/inc/common.php'; /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_UPDATE_EXTRA'; mkdir($results_dir); $CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); //$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY); //$CkanManager = new CkanManager(CKAN_UAT_API_URL, CKAN_UAT_API_KEY); /** * Sample csv * dataset,group,categories * https://catalog.data.gov/dataset/food-access-research-atlas,Agriculture,"Natural Resources and Environment" * download-crossing-inventory-data-highway-rail-crossing,Agriculture, "Natural Resources and Environment;Plants and Plant Systems Agriculture" */ $CkanManager->resultsDir = $results_dir; foreach (glob(CKANMNGR_DATA_DIR . '/extra*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; $basename = str_replace('.csv', '', basename($csv_file)); // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); // file_put_contents($resultsDir . '/' . $basename . '_tags.log', $status, FILE_APPEND | LOCK_EX); $csv = new EasyCSV\Reader($csv_file, 'r+', false); while (true) {
<?php namespace CKAN\Manager; use CKAN; use EasyCSV; require_once dirname(__DIR__) . '/inc/common.php'; /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_RESTORE_DATASETS'; mkdir($results_dir); $ProductionClient = new CkanManager(CKAN_API_URL, CKAN_API_KEY); $StagingClient = new CkanManager(CKAN_UAT_API_URL); //$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY); /** * Sample csv * dataset,group,categories * https://catalog.data.gov/dataset/food-access-research-atlas,Agriculture,"Natural Resources and Environment" * download-crossing-inventory-data-highway-rail-crossing,Agriculture, "Natural Resources and Environment;Plants and Plant Systems Agriculture" */ foreach (glob(CKANMNGR_DATA_DIR . '/*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); file_put_contents($results_dir . '/groups.log', $status, FILE_APPEND | LOCK_EX); $csv = new EasyCSV\Reader($csv_file, 'r+', false); while (true) { $row = $csv->getRow(); if (!$row) {
$prod->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_noaa = $ProdCkanManager->exportBrief('organization:noaa-gov AND metadata_type:geospatial AND dataset_type:dataset'); file_put_contents($results_dir . '/prod.json', json_encode($prod_noaa, JSON_PRETTY_PRINT)); $prod->writeFromArray($prod_noaa); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL; } else { $prod_noaa = json_decode(file_get_contents($results_dir . '/prod.json')); echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL; } echo 'uat.json' . PHP_EOL; if (!is_file($results_dir . '/uat.json')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']); $uatCkanManager = new CkanManager(CKAN_UAT_API_URL); $uatCkanManager->resultsDir = $results_dir; $uat_noaa = $uatCkanManager->exportBrief('organization:noaa-gov AND extras_harvest_source_title:NOAA New CSW AND dataset_type:dataset', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); file_put_contents($results_dir . '/uat.json', json_encode($uat_noaa, JSON_PRETTY_PRINT)); $uat->writeFromArray($uat_noaa); echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_noaa) . PHP_EOL . PHP_EOL; } else { $uat_noaa = json_decode(file_get_contents($results_dir . '/uat.json')); echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_noaa) . PHP_EOL . PHP_EOL; } $uat_noaa_by_title = $uat_noaa_by_guid = []; foreach ($uat_noaa as $name => $dataset) { $title = $dataset['title_simple']; $uat_noaa_by_title[$title] = isset($uat_noaa_by_title[$title]) ? $uat_noaa_by_title[$title] : []; $uat_noaa_by_title[$title][] = $dataset; $guid = trim($dataset['guid']);
if (!is_file($results_dir . '/prod.csv')) { $prod = new Writer($results_dir . '/prod.csv'); $prod->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset'); $prod->writeFromArray($prod_commerce); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_commerce = $prod->getAll(); } echo 'qa.csv' . PHP_EOL; if (!is_file($results_dir . '/qa.csv')) { $qa = new Writer($results_dir . '/qa.csv'); $qa->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_QA_API_URL); $UatCkanManager->resultsDir = $results_dir; $qa_commerce = $UatCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset', '', 'http://qa-catalog-fe-data.reisys.com/dataset/'); $qa->writeFromArray($qa_commerce); } else { $qa = new Reader($results_dir . '/qa.csv'); $qa_commerce = $qa->getAll(); } $qa_commerce_by_title = []; foreach ($qa_commerce as $name => $dataset) { $title = $dataset['title_simple']; $qa_commerce_by_title[$title] = isset($qa_commerce_by_title[$title]) ? $qa_commerce_by_title[$title] : []; $qa_commerce_by_title[$title][] = $dataset; } echo 'prod_vs_qa.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_qa_commerce.csv') && unlink($results_dir . '/prod_vs_qa_commerce.csv');
<?php namespace CKAN\Manager; use EasyCSV; require_once dirname(__DIR__) . '/inc/common.php'; /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_MAKE_PRIVATE'; mkdir($results_dir); $CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); //$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY); $CkanManager->resultsDir = $results_dir; foreach (glob(CKANMNGR_DATA_DIR . '/private*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; $basename = str_replace('.csv', '', basename($csv_file)); // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); file_put_contents($results_dir . '/' . $basename . '.log', $status, FILE_APPEND | LOCK_EX); $csv = new EasyCSV\Reader($csv_file, 'r+', false); while (true) { $row = $csv->getRow(); if (!$row) { break; } // skip headers if (in_array(strtolower($row['0']), ['dataset', 'uid', 'uuid', 'name', 'url', 'data.gov url'])) { continue;
<?php namespace CKAN\Manager; use EasyCSV; require_once dirname(dirname(__DIR__)) . '/inc/common.php'; $start = isset($argv[1]) ? trim($argv[1]) : 0; /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_ASSIGN_GROUPS'; mkdir($results_dir); $CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); //$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY); //$CkanManager = new CkanManager(CKAN_UAT_API_URL, CKAN_UAT_API_KEY); //$CkanManager = new CkanManager(CKAN_QA_API_URL, CKAN_QA_API_KEY); /** * Sample csv * dataset,group,categories * https://catalog.data.gov/dataset/food-access-research-atlas,Agriculture,"Natural Resources and Environment" * download-crossing-inventory-data-highway-rail-crossing,Agriculture, "Natural Resources and Environment;Plants and Plant Systems Agriculture" */ $CkanManager->resultsDir = $results_dir; foreach (glob(CKANMNGR_DATA_DIR . '/assign*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; $basename = str_replace('.csv', '', basename($csv_file)); // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); // file_put_contents($resultsDir . '/' . $basename . '_tags.log', $status, FILE_APPEND | LOCK_EX);
* Get organization terms, including all children, as Array */ $OrgList = new OrganizationList(AGENCIES_LIST_URL); $termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_EXPORT); /** * sometimes there is no parent term (ex. Department of Labor) */ if (!defined('PARENT_TERM')) { define('PARENT_TERM', '_'); } /** * Create results dir for logs and json results */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_' . PARENT_TERM; mkdir($results_dir); /** * Search for packages by terms found */ /** * Production */ //$CkanManager = new CkanManager(CKAN_API_URL); $CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY); /** * Staging */ //$CkanManager = new CkanManager(CKAN_STAGING_API_URL); $CkanManager->resultsDir = $results_dir; $CkanManager->exportOrganizations($termsArray); // show running time on finish timer();
<?php namespace CKAN\Manager; use EasyCSV\Reader; use EasyCSV\Writer; require_once dirname(__DIR__) . '/inc/common.php'; /** * Create results dir for logs and json results */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_SHORT'; mkdir($results_dir); $start = isset($argv[1]) ? trim($argv[1]) : 0; $CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL); $tags_csv = new Writer($results_dir . '/assign_tags.csv'); $CkanManager->resultsDir = $results_dir; foreach (glob(CKANMNGR_DATA_DIR . '/export_*.csv') as $csv_file) { $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL; echo $status; $basename = str_replace('.csv', '', basename($csv_file)); // fix wrong END-OF-LINE file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file))); $csv = new Reader($csv_file, 'r+', false); while (true) { $row = $csv->getRow(); if (!$row) { break; } // skip headers
*/ $OrgList = new OrganizationList(AGENCIES_LIST_URL); $termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_EXPORT); /** * sometimes there is no parent term (ex. Department of Labor) */ if (!defined('PARENT_TERM')) { define('PARENT_TERM', '_'); } /** * Create results dir for logs and json results */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_' . PARENT_TERM; mkdir($results_dir); /** * Search for packages by terms found */ /** * Production */ $CkanManager = new CkanManager(CKAN_API_URL); //$CkanManager = new CkanManager(CKAN_QA_API_URL); //$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY); /** * Staging */ //$CkanManager = new CkanManager(CKAN_STAGING_API_URL); $CkanManager->resultsDir = $results_dir; $CkanManager->exportPackagesByOrgTerms($termsArray); // show running time on finish timer();
if (!is_file($results_dir . '/prod.csv')) { $prod = new Writer($results_dir . '/prod.csv'); $prod->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $ProdCkanManager = new CkanManager(CKAN_API_URL); $ProdCkanManager->resultsDir = $results_dir; $prod_nuclear = $ProdCkanManager->exportBrief('organization:(nrc-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset'); $prod->writeFromArray($prod_nuclear); } else { $prod = new Reader($results_dir . '/prod.csv'); $prod_nuclear = $prod->getAll(); } echo 'uat.csv' . PHP_EOL; if (!is_file($results_dir . '/uat.csv')) { $uat = new Writer($results_dir . '/uat.csv'); $uat->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']); $UatCkanManager = new CkanManager(CKAN_UAT_API_URL); $UatCkanManager->resultsDir = $results_dir; $uat_nuclear = $UatCkanManager->exportBrief('extras_harvest_source_title:NRC data.json', '', 'http://uat-catalog-fe-data.reisys.com/dataset/'); $uat->writeFromArray($uat_nuclear); } else { $uat = new Reader($results_dir . '/uat.csv'); $uat_nuclear = $uat->getAll(); } $uat_nuclear_by_title = []; foreach ($uat_nuclear as $name => $dataset) { $title = $dataset['title_simple']; $uat_nuclear_by_title[$title] = isset($uat_nuclear_by_title[$title]) ? $uat_nuclear_by_title[$title] : []; $uat_nuclear_by_title[$title][] = $dataset; } echo 'prod_vs_uat.csv' . PHP_EOL; is_file($results_dir . '/prod_vs_uat_nuclear_geospatial.csv') && unlink($results_dir . '/prod_vs_uat_nuclear_geospatial.csv');