/**
  */
 public function testCheckDatasetConsistency()
 {
     $CkanClient = $this->prophesize('CKAN\\CkanClient');
     $CkanClient->package_update($this->mockDataset)->willReturn(true);
     $CkanClient->package_show($this->mockDataset['name'])->willReturn(json_encode(['help' => 'some text', 'success' => true, 'result' => $this->mockDataset]));
     $this->CkanManager->setCkan($CkanClient->reveal());
     $check = $this->CkanManager->checkDatasetConsistency($this->mockDataset);
     $this->assertTrue($check);
 }
Example #2
0
<?php

namespace CKAN\Manager;

use EasyCSV\Writer;
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Create results dir for logs and json results
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_SHORT';
mkdir($results_dir);
$CkanManager = new CkanManager(CKAN_API_URL);
//$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL);
//$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL);
//$CkanManager = new CkanManager(CKAN_UAT_API_URL);
$csv = new Writer($results_dir . '/export.' . date('Y-m-d') . '.csv');
//$csv->writeRow([
//    'ckan id',
//    'title',
//    'name',
//    'url',
//    'identifier',
//    'org title',
//    'org name',
//    'topics',
//    'categories',
//]);
$CkanManager->resultsDir = $results_dir;
//$brief = $CkanManager->exportShort('extras_license:"https\://creativecommons.org/publicdomain/zero/1.0/" AND (dataset_type:dataset)');
//$brief = $CkanManager->exportShort('','((collection_package_id:* OR *:*) AND license_id:"cc-by-sa" AND license:"https\://creativecommons.org/publicdomain/zero/1.0/") AND (dataset_type:dataset)');
    $prod->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']);
    $ProdCkanManager = new CkanManager(CKAN_API_URL);
    $ProdCkanManager->resultsDir = $results_dir;
    $prod_pbgc = $ProdCkanManager->exportBrief('organization:pbgc-gov AND dataset_type:dataset');
    file_put_contents($results_dir . '/prod.json', json_encode($prod_pbgc, JSON_PRETTY_PRINT));
    $prod->writeFromArray($prod_pbgc);
    echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_pbgc) . PHP_EOL . PHP_EOL;
} else {
    $prod_pbgc = json_decode(file_get_contents($results_dir . '/prod.json'));
    echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_pbgc) . PHP_EOL . PHP_EOL;
}
echo 'uat.json' . PHP_EOL;
if (!is_file($results_dir . '/uat.json')) {
    $uat = new Writer($results_dir . '/uat.csv');
    $uat->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']);
    $uatCkanManager = new CkanManager(CKAN_UAT_API_URL);
    $uatCkanManager->resultsDir = $results_dir;
    $uat_pbgc = $uatCkanManager->exportBrief('organization:pbgc-gov AND extras_harvest_source_title:PDGC Data.json Source AND dataset_type:dataset', '', 'http://uat-catalog-fe-data.reisys.com/dataset/');
    file_put_contents($results_dir . '/uat.json', json_encode($uat_pbgc, JSON_PRETTY_PRINT));
    $uat->writeFromArray($uat_pbgc);
    echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_pbgc) . PHP_EOL . PHP_EOL;
} else {
    $uat_pbgc = json_decode(file_get_contents($results_dir . '/uat.json'));
    echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_pbgc) . PHP_EOL . PHP_EOL;
}
$uat_pbgc_by_title = $uat_pbgc_by_guid = [];
foreach ($uat_pbgc as $name => $dataset) {
    $title = $dataset['title_simple'];
    $uat_pbgc_by_title[$title] = isset($uat_pbgc_by_title[$title]) ? $uat_pbgc_by_title[$title] : [];
    $uat_pbgc_by_title[$title][] = $dataset;
    $guid = trim($dataset['guid']);
<?php

/**
 * First run validation script, to find matches against CKAN, to get _legacy.csv file
 */
namespace CKAN\Manager;

use EasyCSV;
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_RENAME_DATASETS';
mkdir($results_dir);
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY);
//$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY);
//$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY);
/**
 * CSV
 * datasetName, newDatasetName
 */
$CkanManager->resultsDir = $results_dir;
foreach (glob(CKANMNGR_DATA_DIR . '/rename*.csv') as $csv_file) {
    $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL;
    echo $status;
    //    fix wrong END-OF-LINE
    file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file)));
    $basename = str_replace('.csv', '', basename($csv_file));
    file_put_contents($results_dir . '/' . $basename . '_rename.log', $status, FILE_APPEND | LOCK_EX);
    $csv = new EasyCSV\Reader($csv_file, 'r+', false);
    $prod->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']);
    $ProdCkanManager = new CkanManager(CKAN_API_URL);
    $ProdCkanManager->resultsDir = $results_dir;
    $prod_epa = $ProdCkanManager->exportBrief('organization:epa-gov AND metadata_type:geospatial AND dataset_type:dataset');
    file_put_contents($results_dir . '/prod.json', json_encode($prod_epa, JSON_PRETTY_PRINT));
    $prod->writeFromArray($prod_epa);
    echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_epa) . PHP_EOL . PHP_EOL;
} else {
    $prod_epa = json_decode(file_get_contents($results_dir . '/prod.json'));
    echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_epa) . PHP_EOL . PHP_EOL;
}
echo 'json_backup.json' . PHP_EOL;
if (!is_file($results_dir . '/json_backup.json')) {
    $json_backup_csv = new Writer($results_dir . '/json_backup.csv');
    $json_backup_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']);
    $json_backupCkanManager = new CkanManager(CKAN_UAT_API_URL);
    $json_backupCkanManager->resultsDir = $results_dir;
    $json_backup_epa = $json_backupCkanManager->exportBriefFromJson(CKANMNGR_DATA_DIR . '/epa-gov.json');
    file_put_contents($results_dir . '/json_backup.json', json_encode($json_backup_epa, JSON_PRETTY_PRINT));
    $json_backup_csv->writeFromArray($json_backup_epa);
    echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_epa) . PHP_EOL . PHP_EOL;
} else {
    $json_backup_epa = json_decode(file_get_contents($results_dir . '/json_backup.json'));
    echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_epa) . PHP_EOL . PHP_EOL;
}
$json_backup_tags = [];
$json_datasets = json_decode(file_get_contents(CKANMNGR_DATA_DIR . '/epa-gov.json'), true);
//assoc
foreach ($json_datasets as $dataset_array) {
    $dataset = new Dataset($dataset_array);
    $groups_tags = $dataset->get_groups_and_tags();
if (!is_file($results_dir . '/prod.csv')) {
    $prod = new Writer($results_dir . '/prod.csv');
    $prod->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']);
    $ProdCkanManager = new CkanManager(CKAN_API_URL);
    $ProdCkanManager->resultsDir = $results_dir;
    $prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset AND -harvest_source_id:[\'\' TO *]');
    $prod->writeFromArray($prod_commerce);
} else {
    $prod = new Reader($results_dir . '/prod.csv');
    $prod_commerce = $prod->getAll();
}
echo 'new.csv' . PHP_EOL;
if (!is_file($results_dir . '/new.csv')) {
    $new = new Writer($results_dir . '/new.csv');
    $new->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']);
    $UatCkanManager = new CkanManager(CKAN_API_URL);
    $UatCkanManager->resultsDir = $results_dir;
    $new_commerce = $UatCkanManager->exportBrief('extras_harvest_source_title:Commerce Non Spatial Data.json Harvest Source');
    $new->writeFromArray($new_commerce);
} else {
    $new = new Reader($results_dir . '/new.csv');
    $new_commerce = $new->getAll();
}
$new_commerce_by_title = [];
foreach ($new_commerce as $name => $dataset) {
    $title = $dataset['title_simple'];
    $new_commerce_by_title[$title] = isset($new_commerce_by_title[$title]) ? $new_commerce_by_title[$title] : [];
    $new_commerce_by_title[$title][] = $dataset;
}
echo 'prod_vs_new.csv' . PHP_EOL;
is_file($results_dir . '/prod_vs_prod_commerce.csv') && unlink($results_dir . '/prod_vs_prod_commerce.csv');
    $cmp1_csv = new Writer($results_dir . '/cmp1.csv');
    $cmp1_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']);
    $CkanManager = new CkanManager(CKAN_API_URL);
    $CkanManager->resultsDir = $results_dir;
    $cmp1 = $CkanManager->exportBrief('organization:((eop-gov) OR (omb-eop-gov) OR (ondcp-eop-gov) OR (ceq-eop-gov) ' . 'OR (ostp-eop-gov) OR (ustr-eop-gov) OR (wh-eop-gov)) DMS  AND dataset_type:dataset');
    $cmp1_csv->writeFromArray($cmp1);
} else {
    $cmp1_csv = new Reader($results_dir . '/cmp1.csv');
    $cmp1_csv->getHeaders();
    $cmp1 = $cmp1_csv->getAll();
}
echo 'cmp2.csv' . PHP_EOL;
if (!is_file($results_dir . '/cmp2.csv')) {
    $cmp2_csv = new Writer($results_dir . '/cmp2.csv');
    $cmp2_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']);
    $CkanManager = new CkanManager(CKAN_API_URL);
    $CkanManager->resultsDir = $results_dir;
    $cmp2 = $CkanManager->exportBrief('organization:((eop-gov) OR (omb-eop-gov) OR (ondcp-eop-gov) OR (ceq-eop-gov) ' . 'OR (ostp-eop-gov) OR (ustr-eop-gov) OR (wh-eop-gov)) -DMS AND dataset_type:dataset');
    $cmp2_csv->writeFromArray($cmp2);
} else {
    $cmp2_csv = new Reader($results_dir . '/cmp2.csv');
    $cmp2 = $cmp2_csv->getAll();
}
$cmp2_by_title = $cmp2_by_guid = [];
foreach ($cmp2 as $name => $dataset) {
    $title = $dataset['title_simple'];
    $cmp2_by_title[$title] = isset($cmp2_by_title[$title]) ? $cmp2_by_title[$title] : [];
    $cmp2_by_title[$title][] = $dataset;
    $guid = trim($dataset['guid']);
    if ($guid) {
        $cmp2_by_guid[$guid] = isset($cmp2_by_guid[$guid]) ? $cmp2_by_guid[$guid] : [];
namespace CKAN\Manager;

use EasyCSV;
/**
 * http://www.data.gov/app/themes/roots-nextdatagov/assets/Json/fed_agency.json
 */
define('GROUP_TO_EXPORT', 'aapi0916');
// http://catalog.data.gov/api/3/action/package_search?fq=aapi0916
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Create results dir for logs and json results
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_BREAKDOWN_' . GROUP_TO_EXPORT;
mkdir($results_dir);
/**
 * Search for packages by terms found
 */
/**
 * Production
 */
$CkanManager = new CkanManager(CKAN_API_URL);
/**
 * Staging
 */
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL);
$csv_agencies = new EasyCSV\Writer($results_dir . '/breakdown_' . GROUP_TO_EXPORT . '_by_agency_' . date('Ymd-His') . '.csv');
$csv_categories = new EasyCSV\Writer($results_dir . '/breakdown_' . GROUP_TO_EXPORT . '_by_category_' . date('Ymd-His') . '.csv');
$CkanManager->breakdownByGroup($csv_agencies, $csv_categories);
// show running time on finish
timer();
<?php

namespace CKAN\Manager;

use EasyCSV;
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_DELETE_DATASETS';
mkdir($results_dir);
/**
 * Production
 */
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(CKAN_UAT_API_URL, CKAN_UAT_API_KEY);
//$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY);
/**
 * Staging
 */
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY);
/**
 * Dev
 */
//$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY);
$CkanManager->resultsDir = $results_dir;
/**
 * CSV
 * datasetName, orgId
 */
foreach (glob(CKANMNGR_DATA_DIR . '/undelete*.csv') as $csv_file) {
if (!is_file($results_dir . '/prod.csv')) {
    $prod = new Writer($results_dir . '/prod.csv');
    $prod->writeRow(['title', 'title_simple', 'name', 'url', 'guid', 'topics', 'categories']);
    $ProdCkanManager = new CkanManager(CKAN_API_URL);
    $ProdCkanManager->resultsDir = $results_dir;
    $prod_epa = $ProdCkanManager->exportBrief('organization:epa-gov');
    $prod->writeFromArray($prod_epa);
} else {
    $prod = new Reader($results_dir . '/prod.csv');
    $prod_epa = $prod->getAll();
}
echo 'qa.csv' . PHP_EOL;
if (!is_file($results_dir . '/qa.csv')) {
    $qa = new Writer($results_dir . '/qa.csv');
    $qa->writeRow(['title', 'title_simple', 'name', 'url', 'guid', 'topics', 'categories']);
    $QaCkanManager = new CkanManager(CKAN_QA_API_URL);
    $QaCkanManager->resultsDir = $results_dir;
    $qa_epa = $QaCkanManager->exportBrief('organization:epa-gov', '', 'http://qa-catalog-fe-data.reisys.com/dataset/');
    $qa->writeFromArray($qa_epa);
} else {
    $qa = new Reader($results_dir . '/qa.csv');
    $qa_epa = $qa->getAll();
}
$qa_epa_by_title = $qa_epa_by_guid = [];
foreach ($qa_epa as $name => $dataset) {
    $title = $dataset['title_simple'];
    $qa_epa_by_title[$title] = isset($qa_epa_by_title[$title]) ? $qa_epa_by_title[$title] : [];
    $qa_epa_by_title[$title][] = $dataset;
    $guid = trim($dataset['guid']);
    if ($guid) {
        $qa_epa_by_guid[$guid] = isset($qa_epa_by_guid[$guid]) ? $qa_epa_by_guid[$guid] : [];
Example #11
0
<?php

namespace CKAN\Manager;

use EasyCSV;
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_UPDATE_EXTRA';
mkdir($results_dir);
//$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY);
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_PROD_API_KEY);
//$CkanManager = new CkanManager(CKAN_UAT_API_URL, CKAN_UAT_API_KEY);
/**
 * Sample csv
 * dataset,group,categories
 * https://catalog.data.gov/dataset/food-access-research-atlas,Agriculture,"Natural Resources and Environment"
 * download-crossing-inventory-data-highway-rail-crossing,Agriculture, "Natural Resources and Environment;Plants and Plant Systems Agriculture"
 */
$CkanManager->resultsDir = $results_dir;
foreach (glob(CKANMNGR_DATA_DIR . '/license_update*.csv') as $csv_file) {
    $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL;
    echo $status;
    $basename = str_replace('.csv', '', basename($csv_file));
    //    fix wrong END-OF-LINE
    file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file)));
    //    file_put_contents($resultsDir . '/' . $basename . '_tags.log', $status, FILE_APPEND | LOCK_EX);
    $csv = new EasyCSV\Reader($csv_file, 'r+', false);
    while (true) {
if (!is_file($results_dir . '/prod.csv')) {
    $prod = new Writer($results_dir . '/prod.csv');
    $prod->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']);
    $ProdCkanManager = new CkanManager(CKAN_API_URL);
    $ProdCkanManager->resultsDir = $results_dir;
    $prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset AND -harvest_source_id:[\'\' TO *]');
    $prod->writeFromArray($prod_commerce);
} else {
    $prod = new Reader($results_dir . '/prod.csv');
    $prod_commerce = $prod->getAll();
}
echo 'uat.csv' . PHP_EOL;
if (!is_file($results_dir . '/uat.csv')) {
    $uat = new Writer($results_dir . '/uat.csv');
    $uat->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']);
    $UatCkanManager = new CkanManager(CKAN_UAT_API_URL);
    $UatCkanManager->resultsDir = $results_dir;
    $uat_commerce = $UatCkanManager->exportBrief('extras_harvest_source_title:Commerce JSON', '', 'http://uat-catalog-fe-data.reisys.com/dataset/');
    $uat->writeFromArray($uat_commerce);
} else {
    $uat = new Reader($results_dir . '/uat.csv');
    $uat_commerce = $uat->getAll();
}
$uat_commerce_by_title = [];
foreach ($uat_commerce as $name => $dataset) {
    $title = $dataset['title_simple'];
    $uat_commerce_by_title[$title] = isset($uat_commerce_by_title[$title]) ? $uat_commerce_by_title[$title] : [];
    $uat_commerce_by_title[$title][] = $dataset;
}
echo 'prod_vs_uat.csv' . PHP_EOL;
is_file($results_dir . '/prod_vs_uat_commerce.csv') && unlink($results_dir . '/prod_vs_uat_commerce.csv');
<?php

namespace CKAN\Manager;

use EasyCSV\Reader;
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Create results dir for logs and json results
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_SHORT';
mkdir($results_dir);
$start = isset($argv[1]) ? trim($argv[1]) : 0;
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL);
$CkanManager->resultsDir = $results_dir;
foreach (glob(CKANMNGR_DATA_DIR . '/export_*.csv') as $csv_file) {
    $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL;
    echo $status;
    $basename = str_replace('.csv', '', basename($csv_file));
    //    fix wrong END-OF-LINE
    file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file)));
    $csv = new Reader($csv_file, 'r+', false);
    $return = [];
    while (true) {
        $row = $csv->getRow();
        if (!$row) {
            break;
        }
        //        skip headers
        if (in_array(trim(strtolower($row['0'])), ['link', 'dataset', 'url', 'data.gov url'])) {
/**
 * http://www.data.gov/app/themes/roots-nextdatagov/assets/Json/fed_agency.json
 */
define('ORGANIZATION_TO_TAG', 'General Services Administration');
/**
 * Make it TRUE, if you want datasets to be marked as PRIVATE
 */
define('MARK_PRIVATE', true);
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Get organization terms, including all children, as Array
 */
$OrgList = new OrganizationList(AGENCIES_LIST_URL);
$termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_TAG);
/**
 * sometimes there is no parent term (ex. Department of Labor)
 */
if (!defined('PARENT_TERM')) {
    define('PARENT_TERM', '_');
}
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_LEGACY_' . PARENT_TERM;
mkdir($results_dir);
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY);
$CkanManager->resultsDir = $results_dir;
$CkanManager->reorganizeDatasets(ORGANIZATION_TO_TAG, $termsArray, CKANMNGR_BACKUP_DIR);
// show running time on finish
timer();
<?php

namespace CKAN\Manager;

use EasyCSV;
require_once dirname(dirname(__DIR__)) . '/inc/common.php';
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_REMOVE_GROUPS';
mkdir($results_dir);
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(CKAN_UAT_API_URL, CKAN_UAT_API_KEY);
//$CkanManager = new CkanManager(CKAN_QA_API_URL, CKAN_QA_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY);
//$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY);
$CkanManager->resultsDir = $results_dir;
foreach (glob(CKANMNGR_DATA_DIR . '/remove*.csv') as $csv_file) {
    $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL;
    echo $status;
    $basename = str_replace('.csv', '', basename($csv_file));
    //    fix wrong END-OF-LINE
    file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file)));
    file_put_contents($results_dir . '/' . $basename . '_remove.log', $status, FILE_APPEND | LOCK_EX);
    $csv = new EasyCSV\Reader($csv_file, 'r+', false);
    while (true) {
        $row = $csv->getRow();
        if (!$row) {
            break;
        }
        //        skip headers
<?php

/**
 * First run validation script, to find matches against CKAN, to get _legacy.csv file
 */
namespace CKAN\Manager;

use EasyCSV;
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_RENAME_DATASETS';
mkdir($results_dir);
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY);
//$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY);
//$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY);
/**
 * CSV
 * datasetName, newDatasetName
 */
$CkanManager->resultsDir = $results_dir;
foreach (glob(CKANMNGR_DATA_DIR . '/prename*.csv') as $csv_file) {
    $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL;
    echo $status;
    //    fix wrong END-OF-LINE
    file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file)));
    $basename = str_replace('.csv', '', basename($csv_file));
    file_put_contents($results_dir . '/' . $basename . '_rename.log', $status, FILE_APPEND | LOCK_EX);
    $csv = new EasyCSV\Reader($csv_file, 'r+', false);
Example #17
0
 /**
  * @param             $category
  * @param CkanManager $CkanManagerProduction
  */
 public function checkGroupAgainstProd($category, self $CkanManagerProduction)
 {
     $csv = new Writer($this->resultsDir . '/' . $category . date('_Ymd-His') . '.csv');
     $csv->writeRow(['Staging dataset name', 'Staging Source', 'Prod exists', 'Prod has ' . $category, 'Prod Source']);
     $ckan_query = '((groups:' . $category . ') + dataset_type:dataset)';
     $start = 0;
     $per_page = 20;
     while (true) {
         $packages = $this->tryPackageSearch($ckan_query, '', $per_page, $start);
         if (!$packages) {
             echo "{$start} / {$per_page} :: finish" . PHP_EOL;
             break;
         }
         foreach ($packages as $package) {
             if (is_array($package['extras']) && sizeof($package['extras']) && strpos(json_encode($package['extras']), '"dms"')) {
                 $resource_type = 'DMS';
                 //                    echo "DMS ".$package['name'].PHP_EOL;
             } elseif (is_array($package['extras']) && sizeof($package['extras']) && strpos(json_encode($package['extras']), '"value":"geospatial"')) {
                 $resource_type = 'GEO';
                 //                    echo "GEO ".$package['name'].PHP_EOL;
             } elseif (is_array($package['extras']) && sizeof($package['extras']) && strpos(json_encode($package['extras']), 'source_datajson_identifier')) {
                 $resource_type = 'JSON';
                 //                    echo "JSON ".$package['name'].PHP_EOL;
             } else {
                 $resource_type = 'OTHER';
                 echo json_encode($package['extras']) . PHP_EOL;
                 echo "UNKNOWN: " . $package['name'] . PHP_EOL;
             }
             $prod_package = $CkanManagerProduction->tryPackageShow($package['name']);
             $exists = $prod_package ? 'EXISTS' : 'NOT FOUND';
             $prod_category_found = '';
             $prod_resource_type = '';
             if ($prod_package) {
                 $prod_category_found = 'FALSE';
                 if (isset($prod_package['groups']) && sizeof($prod_package['groups']) && strpos(json_encode($prod_package['groups']), $category)) {
                     $prod_category_found = 'HAS';
                 }
                 if (is_array($prod_package['extras']) && sizeof($prod_package['extras']) && strpos(json_encode($prod_package['extras']), '"dms"')) {
                     $prod_resource_type = 'DMS';
                     //                    echo "DMS ".$prod_package['name'].PHP_EOL;
                 } elseif (is_array($prod_package['extras']) && sizeof($prod_package['extras']) && strpos(json_encode($prod_package['extras']), '"value":"geospatial"')) {
                     $prod_resource_type = 'GEO';
                     //                    echo "GEO ".$prod_package['name'].PHP_EOL;
                 } elseif (is_array($prod_package['extras']) && sizeof($prod_package['extras']) && strpos(json_encode($prod_package['extras']), 'source_datajson_identifier')) {
                     $prod_resource_type = 'JSON';
                     //                    echo "JSON ".$prod_package['name'].PHP_EOL;
                 } else {
                     $prod_resource_type = 'OTHER';
                     echo json_encode($prod_package['extras']) . PHP_EOL;
                     echo "UNKNOWN on PROD: " . $prod_package['name'] . PHP_EOL;
                 }
             }
             $csv->writeRow([$package['name'], $resource_type, $exists, $prod_category_found, $prod_resource_type]);
         }
         $start += $per_page;
     }
 }
    $prod->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']);
    $ProdCkanManager = new CkanManager(CKAN_API_URL);
    $ProdCkanManager->resultsDir = $results_dir;
    $prod_noaa = $ProdCkanManager->exportBrief('organization:noaa-gov AND metadata_type:geospatial AND dataset_type:dataset');
    file_put_contents($results_dir . '/prod.json', json_encode($prod_noaa, JSON_PRETTY_PRINT));
    $prod->writeFromArray($prod_noaa);
    echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL;
} else {
    $prod_noaa = json_decode(file_get_contents($results_dir . '/prod.json'));
    echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL;
}
echo 'json_backup.json' . PHP_EOL;
if (!is_file($results_dir . '/json_backup.json')) {
    $json_backup_csv = new Writer($results_dir . '/json_backup.csv');
    $json_backup_csv->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']);
    $json_backupCkanManager = new CkanManager(CKAN_UAT_API_URL);
    $json_backupCkanManager->resultsDir = $results_dir;
    $json_backup_noaa = $json_backupCkanManager->exportBriefFromJson(CKANMNGR_DATA_DIR . '/noaa-gov_geospatial_with_tags.json');
    file_put_contents($results_dir . '/json_backup.json', json_encode($json_backup_noaa, JSON_PRETTY_PRINT));
    $json_backup_csv->writeFromArray($json_backup_noaa);
    echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_noaa) . PHP_EOL . PHP_EOL;
} else {
    $json_backup_noaa = json_decode(file_get_contents($results_dir . '/json_backup.json'));
    echo PHP_EOL . 'datasets from json_backup: ' . sizeof($json_backup_noaa) . PHP_EOL . PHP_EOL;
}
$json_backup_tags = [];
$json_datasets = json_decode(file_get_contents(CKANMNGR_DATA_DIR . '/noaa-gov_geospatial_with_tags.json'), true);
//assoc
foreach ($json_datasets as $dataset_array) {
    $dataset = new Dataset($dataset_array);
    $groups_tags = $dataset->get_groups_and_tags();
require_once dirname(dirname(__DIR__)) . '/inc/common.php';
/**
 * Get organization terms, including all children, as Array
 */
$OrgList = new OrganizationList(AGENCIES_LIST_URL);
$termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_TAG);
/**
 * sometimes there is no parent term (ex. Department of Labor)
 */
if (!defined('PARENT_TERM')) {
    die('PARENT_TERM not found');
}
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_LEGACY_' . PARENT_TERM;
mkdir($results_dir);
/**
 * Adding Legacy dms tag
 */
$CkanManager = new CkanManager(CKAN_API_URL, LIST_ONLY ? null : CKAN_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY);
$CkanManager->resultsDir = $results_dir;
/**
 * We are skipping noaa-gov and nist-gov within current process
 */
unset($termsArray['noaa-gov']);
unset($termsArray['nist-gov']);
$CkanManager->tagLegacyDms($termsArray, 'metadata_from_legacy_dms');
// show running time on finish
timer();
/**
 * Get organization terms, including all children, as Array
 */
$OrgList = new OrganizationList(AGENCIES_LIST_URL);
$termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_EXPORT);
/**
 * sometimes there is no parent term (ex. Department of Labor)
 */
if (!defined('PARENT_TERM')) {
    define('PARENT_TERM', '_');
}
/**
 * Create results dir for logs and json results
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_TRACKING_' . PARENT_TERM;
mkdir($results_dir);
/**
 * Search for packages by terms found
 */
/**
 * Production
 */
$CkanManager = new CkanManager(CKAN_API_URL);
/**
 * Staging
 */
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL);
$CkanManager->resultsDir = $results_dir;
$CkanManager->exportTrackingByOrgTerms($termsArray);
// show running time on finish
timer();
<?php

namespace CKAN\Manager;

use EasyCSV;
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_UPDATE_EXTRA';
mkdir($results_dir);
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY);
//$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY);
//$CkanManager = new CkanManager(CKAN_UAT_API_URL, CKAN_UAT_API_KEY);
/**
 * Sample csv
 * dataset,group,categories
 * https://catalog.data.gov/dataset/food-access-research-atlas,Agriculture,"Natural Resources and Environment"
 * download-crossing-inventory-data-highway-rail-crossing,Agriculture, "Natural Resources and Environment;Plants and Plant Systems Agriculture"
 */
$CkanManager->resultsDir = $results_dir;
foreach (glob(CKANMNGR_DATA_DIR . '/extra*.csv') as $csv_file) {
    $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL;
    echo $status;
    $basename = str_replace('.csv', '', basename($csv_file));
    //    fix wrong END-OF-LINE
    file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file)));
    //    file_put_contents($resultsDir . '/' . $basename . '_tags.log', $status, FILE_APPEND | LOCK_EX);
    $csv = new EasyCSV\Reader($csv_file, 'r+', false);
    while (true) {
<?php

namespace CKAN\Manager;

use CKAN;
use EasyCSV;
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_RESTORE_DATASETS';
mkdir($results_dir);
$ProductionClient = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
$StagingClient = new CkanManager(CKAN_UAT_API_URL);
//$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY);
/**
 * Sample csv
 * dataset,group,categories
 * https://catalog.data.gov/dataset/food-access-research-atlas,Agriculture,"Natural Resources and Environment"
 * download-crossing-inventory-data-highway-rail-crossing,Agriculture, "Natural Resources and Environment;Plants and Plant Systems Agriculture"
 */
foreach (glob(CKANMNGR_DATA_DIR . '/*.csv') as $csv_file) {
    $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL;
    echo $status;
    //    fix wrong END-OF-LINE
    file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file)));
    file_put_contents($results_dir . '/groups.log', $status, FILE_APPEND | LOCK_EX);
    $csv = new EasyCSV\Reader($csv_file, 'r+', false);
    while (true) {
        $row = $csv->getRow();
        if (!$row) {
    $prod->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']);
    $ProdCkanManager = new CkanManager(CKAN_API_URL);
    $ProdCkanManager->resultsDir = $results_dir;
    $prod_noaa = $ProdCkanManager->exportBrief('organization:noaa-gov AND metadata_type:geospatial AND dataset_type:dataset');
    file_put_contents($results_dir . '/prod.json', json_encode($prod_noaa, JSON_PRETTY_PRINT));
    $prod->writeFromArray($prod_noaa);
    echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL;
} else {
    $prod_noaa = json_decode(file_get_contents($results_dir . '/prod.json'));
    echo PHP_EOL . 'datasets from prod: ' . sizeof($prod_noaa) . PHP_EOL . PHP_EOL;
}
echo 'uat.json' . PHP_EOL;
if (!is_file($results_dir . '/uat.json')) {
    $uat = new Writer($results_dir . '/uat.csv');
    $uat->writeRow(['title', 'title_simple', 'name', 'url', 'identifier', 'guid', 'topics', 'categories']);
    $uatCkanManager = new CkanManager(CKAN_UAT_API_URL);
    $uatCkanManager->resultsDir = $results_dir;
    $uat_noaa = $uatCkanManager->exportBrief('organization:noaa-gov AND extras_harvest_source_title:NOAA New CSW AND dataset_type:dataset', '', 'http://uat-catalog-fe-data.reisys.com/dataset/');
    file_put_contents($results_dir . '/uat.json', json_encode($uat_noaa, JSON_PRETTY_PRINT));
    $uat->writeFromArray($uat_noaa);
    echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_noaa) . PHP_EOL . PHP_EOL;
} else {
    $uat_noaa = json_decode(file_get_contents($results_dir . '/uat.json'));
    echo PHP_EOL . 'datasets from uat: ' . sizeof($uat_noaa) . PHP_EOL . PHP_EOL;
}
$uat_noaa_by_title = $uat_noaa_by_guid = [];
foreach ($uat_noaa as $name => $dataset) {
    $title = $dataset['title_simple'];
    $uat_noaa_by_title[$title] = isset($uat_noaa_by_title[$title]) ? $uat_noaa_by_title[$title] : [];
    $uat_noaa_by_title[$title][] = $dataset;
    $guid = trim($dataset['guid']);
if (!is_file($results_dir . '/prod.csv')) {
    $prod = new Writer($results_dir . '/prod.csv');
    $prod->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']);
    $ProdCkanManager = new CkanManager(CKAN_API_URL);
    $ProdCkanManager->resultsDir = $results_dir;
    $prod_commerce = $ProdCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset');
    $prod->writeFromArray($prod_commerce);
} else {
    $prod = new Reader($results_dir . '/prod.csv');
    $prod_commerce = $prod->getAll();
}
echo 'qa.csv' . PHP_EOL;
if (!is_file($results_dir . '/qa.csv')) {
    $qa = new Writer($results_dir . '/qa.csv');
    $qa->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']);
    $UatCkanManager = new CkanManager(CKAN_QA_API_URL);
    $UatCkanManager->resultsDir = $results_dir;
    $qa_commerce = $UatCkanManager->exportBrief('organization:(doc-gov OR bis-doc-gov OR mbda-doc-gov OR trade-gov OR census-gov ' . ' OR eda-doc-gov OR ntia-doc-gov OR ntis-gov OR nws-doc-gov OR bea-gov OR uspto-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset', '', 'http://qa-catalog-fe-data.reisys.com/dataset/');
    $qa->writeFromArray($qa_commerce);
} else {
    $qa = new Reader($results_dir . '/qa.csv');
    $qa_commerce = $qa->getAll();
}
$qa_commerce_by_title = [];
foreach ($qa_commerce as $name => $dataset) {
    $title = $dataset['title_simple'];
    $qa_commerce_by_title[$title] = isset($qa_commerce_by_title[$title]) ? $qa_commerce_by_title[$title] : [];
    $qa_commerce_by_title[$title][] = $dataset;
}
echo 'prod_vs_qa.csv' . PHP_EOL;
is_file($results_dir . '/prod_vs_qa_commerce.csv') && unlink($results_dir . '/prod_vs_qa_commerce.csv');
<?php

namespace CKAN\Manager;

use EasyCSV;
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_MAKE_PRIVATE';
mkdir($results_dir);
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY);
//$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY);
$CkanManager->resultsDir = $results_dir;
foreach (glob(CKANMNGR_DATA_DIR . '/private*.csv') as $csv_file) {
    $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL;
    echo $status;
    $basename = str_replace('.csv', '', basename($csv_file));
    //    fix wrong END-OF-LINE
    file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file)));
    file_put_contents($results_dir . '/' . $basename . '.log', $status, FILE_APPEND | LOCK_EX);
    $csv = new EasyCSV\Reader($csv_file, 'r+', false);
    while (true) {
        $row = $csv->getRow();
        if (!$row) {
            break;
        }
        //        skip headers
        if (in_array(strtolower($row['0']), ['dataset', 'uid', 'uuid', 'name', 'url', 'data.gov url'])) {
            continue;
<?php

namespace CKAN\Manager;

use EasyCSV;
require_once dirname(dirname(__DIR__)) . '/inc/common.php';
$start = isset($argv[1]) ? trim($argv[1]) : 0;
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_ASSIGN_GROUPS';
mkdir($results_dir);
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY);
//$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY);
//$CkanManager = new CkanManager(CKAN_UAT_API_URL, CKAN_UAT_API_KEY);
//$CkanManager = new CkanManager(CKAN_QA_API_URL, CKAN_QA_API_KEY);
/**
 * Sample csv
 * dataset,group,categories
 * https://catalog.data.gov/dataset/food-access-research-atlas,Agriculture,"Natural Resources and Environment"
 * download-crossing-inventory-data-highway-rail-crossing,Agriculture, "Natural Resources and Environment;Plants and Plant Systems Agriculture"
 */
$CkanManager->resultsDir = $results_dir;
foreach (glob(CKANMNGR_DATA_DIR . '/assign*.csv') as $csv_file) {
    $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL;
    echo $status;
    $basename = str_replace('.csv', '', basename($csv_file));
    //    fix wrong END-OF-LINE
    file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file)));
    //    file_put_contents($resultsDir . '/' . $basename . '_tags.log', $status, FILE_APPEND | LOCK_EX);
Example #27
0
 * Get organization terms, including all children, as Array
 */
$OrgList = new OrganizationList(AGENCIES_LIST_URL);
$termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_EXPORT);
/**
 * sometimes there is no parent term (ex. Department of Labor)
 */
if (!defined('PARENT_TERM')) {
    define('PARENT_TERM', '_');
}
/**
 * Create results dir for logs and json results
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_' . PARENT_TERM;
mkdir($results_dir);
/**
 * Search for packages by terms found
 */
/**
 * Production
 */
//$CkanManager = new CkanManager(CKAN_API_URL);
$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY);
/**
 * Staging
 */
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL);
$CkanManager->resultsDir = $results_dir;
$CkanManager->exportOrganizations($termsArray);
// show running time on finish
timer();
<?php

namespace CKAN\Manager;

use EasyCSV\Reader;
use EasyCSV\Writer;
require_once dirname(__DIR__) . '/inc/common.php';
/**
 * Create results dir for logs and json results
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_SHORT';
mkdir($results_dir);
$start = isset($argv[1]) ? trim($argv[1]) : 0;
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL);
$tags_csv = new Writer($results_dir . '/assign_tags.csv');
$CkanManager->resultsDir = $results_dir;
foreach (glob(CKANMNGR_DATA_DIR . '/export_*.csv') as $csv_file) {
    $status = PHP_EOL . PHP_EOL . basename($csv_file) . PHP_EOL . PHP_EOL;
    echo $status;
    $basename = str_replace('.csv', '', basename($csv_file));
    //    fix wrong END-OF-LINE
    file_put_contents($csv_file, preg_replace('/[\\r\\n]+/', "\n", file_get_contents($csv_file)));
    $csv = new Reader($csv_file, 'r+', false);
    while (true) {
        $row = $csv->getRow();
        if (!$row) {
            break;
        }
        //        skip headers
 */
$OrgList = new OrganizationList(AGENCIES_LIST_URL);
$termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_EXPORT);
/**
 * sometimes there is no parent term (ex. Department of Labor)
 */
if (!defined('PARENT_TERM')) {
    define('PARENT_TERM', '_');
}
/**
 * Create results dir for logs and json results
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_' . PARENT_TERM;
mkdir($results_dir);
/**
 * Search for packages by terms found
 */
/**
 * Production
 */
$CkanManager = new CkanManager(CKAN_API_URL);
//$CkanManager = new CkanManager(CKAN_QA_API_URL);
//$CkanManager = new CkanManager(INVENTORY_CKAN_PROD_API_URL, INVENTORY_CKAN_PROD_API_KEY);
/**
 * Staging
 */
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL);
$CkanManager->resultsDir = $results_dir;
$CkanManager->exportPackagesByOrgTerms($termsArray);
// show running time on finish
timer();
if (!is_file($results_dir . '/prod.csv')) {
    $prod = new Writer($results_dir . '/prod.csv');
    $prod->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']);
    $ProdCkanManager = new CkanManager(CKAN_API_URL);
    $ProdCkanManager->resultsDir = $results_dir;
    $prod_nuclear = $ProdCkanManager->exportBrief('organization:(nrc-gov)' . ' AND -metadata_type:geospatial AND dataset_type:dataset');
    $prod->writeFromArray($prod_nuclear);
} else {
    $prod = new Reader($results_dir . '/prod.csv');
    $prod_nuclear = $prod->getAll();
}
echo 'uat.csv' . PHP_EOL;
if (!is_file($results_dir . '/uat.csv')) {
    $uat = new Writer($results_dir . '/uat.csv');
    $uat->writeRow(['title', 'title_simple', 'name', 'url', 'topics', 'categories']);
    $UatCkanManager = new CkanManager(CKAN_UAT_API_URL);
    $UatCkanManager->resultsDir = $results_dir;
    $uat_nuclear = $UatCkanManager->exportBrief('extras_harvest_source_title:NRC data.json', '', 'http://uat-catalog-fe-data.reisys.com/dataset/');
    $uat->writeFromArray($uat_nuclear);
} else {
    $uat = new Reader($results_dir . '/uat.csv');
    $uat_nuclear = $uat->getAll();
}
$uat_nuclear_by_title = [];
foreach ($uat_nuclear as $name => $dataset) {
    $title = $dataset['title_simple'];
    $uat_nuclear_by_title[$title] = isset($uat_nuclear_by_title[$title]) ? $uat_nuclear_by_title[$title] : [];
    $uat_nuclear_by_title[$title][] = $dataset;
}
echo 'prod_vs_uat.csv' . PHP_EOL;
is_file($results_dir . '/prod_vs_uat_nuclear_geospatial.csv') && unlink($results_dir . '/prod_vs_uat_nuclear_geospatial.csv');