<?php

namespace CKAN\Manager;

use CKAN\OrganizationList;
/**
 * http://www.data.gov/app/themes/roots-nextdatagov/assets/Json/fed_agency.json
 */
define('ORGANIZATION_TO_EXPORT', 'Environmental Protection Agency');
require_once dirname(dirname(__DIR__)) . '/inc/common.php';
/**
 * Get organization terms, including all children, as Array
 */
$OrgList = new OrganizationList(AGENCIES_LIST_URL);
$termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_EXPORT);
/**
 * sometimes there is no parent term (ex. Department of Labor)
 */
if (!defined('PARENT_TERM')) {
    define('PARENT_TERM', '_');
}
/**
 * Create results dir for logs and json results
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_' . PARENT_TERM;
mkdir($results_dir);
/**
 * Search for packages by terms found
 */
/**
 * Production
/**
 * Make it TRUE, if you want datasets to be marked as PRIVATE
 * LIST_ONLY must be true
 */
define('MARK_PRIVATE', true);
/**
 * Rename adding __legacy to the end of dataset name (url will be changed too)
 * LIST_ONLY must be true
 */
define('RENAME_TO_LEGACY', true);
echo "Tagging " . ORGANIZATION_TO_TAG . PHP_EOL;
require_once dirname(dirname(__DIR__)) . '/inc/common.php';
/**
 * Get organization terms, including all children, as Array
 */
$OrgList = new OrganizationList(AGENCIES_LIST_URL);
$termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_TAG);
/**
 * sometimes there is no parent term (ex. Department of Labor)
 */
if (!defined('PARENT_TERM')) {
    die('PARENT_TERM not found');
}
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_LEGACY_' . PARENT_TERM;
mkdir($results_dir);
/**
 * Adding Legacy dms tag
 */
 }
 //        no anchors please
 list($dataset, ) = explode('#', basename(trim($row['0'])));
 if (!$dataset) {
     continue;
 }
 //        double trouble check
 if (strpos($row['0'], '://')) {
     if (!strpos($row['0'], '/dataset/')) {
         if (strpos($row['0'], 'dataset?q=')) {
             parse_str(parse_url($row['0'], PHP_URL_QUERY), $query_array);
             if (isset($query_array['q'])) {
                 $query = $query_array['q'];
                 if (isset($query_array['organization'])) {
                     $org = $query_array['organization'];
                     $organizationList = new OrganizationList();
                     $org = $organizationList->getTreeArrayFor($organizationList->getNameFor($org));
                     if (!is_array($org) || !sizeof($org)) {
                         continue;
                     }
                     $org = join(' OR ', array_keys($org));
                     //                            var_dump($organizationList->getTreeArrayFor($organizationList->getNameFor($org)));
                     //                            continue;
                     $query = "{$query} AND organization:({$org})";
                     //                            echo $query.PHP_EOL;
                 }
                 $packages = $CkanManager->tryPackageSearch($query, '', 200);
                 $CkanManager->say(sizeof($packages) . " found searching: {$query},API SEARCH");
                 file_put_contents($results_dir . '/' . $basename . '_tags.log.csv', sizeof($packages) . " found searching: {$query},API SEARCH" . PHP_EOL, FILE_APPEND | LOCK_EX);
                 //                        print $query_array['q'];
                 if (!sizeof($packages)) {
Esempio n. 4
0
 /**
  * @param string $organizationName
  *
  * @throws \Exception
  * @return mixed
  */
 private function findOrganization($organizationName)
 {
     static $OrgList;
     if (!$OrgList) {
         $OrgList = new OrganizationList(AGENCIES_LIST_URL);
     }
     return $OrgList->getTermFor($organizationName);
 }
<?php

namespace CKAN\Manager;

use CKAN\OrganizationList;
/**
 * @author Alex Perfilov
 * @date   5/23/14
 *
 */
require_once dirname(__DIR__) . '/inc/common.php';
$start = isset($argv[1]) ? trim($argv[1]) : false;
$limit = isset($argv[2]) ? intval($argv[2]) : 1;
/**
 * Create results dir for logs
 */
$results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_PRIVATE_DATASETS_' . $start ?: '';
mkdir($results_dir);
$CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY);
//$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY);
//$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY);
define('ERROR_REPORTING', E_ALL);
/**
 * Get organization terms, including all children, as Array
 */
$OrgList = new OrganizationList(AGENCIES_LIST_URL);
$termsArray = $OrgList->getTreeArray();
$CkanManager->resultsDir = $results_dir;
$CkanManager->getPrivateList($termsArray, $start, $limit);
// show running time on finish
timer();