<?php namespace CKAN\Manager; use CKAN\OrganizationList; /** * http://www.data.gov/app/themes/roots-nextdatagov/assets/Json/fed_agency.json */ define('ORGANIZATION_TO_EXPORT', 'Environmental Protection Agency'); require_once dirname(dirname(__DIR__)) . '/inc/common.php'; /** * Get organization terms, including all children, as Array */ $OrgList = new OrganizationList(AGENCIES_LIST_URL); $termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_EXPORT); /** * sometimes there is no parent term (ex. Department of Labor) */ if (!defined('PARENT_TERM')) { define('PARENT_TERM', '_'); } /** * Create results dir for logs and json results */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_EXPORT_' . PARENT_TERM; mkdir($results_dir); /** * Search for packages by terms found */ /** * Production
/** * Make it TRUE, if you want datasets to be marked as PRIVATE * LIST_ONLY must be true */ define('MARK_PRIVATE', true); /** * Rename adding __legacy to the end of dataset name (url will be changed too) * LIST_ONLY must be true */ define('RENAME_TO_LEGACY', true); echo "Tagging " . ORGANIZATION_TO_TAG . PHP_EOL; require_once dirname(dirname(__DIR__)) . '/inc/common.php'; /** * Get organization terms, including all children, as Array */ $OrgList = new OrganizationList(AGENCIES_LIST_URL); $termsArray = $OrgList->getTreeArrayFor(ORGANIZATION_TO_TAG); /** * sometimes there is no parent term (ex. Department of Labor) */ if (!defined('PARENT_TERM')) { die('PARENT_TERM not found'); } /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_LEGACY_' . PARENT_TERM; mkdir($results_dir); /** * Adding Legacy dms tag */
} // no anchors please list($dataset, ) = explode('#', basename(trim($row['0']))); if (!$dataset) { continue; } // double trouble check if (strpos($row['0'], '://')) { if (!strpos($row['0'], '/dataset/')) { if (strpos($row['0'], 'dataset?q=')) { parse_str(parse_url($row['0'], PHP_URL_QUERY), $query_array); if (isset($query_array['q'])) { $query = $query_array['q']; if (isset($query_array['organization'])) { $org = $query_array['organization']; $organizationList = new OrganizationList(); $org = $organizationList->getTreeArrayFor($organizationList->getNameFor($org)); if (!is_array($org) || !sizeof($org)) { continue; } $org = join(' OR ', array_keys($org)); // var_dump($organizationList->getTreeArrayFor($organizationList->getNameFor($org))); // continue; $query = "{$query} AND organization:({$org})"; // echo $query.PHP_EOL; } $packages = $CkanManager->tryPackageSearch($query, '', 200); $CkanManager->say(sizeof($packages) . " found searching: {$query},API SEARCH"); file_put_contents($results_dir . '/' . $basename . '_tags.log.csv', sizeof($packages) . " found searching: {$query},API SEARCH" . PHP_EOL, FILE_APPEND | LOCK_EX); // print $query_array['q']; if (!sizeof($packages)) {
/** * @param string $organizationName * * @throws \Exception * @return mixed */ private function findOrganization($organizationName) { static $OrgList; if (!$OrgList) { $OrgList = new OrganizationList(AGENCIES_LIST_URL); } return $OrgList->getTermFor($organizationName); }
<?php namespace CKAN\Manager; use CKAN\OrganizationList; /** * @author Alex Perfilov * @date 5/23/14 * */ require_once dirname(__DIR__) . '/inc/common.php'; $start = isset($argv[1]) ? trim($argv[1]) : false; $limit = isset($argv[2]) ? intval($argv[2]) : 1; /** * Create results dir for logs */ $results_dir = CKANMNGR_RESULTS_DIR . date('/Ymd-His') . '_PRIVATE_DATASETS_' . $start ?: ''; mkdir($results_dir); $CkanManager = new CkanManager(CKAN_API_URL, CKAN_API_KEY); //$CkanManager = new CkanManager(CKAN_STAGING_API_URL, CKAN_STAGING_API_KEY); //$CkanManager = new CkanManager(CKAN_DEV_API_URL, CKAN_DEV_API_KEY); define('ERROR_REPORTING', E_ALL); /** * Get organization terms, including all children, as Array */ $OrgList = new OrganizationList(AGENCIES_LIST_URL); $termsArray = $OrgList->getTreeArray(); $CkanManager->resultsDir = $results_dir; $CkanManager->getPrivateList($termsArray, $start, $limit); // show running time on finish timer();