/** * Main function * * @param string[] $argv Program parameters * * @return void * @throws Exception */ function main($argv) { $params = parseArgs($argv); applyConfigOverrides($params); if (empty($params['source']) || !is_string($params['source'])) { echo <<<EOT Usage: {$argv['0']} --source=... [...] Parameters: --source Repository id ('*' for all, separate multiple sources with commas) --exclude Repository id's to exclude when using '*' for source (separate multiple sources with commas) --from Override harvesting start date --until Override harvesting end date --all Harvest from beginning (overrides --from) --verbose Enable verbose output --override Override initial resumption token (e.g. to resume failed connection) --reharvest[=date] This is a full reharvest, delete all records that were not received during the harvesting (or were modified before [date]). Implies --all. --config.section.name=value Set configuration directive to given value overriding any setting in recordmanager.ini --lockfile=file Use a lock file to avoid executing the command multiple times in parallel (useful when running from crontab) EOT; exit(1); } $lockfile = isset($params['lockfile']) ? $params['lockfile'] : ''; $lockhandle = false; try { if (($lockhandle = acquireLock($lockfile)) === false) { die; } $manager = new RecordManager(true, isset($params['verbose']) ? $params['verbose'] : false); $from = isset($params['from']) ? $params['from'] : null; if (isset($params['all']) || isset($params['reharvest'])) { $from = '-'; } foreach (explode(',', $params['source']) as $source) { $manager->harvest($source, $from, isset($params['until']) ? $params['until'] : null, isset($params['override']) ? urldecode($params['override']) : '', isset($params['exclude']) ? $params['exclude'] : null, isset($params['reharvest']) ? $params['reharvest'] : ''); } } catch (Exception $e) { releaseLock($lockhandle); throw $e; } releaseLock($lockhandle); }
/** * Main function * * @param string[] $argv Program parameters * * @return void */ function main($argv) { $params = parseArgs($argv); applyConfigOverrides($params); if (empty($params['search'])) { echo <<<EOT Usage: {$argv['0']} --search=... Parameters: --search=[regexp] Search for a string in data sources and list the data source id's EOT; exit(1); } $manager = new RecordManager(true, isset($params['verbose']) ? $params['verbose'] : false); if (!empty($params['search'])) { $manager->searchDataSources($params['search']); } }
/** * Main function * * @param string[] $argv Program parameters * * @return void * @throws Exception */ function main($argv) { $params = parseArgs($argv); applyConfigOverrides($params); if (empty($params['file']) || empty($params['source'])) { echo <<<EOT Usage: {$argv['0']} --file=... --source=... [...] Parameters: --file The file or wildcard pattern of files of records --source Source ID --verbose Enable verbose output --config.section.name=value Set configuration directive to given value overriding any setting in recordmanager.ini --lockfile=file Use a lock file to avoid executing the command multiple times in parallel (useful when running from crontab) EOT; exit(1); } $lockfile = isset($params['lockfile']) ? $params['lockfile'] : ''; $lockhandle = false; try { if (($lockhandle = acquireLock($lockfile)) === false) { die; } $manager = new RecordManager(true, isset($params['verbose']) ? $params['verbose'] : false); $manager->loadFromFile($params['source'], $params['file']); } catch (Exception $e) { releaseLock($lockhandle); throw $e; } releaseLock($lockhandle); }
/** * Main function * * @param string[] $argv Program parameters * * @return void */ function main($argv) { $params = parseArgs($argv); applyConfigOverrides($params); if (empty($params['file'])) { echo <<<EOT Usage: {$argv['0']} --file=... [...] Parameters: --file=... The file for records --deleted=... The file for deleted record IDs --from=... From date where to start the export --verbose Enable verbose output --quiet Quiet, no output apart from the data --skip=... Skip x records to export only a "representative" subset --source=... Export only the given source(s) (separate multiple sources with commas) --single=... Export single record with the given id --xpath=... Export only records matching the XPath expression --config.section.name=... Set configuration directive to given value overriding any setting in recordmanager.ini --sortdedup Sort export file by dedup id --dedupid=... deduped = Add dedup id's to records that have duplicates always = Always add dedup id's to the records Otherwise dedup id's are not added to the records EOT; exit(1); } $manager = new RecordManager(true, isset($params['verbose']) ? $params['verbose'] : false); $manager->quiet = isset($params['quiet']) ? $params['quiet'] : false; $manager->exportRecords($params['file'], isset($params['deleted']) ? $params['deleted'] : '', isset($params['from']) ? $params['from'] : '', isset($params['skip']) ? $params['skip'] : 0, isset($params['source']) ? $params['source'] : '', isset($params['single']) ? $params['single'] : '', isset($params['xpath']) ? $params['xpath'] : '', isset($params['sortdedup']) ? $params['sortdedup'] : false, isset($params['dedupid']) ? $params['dedupid'] : ''); }
/** * Main function * * @param string[] $argv Program parameters * * @return void * @throws Exception */ function main($argv) { $params = parseArgs($argv); applyConfigOverrides($params); if (empty($params['func']) || !is_string($params['func'])) { echo <<<EOT Usage: {$argv['0']} --func=... [...] Parameters: --func renormalize|deduplicate|updatesolr|dump|dumpsolr|markdeleted |deletesource|deletesolr|optimizesolr|count|checkdedup|comparesolr |purgedeleted|markdedup --source Source ID to process (separate multiple sources with commas) --all Process all records regardless of their state (deduplicate, markdedup) or date (updatesolr) --from Override the date from which to run the update (updatesolr) --single Process only the given record id (deduplicate, updatesolr, dump) --nocommit Don't ask Solr to commit the changes (updatesolr) --field Field to analyze (count) --force Force deletesource to proceed even if deduplication is enabled for the source --verbose Enable verbose output for debugging --config.section.name=value Set configuration directive to given value overriding any setting in recordmanager.ini --lockfile=file Use a lock file to avoid executing the command multiple times in parallel (useful when running from crontab) --comparelog Record comparison output file. N.B. The file will be overwritten (comparesolr) --dumpprefix File name prefix to use when dumping records (dumpsolr). Default is "dumpsolr". --mapped If set, use values only after any mapping files are processed when counting records (count) --daystokeep=days How many last days to keep when purging deleted records (purgedeleted) EOT; exit(1); } $lockfile = isset($params['lockfile']) ? $params['lockfile'] : ''; $lockhandle = false; try { if (($lockhandle = acquireLock($lockfile)) === false) { die; } $manager = new RecordManager(true, isset($params['verbose']) ? $params['verbose'] : false); $sources = isset($params['source']) ? $params['source'] : ''; $single = isset($params['single']) ? $params['single'] : ''; $noCommit = isset($params['nocommit']) ? $params['nocommit'] : false; // Solr update, compare and dump can handle multiple sources at once if ($params['func'] == 'updatesolr' || $params['func'] == 'dumpsolr') { $date = isset($params['all']) ? '' : (isset($params['from']) ? $params['from'] : null); $dumpPrefix = $params['func'] == 'dumpsolr' ? isset($params['dumpprefix']) ? $params['dumpprefix'] : 'dumpsolr' : ''; $manager->updateSolrIndex($date, $sources, $single, $noCommit, '', $dumpPrefix); } elseif ($params['func'] == 'comparesolr') { $date = isset($params['all']) ? '' : (isset($params['from']) ? $params['from'] : null); $manager->updateSolrIndex($date, $sources, $single, $noCommit, isset($params['comparelog']) ? $params['comparelog'] : '-'); } else { foreach (explode(',', $sources) as $source) { switch ($params['func']) { case 'renormalize': $manager->renormalize($source, $single); break; case 'deduplicate': case 'markdedup': $manager->deduplicate($source, isset($params['all']) ? true : false, $single, $params['func'] == 'markdedup'); break; case 'dump': $manager->dumpRecord($single); break; case 'deletesource': $manager->deleteRecords($source, isset($params['force']) ? $params['force'] : false); break; case 'markdeleted': $manager->markDeleted($source); break; case 'deletesolr': $manager->deleteSolrRecords($source); break; case 'optimizesolr': $manager->optimizeSolr(); break; case 'count': $manager->countValues($source, isset($params['field']) ? $params['field'] : null, isset($params['mapped']) ? $params['mapped'] : false); break; case 'checkdedup': $manager->checkDedupRecords(); break; case 'purgedeleted': if (!isset($params['force']) || !$params['force']) { echo <<<EOT Purging of deleted records means that any further Solr updates don't include deletions. Use the --force parameter to indicate that this is ok. No records have been purged. EOT; exit(1); } $manager->purgeDeletedRecords(isset($params['daystokeep']) ? intval($params['daystokeep']) : 0); break; default: echo 'Unknown func: ' . $params['func'] . "\n"; exit(1); } } } } catch (Exception $e) { releaseLock($lockhandle); throw $e; } releaseLock($lockhandle); }