Exemple #1
0
/**
 * Main function
 *
 * @param string[] $argv Program parameters
 *
 * @return void
 * @throws Exception
 */
function main($argv)
{
    $params = parseArgs($argv);
    applyConfigOverrides($params);
    if (empty($params['source']) || !is_string($params['source'])) {
        echo <<<EOT
Usage: {$argv['0']} --source=... [...]

Parameters:

--source            Repository id ('*' for all, separate multiple sources
                    with commas)
--exclude           Repository id's to exclude when using '*' for source
                    (separate multiple sources with commas)
--from              Override harvesting start date
--until             Override harvesting end date
--all               Harvest from beginning (overrides --from)
--verbose           Enable verbose output
--override          Override initial resumption token
                    (e.g. to resume failed connection)
--reharvest[=date]  This is a full reharvest, delete all records that were not
                    received during the harvesting (or were modified before [date]).
                    Implies --all.
--config.section.name=value
                    Set configuration directive to given value overriding any
                    setting in recordmanager.ini
--lockfile=file     Use a lock file to avoid executing the command multiple times in
                    parallel (useful when running from crontab)


EOT;
        exit(1);
    }
    $lockfile = isset($params['lockfile']) ? $params['lockfile'] : '';
    $lockhandle = false;
    try {
        if (($lockhandle = acquireLock($lockfile)) === false) {
            die;
        }
        $manager = new RecordManager(true, isset($params['verbose']) ? $params['verbose'] : false);
        $from = isset($params['from']) ? $params['from'] : null;
        if (isset($params['all']) || isset($params['reharvest'])) {
            $from = '-';
        }
        foreach (explode(',', $params['source']) as $source) {
            $manager->harvest($source, $from, isset($params['until']) ? $params['until'] : null, isset($params['override']) ? urldecode($params['override']) : '', isset($params['exclude']) ? $params['exclude'] : null, isset($params['reharvest']) ? $params['reharvest'] : '');
        }
    } catch (Exception $e) {
        releaseLock($lockhandle);
        throw $e;
    }
    releaseLock($lockhandle);
}
Exemple #2
0
/**
 * Main function
 *
 * @param string[] $argv Program parameters
 *
 * @return void
 * @throws Exception
 */
function main($argv)
{
    $params = parseArgs($argv);
    applyConfigOverrides($params);
    if (empty($params['file']) || empty($params['source'])) {
        echo <<<EOT
Usage: {$argv['0']} --file=... --source=... [...]

Parameters:

--file              The file or wildcard pattern of files of records
--source            Source ID
--verbose           Enable verbose output
--config.section.name=value
                   Set configuration directive to given value overriding any
                   setting in recordmanager.ini
--lockfile=file    Use a lock file to avoid executing the command multiple times in
                   parallel (useful when running from crontab)


EOT;
        exit(1);
    }
    $lockfile = isset($params['lockfile']) ? $params['lockfile'] : '';
    $lockhandle = false;
    try {
        if (($lockhandle = acquireLock($lockfile)) === false) {
            die;
        }
        $manager = new RecordManager(true, isset($params['verbose']) ? $params['verbose'] : false);
        $manager->loadFromFile($params['source'], $params['file']);
    } catch (Exception $e) {
        releaseLock($lockhandle);
        throw $e;
    }
    releaseLock($lockhandle);
}
Exemple #3
0
/**
 * Main function
 *
 * @param string[] $argv Program parameters
 *
 * @return void
 * @throws Exception
 */
function main($argv)
{
    $params = parseArgs($argv);
    applyConfigOverrides($params);
    if (empty($params['func']) || !is_string($params['func'])) {
        echo <<<EOT
Usage: {$argv['0']} --func=... [...]

Parameters:

--func             renormalize|deduplicate|updatesolr|dump|dumpsolr|markdeleted
                   |deletesource|deletesolr|optimizesolr|count|checkdedup|comparesolr
                   |purgedeleted|markdedup
--source           Source ID to process (separate multiple sources with commas)
--all              Process all records regardless of their state (deduplicate,
                   markdedup)
                   or date (updatesolr)
--from             Override the date from which to run the update (updatesolr)
--single           Process only the given record id (deduplicate, updatesolr, dump)
--nocommit         Don't ask Solr to commit the changes (updatesolr)
--field            Field to analyze (count)
--force            Force deletesource to proceed even if deduplication is enabled for
                   the source
--verbose          Enable verbose output for debugging
--config.section.name=value
                   Set configuration directive to given value overriding any setting
                   in recordmanager.ini
--lockfile=file    Use a lock file to avoid executing the command multiple times in
                   parallel (useful when running from crontab)
--comparelog       Record comparison output file. N.B. The file will be overwritten
                   (comparesolr)
--dumpprefix       File name prefix to use when dumping records (dumpsolr). Default
                   is "dumpsolr".
--mapped           If set, use values only after any mapping files are processed when
                   counting records (count)
--daystokeep=days  How many last days to keep when purging deleted records
                   (purgedeleted)


EOT;
        exit(1);
    }
    $lockfile = isset($params['lockfile']) ? $params['lockfile'] : '';
    $lockhandle = false;
    try {
        if (($lockhandle = acquireLock($lockfile)) === false) {
            die;
        }
        $manager = new RecordManager(true, isset($params['verbose']) ? $params['verbose'] : false);
        $sources = isset($params['source']) ? $params['source'] : '';
        $single = isset($params['single']) ? $params['single'] : '';
        $noCommit = isset($params['nocommit']) ? $params['nocommit'] : false;
        // Solr update, compare and dump can handle multiple sources at once
        if ($params['func'] == 'updatesolr' || $params['func'] == 'dumpsolr') {
            $date = isset($params['all']) ? '' : (isset($params['from']) ? $params['from'] : null);
            $dumpPrefix = $params['func'] == 'dumpsolr' ? isset($params['dumpprefix']) ? $params['dumpprefix'] : 'dumpsolr' : '';
            $manager->updateSolrIndex($date, $sources, $single, $noCommit, '', $dumpPrefix);
        } elseif ($params['func'] == 'comparesolr') {
            $date = isset($params['all']) ? '' : (isset($params['from']) ? $params['from'] : null);
            $manager->updateSolrIndex($date, $sources, $single, $noCommit, isset($params['comparelog']) ? $params['comparelog'] : '-');
        } else {
            foreach (explode(',', $sources) as $source) {
                switch ($params['func']) {
                    case 'renormalize':
                        $manager->renormalize($source, $single);
                        break;
                    case 'deduplicate':
                    case 'markdedup':
                        $manager->deduplicate($source, isset($params['all']) ? true : false, $single, $params['func'] == 'markdedup');
                        break;
                    case 'dump':
                        $manager->dumpRecord($single);
                        break;
                    case 'deletesource':
                        $manager->deleteRecords($source, isset($params['force']) ? $params['force'] : false);
                        break;
                    case 'markdeleted':
                        $manager->markDeleted($source);
                        break;
                    case 'deletesolr':
                        $manager->deleteSolrRecords($source);
                        break;
                    case 'optimizesolr':
                        $manager->optimizeSolr();
                        break;
                    case 'count':
                        $manager->countValues($source, isset($params['field']) ? $params['field'] : null, isset($params['mapped']) ? $params['mapped'] : false);
                        break;
                    case 'checkdedup':
                        $manager->checkDedupRecords();
                        break;
                    case 'purgedeleted':
                        if (!isset($params['force']) || !$params['force']) {
                            echo <<<EOT
Purging of deleted records means that any further Solr updates don't include
deletions. Use the --force parameter to indicate that this is ok. No records
have been purged.

EOT;
                            exit(1);
                        }
                        $manager->purgeDeletedRecords(isset($params['daystokeep']) ? intval($params['daystokeep']) : 0);
                        break;
                    default:
                        echo 'Unknown func: ' . $params['func'] . "\n";
                        exit(1);
                }
            }
        }
    } catch (Exception $e) {
        releaseLock($lockhandle);
        throw $e;
    }
    releaseLock($lockhandle);
}