Esempio n. 1
0
 /**
  * Verify consistency of dedup records links with actual records
  *
  * @return void
  */
 public function checkDedupRecords()
 {
     $this->log->log('checkDedupRecords', "Checking dedup record consistency");
     $dedupRecords = $this->db->dedup->find();
     $dedupRecords->immortal(true)->timeout($this->cursorTimeout);
     $count = 0;
     $fixed = 0;
     $pc = new PerformanceCounter();
     foreach ($dedupRecords as $dedupRecord) {
         $results = $this->dedupHandler->checkDedupRecord($dedupRecord);
         if ($results) {
             $fixed += count($results);
             foreach ($results as $result) {
                 $this->log->log('checkDedupRecords', $result);
             }
         }
         ++$count;
         if ($count % 1000 == 0) {
             $pc->add($count);
             $avg = $pc->getSpeed();
             $this->log->log('checkDedupRecords', "{$count} records checked with {$fixed} links fixed, " . "{$avg} records/sec");
         }
     }
     $this->log->log('checkDedupRecords', "Completed with {$count} records checked with {$fixed} links fixed");
 }
Esempio n. 2
0
 /**
  * Update Solr index (merged records and individual records)
  *
  * @param string|null $fromDate   Starting date for updates (if empty
  *                                string, last update date stored in the database
  *                                is used and if null, all records are processed)
  * @param string      $sourceId   Comma-separated list of source IDs to update,
  *                                or empty or * for all sources
  * @param string      $singleId   Export only a record with the given ID
  * @param bool        $noCommit   If true, changes are not explicitly committed
  * @param bool        $delete     If true, records in the given $sourceId are all
  *                                deleted
  * @param string      $compare    If set, just compare the records with the ones
  *                                already in the Solr index and write any
  *                                differences in a file given in this parameter
  * @param string      $dumpPrefix If specified, the Solr records are dumped into
  *                                files and not sent to Solr.
  *
  * @return void
  */
 public function updateRecords($fromDate = null, $sourceId = '', $singleId = '', $noCommit = false, $delete = false, $compare = false, $dumpPrefix = '')
 {
     if ($compare && $compare != '-') {
         file_put_contents($compare, '');
     }
     $this->dumpPrefix = $dumpPrefix;
     $verb = $compare ? 'compared' : ($this->dumpPrefix ? 'dumped' : 'indexed');
     $initVerb = $compare ? 'Comparing' : ($this->dumpPrefix ? 'Dumping' : 'Indexing');
     $childPid = null;
     try {
         if ($this->backgroundUpdates && !$compare) {
             $this->log->log('updateRecords', "Using {$this->backgroundUpdates} thread(s) for updates");
         }
         $needCommit = false;
         if (isset($fromDate) && $fromDate) {
             $mongoFromDate = new MongoDate(strtotime($fromDate));
         }
         if (!isset($fromDate)) {
             $state = $this->db->state->find(['_id' => 'Last Index Update'])->limit(-1)->timeout($this->cursorTimeout)->getNext();
             if (isset($state)) {
                 $mongoFromDate = $state['value'];
             } else {
                 unset($mongoFromDate);
             }
         }
         $from = isset($mongoFromDate) ? date('Y-m-d H:i:s', $mongoFromDate->sec) : 'the beginning';
         // Take the last indexing date now and store it when done
         $lastIndexingDate = new MongoDate();
         if (!$delete && $this->threadedMergedRecordUpdate && !$compare) {
             $childPid = pcntl_fork();
             if ($childPid == -1) {
                 throw new Exception("Could not fork merged record background update child");
             }
         }
         if (!$childPid) {
             $needCommit = $this->processMerged(isset($mongoFromDate) ? $mongoFromDate : null, $sourceId, $singleId, $noCommit, $delete, $compare);
             if ($childPid !== null) {
                 exit($needCommit ? 1 : 0);
             }
         }
         if ($delete) {
             return;
         }
         $this->log->log('updateRecords', "Creating individual record list (from {$from})");
         $params = [];
         if ($singleId) {
             $params['_id'] = $singleId;
             $params['dedup_id'] = ['$exists' => false];
             $lastIndexingDate = null;
         } else {
             if (isset($mongoFromDate)) {
                 $params['updated'] = ['$gte' => $mongoFromDate];
             }
             if ($sourceId) {
                 $sources = explode(',', $sourceId);
                 if (count($sources) == 1) {
                     $params['source_id'] = $sourceId;
                 } else {
                     $sourceParams = [];
                     foreach ($sources as $source) {
                         $sourceParams[] = ['source_id' => $source];
                     }
                     $params['$or'] = $sourceParams;
                 }
             }
             $params['dedup_id'] = ['$exists' => false];
             $params['update_needed'] = false;
         }
         $records = $this->db->record->find($params)->timeout($this->cursorTimeout);
         $records->immortal(true);
         $total = $this->counts ? $records->count() : 'the';
         $count = 0;
         $mergedComponents = 0;
         $deleted = 0;
         if ($noCommit) {
             $this->log->log('updateRecords', "{$initVerb} {$total} individual records (with no forced commits)");
         } else {
             $this->log->log('updateRecords', "{$initVerb} {$total} individual records (max commit interval " . "{$this->commitInterval} records)");
         }
         $pc = new PerformanceCounter();
         $this->initBufferedUpdate();
         foreach ($records as $record) {
             if (isset($this->terminate)) {
                 if ($childPid) {
                     $this->log->log('updateRecords', 'Waiting for child process to terminate...');
                     while (1) {
                         $pid = pcntl_waitpid($childPid, $status, WNOHANG);
                         if ($pid > 0) {
                             break;
                         }
                         sleep(10);
                     }
                 }
                 $this->log->log('updateRecords', 'Termination upon request (individual record handler)');
                 exit(1);
             }
             if (isset($record['update_needed']) && $record['update_needed']) {
                 $this->log->log('updateRecords', "Record {$record['_id']} needs deduplication and would not" . " be processed in a normal update", Logger::WARNING);
             }
             if ($record['deleted']) {
                 if (!$compare) {
                     $this->bufferedDelete((string) $record['_id']);
                 }
                 ++$count;
                 ++$deleted;
             } else {
                 $data = $this->createSolrArray($record, $mergedComponents);
                 if ($data === false) {
                     continue;
                 }
                 if ($this->verbose) {
                     echo "Metadata for record {$record['_id']}: \n";
                     $this->prettyPrint($data);
                 }
                 ++$count;
                 if (!$compare) {
                     $res = $this->bufferedUpdate($data, $count, $childPid || $noCommit);
                 } else {
                     $res = $count % 1000 == 0;
                     $this->compareWithSolrRecord($data, $compare);
                 }
                 if ($res) {
                     $pc->add($count);
                     $avg = $pc->getSpeed();
                     $this->log->log('updateRecords', "{$count} individual records (of which {$deleted} deleted) " . "with {$mergedComponents} merged parts {$verb}, " . "{$avg} records/sec");
                     // Check child status
                     if ($childPid) {
                         $pid = pcntl_waitpid($childPid, $status, WNOHANG);
                         if ($pid > 0) {
                             $childPid = null;
                             $exitCode = pcntl_wexitstatus($status);
                             if ($exitCode == 1) {
                                 $needCommit = true;
                             } elseif ($exitCode) {
                                 $this->log->log('updateRecords', "Merged record update thread failed, " . "aborting", Logger::ERROR);
                                 throw new Exception('Merged record update thread failed');
                             }
                         }
                     }
                 }
             }
         }
         $this->flushUpdateBuffer();
         if (isset($lastIndexingDate) && !$compare) {
             $state = ['_id' => "Last Index Update", 'value' => $lastIndexingDate];
             $this->db->state->save($state);
         }
         if ($count > 0) {
             $needCommit = true;
         }
         $this->log->log('updateRecords', "Total {$count} individual records (of which {$deleted} deleted) with " . "{$mergedComponents} merged parts {$verb}");
         if ($childPid) {
             // Wait for child to finish
             while (1) {
                 $pid = pcntl_waitpid($childPid, $status, WNOHANG);
                 if ($pid > 0) {
                     if (pcntl_wexitstatus($status) == 1) {
                         $needCommit = true;
                     }
                     break;
                 }
                 sleep(10);
             }
         }
         if (!$noCommit && $needCommit && !$compare && !$this->dumpPrefix) {
             $this->waitForHttpChildren();
             $this->log->log('updateRecords', "Final commit...");
             $this->solrRequest('{ "commit": {} }', 3600);
             $this->waitForHttpChildren();
             $this->log->log('updateRecords', "Commit complete");
         }
     } catch (Exception $e) {
         $this->log->log('updateRecords', 'Exception: ' . $e->getMessage() . ' at ' . $e->getFile() . ':' . $e->getLine(), Logger::FATAL);
         if ($childPid) {
             // Kill the child process too
             posix_kill($childPid, SIGINT);
             // Wait for child to finish
             while (1) {
                 $pid = pcntl_waitpid($childPid, $status, WNOHANG);
                 if ($pid > 0) {
                     break;
                 }
                 sleep(10);
             }
         }
         if ($this->threadedMergedRecordUpdate && !$childPid) {
             exit(2);
         }
     }
 }