/** * Run jobs of the specified number/type for the specified time * * The response map has a 'job' field that lists status of each job, including: * - type : the job type * - status : ok/failed * - error : any error message string * - time : the job run time in ms * The response map also has: * - backoffs : the (job type => seconds) map of backoff times * - elapsed : the total time spent running tasks in ms * - reached : the reason the script finished, one of (none-ready, job-limit, time-limit, * memory-limit) * * This method outputs status information only if a debug handler was set. * Any exceptions are caught and logged, but are not reported as output. * * @param array $options Map of parameters: * - type : the job type (or false for the default types) * - maxJobs : maximum number of jobs to run * - maxTime : maximum time in seconds before stopping * - throttle : whether to respect job backoff configuration * @return array Summary response that can easily be JSON serialized */ public function run(array $options) { global $wgJobClasses, $wgTrxProfilerLimits; $response = ['jobs' => [], 'reached' => 'none-ready']; $type = isset($options['type']) ? $options['type'] : false; $maxJobs = isset($options['maxJobs']) ? $options['maxJobs'] : false; $maxTime = isset($options['maxTime']) ? $options['maxTime'] : false; $noThrottle = isset($options['throttle']) && !$options['throttle']; // Bail if job type is invalid if ($type !== false && !isset($wgJobClasses[$type])) { $response['reached'] = 'none-possible'; return $response; } // Bail out if DB is in read-only mode if (wfReadOnly()) { $response['reached'] = 'read-only'; return $response; } // Bail out if there is too much DB lag. // This check should not block as we want to try other wiki queues. list(, $maxLag) = wfGetLB(wfWikiID())->getMaxLag(); if ($maxLag >= self::MAX_ALLOWED_LAG) { $response['reached'] = 'slave-lag-limit'; return $response; } // Flush any pending DB writes for sanity wfGetLBFactory()->commitAll(__METHOD__); // Catch huge single updates that lead to slave lag $trxProfiler = Profiler::instance()->getTransactionProfiler(); $trxProfiler->setLogger(LoggerFactory::getInstance('DBPerformance')); $trxProfiler->setExpectations($wgTrxProfilerLimits['JobRunner'], __METHOD__); // Some jobs types should not run until a certain timestamp $backoffs = []; // map of (type => UNIX expiry) $backoffDeltas = []; // map of (type => seconds) $wait = 'wait'; // block to read backoffs the first time $group = JobQueueGroup::singleton(); $stats = RequestContext::getMain()->getStats(); $jobsPopped = 0; $timeMsTotal = 0; $startTime = microtime(true); // time since jobs started running $lastCheckTime = 1; // timestamp of last slave check do { // Sync the persistent backoffs with concurrent runners $backoffs = $this->syncBackoffDeltas($backoffs, $backoffDeltas, $wait); $blacklist = $noThrottle ? [] : array_keys($backoffs); $wait = 'nowait'; // less important now if ($type === false) { $job = $group->pop(JobQueueGroup::TYPE_DEFAULT, JobQueueGroup::USE_CACHE, $blacklist); } elseif (in_array($type, $blacklist)) { $job = false; // requested queue in backoff state } else { $job = $group->pop($type); // job from a single queue } if ($job) { // found a job ++$jobsPopped; $popTime = time(); $jType = $job->getType(); WebRequest::overrideRequestId($job->getRequestId()); // Back off of certain jobs for a while (for throttling and for errors) $ttw = $this->getBackoffTimeToWait($job); if ($ttw > 0) { // Always add the delta for other runners in case the time running the // job negated the backoff for each individually but not collectively. $backoffDeltas[$jType] = isset($backoffDeltas[$jType]) ? $backoffDeltas[$jType] + $ttw : $ttw; $backoffs = $this->syncBackoffDeltas($backoffs, $backoffDeltas, $wait); } $info = $this->executeJob($job, $stats, $popTime); if ($info['status'] !== false || !$job->allowRetries()) { $group->ack($job); // succeeded or job cannot be retried } // Back off of certain jobs for a while (for throttling and for errors) if ($info['status'] === false && mt_rand(0, 49) == 0) { $ttw = max($ttw, self::ERROR_BACKOFF_TTL); // too many errors $backoffDeltas[$jType] = isset($backoffDeltas[$jType]) ? $backoffDeltas[$jType] + $ttw : $ttw; } $response['jobs'][] = ['type' => $jType, 'status' => $info['status'] === false ? 'failed' : 'ok', 'error' => $info['error'], 'time' => $info['timeMs']]; $timeMsTotal += $info['timeMs']; // Break out if we hit the job count or wall time limits... if ($maxJobs && $jobsPopped >= $maxJobs) { $response['reached'] = 'job-limit'; break; } elseif ($maxTime && microtime(true) - $startTime > $maxTime) { $response['reached'] = 'time-limit'; break; } // Don't let any of the main DB slaves get backed up. // This only waits for so long before exiting and letting // other wikis in the farm (on different masters) get a chance. $timePassed = microtime(true) - $lastCheckTime; if ($timePassed >= self::LAG_CHECK_PERIOD || $timePassed < 0) { try { wfGetLBFactory()->waitForReplication(['ifWritesSince' => $lastCheckTime, 'timeout' => self::MAX_ALLOWED_LAG]); } catch (DBReplicationWaitError $e) { $response['reached'] = 'slave-lag-limit'; break; } $lastCheckTime = microtime(true); } // Don't let any queue slaves/backups fall behind if ($jobsPopped > 0 && $jobsPopped % 100 == 0) { $group->waitForBackups(); } // Bail if near-OOM instead of in a job if (!$this->checkMemoryOK()) { $response['reached'] = 'memory-limit'; break; } } } while ($job); // stop when there are no jobs // Sync the persistent backoffs for the next runJobs.php pass if ($backoffDeltas) { $this->syncBackoffDeltas($backoffs, $backoffDeltas, 'wait'); } $response['backoffs'] = $backoffs; $response['elapsed'] = $timeMsTotal; return $response; }