/** * @param IJobSpecification $job * @return array */ protected function insertFields(IJobSpecification $job) { $dbw = $this->getMasterDB(); return array('job_cmd' => $job->getType(), 'job_namespace' => $job->getTitle()->getNamespace(), 'job_title' => $job->getTitle()->getDBkey(), 'job_params' => self::makeBlob($job->getParams()), 'job_id' => $dbw->nextSequenceValue('job_job_id_seq'), 'job_timestamp' => $dbw->timestamp(), 'job_sha1' => Wikimedia\base_convert(sha1(serialize($job->getDeduplicationInfo())), 16, 36, 31), 'job_random' => mt_rand(0, self::MAX_JOB_RANDOM)); }
/** * @param IJobSpecification $spec * * @return Job */ public function jobFromSpecInternal(IJobSpecification $spec) { return Job::factory($spec->getType(), $spec->getTitle(), $spec->getParams()); }
/** * Register the "root job" of a given job into the queue for de-duplication. * This should only be called right *after* all the new jobs have been inserted. * This is used to turn older, duplicate, job entries into no-ops. The root job * information will remain in the registry until it simply falls out of cache. * * This requires that $job has two special fields in the "params" array: * - rootJobSignature : hash (e.g. SHA1) that identifies the task * - rootJobTimestamp : TS_MW timestamp of this instance of the task * * A "root job" is a conceptual job that consist of potentially many smaller jobs * that are actually inserted into the queue. For example, "refreshLinks" jobs are * spawned when a template is edited. One can think of the task as "update links * of pages that use template X" and an instance of that task as a "root job". * However, what actually goes into the queue are range and leaf job subtypes. * Since these jobs include things like page ID ranges and DB master positions, * and can morph into smaller jobs recursively, simple duplicate detection * for individual jobs being identical (like that of job_sha1) is not useful. * * In the case of "refreshLinks", if these jobs are still in the queue when the template * is edited again, we want all of these old refreshLinks jobs for that template to become * no-ops. This can greatly reduce server load, since refreshLinks jobs involves parsing. * Essentially, the new batch of jobs belong to a new "root job" and the older ones to a * previous "root job" for the same task of "update links of pages that use template X". * * This does nothing for certain queue classes. * * @param IJobSpecification $job * @throws MWException * @return bool */ public final function deduplicateRootJob(IJobSpecification $job) { if ($job->getType() !== $this->type) { throw new MWException("Got '{$job->getType()}' job; expected '{$this->type}'."); } $ok = $this->doDeduplicateRootJob($job); return $ok; }
/** * @param IJobSpecification $job * @return array */ protected function getNewJobFields(IJobSpecification $job) { return array('type' => $job->getType(), 'namespace' => $job->getTitle()->getNamespace(), 'title' => $job->getTitle()->getDBkey(), 'params' => $job->getParams(), 'rtimestamp' => $job->getReleaseTimestamp() ?: 0, 'uuid' => UIDGenerator::newRawUUIDv4(UIDGenerator::QUICK_RAND), 'sha1' => $job->ignoreDuplicates() ? Wikimedia\base_convert(sha1(serialize($job->getDeduplicationInfo())), 16, 36, 31) : '', 'timestamp' => time()); }
/** * Register the "root job" of a given job into the queue for de-duplication. * This should only be called right *after* all the new jobs have been inserted. * This is used to turn older, duplicate, job entries into no-ops. The root job * information will remain in the registry until it simply falls out of cache. * * This requires that $job has two special fields in the "params" array: * - rootJobSignature : hash (e.g. SHA1) that identifies the task * - rootJobTimestamp : TS_MW timestamp of this instance of the task * * A "root job" is a conceptual job that consist of potentially many smaller jobs * that are actually inserted into the queue. For example, "refreshLinks" jobs are * spawned when a template is edited. One can think of the task as "update links * of pages that use template X" and an instance of that task as a "root job". * However, what actually goes into the queue are range and leaf job subtypes. * Since these jobs include things like page ID ranges and DB master positions, * and can morph into smaller jobs recursively, simple duplicate detection * for individual jobs being identical (like that of job_sha1) is not useful. * * In the case of "refreshLinks", if these jobs are still in the queue when the template * is edited again, we want all of these old refreshLinks jobs for that template to become * no-ops. This can greatly reduce server load, since refreshLinks jobs involves parsing. * Essentially, the new batch of jobs belong to a new "root job" and the older ones to a * previous "root job" for the same task of "update links of pages that use template X". * * This does nothing for certain queue classes. * * @param IJobSpecification $job * @throws MWException * @return bool */ public final function deduplicateRootJob(IJobSpecification $job) { $this->assertNotReadOnly(); if ($job->getType() !== $this->type) { throw new MWException("Got '{$job->getType()}' job; expected '{$this->type}'."); } return $this->doDeduplicateRootJob($job); }