/** * @dataProvider provider_backlinks */ public function testRefreshLinks($ns, $dbKey, $pages) { $title = Title::makeTitle($ns, $dbKey); foreach ($pages as $page) { list($bns, $bdbkey) = $page; $bpage = WikiPage::factory(Title::makeTitle($bns, $bdbkey)); $content = ContentHandler::makeContent("[[{$title->getPrefixedText()}]]", $bpage->getTitle()); $bpage->doEditContent($content, "test"); } $title->getBacklinkCache()->clear(); $this->assertEquals(20, $title->getBacklinkCache()->getNumLinks('pagelinks'), 'Correct number of backlinks'); $job = new RefreshLinksJob($title, array('recursive' => true, 'table' => 'pagelinks') + Job::newRootJobParams("refreshlinks:pagelinks:{$title->getPrefixedText()}")); $extraParams = $job->getRootJobParams(); $jobs = BacklinkJobUtils::partitionBacklinkJob($job, 9, 1, array('params' => $extraParams)); $this->assertEquals(10, count($jobs), 'Correct number of sub-jobs'); $this->assertEquals($pages[0], current($jobs[0]->params['pages']), 'First job is leaf job with proper title'); $this->assertEquals($pages[8], current($jobs[8]->params['pages']), 'Last leaf job is leaf job with proper title'); $this->assertEquals(true, isset($jobs[9]->params['recursive']), 'Last job is recursive sub-job'); $this->assertEquals(true, $jobs[9]->params['recursive'], 'Last job is recursive sub-job'); $this->assertEquals(true, is_array($jobs[9]->params['range']), 'Last job is recursive sub-job'); $this->assertEquals($title->getPrefixedText(), $jobs[0]->getTitle()->getPrefixedText(), 'Base job title retainend in leaf job'); $this->assertEquals($title->getPrefixedText(), $jobs[9]->getTitle()->getPrefixedText(), 'Base job title retainend recursive sub-job'); $this->assertEquals($extraParams['rootJobSignature'], $jobs[0]->params['rootJobSignature'], 'Leaf job has root params'); $this->assertEquals($extraParams['rootJobSignature'], $jobs[9]->params['rootJobSignature'], 'Recursive sub-job has root params'); $jobs2 = BacklinkJobUtils::partitionBacklinkJob($jobs[9], 9, 1, array('params' => $extraParams)); $this->assertEquals(10, count($jobs2), 'Correct number of sub-jobs'); $this->assertEquals($pages[9], current($jobs2[0]->params['pages']), 'First job is leaf job with proper title'); $this->assertEquals($pages[17], current($jobs2[8]->params['pages']), 'Last leaf job is leaf job with proper title'); $this->assertEquals(true, isset($jobs2[9]->params['recursive']), 'Last job is recursive sub-job'); $this->assertEquals(true, $jobs2[9]->params['recursive'], 'Last job is recursive sub-job'); $this->assertEquals(true, is_array($jobs2[9]->params['range']), 'Last job is recursive sub-job'); $this->assertEquals($extraParams['rootJobSignature'], $jobs2[0]->params['rootJobSignature'], 'Leaf job has root params'); $this->assertEquals($extraParams['rootJobSignature'], $jobs2[9]->params['rootJobSignature'], 'Recursive sub-job has root params'); $jobs3 = BacklinkJobUtils::partitionBacklinkJob($jobs2[9], 9, 1, array('params' => $extraParams)); $this->assertEquals(2, count($jobs3), 'Correct number of sub-jobs'); $this->assertEquals($pages[18], current($jobs3[0]->params['pages']), 'First job is leaf job with proper title'); $this->assertEquals($extraParams['rootJobSignature'], $jobs3[0]->params['rootJobSignature'], 'Leaf job has root params'); $this->assertEquals($pages[19], current($jobs3[1]->params['pages']), 'Last job is leaf job with proper title'); $this->assertEquals($extraParams['rootJobSignature'], $jobs3[1]->params['rootJobSignature'], 'Last leaf job has root params'); }
/** * Opportunistically enqueue link update jobs given fresh parser output if useful * * @param ParserOutput $parserOutput Current version page output * @since 1.25 */ public function triggerOpportunisticLinksUpdate(ParserOutput $parserOutput) { if (wfReadOnly()) { return; } if (!Hooks::run('OpportunisticLinksUpdate', [$this, $this->mTitle, $parserOutput])) { return; } $config = RequestContext::getMain()->getConfig(); $params = ['isOpportunistic' => true, 'rootJobTimestamp' => $parserOutput->getCacheTime()]; if ($this->mTitle->areRestrictionsCascading()) { // If the page is cascade protecting, the links should really be up-to-date JobQueueGroup::singleton()->lazyPush(RefreshLinksJob::newPrioritized($this->mTitle, $params)); } elseif (!$config->get('MiserMode') && $parserOutput->hasDynamicContent()) { // Assume the output contains "dynamic" time/random based magic words. // Only update pages that expired due to dynamic content and NOT due to edits // to referenced templates/files. When the cache expires due to dynamic content, // page_touched is unchanged. We want to avoid triggering redundant jobs due to // views of pages that were just purged via HTMLCacheUpdateJob. In that case, the // template/file edit already triggered recursive RefreshLinksJob jobs. if ($this->getLinksTimestamp() > $this->getTouched()) { // If a page is uncacheable, do not keep spamming a job for it. // Although it would be de-duplicated, it would still waste I/O. $cache = ObjectCache::getLocalClusterInstance(); $key = $cache->makeKey('dynamic-linksupdate', 'last', $this->getId()); $ttl = max($parserOutput->getCacheExpiry(), 3600); if ($cache->add($key, time(), $ttl)) { JobQueueGroup::singleton()->lazyPush(RefreshLinksJob::newDynamic($this->mTitle, $params)); } } } }
/** * Queue recursive jobs for this page * * Which means do LinksUpdate on all pages that include the current page, * using the job queue. */ protected function queueRecursiveJobs() { self::queueRecursiveJobsForTable($this->mTitle, 'templatelinks'); if ($this->mTitle->getNamespace() == NS_FILE) { // Process imagelinks in case the title is or was a redirect self::queueRecursiveJobsForTable($this->mTitle, 'imagelinks'); } $bc = $this->mTitle->getBacklinkCache(); // Get jobs for cascade-protected backlinks for a high priority queue. // If meta-templates change to using a new template, the new template // should be implicitly protected as soon as possible, if applicable. // These jobs duplicate a subset of the above ones, but can run sooner. // Which ever runs first generally no-ops the other one. $jobs = array(); foreach ($bc->getCascadeProtectedLinks() as $title) { $jobs[] = RefreshLinksJob::newPrioritized($title, array()); } JobQueueGroup::singleton()->push($jobs); }
/** * Run a refreshLinks2 job * @return boolean success */ function run() { wfProfileIn(__METHOD__); $linkCache = LinkCache::singleton(); $linkCache->clear(); if (is_null($this->title)) { $this->error = "refreshLinks2: Invalid title"; wfProfileOut(__METHOD__); return false; } elseif (!isset($this->params['start']) || !isset($this->params['end'])) { $this->error = "refreshLinks2: Invalid params"; wfProfileOut(__METHOD__); return false; } // Back compat for pre-r94435 jobs $table = isset($this->params['table']) ? $this->params['table'] : 'templatelinks'; // Avoid slave lag when fetching templates if (isset($this->params['masterPos'])) { $masterPos = $this->params['masterPos']; } elseif (wfGetLB()->getServerCount() > 1) { $masterPos = wfGetLB()->getMasterPos(); } else { $masterPos = false; } $titles = $this->title->getBacklinkCache()->getLinks($table, $this->params['start'], $this->params['end']); if ($titles->count() > self::MAX_TITLES_RUN) { # We don't want to parse too many pages per job as it can starve other jobs. # If there are too many pages to parse, break this up into smaller jobs. By passing # in the master position here we can cut down on the time spent waiting for slaves to # catch up by the runners handling these jobs since time will have passed between now # and when they pop these jobs off the queue. $start = 0; // batch start $end = 0; // batch end $bsize = 0; // batch size $first = true; // first of batch $jobs = array(); foreach ($titles as $title) { $start = $first ? $title->getArticleId() : $start; $end = $title->getArticleId(); $first = false; if (++$bsize >= self::MAX_TITLES_RUN) { $jobs[] = new RefreshLinksJob2($this->title, array('table' => $table, 'start' => $start, 'end' => $end, 'masterPos' => $masterPos)); $first = true; $start = $end = $bsize = 0; } } if ($bsize > 0) { // group remaining pages into a job $jobs[] = new RefreshLinksJob2($this->title, array('table' => $table, 'start' => $start, 'end' => $end, 'masterPos' => $masterPos)); } Job::batchInsert($jobs); } elseif (php_sapi_name() != 'cli') { # Not suitable for page load triggered job running! # Gracefully switch to refreshLinks jobs if this happens. $jobs = array(); foreach ($titles as $title) { $jobs[] = new RefreshLinksJob($title, array('masterPos' => $masterPos)); } Job::batchInsert($jobs); } else { # Wait for the DB of the current/next slave DB handle to catch up to the master. # This way, we get the correct page_latest for templates or files that just changed # milliseconds ago, having triggered this job to begin with. if ($masterPos) { wfGetLB()->waitFor($masterPos); } # Re-parse each page that transcludes this page and update their tracking links... foreach ($titles as $title) { $revision = Revision::newFromTitle($title, false, Revision::READ_NORMAL); if (!$revision) { $this->error = 'refreshLinks: Article not found "' . $title->getPrefixedDBkey() . '"'; continue; // skip this page } RefreshLinksJob::runForTitleInternal($title, $revision, __METHOD__); wfWaitForSlaves(); } } wfProfileOut(__METHOD__); return true; }