/** * @param Title $title * @return bool */ protected function runForTitle(Title $title) { // Wait for the DB of the current/next slave DB handle to catch up to the master. // This way, we get the correct page_latest for templates or files that just changed // milliseconds ago, having triggered this job to begin with. if (isset($this->params['masterPos']) && $this->params['masterPos'] !== false) { wfGetLB()->waitFor($this->params['masterPos']); } // Fetch the current page and revision... $page = WikiPage::factory($title); $revision = Revision::newFromTitle($title, false, Revision::READ_NORMAL); if (!$revision) { $this->setLastError("refreshLinks: Article not found {$title->getPrefixedDBkey()}"); return false; // XXX: what if it was just deleted? } $content = $revision->getContent(Revision::RAW); if (!$content) { // If there is no content, pretend the content is empty $content = $revision->getContentHandler()->makeEmptyContent(); } $parserOutput = false; $parserOptions = $page->makeParserOptions('canonical'); // If page_touched changed after this root job, then it is likely that // any views of the pages already resulted in re-parses which are now in // cache. The cache can be reused to avoid expensive parsing in some cases. if (isset($this->params['rootJobTimestamp'])) { $opportunistic = !empty($this->params['isOpportunistic']); $skewedTimestamp = $this->params['rootJobTimestamp']; if ($opportunistic) { // Neither clock skew nor DB snapshot/slave lag matter much for such // updates; focus on reusing the (often recently updated) cache } else { // For transclusion updates, the template changes must be reflected $skewedTimestamp = wfTimestamp(TS_MW, wfTimestamp(TS_UNIX, $skewedTimestamp) + self::CLOCK_FUDGE); } if ($page->getLinksTimestamp() > $skewedTimestamp) { // Something already updated the backlinks since this job was made return true; } if ($page->getTouched() >= $skewedTimestamp || $opportunistic) { // Something bumped page_touched since this job was made // or the cache is otherwise suspected to be up-to-date $parserOutput = ParserCache::singleton()->getDirty($page, $parserOptions); if ($parserOutput && $parserOutput->getCacheTime() < $skewedTimestamp) { $parserOutput = false; // too stale } } } // Fetch the current revision and parse it if necessary... if ($parserOutput == false) { $start = microtime(true); // Revision ID must be passed to the parser output to get revision variables correct $parserOutput = $content->getParserOutput($title, $revision->getId(), $parserOptions, false); $elapsed = microtime(true) - $start; // If it took a long time to render, then save this back to the cache to avoid // wasted CPU by other apaches or job runners. We don't want to always save to // cache as this can cause high cache I/O and LRU churn when a template changes. if ($elapsed >= self::PARSE_THRESHOLD_SEC && $page->shouldCheckParserCache($parserOptions, $revision->getId()) && $parserOutput->isCacheable()) { $ctime = wfTimestamp(TS_MW, (int) $start); // cache time ParserCache::singleton()->save($parserOutput, $page, $parserOptions, $ctime, $revision->getId()); } } $updates = $content->getSecondaryDataUpdates($title, null, !empty($this->params['useRecursiveLinksUpdate']), $parserOutput); foreach ($updates as $key => $update) { if ($update instanceof LinksUpdate) { if (!empty($this->params['triggeredRecursive'])) { $update->setTriggeredRecursive(); } if (!empty($this->params['triggeringUser'])) { $userInfo = $this->params['triggeringUser']; if ($userInfo['userId']) { $user = User::newFromId($userInfo['userId']); } else { // Anonymous, use the username $user = User::newFromName($userInfo['userName'], false); } $update->setTriggeringUser($user); } if (!empty($this->params['triggeringRevisionId'])) { $revision = Revision::newFromId($this->params['triggeringRevisionId']); if ($revision === null) { $revision = Revision::newFromId($this->params['triggeringRevisionId'], Revision::READ_LATEST); } $update->setRevision($revision); } } } DataUpdate::runUpdates($updates); InfoAction::invalidateCache($title); return true; }
/** * Purge caches on page update etc * * @param $title Title object * @todo Verify that $title is always a Title object (and never false or null), add Title hint to parameter $title */ public static function onArticleEdit( $title ) { // Invalidate caches of articles which include this page DeferredUpdates::addHTMLCacheUpdate( $title, 'templatelinks' ); // Invalidate the caches of all pages which redirect here DeferredUpdates::addHTMLCacheUpdate( $title, 'redirect' ); // Purge squid for this page only $title->purgeSquid(); // Clear file cache for this page only HTMLFileCache::clearFileCache( $title ); InfoAction::invalidateCache( $title ); }
/** * Purge caches on page update etc * * @param Title $title * @param Revision|null $revision Revision that was just saved, may be null */ public static function onArticleEdit(Title $title, Revision $revision = null) { // Invalidate caches of articles which include this page DeferredUpdates::addUpdate(new HTMLCacheUpdate($title, 'templatelinks')); // Invalidate the caches of all pages which redirect here DeferredUpdates::addUpdate(new HTMLCacheUpdate($title, 'redirect')); MediaWikiServices::getInstance()->getLinkCache()->invalidateTitle($title); // Purge CDN for this page only $title->purgeSquid(); // Clear file cache for this page only HTMLFileCache::clearFileCache($title); $revid = $revision ? $revision->getId() : null; DeferredUpdates::addCallableUpdate(function () use($title, $revid) { InfoAction::invalidateCache($title, $revid); }); }
/** * @param $title Title * @param $revision Revision * @param $fname string * @return void */ public static function runForTitleInternal(Title $title, Revision $revision, $fname) { wfProfileIn($fname); $content = $revision->getContent(Revision::RAW); if (!$content) { // if there is no content, pretend the content is empty $content = $revision->getContentHandler()->makeEmptyContent(); } // Revision ID must be passed to the parser output to get revision variables correct $parserOutput = $content->getParserOutput($title, $revision->getId(), null, false); $updates = $content->getSecondaryDataUpdates($title, null, false, $parserOutput); DataUpdate::runUpdates($updates); InfoAction::invalidateCache($title); wfProfileOut($fname); }
/** * @param Title $title * @return bool */ protected function runForTitle(Title $title = null) { $linkCache = LinkCache::singleton(); $linkCache->clear(); if (is_null($title)) { $this->setLastError("refreshLinks: Invalid title"); return false; } // Wait for the DB of the current/next slave DB handle to catch up to the master. // This way, we get the correct page_latest for templates or files that just changed // milliseconds ago, having triggered this job to begin with. if (isset($this->params['masterPos']) && $this->params['masterPos'] !== false) { wfGetLB()->waitFor($this->params['masterPos']); } $page = WikiPage::factory($title); // Fetch the current revision... $revision = Revision::newFromTitle($title, false, Revision::READ_NORMAL); if (!$revision) { $this->setLastError("refreshLinks: Article not found {$title->getPrefixedDBkey()}"); return false; // XXX: what if it was just deleted? } $content = $revision->getContent(Revision::RAW); if (!$content) { // If there is no content, pretend the content is empty $content = $revision->getContentHandler()->makeEmptyContent(); } $parserOutput = false; $parserOptions = $page->makeParserOptions('canonical'); // If page_touched changed after this root job (with a good slave lag skew factor), // then it is likely that any views of the pages already resulted in re-parses which // are now in cache. This can be reused to avoid expensive parsing in some cases. if (isset($this->params['rootJobTimestamp'])) { $skewedTimestamp = wfTimestamp(TS_UNIX, $this->params['rootJobTimestamp']) + 5; if ($page->getLinksTimestamp() > wfTimestamp(TS_MW, $skewedTimestamp)) { // Something already updated the backlinks since this job was made return true; } if ($page->getTouched() > wfTimestamp(TS_MW, $skewedTimestamp)) { $parserOutput = ParserCache::singleton()->getDirty($page, $parserOptions); if ($parserOutput && $parserOutput->getCacheTime() <= $skewedTimestamp) { $parserOutput = false; // too stale } } } // Fetch the current revision and parse it if necessary... if ($parserOutput == false) { $start = microtime(true); // Revision ID must be passed to the parser output to get revision variables correct $parserOutput = $content->getParserOutput($title, $revision->getId(), $parserOptions, false); $ellapsed = microtime(true) - $start; // If it took a long time to render, then save this back to the cache to avoid // wasted CPU by other apaches or job runners. We don't want to always save to // cache as this can cause high cache I/O and LRU churn when a template changes. if ($ellapsed >= self::PARSE_THRESHOLD_SEC && $page->isParserCacheUsed($parserOptions, $revision->getId()) && $parserOutput->isCacheable()) { $ctime = wfTimestamp(TS_MW, (int) $start); // cache time ParserCache::singleton()->save($parserOutput, $page, $parserOptions, $ctime, $revision->getId()); } } $updates = $content->getSecondaryDataUpdates($title, null, false, $parserOutput); DataUpdate::runUpdates($updates); InfoAction::invalidateCache($title); return true; }
/** * @param Title $title * @return bool */ protected function runForTitle(Title $title) { $page = WikiPage::factory($title); if (!empty($this->params['triggeringRevisionId'])) { // Fetch the specified revision; lockAndGetLatest() below detects if the page // was edited since and aborts in order to avoid corrupting the link tables $revision = Revision::newFromId($this->params['triggeringRevisionId'], Revision::READ_LATEST); } else { // Fetch current revision; READ_LATEST reduces lockAndGetLatest() check failures $revision = Revision::newFromTitle($title, false, Revision::READ_LATEST); } if (!$revision) { $this->setLastError("Revision not found for {$title->getPrefixedDBkey()}"); return false; // just deleted? } $content = $revision->getContent(Revision::RAW); if (!$content) { // If there is no content, pretend the content is empty $content = $revision->getContentHandler()->makeEmptyContent(); } $parserOutput = false; $parserOptions = $page->makeParserOptions('canonical'); // If page_touched changed after this root job, then it is likely that // any views of the pages already resulted in re-parses which are now in // cache. The cache can be reused to avoid expensive parsing in some cases. if (isset($this->params['rootJobTimestamp'])) { $opportunistic = !empty($this->params['isOpportunistic']); $skewedTimestamp = $this->params['rootJobTimestamp']; if ($opportunistic) { // Neither clock skew nor DB snapshot/slave lag matter much for such // updates; focus on reusing the (often recently updated) cache } else { // For transclusion updates, the template changes must be reflected $skewedTimestamp = wfTimestamp(TS_MW, wfTimestamp(TS_UNIX, $skewedTimestamp) + self::CLOCK_FUDGE); } if ($page->getLinksTimestamp() > $skewedTimestamp) { // Something already updated the backlinks since this job was made return true; } if ($page->getTouched() >= $skewedTimestamp || $opportunistic) { // Something bumped page_touched since this job was made // or the cache is otherwise suspected to be up-to-date $parserOutput = ParserCache::singleton()->getDirty($page, $parserOptions); if ($parserOutput && $parserOutput->getCacheTime() < $skewedTimestamp) { $parserOutput = false; // too stale } } } // Fetch the current revision and parse it if necessary... if ($parserOutput == false) { $start = microtime(true); // Revision ID must be passed to the parser output to get revision variables correct $parserOutput = $content->getParserOutput($title, $revision->getId(), $parserOptions, false); $elapsed = microtime(true) - $start; // If it took a long time to render, then save this back to the cache to avoid // wasted CPU by other apaches or job runners. We don't want to always save to // cache as this can cause high cache I/O and LRU churn when a template changes. if ($elapsed >= self::PARSE_THRESHOLD_SEC && $page->shouldCheckParserCache($parserOptions, $revision->getId()) && $parserOutput->isCacheable()) { $ctime = wfTimestamp(TS_MW, (int) $start); // cache time ParserCache::singleton()->save($parserOutput, $page, $parserOptions, $ctime, $revision->getId()); } } $updates = $content->getSecondaryDataUpdates($title, null, !empty($this->params['useRecursiveLinksUpdate']), $parserOutput); foreach ($updates as $key => $update) { // FIXME: This code probably shouldn't be here? // Needed by things like Echo notifications which need // to know which user caused the links update if ($update instanceof LinksUpdate) { if (!empty($this->params['triggeringUser'])) { $userInfo = $this->params['triggeringUser']; if ($userInfo['userId']) { $user = User::newFromId($userInfo['userId']); } else { // Anonymous, use the username $user = User::newFromName($userInfo['userName'], false); } $update->setTriggeringUser($user); } } } $latestNow = $page->lockAndGetLatest(); if (!$latestNow || $revision->getId() != $latestNow) { // Do not clobber over newer updates with older ones. If all jobs where FIFO and // serialized, it would be OK to update links based on older revisions since it // would eventually get to the latest. Since that is not the case (by design), // only update the link tables to a state matching the current revision's output. $this->setLastError("page_latest changed from {$revision->getId()} to {$latestNow}"); return false; } DataUpdate::runUpdates($updates); InfoAction::invalidateCache($title); return true; }
/** * @param Title $title * @return bool */ protected function runForTitle(Title $title) { $services = MediaWikiServices::getInstance(); $stats = $services->getStatsdDataFactory(); $lbFactory = $services->getDBLoadBalancerFactory(); $ticket = $lbFactory->getEmptyTransactionTicket(__METHOD__); $page = WikiPage::factory($title); $page->loadPageData(WikiPage::READ_LATEST); // Serialize links updates by page ID so they see each others' changes $dbw = $lbFactory->getMainLB()->getConnection(DB_MASTER); /** @noinspection PhpUnusedLocalVariableInspection */ $scopedLock = LinksUpdate::acquirePageLock($dbw, $page->getId(), 'job'); // Get the latest ID *after* acquirePageLock() flushed the transaction. // This is used to detect edits/moves after loadPageData() but before the scope lock. // The works around the chicken/egg problem of determining the scope lock key. $latest = $title->getLatestRevID(Title::GAID_FOR_UPDATE); if (!empty($this->params['triggeringRevisionId'])) { // Fetch the specified revision; lockAndGetLatest() below detects if the page // was edited since and aborts in order to avoid corrupting the link tables $revision = Revision::newFromId($this->params['triggeringRevisionId'], Revision::READ_LATEST); } else { // Fetch current revision; READ_LATEST reduces lockAndGetLatest() check failures $revision = Revision::newFromTitle($title, false, Revision::READ_LATEST); } if (!$revision) { $stats->increment('refreshlinks.rev_not_found'); $this->setLastError("Revision not found for {$title->getPrefixedDBkey()}"); return false; // just deleted? } elseif ($revision->getId() != $latest || $revision->getPage() !== $page->getId()) { // Do not clobber over newer updates with older ones. If all jobs where FIFO and // serialized, it would be OK to update links based on older revisions since it // would eventually get to the latest. Since that is not the case (by design), // only update the link tables to a state matching the current revision's output. $stats->increment('refreshlinks.rev_not_current'); $this->setLastError("Revision {$revision->getId()} is not current"); return false; } $content = $revision->getContent(Revision::RAW); if (!$content) { // If there is no content, pretend the content is empty $content = $revision->getContentHandler()->makeEmptyContent(); } $parserOutput = false; $parserOptions = $page->makeParserOptions('canonical'); // If page_touched changed after this root job, then it is likely that // any views of the pages already resulted in re-parses which are now in // cache. The cache can be reused to avoid expensive parsing in some cases. if (isset($this->params['rootJobTimestamp'])) { $opportunistic = !empty($this->params['isOpportunistic']); $skewedTimestamp = $this->params['rootJobTimestamp']; if ($opportunistic) { // Neither clock skew nor DB snapshot/replica DB lag matter much for such // updates; focus on reusing the (often recently updated) cache } else { // For transclusion updates, the template changes must be reflected $skewedTimestamp = wfTimestamp(TS_MW, wfTimestamp(TS_UNIX, $skewedTimestamp) + self::CLOCK_FUDGE); } if ($page->getLinksTimestamp() > $skewedTimestamp) { // Something already updated the backlinks since this job was made $stats->increment('refreshlinks.update_skipped'); return true; } if ($page->getTouched() >= $this->params['rootJobTimestamp'] || $opportunistic) { // Cache is suspected to be up-to-date. As long as the cache rev ID matches // and it reflects the job's triggering change, then it is usable. $parserOutput = ParserCache::singleton()->getDirty($page, $parserOptions); if (!$parserOutput || $parserOutput->getCacheRevisionId() != $revision->getId() || $parserOutput->getCacheTime() < $skewedTimestamp) { $parserOutput = false; // too stale } } } // Fetch the current revision and parse it if necessary... if ($parserOutput) { $stats->increment('refreshlinks.parser_cached'); } else { $start = microtime(true); // Revision ID must be passed to the parser output to get revision variables correct $parserOutput = $content->getParserOutput($title, $revision->getId(), $parserOptions, false); $elapsed = microtime(true) - $start; // If it took a long time to render, then save this back to the cache to avoid // wasted CPU by other apaches or job runners. We don't want to always save to // cache as this can cause high cache I/O and LRU churn when a template changes. if ($elapsed >= self::PARSE_THRESHOLD_SEC && $page->shouldCheckParserCache($parserOptions, $revision->getId()) && $parserOutput->isCacheable()) { $ctime = wfTimestamp(TS_MW, (int) $start); // cache time ParserCache::singleton()->save($parserOutput, $page, $parserOptions, $ctime, $revision->getId()); } $stats->increment('refreshlinks.parser_uncached'); } $updates = $content->getSecondaryDataUpdates($title, null, !empty($this->params['useRecursiveLinksUpdate']), $parserOutput); foreach ($updates as $key => $update) { // FIXME: This code probably shouldn't be here? // Needed by things like Echo notifications which need // to know which user caused the links update if ($update instanceof LinksUpdate) { $update->setRevision($revision); if (!empty($this->params['triggeringUser'])) { $userInfo = $this->params['triggeringUser']; if ($userInfo['userId']) { $user = User::newFromId($userInfo['userId']); } else { // Anonymous, use the username $user = User::newFromName($userInfo['userName'], false); } $update->setTriggeringUser($user); } } } foreach ($updates as $update) { $update->setTransactionTicket($ticket); $update->doUpdate(); } InfoAction::invalidateCache($title); return true; }