public function doUpdate() { $services = MediaWikiServices::getInstance(); $config = $services->getMainConfig(); $lbFactory = $services->getDBLoadBalancerFactory(); $batchSize = $config->get('UpdateRowsPerQuery'); // Page may already be deleted, so don't just getId() $id = $this->pageId; if ($this->ticket) { // Make sure all links update threads see the changes of each other. // This handles the case when updates have to batched into several COMMITs. $scopedLock = LinksUpdate::acquirePageLock($this->getDB(), $id); } $title = $this->page->getTitle(); $dbw = $this->getDB(); // convenience // Delete restrictions for it $dbw->delete('page_restrictions', ['pr_page' => $id], __METHOD__); // Fix category table counts $cats = $dbw->selectFieldValues('categorylinks', 'cl_to', ['cl_from' => $id], __METHOD__); $catBatches = array_chunk($cats, $batchSize); foreach ($catBatches as $catBatch) { $this->page->updateCategoryCounts([], $catBatch, $id); if (count($catBatches) > 1) { $lbFactory->commitAndWaitForReplication(__METHOD__, $this->ticket, ['wiki' => $dbw->getWikiID()]); } } // Refresh the category table entry if it seems to have no pages. Check // master for the most up-to-date cat_pages count. if ($title->getNamespace() === NS_CATEGORY) { $row = $dbw->selectRow('category', ['cat_id', 'cat_title', 'cat_pages', 'cat_subcats', 'cat_files'], ['cat_title' => $title->getDBkey(), 'cat_pages <= 0'], __METHOD__); if ($row) { Category::newFromRow($row, $title)->refreshCounts(); } } $this->batchDeleteByPK('pagelinks', ['pl_from' => $id], ['pl_from', 'pl_namespace', 'pl_title'], $batchSize); $this->batchDeleteByPK('imagelinks', ['il_from' => $id], ['il_from', 'il_to'], $batchSize); $this->batchDeleteByPK('categorylinks', ['cl_from' => $id], ['cl_from', 'cl_to'], $batchSize); $this->batchDeleteByPK('templatelinks', ['tl_from' => $id], ['tl_from', 'tl_namespace', 'tl_title'], $batchSize); $this->batchDeleteByPK('externallinks', ['el_from' => $id], ['el_id'], $batchSize); $this->batchDeleteByPK('langlinks', ['ll_from' => $id], ['ll_from', 'll_lang'], $batchSize); $this->batchDeleteByPK('iwlinks', ['iwl_from' => $id], ['iwl_from', 'iwl_prefix', 'iwl_title'], $batchSize); // Delete any redirect entry or page props entries $dbw->delete('redirect', ['rd_from' => $id], __METHOD__); $dbw->delete('page_props', ['pp_page' => $id], __METHOD__); // Find recentchanges entries to clean up... $rcIdsForTitle = $dbw->selectFieldValues('recentchanges', 'rc_id', ['rc_type != ' . RC_LOG, 'rc_namespace' => $title->getNamespace(), 'rc_title' => $title->getDBkey(), 'rc_timestamp < ' . $dbw->addQuotes($dbw->timestamp($this->timestamp))], __METHOD__); $rcIdsForPage = $dbw->selectFieldValues('recentchanges', 'rc_id', ['rc_type != ' . RC_LOG, 'rc_cur_id' => $id], __METHOD__); // T98706: delete by PK to avoid lock contention with RC delete log insertions $rcIdBatches = array_chunk(array_merge($rcIdsForTitle, $rcIdsForPage), $batchSize); foreach ($rcIdBatches as $rcIdBatch) { $dbw->delete('recentchanges', ['rc_id' => $rcIdBatch], __METHOD__); if (count($rcIdBatches) > 1) { $lbFactory->commitAndWaitForReplication(__METHOD__, $this->ticket, ['wiki' => $dbw->getWikiID()]); } } // Commit and release the lock (if set) ScopedCallback::consume($scopedLock); }
public function doUpdate() { # Page may already be deleted, so don't just getId() $id = $this->pageId; // Make sure all links update threads see the changes of each other. // This handles the case when updates have to batched into several COMMITs. $scopedLock = LinksUpdate::acquirePageLock($this->mDb, $id); # Delete restrictions for it $this->mDb->delete('page_restrictions', ['pr_page' => $id], __METHOD__); # Fix category table counts $cats = $this->mDb->selectFieldValues('categorylinks', 'cl_to', ['cl_from' => $id], __METHOD__); $this->page->updateCategoryCounts([], $cats); # If using cascading deletes, we can skip some explicit deletes if (!$this->mDb->cascadingDeletes()) { # Delete outgoing links $this->mDb->delete('pagelinks', ['pl_from' => $id], __METHOD__); $this->mDb->delete('imagelinks', ['il_from' => $id], __METHOD__); $this->mDb->delete('categorylinks', ['cl_from' => $id], __METHOD__); $this->mDb->delete('templatelinks', ['tl_from' => $id], __METHOD__); $this->mDb->delete('externallinks', ['el_from' => $id], __METHOD__); $this->mDb->delete('langlinks', ['ll_from' => $id], __METHOD__); $this->mDb->delete('iwlinks', ['iwl_from' => $id], __METHOD__); $this->mDb->delete('redirect', ['rd_from' => $id], __METHOD__); $this->mDb->delete('page_props', ['pp_page' => $id], __METHOD__); } # If using cleanup triggers, we can skip some manual deletes if (!$this->mDb->cleanupTriggers()) { $title = $this->page->getTitle(); # Find recentchanges entries to clean up... $rcIdsForTitle = $this->mDb->selectFieldValues('recentchanges', 'rc_id', ['rc_type != ' . RC_LOG, 'rc_namespace' => $title->getNamespace(), 'rc_title' => $title->getDBkey()], __METHOD__); $rcIdsForPage = $this->mDb->selectFieldValues('recentchanges', 'rc_id', ['rc_type != ' . RC_LOG, 'rc_cur_id' => $id], __METHOD__); # T98706: delete PK to avoid lock contention with RC delete log insertions $rcIds = array_merge($rcIdsForTitle, $rcIdsForPage); if ($rcIds) { $this->mDb->delete('recentchanges', ['rc_id' => $rcIds], __METHOD__); } } $this->mDb->onTransactionIdle(function () use(&$scopedLock) { // Release the lock *after* the final COMMIT for correctness ScopedCallback::consume($scopedLock); }); }
function run() { if (is_null($this->title)) { $this->setLastError("deleteLinks: Invalid title"); return false; } $pageId = $this->params['pageId']; // Serialize links updates by page ID so they see each others' changes $scopedLock = LinksUpdate::acquirePageLock(wfGetDB(DB_MASTER), $pageId, 'job'); if (WikiPage::newFromID($pageId, WikiPage::READ_LATEST)) { // The page was restored somehow or something went wrong $this->setLastError("deleteLinks: Page #{$pageId} exists"); return false; } $factory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); $timestamp = isset($this->params['timestamp']) ? $this->params['timestamp'] : null; $page = WikiPage::factory($this->title); // title when deleted $update = new LinksDeletionUpdate($page, $pageId, $timestamp); $update->setTransactionTicket($factory->getEmptyTransactionTicket(__METHOD__)); $update->doUpdate(); return true; }
/** * @param Title $title * @return bool */ protected function runForTitle(Title $title) { $services = MediaWikiServices::getInstance(); $stats = $services->getStatsdDataFactory(); $lbFactory = $services->getDBLoadBalancerFactory(); $ticket = $lbFactory->getEmptyTransactionTicket(__METHOD__); $page = WikiPage::factory($title); $page->loadPageData(WikiPage::READ_LATEST); // Serialize links updates by page ID so they see each others' changes $dbw = $lbFactory->getMainLB()->getConnection(DB_MASTER); /** @noinspection PhpUnusedLocalVariableInspection */ $scopedLock = LinksUpdate::acquirePageLock($dbw, $page->getId(), 'job'); // Get the latest ID *after* acquirePageLock() flushed the transaction. // This is used to detect edits/moves after loadPageData() but before the scope lock. // The works around the chicken/egg problem of determining the scope lock key. $latest = $title->getLatestRevID(Title::GAID_FOR_UPDATE); if (!empty($this->params['triggeringRevisionId'])) { // Fetch the specified revision; lockAndGetLatest() below detects if the page // was edited since and aborts in order to avoid corrupting the link tables $revision = Revision::newFromId($this->params['triggeringRevisionId'], Revision::READ_LATEST); } else { // Fetch current revision; READ_LATEST reduces lockAndGetLatest() check failures $revision = Revision::newFromTitle($title, false, Revision::READ_LATEST); } if (!$revision) { $stats->increment('refreshlinks.rev_not_found'); $this->setLastError("Revision not found for {$title->getPrefixedDBkey()}"); return false; // just deleted? } elseif ($revision->getId() != $latest || $revision->getPage() !== $page->getId()) { // Do not clobber over newer updates with older ones. If all jobs where FIFO and // serialized, it would be OK to update links based on older revisions since it // would eventually get to the latest. Since that is not the case (by design), // only update the link tables to a state matching the current revision's output. $stats->increment('refreshlinks.rev_not_current'); $this->setLastError("Revision {$revision->getId()} is not current"); return false; } $content = $revision->getContent(Revision::RAW); if (!$content) { // If there is no content, pretend the content is empty $content = $revision->getContentHandler()->makeEmptyContent(); } $parserOutput = false; $parserOptions = $page->makeParserOptions('canonical'); // If page_touched changed after this root job, then it is likely that // any views of the pages already resulted in re-parses which are now in // cache. The cache can be reused to avoid expensive parsing in some cases. if (isset($this->params['rootJobTimestamp'])) { $opportunistic = !empty($this->params['isOpportunistic']); $skewedTimestamp = $this->params['rootJobTimestamp']; if ($opportunistic) { // Neither clock skew nor DB snapshot/replica DB lag matter much for such // updates; focus on reusing the (often recently updated) cache } else { // For transclusion updates, the template changes must be reflected $skewedTimestamp = wfTimestamp(TS_MW, wfTimestamp(TS_UNIX, $skewedTimestamp) + self::CLOCK_FUDGE); } if ($page->getLinksTimestamp() > $skewedTimestamp) { // Something already updated the backlinks since this job was made $stats->increment('refreshlinks.update_skipped'); return true; } if ($page->getTouched() >= $this->params['rootJobTimestamp'] || $opportunistic) { // Cache is suspected to be up-to-date. As long as the cache rev ID matches // and it reflects the job's triggering change, then it is usable. $parserOutput = ParserCache::singleton()->getDirty($page, $parserOptions); if (!$parserOutput || $parserOutput->getCacheRevisionId() != $revision->getId() || $parserOutput->getCacheTime() < $skewedTimestamp) { $parserOutput = false; // too stale } } } // Fetch the current revision and parse it if necessary... if ($parserOutput) { $stats->increment('refreshlinks.parser_cached'); } else { $start = microtime(true); // Revision ID must be passed to the parser output to get revision variables correct $parserOutput = $content->getParserOutput($title, $revision->getId(), $parserOptions, false); $elapsed = microtime(true) - $start; // If it took a long time to render, then save this back to the cache to avoid // wasted CPU by other apaches or job runners. We don't want to always save to // cache as this can cause high cache I/O and LRU churn when a template changes. if ($elapsed >= self::PARSE_THRESHOLD_SEC && $page->shouldCheckParserCache($parserOptions, $revision->getId()) && $parserOutput->isCacheable()) { $ctime = wfTimestamp(TS_MW, (int) $start); // cache time ParserCache::singleton()->save($parserOutput, $page, $parserOptions, $ctime, $revision->getId()); } $stats->increment('refreshlinks.parser_uncached'); } $updates = $content->getSecondaryDataUpdates($title, null, !empty($this->params['useRecursiveLinksUpdate']), $parserOutput); foreach ($updates as $key => $update) { // FIXME: This code probably shouldn't be here? // Needed by things like Echo notifications which need // to know which user caused the links update if ($update instanceof LinksUpdate) { $update->setRevision($revision); if (!empty($this->params['triggeringUser'])) { $userInfo = $this->params['triggeringUser']; if ($userInfo['userId']) { $user = User::newFromId($userInfo['userId']); } else { // Anonymous, use the username $user = User::newFromName($userInfo['userName'], false); } $update->setTriggeringUser($user); } } } foreach ($updates as $update) { $update->setTransactionTicket($ticket); $update->doUpdate(); } InfoAction::invalidateCache($title); return true; }