/** * Run a bunch of URLs through SquidUpdate::purge() * to benchmark Squid response times. * @param array $urls A bunch of URLs to purge * @param int $trials How many times to run the test? * @return string */ private function benchSquid($urls, $trials = 1) { $start = microtime(true); for ($i = 0; $i < $trials; $i++) { CdnCacheUpdate::purge($urls); } $delta = microtime(true) - $start; $pertrial = $delta / $trials; $pertitle = $pertrial / count($urls); return sprintf("%4d titles in %6.2fms (%6.2fms each)", count($urls), $pertrial * 1000.0, $pertitle * 1000.0); }
/** * Helper to purge an array of $urls * @param array $urls List of URLS to purge from squids */ private function sendPurgeRequest($urls) { if ($this->hasOption('delay')) { $delay = floatval($this->getOption('delay')); foreach ($urls as $url) { if ($this->hasOption('verbose')) { $this->output($url . "\n"); } $u = new CdnCacheUpdate(array($url)); $u->doUpdate(); usleep($delay * 1000000.0); } } else { if ($this->hasOption('verbose')) { $this->output(implode("\n", $urls) . "\n"); } $u = new CdnCacheUpdate($urls); $u->doUpdate(); } }
/** * Queue a purge operation * * @param string $url */ public function queuePurge($url) { global $wgSquidPurgeUseHostHeader; $url = CdnCacheUpdate::expand(str_replace("\n", '', $url)); $request = array(); if ($wgSquidPurgeUseHostHeader) { $url = wfParseUrl($url); $host = $url['host']; if (isset($url['port']) && strlen($url['port']) > 0) { $host .= ":" . $url['port']; } $path = $url['path']; if (isset($url['query']) && is_string($url['query'])) { $path = wfAppendQuery($path, $url['query']); } $request[] = "PURGE {$path} HTTP/1.1"; $request[] = "Host: {$host}"; } else { $request[] = "PURGE {$url} HTTP/1.0"; } $request[] = "Connection: Keep-Alive"; $request[] = "Proxy-Connection: Keep-Alive"; $request[] = "User-Agent: " . Http::userAgent() . ' ' . __CLASS__; // Two ''s to create \r\n\r\n $request[] = ''; $request[] = ''; $this->requests[] = implode("\r\n", $request); if ($this->currentRequestIndex === null) { $this->nextRequest(); } }
public function run() { // Use purge() directly to avoid infinite recursion CdnCacheUpdate::purge($this->params['urls']); return true; }
/** * @param array $pages Map of (page ID => (namespace, DB key)) entries */ protected function invalidateTitles(array $pages) { global $wgUpdateRowsPerQuery, $wgUseFileCache; // Get all page IDs in this query into an array $pageIds = array_keys($pages); if (!$pageIds) { return; } // The page_touched field will need to be bumped for these pages. // Only bump it to the present time if no "rootJobTimestamp" was known. // If it is known, it can be used instead, which avoids invalidating output // that was in fact generated *after* the relevant dependency change time // (e.g. template edit). This is particularily useful since refreshLinks jobs // save back parser output and usually run along side htmlCacheUpdate jobs; // their saved output would be invalidated by using the current timestamp. if (isset($this->params['rootJobTimestamp'])) { $touchTimestamp = $this->params['rootJobTimestamp']; } else { $touchTimestamp = wfTimestampNow(); } $dbw = wfGetDB(DB_MASTER); // Update page_touched (skipping pages already touched since the root job). // Check $wgUpdateRowsPerQuery for sanity; batch jobs are sized by that already. foreach (array_chunk($pageIds, $wgUpdateRowsPerQuery) as $batch) { $dbw->commit(__METHOD__, 'flush'); wfGetLBFactory()->waitForReplication(); $dbw->update('page', ['page_touched' => $dbw->timestamp($touchTimestamp)], ['page_id' => $batch, "page_touched < " . $dbw->addQuotes($dbw->timestamp($touchTimestamp))], __METHOD__); } // Get the list of affected pages (races only mean something else did the purge) $titleArray = TitleArray::newFromResult($dbw->select('page', ['page_namespace', 'page_title'], ['page_id' => $pageIds, 'page_touched' => $dbw->timestamp($touchTimestamp)], __METHOD__)); // Update CDN $u = CdnCacheUpdate::newFromTitles($titleArray); $u->doUpdate(); // Update file cache if ($wgUseFileCache) { foreach ($titleArray as $title) { HTMLFileCache::clearFileCache($title); } } }
/** * @param array $pages Map of (page ID => (namespace, DB key)) entries */ protected function invalidateTitles(array $pages) { global $wgUpdateRowsPerQuery, $wgUseFileCache; // Get all page IDs in this query into an array $pageIds = array_keys($pages); if (!$pageIds) { return; } // Bump page_touched to the current timestamp. This used to use the root job timestamp // (e.g. template/file edit time), which was a bit more efficient when template edits are // rare and don't effect the same pages much. However, this way allows for better // de-duplication, which is much more useful for wikis with high edit rates. Note that // RefreshLinksJob, which is enqueued alongside HTMLCacheUpdateJob, saves the parser output // since it has to parse anyway. We assume that vast majority of the cache jobs finish // before the link jobs, so using the current timestamp instead of the root timestamp is // not expected to invalidate these cache entries too often. $touchTimestamp = wfTimestampNow(); $dbw = wfGetDB(DB_MASTER); $factory = wfGetLBFactory(); $ticket = $factory->getEmptyTransactionTicket(__METHOD__); // Update page_touched (skipping pages already touched since the root job). // Check $wgUpdateRowsPerQuery for sanity; batch jobs are sized by that already. foreach (array_chunk($pageIds, $wgUpdateRowsPerQuery) as $batch) { $factory->commitAndWaitForReplication(__METHOD__, $ticket); $dbw->update('page', ['page_touched' => $dbw->timestamp($touchTimestamp)], ['page_id' => $batch, "page_touched < " . $dbw->addQuotes($dbw->timestamp($touchTimestamp))], __METHOD__); } // Get the list of affected pages (races only mean something else did the purge) $titleArray = TitleArray::newFromResult($dbw->select('page', ['page_namespace', 'page_title'], ['page_id' => $pageIds, 'page_touched' => $dbw->timestamp($touchTimestamp)], __METHOD__)); // Update CDN $u = CdnCacheUpdate::newFromTitles($titleArray); $u->doUpdate(); // Update file cache if ($wgUseFileCache) { foreach ($titleArray as $title) { HTMLFileCache::clearFileCache($title); } } }
public function execute() { global $wgHTCPRouting; if ($this->hasOption('htcp-dest')) { $parts = explode(':', $this->getOption('htcp-dest')); if (count($parts) < 2) { // Add default htcp port $parts[] = '4827'; } // Route all HTCP messages to provided host:port $wgHTCPRouting = ['' => ['host' => $parts[0], 'port' => $parts[1]]]; if ($this->hasOption('verbose')) { $this->output("HTCP broadcasts to {$parts[0]}:{$parts[1]}\n"); } } $dbr = $this->getDB(DB_REPLICA); $minTime = $dbr->timestamp($this->getOption('starttime')); $maxTime = $dbr->timestamp($this->getOption('endtime')); if ($maxTime < $minTime) { $this->error("\nERROR: starttime after endtime\n"); $this->maybeHelp(true); } $stuckCount = 0; // loop breaker while (true) { // Adjust bach size if we are stuck in a second that had many changes $bSize = $this->mBatchSize + $stuckCount * $this->mBatchSize; $res = $dbr->select(['page', 'revision'], ['rev_timestamp', 'page_namespace', 'page_title'], ["rev_timestamp > " . $dbr->addQuotes($minTime), "rev_timestamp <= " . $dbr->addQuotes($maxTime), "page_latest = rev_id"], __METHOD__, ['ORDER BY' => 'rev_timestamp', 'LIMIT' => $bSize], ['page' => ['INNER JOIN', 'rev_page=page_id']]); if (!$res->numRows()) { // nothing more found so we are done break; } // Kludge to not get stuck in loops for batches with the same timestamp list($rows, $lastTime) = $this->pageableSortedRows($res, 'rev_timestamp', $bSize); if (!count($rows)) { ++$stuckCount; continue; } // Reset suck counter $stuckCount = 0; $this->output("Processing changes from {$minTime} to {$lastTime}.\n"); // Advance past the last row next time $minTime = $lastTime; // Create list of URLs from page_namespace + page_title $urls = []; foreach ($rows as $row) { $title = Title::makeTitle($row->page_namespace, $row->page_title); $urls[] = $title->getInternalURL(); } if ($this->hasOption('dry-run') || $this->hasOption('verbose')) { $this->output(implode("\n", $urls) . "\n"); if ($this->hasOption('dry-run')) { continue; } } // Send batch of purge requests out to squids $squid = new CdnCacheUpdate($urls, count($urls)); $squid->doUpdate(); if ($this->hasOption('sleep-per-batch')) { // sleep-per-batch is milliseconds, usleep wants micro seconds. usleep(1000 * (int) $this->getOption('sleep-per-batch')); } } $this->output("Done!\n"); }