Beispiel #1
0
 /**
  * Run a bunch of URLs through SquidUpdate::purge()
  * to benchmark Squid response times.
  * @param array $urls A bunch of URLs to purge
  * @param int $trials How many times to run the test?
  * @return string
  */
 private function benchSquid($urls, $trials = 1)
 {
     $start = microtime(true);
     for ($i = 0; $i < $trials; $i++) {
         CdnCacheUpdate::purge($urls);
     }
     $delta = microtime(true) - $start;
     $pertrial = $delta / $trials;
     $pertitle = $pertrial / count($urls);
     return sprintf("%4d titles in %6.2fms (%6.2fms each)", count($urls), $pertrial * 1000.0, $pertitle * 1000.0);
 }
Beispiel #2
0
 /**
  * Helper to purge an array of $urls
  * @param array $urls List of URLS to purge from squids
  */
 private function sendPurgeRequest($urls)
 {
     if ($this->hasOption('delay')) {
         $delay = floatval($this->getOption('delay'));
         foreach ($urls as $url) {
             if ($this->hasOption('verbose')) {
                 $this->output($url . "\n");
             }
             $u = new CdnCacheUpdate(array($url));
             $u->doUpdate();
             usleep($delay * 1000000.0);
         }
     } else {
         if ($this->hasOption('verbose')) {
             $this->output(implode("\n", $urls) . "\n");
         }
         $u = new CdnCacheUpdate($urls);
         $u->doUpdate();
     }
 }
 /**
  * Queue a purge operation
  *
  * @param string $url
  */
 public function queuePurge($url)
 {
     global $wgSquidPurgeUseHostHeader;
     $url = CdnCacheUpdate::expand(str_replace("\n", '', $url));
     $request = array();
     if ($wgSquidPurgeUseHostHeader) {
         $url = wfParseUrl($url);
         $host = $url['host'];
         if (isset($url['port']) && strlen($url['port']) > 0) {
             $host .= ":" . $url['port'];
         }
         $path = $url['path'];
         if (isset($url['query']) && is_string($url['query'])) {
             $path = wfAppendQuery($path, $url['query']);
         }
         $request[] = "PURGE {$path} HTTP/1.1";
         $request[] = "Host: {$host}";
     } else {
         $request[] = "PURGE {$url} HTTP/1.0";
     }
     $request[] = "Connection: Keep-Alive";
     $request[] = "Proxy-Connection: Keep-Alive";
     $request[] = "User-Agent: " . Http::userAgent() . ' ' . __CLASS__;
     // Two ''s to create \r\n\r\n
     $request[] = '';
     $request[] = '';
     $this->requests[] = implode("\r\n", $request);
     if ($this->currentRequestIndex === null) {
         $this->nextRequest();
     }
 }
Beispiel #4
0
 public function run()
 {
     // Use purge() directly to avoid infinite recursion
     CdnCacheUpdate::purge($this->params['urls']);
     return true;
 }
 /**
  * @param array $pages Map of (page ID => (namespace, DB key)) entries
  */
 protected function invalidateTitles(array $pages)
 {
     global $wgUpdateRowsPerQuery, $wgUseFileCache;
     // Get all page IDs in this query into an array
     $pageIds = array_keys($pages);
     if (!$pageIds) {
         return;
     }
     // The page_touched field will need to be bumped for these pages.
     // Only bump it to the present time if no "rootJobTimestamp" was known.
     // If it is known, it can be used instead, which avoids invalidating output
     // that was in fact generated *after* the relevant dependency change time
     // (e.g. template edit). This is particularily useful since refreshLinks jobs
     // save back parser output and usually run along side htmlCacheUpdate jobs;
     // their saved output would be invalidated by using the current timestamp.
     if (isset($this->params['rootJobTimestamp'])) {
         $touchTimestamp = $this->params['rootJobTimestamp'];
     } else {
         $touchTimestamp = wfTimestampNow();
     }
     $dbw = wfGetDB(DB_MASTER);
     // Update page_touched (skipping pages already touched since the root job).
     // Check $wgUpdateRowsPerQuery for sanity; batch jobs are sized by that already.
     foreach (array_chunk($pageIds, $wgUpdateRowsPerQuery) as $batch) {
         $dbw->commit(__METHOD__, 'flush');
         wfGetLBFactory()->waitForReplication();
         $dbw->update('page', ['page_touched' => $dbw->timestamp($touchTimestamp)], ['page_id' => $batch, "page_touched < " . $dbw->addQuotes($dbw->timestamp($touchTimestamp))], __METHOD__);
     }
     // Get the list of affected pages (races only mean something else did the purge)
     $titleArray = TitleArray::newFromResult($dbw->select('page', ['page_namespace', 'page_title'], ['page_id' => $pageIds, 'page_touched' => $dbw->timestamp($touchTimestamp)], __METHOD__));
     // Update CDN
     $u = CdnCacheUpdate::newFromTitles($titleArray);
     $u->doUpdate();
     // Update file cache
     if ($wgUseFileCache) {
         foreach ($titleArray as $title) {
             HTMLFileCache::clearFileCache($title);
         }
     }
 }
 /**
  * @param array $pages Map of (page ID => (namespace, DB key)) entries
  */
 protected function invalidateTitles(array $pages)
 {
     global $wgUpdateRowsPerQuery, $wgUseFileCache;
     // Get all page IDs in this query into an array
     $pageIds = array_keys($pages);
     if (!$pageIds) {
         return;
     }
     // Bump page_touched to the current timestamp. This used to use the root job timestamp
     // (e.g. template/file edit time), which was a bit more efficient when template edits are
     // rare and don't effect the same pages much. However, this way allows for better
     // de-duplication, which is much more useful for wikis with high edit rates. Note that
     // RefreshLinksJob, which is enqueued alongside HTMLCacheUpdateJob, saves the parser output
     // since it has to parse anyway. We assume that vast majority of the cache jobs finish
     // before the link jobs, so using the current timestamp instead of the root timestamp is
     // not expected to invalidate these cache entries too often.
     $touchTimestamp = wfTimestampNow();
     $dbw = wfGetDB(DB_MASTER);
     $factory = wfGetLBFactory();
     $ticket = $factory->getEmptyTransactionTicket(__METHOD__);
     // Update page_touched (skipping pages already touched since the root job).
     // Check $wgUpdateRowsPerQuery for sanity; batch jobs are sized by that already.
     foreach (array_chunk($pageIds, $wgUpdateRowsPerQuery) as $batch) {
         $factory->commitAndWaitForReplication(__METHOD__, $ticket);
         $dbw->update('page', ['page_touched' => $dbw->timestamp($touchTimestamp)], ['page_id' => $batch, "page_touched < " . $dbw->addQuotes($dbw->timestamp($touchTimestamp))], __METHOD__);
     }
     // Get the list of affected pages (races only mean something else did the purge)
     $titleArray = TitleArray::newFromResult($dbw->select('page', ['page_namespace', 'page_title'], ['page_id' => $pageIds, 'page_touched' => $dbw->timestamp($touchTimestamp)], __METHOD__));
     // Update CDN
     $u = CdnCacheUpdate::newFromTitles($titleArray);
     $u->doUpdate();
     // Update file cache
     if ($wgUseFileCache) {
         foreach ($titleArray as $title) {
             HTMLFileCache::clearFileCache($title);
         }
     }
 }
 public function execute()
 {
     global $wgHTCPRouting;
     if ($this->hasOption('htcp-dest')) {
         $parts = explode(':', $this->getOption('htcp-dest'));
         if (count($parts) < 2) {
             // Add default htcp port
             $parts[] = '4827';
         }
         // Route all HTCP messages to provided host:port
         $wgHTCPRouting = ['' => ['host' => $parts[0], 'port' => $parts[1]]];
         if ($this->hasOption('verbose')) {
             $this->output("HTCP broadcasts to {$parts[0]}:{$parts[1]}\n");
         }
     }
     $dbr = $this->getDB(DB_REPLICA);
     $minTime = $dbr->timestamp($this->getOption('starttime'));
     $maxTime = $dbr->timestamp($this->getOption('endtime'));
     if ($maxTime < $minTime) {
         $this->error("\nERROR: starttime after endtime\n");
         $this->maybeHelp(true);
     }
     $stuckCount = 0;
     // loop breaker
     while (true) {
         // Adjust bach size if we are stuck in a second that had many changes
         $bSize = $this->mBatchSize + $stuckCount * $this->mBatchSize;
         $res = $dbr->select(['page', 'revision'], ['rev_timestamp', 'page_namespace', 'page_title'], ["rev_timestamp > " . $dbr->addQuotes($minTime), "rev_timestamp <= " . $dbr->addQuotes($maxTime), "page_latest = rev_id"], __METHOD__, ['ORDER BY' => 'rev_timestamp', 'LIMIT' => $bSize], ['page' => ['INNER JOIN', 'rev_page=page_id']]);
         if (!$res->numRows()) {
             // nothing more found so we are done
             break;
         }
         // Kludge to not get stuck in loops for batches with the same timestamp
         list($rows, $lastTime) = $this->pageableSortedRows($res, 'rev_timestamp', $bSize);
         if (!count($rows)) {
             ++$stuckCount;
             continue;
         }
         // Reset suck counter
         $stuckCount = 0;
         $this->output("Processing changes from {$minTime} to {$lastTime}.\n");
         // Advance past the last row next time
         $minTime = $lastTime;
         // Create list of URLs from page_namespace + page_title
         $urls = [];
         foreach ($rows as $row) {
             $title = Title::makeTitle($row->page_namespace, $row->page_title);
             $urls[] = $title->getInternalURL();
         }
         if ($this->hasOption('dry-run') || $this->hasOption('verbose')) {
             $this->output(implode("\n", $urls) . "\n");
             if ($this->hasOption('dry-run')) {
                 continue;
             }
         }
         // Send batch of purge requests out to squids
         $squid = new CdnCacheUpdate($urls, count($urls));
         $squid->doUpdate();
         if ($this->hasOption('sleep-per-batch')) {
             // sleep-per-batch is milliseconds, usleep wants micro seconds.
             usleep(1000 * (int) $this->getOption('sleep-per-batch'));
         }
     }
     $this->output("Done!\n");
 }