public function execute() { global $wgRevisionCacheExpiry, $wgMemc; wfProfileIn(__METHOD__); $cluster = $blobid = null; extract($this->extractRequestParams()); if (empty($blobid)) { $this->dieUsage('Invalid blobid', 1, 404); } if (empty($cluster)) { $this->dieUsage('Invalid cluster', 2, 404); } $url = sprintf("DB://%s/%d", $cluster, $blobid); $text = ExternalStore::fetchFromURL($url); if ($text === false) { $this->dieUsage('Text not found', 3, 404); } $result = $this->getResult(); $result->setRawMode(); $result->disableSizeCheck(); $result->reset(); $result->addValue(null, 'text', $text); $result->addValue(null, 'mime', 'text/plain'); $result->enableSizeCheck(); wfProfileOut(__METHOD__); }
function testExternalStoreDoesNotFetchIncorrectURL() { global $wgExternalStores; $wgExternalStores = true; # Assertions for r68900 $this->assertFalse(ExternalStore::fetchFromURL('http://')); $this->assertFalse(ExternalStore::fetchFromURL('ftp.wikimedia.org')); $this->assertFalse(ExternalStore::fetchFromURL('/super.txt')); }
/** * Store a data item to an external store, identified by a partial URL * The protocol part is used to identify the class, the rest is passed to the * class itself as a parameter. * Returns the URL of the stored data item, or false on error */ static function insert($url, $data) { list($proto, $params) = explode('://', $url, 2); $store =& ExternalStore::getStoreObject($proto); if ($store === false) { return false; } else { return $store->store($params, $data); } }
/** * @covers ExternalStore::fetchFromURL */ public function testExternalFetchFromURL() { $this->setMwGlobals('wgExternalStores', false); $this->assertFalse(ExternalStore::fetchFromURL('FOO://cluster1/200'), 'Deny if wgExternalStores is not set to a non-empty array'); $this->setMwGlobals('wgExternalStores', array('FOO')); $this->assertEquals(ExternalStore::fetchFromURL('FOO://cluster1/200'), 'Hello', 'Allow FOO://cluster1/200'); $this->assertEquals(ExternalStore::fetchFromURL('FOO://cluster1/300/0'), 'Hello', 'Allow FOO://cluster1/300/0'); # Assertions for r68900 $this->assertFalse(ExternalStore::fetchFromURL('ftp.example.org'), 'Deny domain ftp.example.org'); $this->assertFalse(ExternalStore::fetchFromURL('/example.txt'), 'Deny path /example.txt'); $this->assertFalse(ExternalStore::fetchFromURL('http://'), 'Deny protocol http://'); }
function moveToExternal() { $fname = __METHOD__; $dbw = wfGetDB(DB_MASTER); $dbr = wfGetDB(DB_SLAVE); $ext = new ExternalStoreDB(); $numMoved = 0; $numStubs = 0; $res = $dbr->query("SELECT * FROM revision r1 FORCE INDEX (PRIMARY), text t2\n\t\tWHERE old_id = rev_text_id\n\t\tAND old_flags LIKE '%external%'\n\t\tORDER BY rev_timestamp, rev_id", $fname); $ext = new ExternalStoreDB(); while ($row = $dbr->fetchObject($res)) { $url = $row->old_text; $id = $row->old_id; /** * do the trick with spliiting string and rejoining without external * flag */ $flags = explode(",", $row->old_flags); $ftmp = array(); foreach ($flags as $f) { $f = trim($f); if ($f === "external") { continue; } $ftmp[] = $f; } $flags = implode(",", $ftmp); if (strpos($flags, 'object') !== false) { $obj = unserialize($text); $className = strtolower(get_class($obj)); if ($className == 'historyblobstub') { continue; } elseif ($className == 'historyblobcurstub') { $text = gzdeflate($obj->getText()); $flags = 'utf-8,gzip,external'; } elseif ($className == 'concatenatedgziphistoryblob') { // Do nothing } else { print "Warning: unrecognised object class \"{$className}\"\n"; continue; } } else { $className = false; } $text = ExternalStore::fetchFromURL($url); echo "moved url {$url} back to {$id} with flags {$flags}\n"; $dbw->update('text', array('old_flags' => $flags, 'old_text' => $text), array('old_id' => $id), $fname); $numMoved++; } $dbr->freeResult($res); }
/** * Main entry point, tak job from queue and run it * * @access public */ public function execute() { global $wgUser, $wgTheSchwartzSecretToken, $wgLBFactoryConf; wfProfileIn(__METHOD__); ini_set("memory_limit", -1); ini_set("max_execution_time", 0); $params = $this->extractRequestParams(); $result = array(); # # check token first # if (!(isset($params["token"]) && $params["token"] == $wgTheSchwartzSecretToken)) { $this->dieUsageMsg(array("cantrunjobs")); } $blob = null; $hash = null; # # check for store and id parameters # if (isset($params["store"]) && isset($params["id"])) { $store = $params["store"]; $id = $params["id"]; # # check if store defined in loadbalancer file # if (isset($wgLBFactoryConf["externalLoads"][$store])) { wfDebug(__METHOD__ . ": getting {$id} from {$store}\n"); $url = sprintf("DB://%s/%d", $store, $id); $blob = ExternalStore::fetchFromURL($url); if ($blob === false) { wfProfileOut(__METHOD__); $this->dieUsage('Text not found', 3, 404); } $hash = md5($blob); $blob = unpack("H*", $blob)[1]; } else { wfDebug(__METHOD__ . ": store {$store} is not defined in wgLBFactoryConf\n"); wfProfileOut(__METHOD__); $this->dieUsage('Text not found', 3, 404); } } $result["blob"] = $blob; $result["hash"] = $hash; $this->getResult()->addValue(null, $this->getModuleName(), $result); wfProfileOut(__METHOD__); }
public function execute() { $dbr = wfGetDB(DB_SLAVE); $row = $dbr->selectRow(array('text', 'revision'), array('old_flags', 'old_text'), array('old_id=rev_text_id', 'rev_id' => $this->getArg())); if (!$row) { $this->error("Row not found", true); } $flags = explode(',', $row->old_flags); $text = $row->old_text; if (in_array('external', $flags)) { $this->output("External {$text}\n"); if (preg_match('!^DB://(\\w+)/(\\w+)/(\\w+)$!', $text, $m)) { $es = ExternalStore::getStoreObject('DB'); $blob = $es->fetchBlob($m[1], $m[2], $m[3]); if (strtolower(get_class($blob)) == 'concatenatedgziphistoryblob') { $this->output("Found external CGZ\n"); $blob->uncompress(); $this->output("Items: (" . implode(', ', array_keys($blob->mItems)) . ")\n"); $text = $blob->getItem($m[3]); } else { $this->output("CGZ expected at {$text}, got " . gettype($blob) . "\n"); $text = $blob; } } else { $this->output("External plain {$text}\n"); $text = ExternalStore::fetchFromURL($text); } } if (in_array('gzip', $flags)) { $text = gzinflate($text); } if (in_array('object', $flags)) { $obj = unserialize($text); $text = $obj->getText(); } if (is_object($text)) { $this->error("Unexpectedly got object of type: " . get_class($text)); } else { $this->output("Text length: " . strlen($text) . "\n"); $this->output(substr($text, 0, 100) . "\n"); } }
/** * @return string */ function getText() { $fname = 'HistoryBlobStub::getText'; if (isset(self::$blobCache[$this->mOldId])) { $obj = self::$blobCache[$this->mOldId]; } else { $dbr = wfGetDB(DB_SLAVE); $row = $dbr->selectRow('text', array('old_flags', 'old_text'), array('old_id' => $this->mOldId)); if (!$row) { return false; } $flags = explode(',', $row->old_flags); if (in_array('external', $flags)) { $url = $row->old_text; $parts = explode('://', $url, 2); if (!isset($parts[1]) || $parts[1] == '') { wfProfileOut($fname); return false; } $row->old_text = ExternalStore::fetchFromUrl($url); } if (!in_array('object', $flags)) { return false; } if (in_array('gzip', $flags)) { // This shouldn't happen, but a bug in the compress script // may at times gzip-compress a HistoryBlob object row. $obj = unserialize(gzinflate($row->old_text)); } else { $obj = unserialize($row->old_text); } if (!is_object($obj)) { // Correct for old double-serialization bug. $obj = unserialize($obj); } if (!is_object($obj)) { return false; } // Save this item for reference; if pulling many // items in a row we'll likely use it again. $obj->uncompress(); self::$blobCache = array($this->mOldId => $obj); } return $obj->getItem($this->mHash); }
/** * Insert a new revision into the database, returning the new revision ID * number on success and dies horribly on failure. * * @param $dbw DatabaseBase: (master connection) * @throws MWException * @return Integer */ public function insertOn($dbw) { global $wgDefaultExternalStore, $wgContentHandlerUseDB; wfProfileIn(__METHOD__); $this->checkContentModel(); $data = $this->mText; $flags = self::compressRevisionText($data); # Write to external storage if required if ($wgDefaultExternalStore) { // Store and get the URL $data = ExternalStore::insertToDefault($data); if (!$data) { wfProfileOut(__METHOD__); throw new MWException("Unable to store text to external storage"); } if ($flags) { $flags .= ','; } $flags .= 'external'; } # Record the text (or external storage URL) to the text table if (!isset($this->mTextId)) { $old_id = $dbw->nextSequenceValue('text_old_id_seq'); $dbw->insert('text', array('old_id' => $old_id, 'old_text' => $data, 'old_flags' => $flags), __METHOD__); $this->mTextId = $dbw->insertId(); } if ($this->mComment === null) { $this->mComment = ""; } # Record the edit in revisions $rev_id = isset($this->mId) ? $this->mId : $dbw->nextSequenceValue('revision_rev_id_seq'); $row = array('rev_id' => $rev_id, 'rev_page' => $this->mPage, 'rev_text_id' => $this->mTextId, 'rev_comment' => $this->mComment, 'rev_minor_edit' => $this->mMinorEdit ? 1 : 0, 'rev_user' => $this->mUser, 'rev_user_text' => $this->mUserText, 'rev_timestamp' => $dbw->timestamp($this->mTimestamp), 'rev_deleted' => $this->mDeleted, 'rev_len' => $this->mSize, 'rev_parent_id' => is_null($this->mParentId) ? $this->getPreviousRevisionId($dbw) : $this->mParentId, 'rev_sha1' => is_null($this->mSha1) ? Revision::base36Sha1($this->mText) : $this->mSha1); if ($wgContentHandlerUseDB) { //NOTE: Store null for the default model and format, to save space. //XXX: Makes the DB sensitive to changed defaults. Make this behavior optional? Only in miser mode? $model = $this->getContentModel(); $format = $this->getContentFormat(); $title = $this->getTitle(); if ($title === null) { wfProfileOut(__METHOD__); throw new MWException("Insufficient information to determine the title of the revision's page!"); } $defaultModel = ContentHandler::getDefaultModelFor($title); $defaultFormat = ContentHandler::getForModelID($defaultModel)->getDefaultFormat(); $row['rev_content_model'] = $model === $defaultModel ? null : $model; $row['rev_content_format'] = $format === $defaultFormat ? null : $format; } $dbw->insert('revision', $row, __METHOD__); $this->mId = !is_null($rev_id) ? $rev_id : $dbw->insertId(); wfRunHooks('RevisionInsertComplete', array(&$this, $data, $flags)); wfProfileOut(__METHOD__); return $this->mId; }
/** * Insert a new revision into the database, returning the new revision ID * number on success and dies horribly on failure. * * @param Database $dbw * @return int */ public function insertOn($dbw) { global $wgDefaultExternalStore; wfProfileIn(__METHOD__); $data = $this->mText; $flags = Revision::compressRevisionText($data); # Write to external storage if required if ($wgDefaultExternalStore) { // Store and get the URL $data = ExternalStore::insertToDefault($data); if (!$data) { throw new MWException("Unable to store text to external storage"); } if ($flags) { $flags .= ','; } $flags .= 'external'; } # Record the text (or external storage URL) to the text table if (!isset($this->mTextId)) { $old_id = $dbw->nextSequenceValue('text_old_id_val'); $dbw->insert('text', array('old_id' => $old_id, 'old_text' => $data, 'old_flags' => $flags), __METHOD__); $this->mTextId = $dbw->insertId(); } # Record the edit in revisions $rev_id = isset($this->mId) ? $this->mId : $dbw->nextSequenceValue('rev_rev_id_val'); $dbw->insert('revision', array('rev_id' => $rev_id, 'rev_page' => $this->mPage, 'rev_text_id' => $this->mTextId, 'rev_comment' => $this->mComment, 'rev_minor_edit' => $this->mMinorEdit ? 1 : 0, 'rev_user' => $this->mUser, 'rev_user_text' => $this->mUserText, 'rev_timestamp' => $dbw->timestamp($this->mTimestamp), 'rev_deleted' => $this->mDeleted, 'rev_len' => $this->mSize, 'rev_parent_id' => is_null($this->mParentId) ? $this->getPreviousRevisionId($dbw) : $this->mParentId), __METHOD__); $this->mId = !is_null($rev_id) ? $rev_id : $dbw->insertId(); wfRunHooks('RevisionInsertComplete', array(&$this, $data, $flags)); wfProfileOut(__METHOD__); return $this->mId; }
/** * Insert a new revision into the database, returning the new revision ID * number on success and dies horribly on failure. * * @param IDatabase $dbw (master connection) * @throws MWException * @return int */ public function insertOn($dbw) { global $wgDefaultExternalStore, $wgContentHandlerUseDB; // Not allowed to have rev_page equal to 0, false, etc. if (!$this->mPage) { $title = $this->getTitle(); if ($title instanceof Title) { $titleText = ' for page ' . $title->getPrefixedText(); } else { $titleText = ''; } throw new MWException("Cannot insert revision{$titleText}: page ID must be nonzero"); } $this->checkContentModel(); $data = $this->mText; $flags = self::compressRevisionText($data); # Write to external storage if required if ($wgDefaultExternalStore) { // Store and get the URL $data = ExternalStore::insertToDefault($data); if (!$data) { throw new MWException("Unable to store text to external storage"); } if ($flags) { $flags .= ','; } $flags .= 'external'; } # Record the text (or external storage URL) to the text table if ($this->mTextId === null) { $old_id = $dbw->nextSequenceValue('text_old_id_seq'); $dbw->insert('text', array('old_id' => $old_id, 'old_text' => $data, 'old_flags' => $flags), __METHOD__); $this->mTextId = $dbw->insertId(); } if ($this->mComment === null) { $this->mComment = ""; } # Record the edit in revisions $rev_id = $this->mId !== null ? $this->mId : $dbw->nextSequenceValue('revision_rev_id_seq'); $row = array('rev_id' => $rev_id, 'rev_page' => $this->mPage, 'rev_text_id' => $this->mTextId, 'rev_comment' => $this->mComment, 'rev_minor_edit' => $this->mMinorEdit ? 1 : 0, 'rev_user' => $this->mUser, 'rev_user_text' => $this->mUserText, 'rev_timestamp' => $dbw->timestamp($this->mTimestamp), 'rev_deleted' => $this->mDeleted, 'rev_len' => $this->mSize, 'rev_parent_id' => $this->mParentId === null ? $this->getPreviousRevisionId($dbw) : $this->mParentId, 'rev_sha1' => $this->mSha1 === null ? Revision::base36Sha1($this->mText) : $this->mSha1); if ($wgContentHandlerUseDB) { // NOTE: Store null for the default model and format, to save space. // XXX: Makes the DB sensitive to changed defaults. // Make this behavior optional? Only in miser mode? $model = $this->getContentModel(); $format = $this->getContentFormat(); $title = $this->getTitle(); if ($title === null) { throw new MWException("Insufficient information to determine the title of the " . "revision's page!"); } $defaultModel = ContentHandler::getDefaultModelFor($title); $defaultFormat = ContentHandler::getForModelID($defaultModel)->getDefaultFormat(); $row['rev_content_model'] = $model === $defaultModel ? null : $model; $row['rev_content_format'] = $format === $defaultFormat ? null : $format; } $dbw->insert('revision', $row, __METHOD__); $this->mId = $rev_id !== null ? $rev_id : $dbw->insertId(); // Assertion to try to catch T92046 if ((int) $this->mId === 0) { throw new UnexpectedValueException('After insert, Revision mId is ' . var_export($this->mId, 1) . ': ' . var_export($row, 1)); } Hooks::run('RevisionInsertComplete', array(&$this, $data, $flags)); return $this->mId; }
/** * Insert a new revision into the database, returning the new revision ID * number on success and dies horribly on failure. * * @param Database $dbw * @return int */ function insertOn(&$dbw) { global $wgDefaultExternalStore; $fname = 'Revision::insertOn'; wfProfileIn($fname); $data = $this->mText; $flags = Revision::compressRevisionText($data); # Write to external storage if required if ($wgDefaultExternalStore) { if (is_array($wgDefaultExternalStore)) { // Distribute storage across multiple clusters $store = $wgDefaultExternalStore[mt_rand(0, count($wgDefaultExternalStore) - 1)]; } else { $store = $wgDefaultExternalStore; } require_once 'ExternalStore.php'; // Store and get the URL $data = ExternalStore::insert($store, $data); if (!$data) { # This should only happen in the case of a configuration error, where the external store is not valid throw new MWException("Unable to store text to external storage {$store}"); } if ($flags) { $flags .= ','; } $flags .= 'external'; } # Record the text (or external storage URL) to the text table if (!isset($this->mTextId)) { $old_id = $dbw->nextSequenceValue('text_old_id_val'); $dbw->insert('text', array('old_id' => $old_id, 'old_text' => $data, 'old_flags' => $flags), $fname); $this->mTextId = $dbw->insertId(); } # Record the edit in revisions $rev_id = isset($this->mId) ? $this->mId : $dbw->nextSequenceValue('rev_rev_id_val'); $dbw->insert('revision', array('rev_id' => $rev_id, 'rev_page' => $this->mPage, 'rev_text_id' => $this->mTextId, 'rev_comment' => $this->mComment, 'rev_minor_edit' => $this->mMinorEdit ? 1 : 0, 'rev_user' => $this->mUser, 'rev_user_text' => $this->mUserText, 'rev_timestamp' => $dbw->timestamp($this->mTimestamp), 'rev_deleted' => $this->mDeleted), $fname); $this->mId = !is_null($rev_id) ? $rev_id : $dbw->insertId(); wfProfileOut($fname); return $this->mId; }
/** * Get revision text associated with an old or archive row * $row is usually an object from wfFetchRow(), both the flags and the text * field must be included * @static * @param integer $row Id of a row * @param string $prefix table prefix (default 'old_') * @return string $text|false the text requested */ function getRevisionText($row, $prefix = 'old_') { $fname = 'Revision::getRevisionText'; wfProfileIn($fname); # Get data $textField = $prefix . 'text'; $flagsField = $prefix . 'flags'; if (isset($row->{$flagsField})) { $flags = explode(',', $row->{$flagsField}); } else { $flags = array(); } if (isset($row->{$textField})) { $text = $row->{$textField}; } else { wfProfileOut($fname); return false; } # Use external methods for external objects, text in table is URL-only then if (in_array('external', $flags)) { $url = $text; @(list($proto, $path) = explode('://', $url, 2)); if ($path == "") { wfProfileOut($fname); return false; } require_once 'ExternalStore.php'; $text = ExternalStore::fetchFromURL($url); } if (in_array('gzip', $flags)) { # Deal with optional compression of archived pages. # This can be done periodically via maintenance/compressOld.php, and # as pages are saved if $wgCompressRevisions is set. $text = gzinflate($text); } if (in_array('object', $flags)) { # Generic compressed storage $obj = unserialize($text); # Bugger, corrupted my test database by double-serializing if (!is_object($obj)) { $obj = unserialize($obj); } $text = $obj->getText(); } global $wgLegacyEncoding; if ($wgLegacyEncoding && !in_array('utf-8', $flags)) { # Old revisions kept around in a legacy encoding? # Upconvert on demand. global $wgInputEncoding, $wgContLang; $text = $wgContLang->iconv($wgLegacyEncoding, $wgInputEncoding, $text); } wfProfileOut($fname); return $text; }
/** * Gets master database connections for all of the ExternalStoreDB * stores configured in $wgDefaultExternalStore. * * @return Database[] Array of Database master connections */ protected static function getExternalStoreDatabaseConnections() { global $wgDefaultExternalStore; /** @var ExternalStoreDB $externalStoreDB */ $externalStoreDB = ExternalStore::getStoreObject('DB'); $defaultArray = (array) $wgDefaultExternalStore; $dbws = []; foreach ($defaultArray as $url) { if (strpos($url, 'DB://') === 0) { list($proto, $cluster) = explode('://', $url, 2); // Avoid getMaster() because setupDatabaseWithTestPrefix() // requires Database instead of plain DBConnRef/IDatabase $lb = $externalStoreDB->getLoadBalancer($cluster); $dbw = $lb->getConnection(DB_MASTER); $dbws[] = $dbw; } } return $dbws; }
<?php /** * Script tests fetching revision texts from external clusters * * @package MediaWiki * @addtopackage maintenance * * @author Władysław Bodzek */ ini_set("include_path", dirname(__FILE__) . "/../../../maintenance/"); $optionsWithArgs = array(); require_once "commandLine.inc"; $urls = array("DB://archive1/191", "DB://archive1/211", "DB://archive1/1910000000000"); foreach ($urls as $url) { $data = ExternalStore::fetchFromUrl($url); $text = gzinflate($data); var_dump($url, $data, $text); }
foreach ($esIdsForCluster[$cluster] as $id) { if ($id === $lastId || $id === $lastId + 1) { $lastId = $id; continue; } $range = range($lastId + 1, $id - 1); $lastId = $id; echo "Checking " . count($range) . " es urls\n"; if (count($range) > 100) { echo "More than 100 potential es urls, skipping\n"; $invalid = true; continue; } foreach ($range as $possible) { $url = "DB://{$cluster}/{$possible}"; $content = gzinflate(ExternalStore::fetchFromURL($url)); if (false !== @unserialize($content)) { // if it unserializes, its not our content continue; } $json = @json_decode($content, true); if ($json && count($json) === 1 && isset($json['flow-workflow'])) { // while technically possible to be a topic title, i'm almost // certain this is a core revisions inserted by flow in the form // of: {"flow-workflow":"sbk26yv6cpcxxm87"} continue; } if (!in_array($changeType, $plaintextChangeTypes)) { if (false === strpos($content, 'data-parsoid')) { continue; }
/** * @brief Gets old article's text * * @desc Returns article's content from text table if fail it'll return empty string * * @param integer $textId article's text id in text table * * @return string * @author Andrzej 'nAndy' Åukaszewski */ public function getDeletedArticleTitleTxt($textId) { $dbr = wfGetDB(DB_SLAVE); $row = $dbr->selectRow('text', array('old_text', 'old_flags'), array('old_id' => $textId), __METHOD__); if (!empty($row->old_text) && !empty($row->old_flags)) { $flags = explode(',', $row->old_flags); if (in_array('gzip', $flags)) { return gzinflate(ExternalStore::fetchFromUrl($row->old_text)); } } return ''; }
/** * Retrieve a var dump from External Storage or the text table * Some of this code is stolen from Revision::loadText et al * * @param $stored_dump * * @return object|AbuseFilterVariableHolder|bool */ public static function loadVarDump($stored_dump) { wfProfileIn(__METHOD__); // Back-compat if (strpos($stored_dump, 'stored-text:') === false) { wfProfileOut(__METHOD__); return unserialize($stored_dump); } $text_id = substr($stored_dump, strlen('stored-text:')); $dbr = wfGetDB(DB_SLAVE); $text_row = $dbr->selectRow('text', array('old_text', 'old_flags'), array('old_id' => $text_id), __METHOD__); if (!$text_row) { wfProfileOut(__METHOD__); return new AbuseFilterVariableHolder(); } $flags = explode(',', $text_row->old_flags); $text = $text_row->old_text; if (in_array('external', $flags)) { $text = ExternalStore::fetchFromURL($text); } if (in_array('gzip', $flags)) { $text = gzinflate($text); } $obj = unserialize($text); if (in_array('nativeDataArray', $flags)) { $vars = $obj; $obj = new AbuseFilterVariableHolder(); foreach ($vars as $key => $value) { $obj->setVar($key, $value); } } wfProfileOut(__METHOD__); return $obj; }
$uuid = Flow\Model\UUID::create($rev->rev_id); echo "\n********************\n\nProcessing revision " . $uuid->getAlphadecimal() . "\n"; ++$totalNullContentWithParent; $res = iterator_to_array($dbr->select('flow_revision', array('rev_content', 'rev_flags'), array('rev_id' => new \Flow\Model\UUIDBlob($rev->rev_parent_id)), __FILE__)); // not likely ... but lets be careful if (!$res) { echo "No parent found?\n"; $totalBadQueryResult++; continue; } elseif (count($res) > 1) { echo "Multiple parents found?\n"; $totalBadQueryResult++; continue; } $parent = reset($res); $parentItem = ExternalStore::fetchFromURL($parent->rev_content); if ($parentItem) { echo "MATCHED\n"; fputcsv($csvOutput, array($uuid->getAlphadecimal(), $parent->rev_content, $parent->rev_flags)); ++$totalMatched; } else { echo "Parent item is null\n"; ++$totalNullParentContent; } } } echo "Considered {$totalNullContentWithParent} revisions with parents and no content\n"; if ($totalNullContentWithParent > 0) { echo "Could not fix {$totalNullParentContent} (" . number_format(100 * $totalNullParentContent / $totalNullContentWithParent) . "%) due to parent not having content\n"; echo "Could not fix {$totalBadQueryResult} (" . number_format(100 * $totalBadQueryResult / $totalNullContentWithParent) . "%) due to not finding the parent revision\n"; echo "Found matches for {$totalMatched} (" . number_format(100 * $totalMatched / $totalNullContentWithParent) . "%)\n";
if (in_array('external', $flags)) { print "External {$text}\n"; if (preg_match('!^DB://(\\w+)/(\\w+)/(\\w+)$!', $text, $m)) { $es = ExternalStore::getStoreObject('DB'); $blob = $es->fetchBlob($m[1], $m[2], $m[3]); if (strtolower(get_class($blob)) == 'concatenatedgziphistoryblob') { print "Found external CGZ\n"; $blob->uncompress(); print "Items: (" . implode(', ', array_keys($blob->mItems)) . ")\n"; $text = $blob->getItem($m[3]); } else { print "CGZ expected at {$text}, got " . gettype($blob) . "\n"; $text = $blob; } } else { print "External plain {$text}\n"; $text = ExternalStore::fetchFromURL($text); } } if (in_array('gzip', $flags)) { $text = gzinflate($text); } if (in_array('object', $flags)) { $text = unserialize($text); } if (is_object($text)) { print "Unexpectedly got object of type: " . get_class($text) . "\n"; } else { print "Text length: " . strlen($text) . "\n"; print substr($text, 0, 100) . "\n"; }
/** * This is based on part of HistoryBlobStub::getText(). * Determine if the text can be retrieved from the row in the normal way. * @param $stub * @param $secondaryRow * @return bool */ function isUnbrokenStub($stub, $secondaryRow) { $flags = explode(',', $secondaryRow->old_flags); $text = $secondaryRow->old_text; if (in_array('external', $flags)) { $url = $text; @(list(, $path) = explode('://', $url, 2)); if ($path == "") { return false; } $text = ExternalStore::fetchFromUrl($url); } if (!in_array('object', $flags)) { return false; } if (in_array('gzip', $flags)) { $obj = unserialize(gzinflate($text)); } else { $obj = unserialize($text); } if (!is_object($obj)) { // Correct for old double-serialization bug. $obj = unserialize($obj); } if (!is_object($obj)) { return false; } $obj->uncompress(); $text = $obj->getItem($stub['hash']); return $text !== false; }
/** * Gets master database connections for all of the ExternalStoreDB * stores configured in $wgDefaultExternalStore. * * @return array Array of DatabaseBase master connections */ protected static function getExternalStoreDatabaseConnections() { global $wgDefaultExternalStore; $externalStoreDB = ExternalStore::getStoreObject('DB'); $defaultArray = (array) $wgDefaultExternalStore; $dbws = []; foreach ($defaultArray as $url) { if (strpos($url, 'DB://') === 0) { list($proto, $cluster) = explode('://', $url, 2); $dbw = $externalStoreDB->getMaster($cluster); $dbws[] = $dbw; } } return $dbws; }