function moveToExternal($cluster, $maxID) { $fname = 'moveToExternal'; $dbw =& wfGetDB(DB_MASTER); print "Moving {$maxID} text rows to external storage\n"; $ext = new ExternalStoreDB(); for ($id = 1; $id <= $maxID; $id++) { if (!($id % REPORTING_INTERVAL)) { print "{$id}\n"; wfWaitForSlaves(5); } $row = $dbw->selectRow('text', array('old_flags', 'old_text'), array('old_id' => $id, "old_flags NOT LIKE '%external%'"), $fname); if (!$row) { # Non-existent or already done continue; } # Resolve stubs $flags = explode(',', $row->old_flags); if (in_array('object', $flags) && substr($row->old_text, 0, strlen(STUB_HEADER)) === STUB_HEADER) { resolveStub($id, $row->old_text, $row->old_flags); continue; } $url = $ext->store($cluster, $row->old_text); if (!$url) { print "Error writing to external storage\n"; exit; } if ($row->old_flags === '') { $flags = 'external'; } else { $flags = "{$row->old_flags},external"; } $dbw->update('text', array('old_flags' => $flags, 'old_text' => $url), array('old_id' => $id), $fname); } }
function moveToExternal($cluster, $maxID) { $fname = 'moveToExternal'; $dbw =& wfGetDB(DB_MASTER); print "Moving {$maxID} text rows to external storage\n"; $ext = new ExternalStoreDB(); for ($id = 1; $id <= $maxID; $id++) { if (!($id % REPORTING_INTERVAL)) { print "{$id}\n"; wfWaitForSlaves(5); } $row = $dbw->selectRow('text', array('old_flags', 'old_text'), array('old_id' => $id, "old_flags NOT LIKE '%external%'"), $fname); if (!$row) { # Non-existent or already done continue; } # Resolve stubs $text = $row->old_text; if ($row->old_flags === '') { $flags = 'external'; } else { $flags = "{$row->old_flags},external"; } if (strpos($flags, 'object') !== false) { $obj = unserialize($text); $className = strtolower(get_class($obj)); if ($className == 'historyblobstub') { resolveStub($id, $row->old_text, $row->old_flags); continue; } elseif ($className == 'historyblobcurstub') { $text = gzdeflate($obj->getText()); $flags = 'utf-8,gzip,external'; } elseif ($className == 'concatenatedgziphistoryblob') { // Do nothing } else { print "Warning: unrecognised object class \"{$className}\"\n"; continue; } } if (strlen($text) < 100) { // Don't move tiny revisions continue; } #print "Storing " . strlen( $text ) . " bytes to $url\n"; $url = $ext->store($cluster, $text); if (!$url) { print "Error writing to external storage\n"; exit; } $dbw->update('text', array('old_flags' => $flags, 'old_text' => $url), array('old_id' => $id), $fname); } }
/** * Compress the text in chunks after concatenating the revisions. * * @param int $startId * @param int $maxChunkSize * @param string $beginDate * @param string $endDate * @param string $extdb * @param bool|int $maxPageId * @return bool */ private function compressWithConcat($startId, $maxChunkSize, $beginDate, $endDate, $extdb = "", $maxPageId = false) { $loadStyle = self::LS_CHUNKED; $dbr = wfGetDB(DB_SLAVE); $dbw = wfGetDB(DB_MASTER); # Set up external storage if ($extdb != '') { $storeObj = new ExternalStoreDB(); } # Get all articles by page_id if (!$maxPageId) { $maxPageId = $dbr->selectField('page', 'max(page_id)', '', __METHOD__); } $this->output("Starting from {$startId} of {$maxPageId}\n"); $pageConds = array(); /* if ( $exclude_ns0 ) { print "Excluding main namespace\n"; $pageConds[] = 'page_namespace<>0'; } if ( $queryExtra ) { $pageConds[] = $queryExtra; } */ # For each article, get a list of revisions which fit the criteria # No recompression, use a condition on old_flags # Don't compress object type entities, because that might produce data loss when # overwriting bulk storage concat rows. Don't compress external references, because # the script doesn't yet delete rows from external storage. $conds = array('old_flags NOT ' . $dbr->buildLike($dbr->anyString(), 'object', $dbr->anyString()) . ' AND old_flags NOT ' . $dbr->buildLike($dbr->anyString(), 'external', $dbr->anyString())); if ($beginDate) { if (!preg_match('/^\\d{14}$/', $beginDate)) { $this->error("Invalid begin date \"{$beginDate}\"\n"); return false; } $conds[] = "rev_timestamp>'" . $beginDate . "'"; } if ($endDate) { if (!preg_match('/^\\d{14}$/', $endDate)) { $this->error("Invalid end date \"{$endDate}\"\n"); return false; } $conds[] = "rev_timestamp<'" . $endDate . "'"; } if ($loadStyle == self::LS_CHUNKED) { $tables = array('revision', 'text'); $fields = array('rev_id', 'rev_text_id', 'old_flags', 'old_text'); $conds[] = 'rev_text_id=old_id'; $revLoadOptions = 'FOR UPDATE'; } else { $tables = array('revision'); $fields = array('rev_id', 'rev_text_id'); $revLoadOptions = array(); } # Don't work with current revisions # Don't lock the page table for update either -- TS 2006-04-04 #$tables[] = 'page'; #$conds[] = 'page_id=rev_page AND rev_id != page_latest'; for ($pageId = $startId; $pageId <= $maxPageId; $pageId++) { wfWaitForSlaves(); # Wake up $dbr->ping(); # Get the page row $pageRes = $dbr->select('page', array('page_id', 'page_namespace', 'page_title', 'page_latest'), $pageConds + array('page_id' => $pageId), __METHOD__); if ($pageRes->numRows() == 0) { continue; } $pageRow = $dbr->fetchObject($pageRes); # Display progress $titleObj = Title::makeTitle($pageRow->page_namespace, $pageRow->page_title); $this->output("{$pageId}\t" . $titleObj->getPrefixedDBkey() . " "); # Load revisions $revRes = $dbw->select($tables, $fields, array_merge(array('rev_page' => $pageRow->page_id, 'rev_id < ' . $pageRow->page_latest), $conds), __METHOD__, $revLoadOptions); $revs = array(); foreach ($revRes as $revRow) { $revs[] = $revRow; } if (count($revs) < 2) { # No revisions matching, no further processing $this->output("\n"); continue; } # For each chunk $i = 0; while ($i < count($revs)) { if ($i < count($revs) - $maxChunkSize) { $thisChunkSize = $maxChunkSize; } else { $thisChunkSize = count($revs) - $i; } $chunk = new ConcatenatedGzipHistoryBlob(); $stubs = array(); $dbw->begin(__METHOD__); $usedChunk = false; $primaryOldid = $revs[$i]->rev_text_id; // @codingStandardsIgnoreStart Ignore avoid function calls in a FOR loop test part warning # Get the text of each revision and add it to the object for ($j = 0; $j < $thisChunkSize && $chunk->isHappy(); $j++) { // @codingStandardsIgnoreEnd $oldid = $revs[$i + $j]->rev_text_id; # Get text if ($loadStyle == self::LS_INDIVIDUAL) { $textRow = $dbw->selectRow('text', array('old_flags', 'old_text'), array('old_id' => $oldid), __METHOD__, 'FOR UPDATE'); $text = Revision::getRevisionText($textRow); } else { $text = Revision::getRevisionText($revs[$i + $j]); } if ($text === false) { $this->error("\nError, unable to get text in old_id {$oldid}"); #$dbw->delete( 'old', array( 'old_id' => $oldid ) ); } if ($extdb == "" && $j == 0) { $chunk->setText($text); $this->output('.'); } else { # Don't make a stub if it's going to be longer than the article # Stubs are typically about 100 bytes if (strlen($text) < 120) { $stub = false; $this->output('x'); } else { $stub = new HistoryBlobStub($chunk->addItem($text)); $stub->setLocation($primaryOldid); $stub->setReferrer($oldid); $this->output('.'); $usedChunk = true; } $stubs[$j] = $stub; } } $thisChunkSize = $j; # If we couldn't actually use any stubs because the pages were too small, do nothing if ($usedChunk) { if ($extdb != "") { # Move blob objects to External Storage $stored = $storeObj->store($extdb, serialize($chunk)); if ($stored === false) { $this->error("Unable to store object"); return false; } # Store External Storage URLs instead of Stub placeholders foreach ($stubs as $stub) { if ($stub === false) { continue; } # $stored should provide base path to a BLOB $url = $stored . "/" . $stub->getHash(); $dbw->update('text', array('old_text' => $url, 'old_flags' => 'external,utf-8'), array('old_id' => $stub->getReferrer())); } } else { # Store the main object locally $dbw->update('text', array('old_text' => serialize($chunk), 'old_flags' => 'object,utf-8'), array('old_id' => $primaryOldid)); # Store the stub objects for ($j = 1; $j < $thisChunkSize; $j++) { # Skip if not compressing and don't overwrite the first revision if ($stubs[$j] !== false && $revs[$i + $j]->rev_text_id != $primaryOldid) { $dbw->update('text', array('old_text' => serialize($stubs[$j]), 'old_flags' => 'object,utf-8'), array('old_id' => $revs[$i + $j]->rev_text_id)); } } } } # Done, next $this->output("/"); $dbw->commit(__METHOD__); $i += $thisChunkSize; wfWaitForSlaves(); } $this->output("\n"); } return true; }
function moveToExternal($cluster, $maxID, $minID = 1) { $fname = 'moveToExternal'; $dbw = wfGetDB(DB_MASTER); $dbr = wfGetDB(DB_SLAVE); $count = $maxID - $minID + 1; $blockSize = 1000; $numBlocks = ceil($count / $blockSize); print "Moving text rows from {$minID} to {$maxID} to external storage\n"; $ext = new ExternalStoreDB(); $numMoved = 0; $numStubs = 0; for ($block = 0; $block < $numBlocks; $block++) { $blockStart = $block * $blockSize + $minID; $blockEnd = $blockStart + $blockSize - 1; if (!($block % REPORTING_INTERVAL)) { print "oldid={$blockStart}, moved={$numMoved}\n"; wfWaitForSlaves(2); } $res = $dbr->select('text', array('old_id', 'old_flags', 'old_text'), array("old_id BETWEEN {$blockStart} AND {$blockEnd}", "old_flags NOT LIKE '%external%'"), $fname); while ($row = $dbr->fetchObject($res)) { # Resolve stubs $text = $row->old_text; $id = $row->old_id; if ($row->old_flags === '') { $flags = 'external'; } else { $flags = "{$row->old_flags},external"; } if (strpos($flags, 'object') !== false) { $obj = unserialize($text); $className = strtolower(get_class($obj)); if ($className == 'historyblobstub') { #resolveStub( $id, $row->old_text, $row->old_flags ); #$numStubs++; continue; } elseif ($className == 'historyblobcurstub') { $text = gzdeflate($obj->getText()); $flags = 'utf-8,gzip,external'; } elseif ($className == 'concatenatedgziphistoryblob') { // Do nothing } else { print "Warning: unrecognised object class \"{$className}\"\n"; continue; } } else { $className = false; } if (strlen($text) < 100 && $className === false) { // Don't move tiny revisions continue; } #print "Storing " . strlen( $text ) . " bytes to $url\n"; #print "old_id=$id\n"; $url = $ext->store($cluster, $text); if (!$url) { print "Error writing to external storage\n"; exit; } $dbw->update('text', array('old_flags' => $flags, 'old_text' => $url), array('old_id' => $id), $fname); $numMoved++; } $dbr->freeResult($res); } }