public function execute() { global $wgExternalSharedDB; $this->output("md5 calculation started ... \n"); $this->time = time(); $migrated = 0; $limit = $this->getOption('limit', -1); $debug = $this->hasOption('debug'); $wikis = $this->getOption('wiki', ''); $this->db = $this->getDB(DB_SLAVE, array(), $wgExternalSharedDB); $where = array(); if (!empty($wikis)) { $where['city_id'] = explode(',', $wikis); } $order = $this->hasOption('reverse') ? " DESC" : ""; $res = $this->db->select(array('city_list'), array('city_id', 'city_dbname'), $where, __CLASS__, array_merge(array('ORDER BY' => "city_last_timestamp{$order}, city_id{$order}"), $limit > 0 ? array('LIMIT' => $limit) : array())); $this->output("Building list of wiki IDs...\n"); $queue = array(); while ($row = $res->fetchObject()) { $id = intval($row->city_id); $dbname = $row->city_dbname; $queue[$id] = $dbname; } $this->output(sprintf("Scheduling %d wikis for migration...\n", count($queue))); $this->output("\nRun migrateImagesToSwift script \n"); $this->output("Building list of processes to run...\n"); $processes = array(); foreach ($queue as $id => $dbname) { $processes[] = $this->getProcess($id); } $threads = $this->getOption('threads', self::THREADS_DEFAULT); $this->output("Using {$threads} threads...\n"); $runner = new \Wikia\Swift\Process\Runner($processes, $threads); $runner->run(); $this->output(sprintf("\nMigrated %d Wikis in %s\n", $migrated, Wikia::timeDuration(time() - $this->time))); $this->output("\nDone!\n"); }
/** * run backup for range of wikis */ function runBackups($from, $to, $full, $options) { global $IP, $wgWikiaLocalSettingsPath, $wgWikiaAdminSettingsPath, $wgMaxShellTime, $wgMaxShellFileSize, $wgDumpsDisabledWikis; $range = array(); /** * shortcut for full & current together */ $both = isset($options["both"]) ? true : false; /** * store backup in another folder, not available for users */ $hide = isset($options["hide"]) ? true : false; /** * silly trick, if we have id defined we are defining $from & $to from it * if we have db param defined we first resolve which id is connected to this * database */ if (isset($options["db"]) && is_string($options["db"])) { $city_id = WikiFactory::DBtoID($options["db"]); if ($city_id) { $from = $to = $city_id; $to++; } } elseif (isset($options["id"]) && is_numeric($options["id"])) { $from = $to = $options["id"]; $to++; } elseif (isset($options["even"])) { $range[] = "city_id % 2 = 0"; $range[] = "city_public = 1"; } elseif (isset($options["odd"])) { $range[] = "city_id % 2 <> 0"; $range[] = "city_public = 1"; } else { /** * if all only for active */ $range[] = "city_public = 1"; } /** * exclude wikis with dumps disabled */ if (!empty($wgDumpsDisabledWikis) && is_array($wgDumpsDisabledWikis)) { $range[] = 'city_id NOT IN (' . implode(',', $wgDumpsDisabledWikis) . ')'; } /** * switch off limits for dumps */ $wgMaxShellTime = 0; $wgMaxShellFileSize = 0; if ($from !== false && $to !== false) { $range[] = sprintf("city_id >= %d AND city_id < %d", $from, $to); Wikia::log(__METHOD__, "info", "Running from {$from} to {$to}", true, true); } else { Wikia::log(__METHOD__, "info", "Running for all wikis", true, true); } $dbw = Wikifactory::db(DB_MASTER); $sth = $dbw->select(array("city_list"), array("city_id", "city_dbname"), $range, __METHOD__, array("ORDER BY" => "city_id")); while ($row = $dbw->fetchObject($sth)) { /** * get cluster for this wiki */ $cluster = WikiFactory::getVarValueByName("wgDBcluster", $row->city_id); $server = wfGetDB(DB_SLAVE, 'dumps', $row->city_dbname)->getProperty("mServer"); /** * build command */ $status = false; $basedir = getDirectory($row->city_dbname, $hide); if ($full || $both) { $path = sprintf("%s/pages_full.xml.gz", $basedir); $time = wfTime(); Wikia::log(__METHOD__, "info", "{$row->city_id} {$row->city_dbname} {$path}", true, true); $cmd = array("SERVER_ID={$row->city_id}", "php", "{$IP}/maintenance/dumpBackup.php", "--conf {$wgWikiaLocalSettingsPath}", "--aconf {$wgWikiaAdminSettingsPath}", "--full", "--xml", "--quiet", "--server={$server}", "--output=gzip:{$path}"); wfShellExec(implode(" ", $cmd), $status); $time = Wikia::timeDuration(wfTime() - $time); Wikia::log(__METHOD__, "info", "{$row->city_id} {$row->city_dbname} status: {$status}, time: {$time}", true, true); } if (!$full || $both) { $path = sprintf("%s/pages_current.xml.gz", $basedir); $time = wfTime(); Wikia::log(__METHOD__, "info", "{$row->city_id} {$row->city_dbname} {$path}", true, true); $cmd = array("SERVER_ID={$row->city_id}", "php", "{$IP}/maintenance/dumpBackup.php", "--conf {$wgWikiaLocalSettingsPath}", "--aconf {$wgWikiaAdminSettingsPath}", "--current", "--xml", "--quiet", "--server={$server}", "--output=gzip:{$path}"); wfShellExec(implode(" ", $cmd), $status); $time = Wikia::timeDuration(wfTime() - $time); Wikia::log(__METHOD__, "info", "{$row->city_id} {$row->city_dbname} status: {$status}, time: {$time}", true, true); } /** * generate index.json */ $jsonfile = sprintf("%s/index.json", $basedir); $json = array(); /** * open dir and read info about files */ if (is_dir($basedir)) { $dh = opendir($basedir); while (($file = readdir($dh)) !== false) { $fullpath = $basedir . "/" . $file; if (is_file($fullpath)) { $json[$file] = array("name" => $file, "timestamp" => filectime($fullpath), "mwtimestamp" => wfTimestamp(TS_MW, filectime($fullpath))); } } closedir($dh); } if (count($json)) { file_put_contents($jsonfile, json_encode($json)); } } }
public function execute() { global $wgExternalSharedDB, $wgWikiaDatacenter; $this->output("Wikis migration started ... \n"); $this->time = time(); $migrated = 0; $limit = $this->getOption('limit', -1); $debug = $this->hasOption('debug'); $force = $this->hasOption('force'); $wikis = $this->getOption('wiki', null); $forceAll = $this->hasOption('all'); $this->dryRun = $this->hasOption('dry-run'); $this->noDeletes = $this->hasOption('no-deletes'); $this->calculateMd5 = $this->hasOption('md5'); $this->syncSecondary = $this->hasOption('sync'); if ($this->syncSecondary) { $force = true; $forceAll = true; } $this->dc = $this->getOption('dc', 'sjc,res'); # don't migrate top 200 Wikis $top200Wikis = array(); if (!$forceAll) { $top200Wikis = DataMartService::getWAM200Wikis(); if (count($top200Wikis) != 200) { $this->output("Number of Top 200 Wikis is different than 200 !\n"); exit; } if ($debug) { $this->output("Top 200 Wikis: " . implode(", ", $top200Wikis) . "\n"); } } # don't migrate video.wikia.com & corp.wikia.com $this->disabled_wikis = array_merge($top200Wikis, $this->disabled_wikis); foreach ($this->disabled_wikis as $k => $v) { $this->disabled_wikis[$k] = intval($v); } if ($this->syncSecondary) { $this->disabled_wikis = array(); } $wikiIds = null; if ($wikis !== null) { $wikiIds = array(); foreach (explode(',', $wikis) as $id) { if (is_numeric($id) && $id >= 0) { $wikiIds[] = $id; } } if (count($wikiIds) == 0) { $wikiIds = null; } } $this->db = $this->getDB(DB_SLAVE, array(), $wgExternalSharedDB); $order = $this->hasOption('reverse') ? " DESC" : ""; $res = $this->db->select(array('city_list', 'city_variables'), array('city_id', 'city_dbname'), array_merge(array('city_public' => 1), !$force ? array('cv_value is null or cv_value != "b:1;"') : array(), is_array($wikiIds) ? array('city_id' => $wikiIds) : array()), __CLASS__, array_merge(array('ORDER BY' => "city_last_timestamp{$order}, city_id{$order}"), $limit > 0 ? array('LIMIT' => $limit) : array()), array('city_variables' => array('LEFT JOIN', 'city_list.city_id = city_variables.cv_city_id and cv_variable_id = 1334'))); $this->output(sprintf("Found %d wikis in database...\n", $res->numRows())); $this->output("Building list of wiki IDs...\n"); $removedCount = 0; $queue = array(); while ($row = $res->fetchObject()) { $id = intval($row->city_id); $dbname = $row->city_dbname; if (!in_array($id, $this->disabled_wikis)) { $queue[$id] = $dbname; } else { $removedCount++; } } $this->output(sprintf("Skipped %d wikis that are on blacklist...\n", $removedCount)); $this->output(sprintf("Scheduling %d wikis for migration...\n", count($queue))); if ($this->hasOption('stats-only')) { return; } $this->output("\nRun migrateImagesToSwift script \n"); $this->output("Building list of processes to run...\n"); $processes = array(); foreach ($queue as $id => $dbname) { if ($this->calculateMd5) { $process = $this->getMd5Process($id, $dbname); } elseif ($this->syncSecondary) { $process = $this->getSyncProcess($id, $dbname); } else { $process = $this->getMigrationProcess($id, $dbname); } $processes[] = $process; } $threads = $this->getOption('threads', self::THREADS_DEFAULT); $threads = intval($threads); $threadDelay = $this->getOption('delay', self::DELAY_DEFAULT); $threadDelay = intval($threadDelay); $this->output("Using {$threads} threads...\n"); $runner = new \Wikia\Swift\Process\Runner($processes, $threads, $threadDelay); $runner->run(); $this->output(sprintf("\nMigrated %d Wikis in %s\n", $migrated, Wikia::timeDuration(time() - $this->time))); $this->output("\nDone!\n"); }
/** * Puts the specified file to Amazon S3 storage * * if $bPublic, the file will be available for all users * if $sMimeType is set then the specified mime tipe is set, otherwise * let AmazonS3 decide on mime type. */ public static function putToAmazonS3($sPath, $bPublic = true, $sMimeType = null) { $time = wfTime(); $sDestination = wfEscapeShellArg('s3://wikia_xml_dumps/' . DumpsOnDemand::getPath(basename($sPath))); $sPath = wfEscapeShellArg($sPath); $sCmd = 'sudo /usr/bin/s3cmd -c /root/.s3cfg --add-header=Content-Disposition:attachment'; if (!is_null($sMimeType)) { $sMimeType = wfEscapeShellArg($sMimeType); $sCmd .= " --mime-type={$sMimeType}"; } $sCmd .= $bPublic ? ' --acl-public' : ''; $sCmd .= " put {$sPath} {$sDestination}"; wfShellExec($sCmd, $iStatus); $time = Wikia::timeDuration(wfTime() - $time); Wikia::log(__METHOD__, "info", "Put {$sPath} to Amazon S3 storage: status: {$iStatus}, time: {$time}", true, true); return $iStatus; }
public function execute() { global $wgCityId, $wgExternalSharedDB; // force migration of wikis with read-only mode if (wfReadOnly()) { global $wgReadOnly; $wgReadOnly = false; } $this->init(); $dbr = $this->getDB(DB_SLAVE); $isForced = $this->hasOption('force'); $isDryRun = $this->hasOption('dry-run'); // one migration is enough global $wgEnableSwiftFileBackend, $wgEnableUploads, $wgDBname; if (!empty($wgEnableSwiftFileBackend) && !$isForced) { $this->error("\$wgEnableSwiftFileBackend = true - new files storage already enabled on {$wgDBname} wiki!", 1); } if (empty($wgEnableUploads) && !$isForced) { $this->error("\$wgEnableUploads = false - migration is already running on {$wgDBname} wiki!", 1); } // get images count $tables = ['filearchive' => 'fa_size', 'image' => 'img_size', 'oldimage' => 'oi_size']; foreach ($tables as $table => $sizeField) { $row = $dbr->selectRow($table, ['count(*) AS cnt', "SUM({$sizeField}) AS size"], [], __METHOD__); $this->output(sprintf("* %s:\t%d images (%d MB)\n", $table, $row->cnt, round($row->size / 1024 / 1024))); $this->imagesCnt += $row->cnt; $this->imagesSize += $row->size; } $this->output(sprintf("\n%d image(s) (%d MB) will be migrated (should take ~ %s with %d kB/s / ~ %s with %d files/sec)...\n", $this->imagesCnt, round($this->imagesSize / 1024 / 1024), Wikia::timeDuration($this->imagesSize / 1024 / self::KB_PER_SEC), self::KB_PER_SEC, Wikia::timeDuration($this->imagesCnt / self::FILES_PER_SEC), self::FILES_PER_SEC)); if ($this->hasOption('stats-only')) { return; } // ok, so let's start... $this->time = time(); self::log(__CLASS__, 'migration started', self::LOG_MIGRATION_PROGRESS); // wait a bit to prevent deadlocks (from 0 to 2 sec) usleep(mt_rand(0, 2000) * 1000); // lock the wiki $dbw = $this->getDB(DB_MASTER, array(), $wgExternalSharedDB); if (!$isDryRun) { $dbw->replace('city_image_migrate', ['city_id'], ['city_id' => $wgCityId, 'locked' => 1], __CLASS__); } // block uploads via WikiFactory if (!$isDryRun) { WikiFactory::setVarByName('wgEnableUploads', $wgCityId, false, self::REASON); WikiFactory::setVarByName('wgUploadMaintenance', $wgCityId, true, self::REASON); $this->output("Uploads and image operations disabled\n\n"); } else { $this->output("Performing dry run...\n\n"); } // prepare the list of files to migrate to new storage // (a) current revisions of images // @see http://www.mediawiki.org/wiki/Image_table $this->output("\nA) Current revisions of images - /images\n"); $res = $dbr->select('image', ['img_name AS name', 'img_size AS size', 'img_sha1 AS hash', 'img_major_mime AS major_mime', 'img_minor_mime AS minor_mime']); while ($row = $res->fetchRow()) { $path = $this->getImagePath($row); $this->copyFile($path, $row); } // (b) old revisions of images // @see http://www.mediawiki.org/wiki/Oldimage_table $this->output("\nB) Old revisions of images - /archive\n"); $res = $dbr->select('oldimage', ['oi_name AS name', 'oi_archive_name AS archived_name', 'oi_size AS size', 'oi_sha1 AS hash', 'oi_major_mime AS major_mime', 'oi_minor_mime AS minor_mime']); while ($row = $res->fetchRow()) { $path = $this->getOldImagePath($row); $this->copyFile($path, $row); } // (c) deleted images // @see http://www.mediawiki.org/wiki/Filearchive_table $this->output("\nC) Deleted images - /deleted\n"); $res = $dbr->select('filearchive', ['fa_name AS name', 'fa_storage_key AS storage_key', 'fa_size AS size', 'fa_major_mime AS major_mime', 'fa_minor_mime AS minor_mime']); while ($row = $res->fetchRow()) { $path = $this->getRemovedImagePath($row); $this->copyFile($path, $row); } // stats per DC $statsPerDC = []; foreach ($this->timePerDC as $dc => $time) { $statsPerDC[] = sprintf("%s took %s", $dc, Wikia::timeDuration(round($time))); } // summary $totalTime = time() - $this->time; $report = sprintf('Migrated %d files (%d MB) with %d fails in %s (%.2f files/sec, %.2f kB/s) - DCs: %s', $this->migratedImagesCnt, round($this->migratedImagesSize / 1024 / 1024), $this->migratedImagesFailedCnt, Wikia::timeDuration($totalTime), floor($this->imagesCnt) / (time() - $this->time), $this->migratedImagesSize / 1024 / (time() - $this->time), join(', ', $statsPerDC)); $this->output("\n{$report}\n"); self::log(__CLASS__, 'migration completed - ' . $report, self::LOG_MIGRATION_PROGRESS); // if running in --dry-run, leave now if ($isDryRun) { $this->output("\nDry run completed!\n"); return; } // unlock the wiki $dbw->ping(); $dbw->replace('city_image_migrate', ['city_id'], ['city_id' => $wgCityId, 'locked' => 0], __CLASS__); // update wiki configuration // enable Swift storage via WikiFactory WikiFactory::setVarByName('wgEnableSwiftFileBackend', $wgCityId, true, sprintf('%s - migration took %s', self::REASON, Wikia::timeDuration($totalTime))); $this->output("\nNew storage enabled\n"); // too short bucket name fix if ($this->shortBucketNameFixed) { global $wgUploadPath, $wgUploadDirectory, $wgUploadDirectoryNFS; WikiFactory::setVarByName('wgUploadPath', $wgCityId, $wgUploadPath, self::REASON); WikiFactory::setVarByName('wgUploadDirectory', $wgCityId, $wgUploadDirectory, self::REASON); WikiFactory::setVarByName('wgUploadDirectoryNFS', $wgCityId, $wgUploadDirectoryNFS, self::REASON); $this->output("\nNew upload directory set up\n"); } // enable uploads via WikiFactory // wgEnableUploads = true / wgUploadMaintenance = false (remove values from WF to give them the default value) WikiFactory::removeVarByName('wgEnableUploads', $wgCityId, self::REASON); WikiFactory::removeVarByName('wgUploadMaintenance', $wgCityId, self::REASON); $this->output("\nUploads and image operations enabled\n"); $this->output("\nDone!\n"); }
public function execute() { global $wgDBname, $wgCityId, $wgExternalSharedDB, $wgUploadDirectory, $wgUploadDirectoryNFS; $this->debug = $this->hasOption('debug'); $this->logger = new \Wikia\Swift\Logger\Logger($this->debug ? 10 : 10, -1, 10); $this->logger->setFile('/var/log/migration/' . $wgDBname . '.log'); $this->logger = $this->logger->prefix($wgDBname); // force migration of wikis with read-only mode if (wfReadOnly()) { global $wgReadOnly; $wgReadOnly = false; } $this->init(); $dbr = $this->getDB(DB_SLAVE); $isForced = $this->hasOption('force'); $isDryRun = $this->hasOption('dry-run'); $this->useDiff = $this->getOption('diff', false); $this->useLocalFiles = $this->getOption('local', false); $this->useDeletes = !$this->hasOption('no-deletes'); $this->threads = intval($this->getOption('threads', self::THREADS_DEFAULT)); $this->threads = min(self::THREADS_MAX, max(1, $this->threads)); $this->hammer = $this->getOption('hammer', null); $uploadDir = !empty($wgUploadDirectoryNFS) ? $wgUploadDirectoryNFS : $wgUploadDirectory; $uploadDir = $this->rewriteLocalPath($uploadDir); if (!is_dir($uploadDir)) { $this->fatal(__CLASS__, "Could not read the source directory: {$uploadDir}", self::LOG_MIGRATION_ERRORS); } // just don't f**k everything! if ($this->useLocalFiles && !$isDryRun) { if (gethostname() !== 'file-s4') { $this->fatal(__CLASS__, "Incremental upload requires access to master file system (don't use --local)", self::LOG_MIGRATION_ERRORS); } } if (!empty($this->hammer) && !$isDryRun) { $this->fatal(__CLASS__, "Hammer option not supported when not using --dry-run", self::LOG_MIGRATION_ERRORS); } // one migration is enough global $wgEnableSwiftFileBackend, $wgEnableUploads, $wgDBname; if (!empty($wgEnableSwiftFileBackend) && !$isForced) { $this->error("\$wgEnableSwiftFileBackend = true - new files storage already enabled on {$wgDBname} wiki!", 1); } if (empty($wgEnableUploads) && !$isForced) { $this->error("\$wgEnableUploads = false - migration is already running on {$wgDBname} wiki!", 1); } // get images count $tables = ['filearchive' => 'fa_size', 'image' => 'img_size', 'oldimage' => 'oi_size']; foreach ($tables as $table => $sizeField) { $row = $dbr->selectRow($table, ['count(*) AS cnt', "SUM({$sizeField}) AS size"], [], __METHOD__); $this->output(sprintf("* %s:\t%d images (%d MB)\n", $table, $row->cnt, round($row->size / 1024 / 1024))); $this->imagesCnt += $row->cnt; $this->imagesSize += $row->size; } $this->output(sprintf("\n%d image(s) (%d MB) will be migrated (should take ~ %s with %d kB/s / ~ %s with %d files/sec)...\n", $this->imagesCnt, round($this->imagesSize / 1024 / 1024), Wikia::timeDuration($this->imagesSize / 1024 / self::KB_PER_SEC), self::KB_PER_SEC, Wikia::timeDuration($this->imagesCnt / self::FILES_PER_SEC), self::FILES_PER_SEC)); if ($this->hasOption('stats-only')) { return; } // ok, so let's start... $this->time = time(); self::logWikia(__CLASS__, 'migration started', self::LOG_MIGRATION_PROGRESS); // wait a bit to prevent deadlocks (from 0 to 2 sec) usleep(mt_rand(0, 2000) * 1000); // lock the wiki $dbw = $this->getDB(DB_MASTER, array(), $wgExternalSharedDB); if (!$isDryRun) { $dbw->replace('city_image_migrate', ['city_id'], ['city_id' => $wgCityId, 'locked' => 1], __CLASS__); } // block uploads via WikiFactory if (!$isDryRun) { register_shutdown_function(array($this, 'unlockWiki')); $this->areUploadsDisabled = true; WikiFactory::setVarByName('wgEnableUploads', $wgCityId, false, self::REASON); WikiFactory::setVarByName('wgUploadMaintenance', $wgCityId, true, self::REASON); $this->output("Uploads and image operations disabled\n\n"); if ($this->hasOption('wait')) { $this->output("Sleeping for 180 seconds to let Apache finish uploads...\n"); sleep(180); } } else { $this->output("Performing dry run...\n\n"); } // prepare the list of files to migrate to new storage // (a) current revisions of images // @see http://www.mediawiki.org/wiki/Image_table $this->output("\nA) Current revisions of images - /images\n"); $res = $dbr->select('image', ['img_name AS name', 'img_size AS size', 'img_sha1 AS hash', 'img_major_mime AS major_mime', 'img_minor_mime AS minor_mime']); while ($row = $res->fetchRow()) { $path = $this->getImagePath($row); $this->queueFile($path, $row); } // (b) old revisions of images // @see http://www.mediawiki.org/wiki/Oldimage_table $this->output("\nB) Old revisions of images - /archive\n"); $res = $dbr->select('oldimage', ['oi_name AS name', 'oi_archive_name AS archived_name', 'oi_size AS size', 'oi_sha1 AS hash', 'oi_major_mime AS major_mime', 'oi_minor_mime AS minor_mime']); while ($row = $res->fetchRow()) { $path = $this->getOldImagePath($row); $this->queueFile($path, $row); } // (c) deleted images // @see http://www.mediawiki.org/wiki/Filearchive_table $this->output("\nC) Deleted images - /deleted\n"); $res = $dbr->select('filearchive', ['fa_name AS name', 'fa_storage_key AS storage_key', 'fa_size AS size', 'fa_major_mime AS major_mime', 'fa_minor_mime AS minor_mime']); while ($row = $res->fetchRow()) { $path = $this->getRemovedImagePath($row); $this->queueFile($path, $row); } $this->processQueue(); echo count($this->allFiles) . PHP_EOL; // stats per DC $statsPerDC = []; foreach ($this->timePerDC as $dc => $time) { $statsPerDC[] = sprintf("%s took %s", $dc, Wikia::timeDuration(round($time))); } // summary $totalTime = time() - $this->time; $report = sprintf('Migrated %d files with %d fails in %s', $this->migratedImagesCnt, $this->migratedImagesFailedCnt, Wikia::timeDuration($totalTime)); $this->output("\n{$report}\n"); self::logWikia(__CLASS__, 'migration completed - ' . $report, self::LOG_MIGRATION_PROGRESS); // if running in --dry-run, leave now if ($isDryRun) { $this->output("\nDry run completed!\n"); return; } // unlock the wiki $dbw->ping(); $dbw->replace('city_image_migrate', ['city_id'], ['city_id' => $wgCityId, 'locked' => 0], __CLASS__); $dbr = $this->getDB(DB_MASTER, array(), $wgExternalSharedDB); $dbr->ping(); // update wiki configuration // enable Swift storage via WikiFactory WikiFactory::setVarByName('wgEnableSwiftFileBackend', $wgCityId, true, sprintf('%s - migration took %s', self::REASON, Wikia::timeDuration($totalTime))); $this->output("\nNew storage enabled\n"); // too short bucket name fix if ($this->shortBucketNameFixed) { global $wgUploadPath, $wgUploadDirectory, $wgUploadDirectoryNFS; WikiFactory::setVarByName('wgUploadPath', $wgCityId, $wgUploadPath, self::REASON); WikiFactory::setVarByName('wgUploadDirectory', $wgCityId, $wgUploadDirectory, self::REASON); WikiFactory::setVarByName('wgUploadDirectoryNFS', $wgCityId, $wgUploadDirectoryNFS, self::REASON); $this->output("\nNew upload directory set up\n"); } // enable uploads via WikiFactory // wgEnableUploads = true / wgUploadMaintenance = false (remove values from WF to give them the default value) WikiFactory::removeVarByName('wgEnableUploads', $wgCityId, self::REASON); WikiFactory::removeVarByName('wgUploadMaintenance', $wgCityId, self::REASON); $this->areUploadsDisabled = false; $this->output("\nUploads and image operations enabled\n"); $this->output("\nDone!\n"); }
/** * pack all images, use PEAR Archive_Tar for archive. * * @access public * * @param string $uploadDirectory path to images * @param string $dbname database name * @param int $cityid city ID * * @return string path to created archive or false if not created */ public function tarFiles($directory, $dbname, $cityid) { $swiftEnabled = WikiFactory::getVarValueByName('wgEnableSwiftFileBackend', $cityid); $wgUploadPath = WikiFactory::getVarValueByName('wgUploadPath', $cityid); if ($swiftEnabled) { // sync Swift container to the local directory $directory = sprintf("/tmp/images/{$dbname}/"); $path = trim(parse_url($wgUploadPath, PHP_URL_PATH), '/'); $container = substr($path, 0, -7); // eg. poznan/pl $this->log(sprintf('Rsyncing images from "%s" Swift storage to "%s"...', $container, $directory)); wfMkdirParents($directory); $time = wfTime(); // s3cmd sync --dry-run s3://dilbert ~/images/dilbert/ --exclude "/thumb/*" --exclude "/temp/*" $cmd = sprintf('sudo /usr/bin/s3cmd -c %s sync s3://%s/images "%s" --exclude "/thumb/*" --exclude "/temp/*"', '/etc/s3cmd/sjc_prod.cfg', $container, $directory); wfShellExec($cmd, $iStatus); $time = Wikia::timeDuration(wfTime() - $time); Wikia::log(__METHOD__, "info", "Rsync to {$directory} from {$container} Swift storage: status: {$iStatus}, time: {$time}", true, true); } /** * @name dumpfile */ $tarfile = sprintf("/tmp/{$dbname}_images.tar"); if (file_exists($tarfile)) { @unlink($tarfile); } $tar = new Archive_Tar($tarfile); if (!$tar) { $this->log("Cannot open {$tarfile}"); echo "Cannot open {$tarfile}"; die(1); } $files = $this->getDirTree($directory); if (is_array($files) && count($files)) { $this->log(sprintf("Packing %d files from {$directory} to {$tarfile}", count($files))); $tar->create($files); $result = $tarfile; } else { $this->log("List of files in {$directory} is empty"); $result = false; } return $result; }
/** * connect to statsdb and processing events table */ public function receiveFromEvents() { global $wgStatsDB, $wgCityId, $wgMemc, $wgStatsDBEnabled, $wgSharedDB, $wgIP; wfProfileIn(__METHOD__); if (empty($wgStatsDBEnabled)) { wfProfileOut(__METHOD__); return false; } try { while (1) { $dbr = wfGetDB(DB_SLAVE, array(), $wgStatsDB); $where = array(" rev_timestamp >= '" . $this->mDate . "' ", " (event_type = 2 or event_type = 1 ) "); if (!empty($wgStatsIgnoreWikis)) { $where[] = 'wiki_id not in (' . $dbr->makeList($wgStatsIgnoreWikis) . ')'; } $oRes = $dbr->select(array('events'), array('wiki_id, page_id, page_ns, user_id, rev_timestamp, user_is_bot'), $where, __METHOD__); $result = array(); $loop = 0; while ($oRow = $dbr->fetchObject($oRes)) { if ($oRow->rev_timestamp > $this->mDate) { $this->mDate = $oRow->rev_timestamp; } $result[$oRow->wiki_id][$oRow->page_id] = $oRow; $loop++; } $dbr->freeResult($oRes); Wikia::log(__METHOD__, 'events', 'Read ' . $loop . ' events (for ' . count($result) . ' Wikis) successfully. Next timestamp: ' . $this->mDate); $records = count($result); if (!empty($result)) { $producerDB = new WikiaStatsAutoHubsConsumerDB(DB_MASTER); $data = array('blogs' => array(), 'articles' => array(), 'user' => array(), 'tags' => array()); $loop = 0; foreach ($result as $city_id => $rows) { $start = time(); $loop++; Wikia::log(__METHOD__, 'events', 'Wikia ' . $city_id . ' (' . $loop . '/' . $records . ') processing: ' . count($rows) . ' rows'); $memkey = sprintf("%s:wikia:%d", __METHOD__, $city_id); $info = $wgMemc->get($memkey); if (empty($info)) { # wikia $oWikia = WikiFactory::getWikiByID($city_id); if (!is_object($oWikia)) { Wikia::log(__METHOD__, "Wikia not found: " . $city_id); continue; } # server $server = WikiFactory::getVarValueByName("wgServer", $city_id); $info = array('lang' => $oWikia->city_lang, 'db' => $oWikia->city_dbname, 'sitename' => $oWikia->city_title, 'server' => $server); $wgMemc->set($memkey, $info, 60 * 60 * 2); } if (!isset($info['db']) && !isset($info['sitename']) && !isset($info['lang']) && !isset($info['server'])) { Wikia::log(__METHOD__, "Wikia not found: " . $city_id); continue; } # initial table $lang = $info['lang']; if (!isset($data['blogs'][$lang])) { $data['blogs'][$lang] = array(); } if (!isset($data['articles'][$lang])) { $data['articles'][$lang] = array(); } if (!isset($data['user'][$lang])) { $data['user'][$lang] = array(); } # tags $oWFTags = new WikiFactoryTags($city_id); $tags = $oWFTags->getTags(); foreach ($rows as $oRow) { if (is_object($oRow)) { $oUser = User::newFromId($oRow->user_id); if (!is_object($oUser)) { continue; } if (NS_BLOG_ARTICLE == $oRow->page_ns) { if (!empty($tags)) { foreach ($tags as $id => $val) { if (!isset($data['blogs'][$lang][$id])) { $data['blogs'][$lang][$id] = array(); } # prepare insert data $data['blogs'][$lang][$id][] = array('tb_city_id' => $city_id, 'tb_page_id' => $oRow->page_id, 'tb_tag_id' => $id, 'tb_date' => date("Y-m-d"), 'tb_city_lang' => $lang, 'tb_count' => 1); } } } else { $memkey = sprintf("%s:%s:user:%d", __METHOD__, $wgSharedDB, $oRow->user_id); $user = $wgMemc->get($memkey); if (empty($user)) { $groups = $oUser->getGroups(); $user_groups = implode(";", $groups); $user = array('name' => $oUser->getName(), 'groups' => $user_groups); $wgMemc->set($memkey, $user, 60 * 60 * 2); } if (!isset($user['name'])) { continue; } if ($user['name'] == $wgIP || User::isIP($user['name'])) { continue; } if (!empty($tags)) { foreach ($tags as $id => $val) { $date = date("Y-m-d"); $mcKey = wfSharedMemcKey("auto_hubs", "unique_control", $city_id, $oRow->page_id, $oRow->user_id, $id, $date); $out = $wgMemc->get($mcKey, null); if ($out == 1) { continue; } $wgMemc->set($mcKey, 1, 24 * 60 * 60); $allowed = $oRow->user_is_bot != 'Y' && !in_array($oUser->getName(), $producerDB->getBanedUsers()); if (!isset($data['user'][$lang][$id]) && $allowed) { $data['user'][$lang][$id] = array(); } if (!isset($data['articles'][$lang][$id])) { $data['articles'][$lang][$id] = array(); } # # prepare insert data $data['articles'][$lang][$id][] = array('ta_city_id' => $city_id, 'ta_page_id' => $oRow->page_id, 'ta_tag_id' => $id, 'ta_date' => $date, 'ta_city_lang' => $lang, 'ta_count' => 1); if ($allowed) { $data['user'][$lang][$id][] = array('tu_user_id' => $oRow->user_id, 'tu_tag_id' => $id, 'tu_date' => $date, 'tu_groups' => $user['groups'], 'tu_username' => addslashes($user['name']), 'tu_city_lang' => $lang, 'tu_count' => 1); } } } } } } $end = time(); $time = Wikia::timeDuration($end - $start); Wikia::log(__METHOD__, 'events', 'Wikia ' . $city_id . ' processed in: ' . $time); } // insert data to database # blogs $start = time(); Wikia::log(__METHOD__, 'events', 'Insert ' . count($data['blogs']) . ' blogs'); $producerDB->insertBlogComment($data['blogs']); $end = time(); $time = Wikia::timeDuration($end - $start); Wikia::log(__METHOD__, 'events', 'Inserts done in: ' . $time); # articles $start = time(); Wikia::log(__METHOD__, 'events', 'Insert ' . count($data['articles']) . ' articles'); $producerDB->insertArticleEdit($data['articles']); $end = time(); $time = Wikia::timeDuration($end - $start); Wikia::log(__METHOD__, 'events', 'Inserts done in: ' . $time); $start = time(); Wikia::log(__METHOD__, 'events', 'Insert ' . count($data['user']) . ' users'); $producerDB->insertUserEdit($data['user']); $end = time(); $time = Wikia::timeDuration($end - $start); Wikia::log(__METHOD__, 'events', 'Inserts done in: ' . $time); // unset data unset($data); } else { Wikia::log(__METHOD__, "No data found in events table. Last timestamp: " . $this->mDate); } Wikia::log(__METHOD__, "Wait " . self::sleepTime . " sec. "); sleep(self::sleepTime); } } catch (MWException $e) { $mesg = $e->getMessage(); $class = get_class($e); Wikia::log(__METHOD__, 'events', $mesg); die('Cannot proceed events data. Message was: ' . $mesg . '. Class was:' . $class); } wfProfileOut(__METHOD__); }
public function execute() { global $wgExternalSharedDB; $this->output("Wikis migration started ... \n"); $this->time = time(); $migrated = 0; $limit = $this->getOption('limit', self::DEFAULT_LIMIT); $debug = $this->hasOption('debug'); $procs = $this->getOption('procs', self::MIGRATE_PROCS); $force = $this->hasOption('force'); $wikis = $this->getOption('wiki', ''); # don't migrate top 200 Wikis $top200Wikis = DataMartService::getWAM200Wikis(); if (count($top200Wikis) != 200) { $this->output("Number of Top 200 Wikis is different than 200 !\n"); exit; } # don't migrate video.wikia.com & corp.wikia.com $this->disabled_wikis = array_merge($top200Wikis, $this->disabled_wikis); if ($debug) { $this->output("Top 200 Wikis: " . implode(", ", $top200Wikis) . "\n"); } $this->db = $this->getDB(DB_SLAVE, array(), $wgExternalSharedDB); $where = ['city_public' => 1, 'city_image_migrate.city_id is null']; if (!empty($wikis)) { $where['city_list.city_id'] = explode(",", $wikis); } $join = ['city_image_migrate.city_id = city_list.city_id', 'city_image_migrate.locked is not null']; $res = $this->db->select(['city_list', 'city_image_migrate'], ['city_list.city_id', 'city_list.city_dbname'], $where, 'MigrateImagesToSwift', ['ORDER BY' => 'city_last_timestamp, city_id', 'LIMIT' => $limit], ['city_image_migrate' => ['LEFT JOIN', $join]]); $to_migrate = []; $i = 0; $x = 0; while ($row = $res->fetchObject()) { $this->output("\tAdd {$row->city_dbname} to migration package ... "); if (in_array($row->city_id, $this->disabled_wikis)) { $this->output("don't migrate it now \n"); continue; } $to_migrate[$row->city_id] = $row->city_dbname; $this->output("done \n "); $i++; } $this->output("\n\nRun migrateImagesToSwift script \n"); foreach ($to_migrate as $id => $dbname) { # check how many Wikis is locked and sleep if needed $this->isLocked($procs); # run main migration script written by Macbre $this->output("\tMigrate Wiki {$id}: {$dbname} ... "); $cmd = sprintf(self::CMD, $this->getOption('conf'), $id, $force ? ' --force' : '', self::SCRIPT_PROCS, $this->makePath($dbname)); if ($debug) { $this->output("\n\tRun cmd: {$cmd} \n"); } global $wgMaxShellTime; $wgMaxShellTime = 0; $result = wfShellExec($cmd, $retval); if ($retval) { $this->output("Error code {$retval}: {$result} \n"); } else { $this->output("Done in " . Wikia::timeDuration(time() - $this->time) . "\n"); } $migrated++; } $this->output(sprintf("\nMigrated %d Wikis in %s\n", $migrated, Wikia::timeDuration(time() - $this->time))); $this->output("\nDone!\n"); }
/** * run backup for range of wikis */ function runBackups($from, $to, $full, $options) { global $IP, $wgWikiaLocalSettingsPath, $wgWikiaAdminSettingsPath, $wgMaxShellTime, $wgMaxShellFileSize, $wgDumpsDisabledWikis; $range = array(); /** * shortcut for full & current together */ $both = isset($options["both"]); /** * store backup in another folder, not available for users */ $hide = isset($options["hide"]); /** * store backup in the system tmp dir */ $use_temp = isset($options['tmp']); /** * send backup to Amazon S3 and delete the local copy */ $s3 = isset($options['s3']); /** * silly trick, if we have id defined we are defining $from & $to from it * if we have db param defined we first resolve which id is connected to this * database */ if (isset($options["db"]) && is_string($options["db"])) { $city_id = WikiFactory::DBtoID($options["db"]); if ($city_id) { $from = $to = $city_id; $to++; } } elseif (isset($options["id"]) && is_numeric($options["id"])) { $from = $to = $options["id"]; $to++; } elseif (isset($options["even"])) { $range[] = "city_id % 2 = 0"; $range[] = "city_public = 1"; } elseif (isset($options["odd"])) { $range[] = "city_id % 2 <> 0"; $range[] = "city_public = 1"; } else { /** * if all only for active */ $range[] = "city_public = 1"; } /** * exclude wikis with dumps disabled */ if (!empty($wgDumpsDisabledWikis) && is_array($wgDumpsDisabledWikis)) { $range[] = 'city_id NOT IN (' . implode(',', $wgDumpsDisabledWikis) . ')'; } /** * switch off limits for dumps */ $wgMaxShellTime = 0; $wgMaxShellFileSize = 0; if ($from !== false && $to !== false) { $range[] = sprintf("city_id >= %d AND city_id < %d", $from, $to); Wikia::log(__METHOD__, "info", "Running from {$from} to {$to}", true, true); } else { Wikia::log(__METHOD__, "info", "Running for all wikis", true, true); } $dbw = Wikifactory::db(DB_MASTER); $sth = $dbw->select(array("city_list"), array("city_id", "city_dbname"), $range, __METHOD__, array("ORDER BY" => "city_id")); while ($row = $dbw->fetchObject($sth)) { /** * get cluster for this wiki */ $cluster = WikiFactory::getVarValueByName("wgDBcluster", $row->city_id); $server = wfGetDB(DB_SLAVE, 'dumps', $row->city_dbname)->getProperty("mServer"); /** * build command */ $status = false; $basedir = getDirectory($row->city_dbname, $hide, $use_temp); if ($full || $both) { $path = sprintf("%s/%s_pages_full.xml.7z", $basedir, $row->city_dbname); $time = wfTime(); Wikia::log(__METHOD__, "info", "{$row->city_id} {$row->city_dbname} {$path}", true, true); $cmd = array("SERVER_ID={$row->city_id}", "php", "{$IP}/maintenance/dumpBackup.php", "--conf {$wgWikiaLocalSettingsPath}", "--aconf {$wgWikiaAdminSettingsPath}", "--full", "--xml", "--quiet", "--server={$server}", "--output=" . DumpsOnDemand::DEFAULT_COMPRESSION_FORMAT . ":{$path}"); wfShellExec(implode(" ", $cmd), $status); $time = Wikia::timeDuration(wfTime() - $time); Wikia::log(__METHOD__, "info", "{$row->city_id} {$row->city_dbname} status: {$status}, time: {$time}", true, true); if ($s3 && 0 == DumpsOnDemand::putToAmazonS3($path, !$hide, MimeMagic::singleton()->guessMimeType($path))) { unlink($path); } } if (!$full || $both) { $path = sprintf("%s/%s_pages_current.xml.7z", $basedir, $row->city_dbname); $time = wfTime(); Wikia::log(__METHOD__, "info", "{$row->city_id} {$row->city_dbname} {$path}", true, true); $cmd = array("SERVER_ID={$row->city_id}", "php", "{$IP}/maintenance/dumpBackup.php", "--conf {$wgWikiaLocalSettingsPath}", "--aconf {$wgWikiaAdminSettingsPath}", "--current", "--xml", "--quiet", "--server={$server}", "--output=" . DumpsOnDemand::DEFAULT_COMPRESSION_FORMAT . ":{$path}"); wfShellExec(implode(" ", $cmd), $status); $time = Wikia::timeDuration(wfTime() - $time); Wikia::log(__METHOD__, "info", "{$row->city_id} {$row->city_dbname} status: {$status}, time: {$time}", true, true); if ($s3 && 0 == DumpsOnDemand::putToAmazonS3($path, !$hide, MimeMagic::singleton()->guessMimeType($path))) { unlink($path); } } } }
public function execute() { global $wgDBname, $wgCityId, $wgExternalSharedDB, $wgAvatarsMaintenance; $this->debug = $this->hasOption('debug'); $this->logger = new \Wikia\Swift\Logger\Logger($this->debug ? 10 : 10, -1, 10); $this->logger->setFile('/var/log/migration/' . $wgDBname . '.log'); $this->logger = $this->logger->prefix($wgDBname); $isForced = $this->hasOption('force'); $isDryRun = $this->hasOption('dry-run'); $this->useDiff = $this->getOption('diff', false); $this->useLocalFiles = $this->getOption('local', false); $this->useDeletes = !$this->hasOption('no-deletes'); $this->threads = intval($this->getOption('threads', self::THREADS_DEFAULT)); $this->threads = min(self::THREADS_MAX, max(1, $this->threads)); $this->hammer = $this->getOption('hammer', null); $this->parseSection(); $uploadDir = "/images/c/common/avatars" . $this->section; $uploadDir = $this->rewriteLocalPath($uploadDir); $this->uploadDir = $uploadDir; $this->init(); if (!is_dir($uploadDir)) { $this->fatal(__CLASS__, "Could not read the source directory: {$uploadDir}", self::LOG_MIGRATION_ERRORS); } // just don't f**k everything! if ($this->useLocalFiles && !$isDryRun) { if (gethostname() !== 'file-s4') { $this->fatal(__CLASS__, "Incremental upload requires access to master file system (don't use --local)", self::LOG_MIGRATION_ERRORS); } } if (!empty($this->hammer) && !$isDryRun) { $this->fatal(__CLASS__, "Hammer option not supported when not using --dry-run", self::LOG_MIGRATION_ERRORS); } // one migration is enough if (empty($wgAvatarsMaintenance) && !$isDryRun) { $this->error("\$wgAvatarsMaintenance = false - avatars maintenance is not switched on, cannot proceed!", 1); } // ok, so let's start... $this->time = time(); $this->output("Collecting files to upload...\n\n"); foreach (str_split('0123456789abcdef') as $p) { $this->processPath($uploadDir, 'avatars' . $this->section, $p); } $this->output(sprintf("Found %d files...\n\n", count($this->allFiles))); if ($this->hasOption('stats-only')) { return; } self::logWikia(__CLASS__, 'migration started', self::LOG_MIGRATION_PROGRESS); // block uploads via WikiFactory if (!$isDryRun) { $this->output("Uploads and avatars operations disabled\n\n"); } else { $this->output("Performing dry run...\n\n"); } $this->processQueue(); // stats per DC $statsPerDC = []; foreach ($this->timePerDC as $dc => $time) { $statsPerDC[] = sprintf("%s took %s", $dc, Wikia::timeDuration(round($time))); } // summary $totalTime = time() - $this->time; $report = sprintf('Migrated %d files with %d fails in %s', $this->migratedImagesCnt, $this->migratedImagesFailedCnt, Wikia::timeDuration($totalTime)); $this->output("\n{$report}\n"); self::logWikia(__CLASS__, 'migration completed - ' . $report, self::LOG_MIGRATION_PROGRESS); // if running in --dry-run, leave now if ($isDryRun) { $this->output("\nDry run completed!\n"); return; } $this->output("\nDone!\n"); }
public function execute() { global $wgExternalSharedDB; // force migration of wikis with read-only mode if (wfReadOnly()) { global $wgReadOnly; $wgReadOnly = false; } $this->debug = $this->hasOption('debug'); $this->dryRun = $this->hasOption('dry-run'); $this->sourceDC = $this->getOption('source-dc', self::SOURCE_DC_DEFAULT); $this->destDCs = explode(',', $this->getOption('dc', self::DESTINATION_DC_DEFAULT)); $this->init(); $dbr = $this->getDB(DB_SLAVE); // get images count $tables = ['filearchive' => 'fa_size', 'image' => 'img_size', 'oldimage' => 'oi_size']; foreach ($tables as $table => $sizeField) { $row = $dbr->selectRow($table, ['count(*) AS cnt', "SUM({$sizeField}) AS size"], [], __METHOD__); $this->output(sprintf("* %s:\t%d images (%d MB)\n", $table, $row->cnt, round($row->size / 1024 / 1024))); $this->imagesCnt += $row->cnt; $this->imagesSize += $row->size; } $this->output(sprintf("\n%d image(s) (%d MB) will be checked...\n", $this->imagesCnt, round($this->imagesSize / 1024 / 1024))); if ($this->hasOption('stats-only')) { return; } // ok, so let's start... $this->time = time(); // block uploads via WikiFactory $this->output("Starting sync...\n\n"); // prepare the list of files to migrate to new storage // (a) current revisions of images // @see http://www.mediawiki.org/wiki/Image_table $this->output("\nA) Current revisions of images - /images\n"); $res = $dbr->select('image', ['img_name AS name', 'img_size AS size', 'img_sha1 AS hash', 'img_major_mime AS major_mime', 'img_minor_mime AS minor_mime']); while ($row = $res->fetchRow()) { $path = $this->getImagePath($row); $this->copyFile($path, $row); } // (b) old revisions of images // @see http://www.mediawiki.org/wiki/Oldimage_table $this->output("\nB) Old revisions of images - /archive\n"); $res = $dbr->select('oldimage', ['oi_name AS name', 'oi_archive_name AS archived_name', 'oi_size AS size', 'oi_sha1 AS hash', 'oi_major_mime AS major_mime', 'oi_minor_mime AS minor_mime']); while ($row = $res->fetchRow()) { $path = $this->getOldImagePath($row); $this->copyFile($path, $row); } // (c) deleted images // @see http://www.mediawiki.org/wiki/Filearchive_table $this->output("\nC) Deleted images - /deleted\n"); $res = $dbr->select('filearchive', ['fa_name AS name', 'fa_storage_key AS storage_key', 'fa_size AS size', 'fa_major_mime AS major_mime', 'fa_minor_mime AS minor_mime']); while ($row = $res->fetchRow()) { $path = $this->getRemovedImagePath($row); $this->copyFile($path, $row); } // stats per DC $statsPerDC = []; foreach ($this->timePerDC as $dc => $time) { $statsPerDC[] = sprintf("%s took %s", $dc, Wikia::timeDuration(round($time))); } // summary $totalTime = time() - $this->time; $report = sprintf('Checked %d files and copied %d files (%d MB) with %d fails in %s', $this->checkedImagesCnt, $this->migratedImagesCnt, round($this->migratedImagesSize / 1024 / 1024), $this->migratedImagesFailedCnt, Wikia::timeDuration($totalTime)); $this->output("\n{$report}\n"); $this->output("\nDone!\n"); }