break; } $text .= $buffer; } $gotbytes = strlen($text); if ($gotbytes != $nbytes) { $this->progress("Expected {$nbytes} bytes from database subprocess, got {$gotbytes} "); return false; } // Do normalization in the dump thread... $stripped = str_replace("\r", "", $text); $normalized = $wgContLang->normalize($stripped); return $normalized; } } $dumper = new TextPassDumper($argv); if (!isset($options['help'])) { $dumper->dump(WikiExporter::FULL); } else { $dumper->progress(<<<ENDS This script postprocesses XML dumps from dumpBackup.php to add page text which was stubbed out (using --stub). XML input is accepted on stdin. XML output is sent to stdout; progress reports are sent to stderr. Usage: php dumpTextPass.php [<options>] Options: --stub=<type>:<file> To load a compressed stub dump instead of stdin --prefetch=<type>:<file> Use a prior dump file as a text source, to save \t\t\t pressure on the database.
/** * Ensures that checkpoint dumps are used and written, by successively increasing the * stub size and dumping until the duration crosses a threshold. * * @param string $checkpointFormat Either "file" for plain text or "gzip" for gzipped * checkpoint files. */ private function checkpointHelper($checkpointFormat = "file") { // Getting temporary names $nameStub = $this->getNewTempFile(); $nameOutputDir = $this->getNewTempDirectory(); $stderr = fopen('php://output', 'a'); if ($stderr === false) { $this->fail("Could not open stream for stderr"); } $iterations = 32; // We'll start with that many iterations of revisions // in stub. Make sure that the generated volume is above the buffer size // set below. Otherwise, the checkpointing does not trigger. $lastDuration = 0; $minDuration = 2; // We want the dump to take at least this many seconds $checkpointAfter = 0.5; // Generate checkpoint after this many seconds // Until a dump takes at least $minDuration seconds, perform a dump and check // duration. If the dump did not take long enough increase the iteration // count, to generate a bigger stub file next time. while ($lastDuration < $minDuration) { // Setting up the dump wfRecursiveRemoveDir($nameOutputDir); $this->assertTrue(wfMkdirParents($nameOutputDir), "Creating temporary output directory "); $this->setUpStub($nameStub, $iterations); $dumper = new TextPassDumper(array("--stub=file:" . $nameStub, "--output=" . $checkpointFormat . ":" . $nameOutputDir . "/full", "--maxtime=1", "--buffersize=32768", "--checkpointfile=checkpoint-%s-%s.xml.gz")); $dumper->setDb($this->db); $dumper->maxTimeAllowed = $checkpointAfter; // Patching maxTime from 1 minute $dumper->stderr = $stderr; // The actual dump and taking time $ts_before = microtime(true); $dumper->dump(WikiExporter::FULL, WikiExporter::TEXT); $ts_after = microtime(true); $lastDuration = $ts_after - $ts_before; // Handling increasing the iteration count for the stubs if ($lastDuration < $minDuration) { $old_iterations = $iterations; if ($lastDuration > 0.2) { // lastDuration is big enough, to allow an educated guess $factor = ($minDuration + 0.5) / $lastDuration; if ($factor > 1.1 && $factor < 100) { // educated guess is reasonable $iterations = (int) ($iterations * $factor); } } if ($old_iterations == $iterations) { // Heuristics were not applied, so we just *2. $iterations *= 2; } $this->assertLessThan(50000, $iterations, "Emergency stop against infinitely increasing iteration " . "count ( last duration: {$lastDuration} )"); } } // The dump (hopefully) did take long enough to produce more than one // checkpoint file. // // We now check all the checkpoint files for validity. $files = scandir($nameOutputDir); $this->assertTrue(asort($files), "Sorting files in temporary directory"); $fileOpened = false; $lookingForPage = 1; $checkpointFiles = 0; // Each run of the following loop body tries to handle exactly 1 /page/ (not // iteration of stub content). $i is only increased after having treated page 4. for ($i = 0; $i < $iterations;) { // 1. Assuring a file is opened and ready. Skipping across header if // necessary. if (!$fileOpened) { $this->assertNotEmpty($files, "No more existing dump files, " . "but not yet all pages found"); $fname = array_shift($files); while ($fname == "." || $fname == "..") { $this->assertNotEmpty($files, "No more existing dump" . " files, but not yet all pages found"); $fname = array_shift($files); } if ($checkpointFormat == "gzip") { $this->gunzip($nameOutputDir . "/" . $fname); } $this->assertDumpStart($nameOutputDir . "/" . $fname); $fileOpened = true; $checkpointFiles++; } // 2. Performing a single page check switch ($lookingForPage) { case 1: // Page 1 $this->assertPageStart($this->pageId1 + $i * self::$numOfPages, NS_MAIN, "BackupDumperTestP1"); $this->assertRevision($this->revId1_1 + $i * self::$numOfRevs, "BackupDumperTestP1Summary1", $this->textId1_1, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87", "BackupDumperTestP1Text1"); $this->assertPageEnd(); $lookingForPage = 2; break; case 2: // Page 2 $this->assertPageStart($this->pageId2 + $i * self::$numOfPages, NS_MAIN, "BackupDumperTestP2"); $this->assertRevision($this->revId2_1 + $i * self::$numOfRevs, "BackupDumperTestP2Summary1", $this->textId2_1, false, "jprywrymfhysqllua29tj3sc7z39dl2", "BackupDumperTestP2Text1"); $this->assertRevision($this->revId2_2 + $i * self::$numOfRevs, "BackupDumperTestP2Summary2", $this->textId2_2, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95", "BackupDumperTestP2Text2", $this->revId2_1 + $i * self::$numOfRevs); $this->assertRevision($this->revId2_3 + $i * self::$numOfRevs, "BackupDumperTestP2Summary3", $this->textId2_3, false, "jfunqmh1ssfb8rs43r19w98k28gg56r", "BackupDumperTestP2Text3", $this->revId2_2 + $i * self::$numOfRevs); $this->assertRevision($this->revId2_4 + $i * self::$numOfRevs, "BackupDumperTestP2Summary4 extra", $this->textId2_4, false, "6o1ciaxa6pybnqprmungwofc4lv00wv", "BackupDumperTestP2Text4 some additional Text", $this->revId2_3 + $i * self::$numOfRevs); $this->assertPageEnd(); $lookingForPage = 4; break; case 4: // Page 4 $this->assertPageStart($this->pageId4 + $i * self::$numOfPages, NS_TALK, "Talk:BackupDumperTestP1"); $this->assertRevision($this->revId4_1 + $i * self::$numOfRevs, "Talk BackupDumperTestP1 Summary1", $this->textId4_1, false, "nktofwzd0tl192k3zfepmlzxoax1lpe", "TALK ABOUT BACKUPDUMPERTESTP1 TEXT1", false, "BackupTextPassTestModel", "text/plain"); $this->assertPageEnd(); $lookingForPage = 1; // We dealt with the whole iteration. $i++; break; default: $this->fail("Bad setting for lookingForPage ({$lookingForPage})"); } // 3. Checking for the end of the current checkpoint file if ($this->xml->nodeType == XMLReader::END_ELEMENT && $this->xml->name == "mediawiki") { $this->assertDumpEnd(); $fileOpened = false; } } // Assuring we completely read all files ... $this->assertFalse($fileOpened, "Currently read file still open?"); $this->assertEmpty($files, "Remaining unchecked files"); // ... and have dealt with more than one checkpoint file $this->assertGreaterThan(1, $checkpointFiles, "expected more than 1 checkpoint to have been created. " . "Checkpoint interval is {$checkpointAfter} seconds, maybe your computer is too fast?"); $this->expectETAOutput(); }
$this->thisRev .= $data; } elseif ($this->state == "page") { $this->thisPage .= $data; } } $this->buffer .= htmlspecialchars($data); } function clearOpenElement($style) { if ($this->openElement) { $this->buffer .= wfElement($this->openElement[0], $this->openElement[1], $style); $this->openElement = false; } } } $dumper = new TextPassDumper($argv); if (true) { $dumper->dump(); } else { $dumper->progress(<<<END This script postprocesses XML dumps from dumpBackup.php to add page text which was stubbed out (using --stub). XML input is accepted on stdin. XML output is sent to stdout; progress reports are sent to stderr. Usage: php dumpTextPass.php [<options>] Options: --stub=<type>:<file> To load a compressed stub dump instead of stdin --prefetch=<type>:<file> Use a prior dump file as a text source, to save pressure on the database.
* but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file * @ingroup Maintenance */ $originalDir = getcwd(); require_once __DIR__ . '/commandLine.inc'; require_once __DIR__ . '/backupTextPass.inc'; $dumper = new TextPassDumper($argv); if (!isset($options['help'])) { $dumper->dump(true); } else { $dumper->progress(<<<ENDS This script postprocesses XML dumps from dumpBackup.php to add page text which was stubbed out (using --stub). XML input is accepted on stdin. XML output is sent to stdout; progress reports are sent to stderr. Usage: php dumpTextPass.php [<options>] Options: --stub=<type>:<file> To load a compressed stub dump instead of stdin --prefetch=<type>:<file> Use a prior dump file as a text source, to save \t\t\t pressure on the database.