Exemple #1
0
                break;
            }
            $text .= $buffer;
        }
        $gotbytes = strlen($text);
        if ($gotbytes != $nbytes) {
            $this->progress("Expected {$nbytes} bytes from database subprocess, got {$gotbytes} ");
            return false;
        }
        // Do normalization in the dump thread...
        $stripped = str_replace("\r", "", $text);
        $normalized = $wgContLang->normalize($stripped);
        return $normalized;
    }
}
$dumper = new TextPassDumper($argv);
if (!isset($options['help'])) {
    $dumper->dump(WikiExporter::FULL);
} else {
    $dumper->progress(<<<ENDS
This script postprocesses XML dumps from dumpBackup.php to add
page text which was stubbed out (using --stub).

XML input is accepted on stdin.
XML output is sent to stdout; progress reports are sent to stderr.

Usage: php dumpTextPass.php [<options>]
Options:
  --stub=<type>:<file> To load a compressed stub dump instead of stdin
  --prefetch=<type>:<file> Use a prior dump file as a text source, to save
\t\t\t  pressure on the database.
 /**
  * Ensures that checkpoint dumps are used and written, by successively increasing the
  * stub size and dumping until the duration crosses a threshold.
  *
  * @param string $checkpointFormat Either "file" for plain text or "gzip" for gzipped
  *   checkpoint files.
  */
 private function checkpointHelper($checkpointFormat = "file")
 {
     // Getting temporary names
     $nameStub = $this->getNewTempFile();
     $nameOutputDir = $this->getNewTempDirectory();
     $stderr = fopen('php://output', 'a');
     if ($stderr === false) {
         $this->fail("Could not open stream for stderr");
     }
     $iterations = 32;
     // We'll start with that many iterations of revisions
     // in stub. Make sure that the generated volume is above the buffer size
     // set below. Otherwise, the checkpointing does not trigger.
     $lastDuration = 0;
     $minDuration = 2;
     // We want the dump to take at least this many seconds
     $checkpointAfter = 0.5;
     // Generate checkpoint after this many seconds
     // Until a dump takes at least $minDuration seconds, perform a dump and check
     // duration. If the dump did not take long enough increase the iteration
     // count, to generate a bigger stub file next time.
     while ($lastDuration < $minDuration) {
         // Setting up the dump
         wfRecursiveRemoveDir($nameOutputDir);
         $this->assertTrue(wfMkdirParents($nameOutputDir), "Creating temporary output directory ");
         $this->setUpStub($nameStub, $iterations);
         $dumper = new TextPassDumper(array("--stub=file:" . $nameStub, "--output=" . $checkpointFormat . ":" . $nameOutputDir . "/full", "--maxtime=1", "--buffersize=32768", "--checkpointfile=checkpoint-%s-%s.xml.gz"));
         $dumper->setDb($this->db);
         $dumper->maxTimeAllowed = $checkpointAfter;
         // Patching maxTime from 1 minute
         $dumper->stderr = $stderr;
         // The actual dump and taking time
         $ts_before = microtime(true);
         $dumper->dump(WikiExporter::FULL, WikiExporter::TEXT);
         $ts_after = microtime(true);
         $lastDuration = $ts_after - $ts_before;
         // Handling increasing the iteration count for the stubs
         if ($lastDuration < $minDuration) {
             $old_iterations = $iterations;
             if ($lastDuration > 0.2) {
                 // lastDuration is big enough, to allow an educated guess
                 $factor = ($minDuration + 0.5) / $lastDuration;
                 if ($factor > 1.1 && $factor < 100) {
                     // educated guess is reasonable
                     $iterations = (int) ($iterations * $factor);
                 }
             }
             if ($old_iterations == $iterations) {
                 // Heuristics were not applied, so we just *2.
                 $iterations *= 2;
             }
             $this->assertLessThan(50000, $iterations, "Emergency stop against infinitely increasing iteration " . "count ( last duration: {$lastDuration} )");
         }
     }
     // The dump (hopefully) did take long enough to produce more than one
     // checkpoint file.
     //
     // We now check all the checkpoint files for validity.
     $files = scandir($nameOutputDir);
     $this->assertTrue(asort($files), "Sorting files in temporary directory");
     $fileOpened = false;
     $lookingForPage = 1;
     $checkpointFiles = 0;
     // Each run of the following loop body tries to handle exactly 1 /page/ (not
     // iteration of stub content). $i is only increased after having treated page 4.
     for ($i = 0; $i < $iterations;) {
         // 1. Assuring a file is opened and ready. Skipping across header if
         //    necessary.
         if (!$fileOpened) {
             $this->assertNotEmpty($files, "No more existing dump files, " . "but not yet all pages found");
             $fname = array_shift($files);
             while ($fname == "." || $fname == "..") {
                 $this->assertNotEmpty($files, "No more existing dump" . " files, but not yet all pages found");
                 $fname = array_shift($files);
             }
             if ($checkpointFormat == "gzip") {
                 $this->gunzip($nameOutputDir . "/" . $fname);
             }
             $this->assertDumpStart($nameOutputDir . "/" . $fname);
             $fileOpened = true;
             $checkpointFiles++;
         }
         // 2. Performing a single page check
         switch ($lookingForPage) {
             case 1:
                 // Page 1
                 $this->assertPageStart($this->pageId1 + $i * self::$numOfPages, NS_MAIN, "BackupDumperTestP1");
                 $this->assertRevision($this->revId1_1 + $i * self::$numOfRevs, "BackupDumperTestP1Summary1", $this->textId1_1, false, "0bolhl6ol7i6x0e7yq91gxgaan39j87", "BackupDumperTestP1Text1");
                 $this->assertPageEnd();
                 $lookingForPage = 2;
                 break;
             case 2:
                 // Page 2
                 $this->assertPageStart($this->pageId2 + $i * self::$numOfPages, NS_MAIN, "BackupDumperTestP2");
                 $this->assertRevision($this->revId2_1 + $i * self::$numOfRevs, "BackupDumperTestP2Summary1", $this->textId2_1, false, "jprywrymfhysqllua29tj3sc7z39dl2", "BackupDumperTestP2Text1");
                 $this->assertRevision($this->revId2_2 + $i * self::$numOfRevs, "BackupDumperTestP2Summary2", $this->textId2_2, false, "b7vj5ks32po5m1z1t1br4o7scdwwy95", "BackupDumperTestP2Text2", $this->revId2_1 + $i * self::$numOfRevs);
                 $this->assertRevision($this->revId2_3 + $i * self::$numOfRevs, "BackupDumperTestP2Summary3", $this->textId2_3, false, "jfunqmh1ssfb8rs43r19w98k28gg56r", "BackupDumperTestP2Text3", $this->revId2_2 + $i * self::$numOfRevs);
                 $this->assertRevision($this->revId2_4 + $i * self::$numOfRevs, "BackupDumperTestP2Summary4 extra", $this->textId2_4, false, "6o1ciaxa6pybnqprmungwofc4lv00wv", "BackupDumperTestP2Text4 some additional Text", $this->revId2_3 + $i * self::$numOfRevs);
                 $this->assertPageEnd();
                 $lookingForPage = 4;
                 break;
             case 4:
                 // Page 4
                 $this->assertPageStart($this->pageId4 + $i * self::$numOfPages, NS_TALK, "Talk:BackupDumperTestP1");
                 $this->assertRevision($this->revId4_1 + $i * self::$numOfRevs, "Talk BackupDumperTestP1 Summary1", $this->textId4_1, false, "nktofwzd0tl192k3zfepmlzxoax1lpe", "TALK ABOUT BACKUPDUMPERTESTP1 TEXT1", false, "BackupTextPassTestModel", "text/plain");
                 $this->assertPageEnd();
                 $lookingForPage = 1;
                 // We dealt with the whole iteration.
                 $i++;
                 break;
             default:
                 $this->fail("Bad setting for lookingForPage ({$lookingForPage})");
         }
         // 3. Checking for the end of the current checkpoint file
         if ($this->xml->nodeType == XMLReader::END_ELEMENT && $this->xml->name == "mediawiki") {
             $this->assertDumpEnd();
             $fileOpened = false;
         }
     }
     // Assuring we completely read all files ...
     $this->assertFalse($fileOpened, "Currently read file still open?");
     $this->assertEmpty($files, "Remaining unchecked files");
     // ... and have dealt with more than one checkpoint file
     $this->assertGreaterThan(1, $checkpointFiles, "expected more than 1 checkpoint to have been created. " . "Checkpoint interval is {$checkpointAfter} seconds, maybe your computer is too fast?");
     $this->expectETAOutput();
 }
                $this->thisRev .= $data;
            } elseif ($this->state == "page") {
                $this->thisPage .= $data;
            }
        }
        $this->buffer .= htmlspecialchars($data);
    }
    function clearOpenElement($style)
    {
        if ($this->openElement) {
            $this->buffer .= wfElement($this->openElement[0], $this->openElement[1], $style);
            $this->openElement = false;
        }
    }
}
$dumper = new TextPassDumper($argv);
if (true) {
    $dumper->dump();
} else {
    $dumper->progress(<<<END
This script postprocesses XML dumps from dumpBackup.php to add
page text which was stubbed out (using --stub).

XML input is accepted on stdin.
XML output is sent to stdout; progress reports are sent to stderr.

Usage: php dumpTextPass.php [<options>]
Options:
  --stub=<type>:<file> To load a compressed stub dump instead of stdin
  --prefetch=<type>:<file> Use a prior dump file as a text source, to save
              pressure on the database.
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 * @ingroup Maintenance
 */
$originalDir = getcwd();
require_once __DIR__ . '/commandLine.inc';
require_once __DIR__ . '/backupTextPass.inc';
$dumper = new TextPassDumper($argv);
if (!isset($options['help'])) {
    $dumper->dump(true);
} else {
    $dumper->progress(<<<ENDS
This script postprocesses XML dumps from dumpBackup.php to add
page text which was stubbed out (using --stub).

XML input is accepted on stdin.
XML output is sent to stdout; progress reports are sent to stderr.

Usage: php dumpTextPass.php [<options>]
Options:
  --stub=<type>:<file> To load a compressed stub dump instead of stdin
  --prefetch=<type>:<file> Use a prior dump file as a text source, to save
\t\t\t  pressure on the database.