public function execute() { global $wgCategoryCollation; $dbw = $this->getDB( DB_MASTER ); $force = $this->getOption( 'force' ); $dryRun = $this->getOption( 'dry-run' ); $verboseStats = $this->getOption( 'verbose-stats' ); if ( $this->hasOption( 'target-collation' ) ) { $collationName = $this->getOption( 'target-collation' ); $collation = Collation::factory( $collationName ); } else { $collationName = $wgCategoryCollation; $collation = Collation::singleton(); } // Collation sanity check: in some cases the constructor will work, // but this will raise an exception, breaking all category pages $collation->getFirstLetter( 'MediaWiki' ); $options = array( 'LIMIT' => self::BATCH_SIZE, 'ORDER BY' => 'cl_to, cl_type, cl_from', 'STRAIGHT_JOIN', ); if ( $force || $dryRun ) { $collationConds = array(); } else { if ( $this->hasOption( 'previous-collation' ) ) { $collationConds['cl_collation'] = $this->getOption( 'previous-collation' ); } else { $collationConds = array( 0 => 'cl_collation != ' . $dbw->addQuotes( $collationName ) ); } $count = $dbw->estimateRowCount( 'categorylinks', '*', $collationConds, __METHOD__ ); // Improve estimate if feasible if ( $count < 1000000 ) { $count = $dbw->selectField( 'categorylinks', 'COUNT(*)', $collationConds, __METHOD__ ); } if ( $count == 0 ) { $this->output( "Collations up-to-date.\n" ); return; } $this->output( "Fixing collation for $count rows.\n" ); } $count = 0; $batchCount = 0; $batchConds = array(); do { $this->output( "Selecting next " . self::BATCH_SIZE . " rows..." ); $res = $dbw->select( array( 'categorylinks', 'page' ), array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', 'cl_sortkey', 'cl_type', 'page_namespace', 'page_title' ), array_merge( $collationConds, $batchConds, array( 'cl_from = page_id' ) ), __METHOD__, $options ); $this->output( " processing..." ); if ( !$dryRun ) { $dbw->begin( __METHOD__ ); } foreach ( $res as $row ) { $title = Title::newFromRow( $row ); if ( !$row->cl_collation ) { # This is an old-style row, so the sortkey needs to be # converted. if ( $row->cl_sortkey == $title->getText() || $row->cl_sortkey == $title->getPrefixedText() ) { $prefix = ''; } else { # Custom sortkey, use it as a prefix $prefix = $row->cl_sortkey; } } else { $prefix = $row->cl_sortkey_prefix; } # cl_type will be wrong for lots of pages if cl_collation is 0, # so let's update it while we're here. if ( $title->getNamespace() == NS_CATEGORY ) { $type = 'subcat'; } elseif ( $title->getNamespace() == NS_FILE ) { $type = 'file'; } else { $type = 'page'; } $newSortKey = $collation->getSortKey( $title->getCategorySortkey( $prefix ) ); if ( $verboseStats ) { $this->updateSortKeySizeHistogram( $newSortKey ); } if ( !$dryRun ) { $dbw->update( 'categorylinks', array( 'cl_sortkey' => $newSortKey, 'cl_sortkey_prefix' => $prefix, 'cl_collation' => $collationName, 'cl_type' => $type, 'cl_timestamp = cl_timestamp', ), array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ), __METHOD__ ); } } if ( !$dryRun ) { $dbw->commit( __METHOD__ ); } if ( $row ) { $batchConds = array( $this->getBatchCondition( $row ) ); } $count += $res->numRows(); $this->output( "$count done.\n" ); if ( !$dryRun && ++$batchCount % self::SYNC_INTERVAL == 0 ) { $this->output( "Waiting for slaves ... " ); wfWaitForSlaves(); $this->output( "done\n" ); } } while ( $res->numRows() == self::BATCH_SIZE ); $this->output( "$count rows processed\n" ); if ( $verboseStats ) { $this->output( "\n" ); $this->showSortKeySizeHistogram(); } }
/** * Test correct first letter is fetched. * * @param $collation String Collation name (aka uca-en) * @param $string String String to get first letter of * @param $firstLetter String Expected first letter. * * @dataProvider firstLetterProvider */ function testGetFirstLetter($collation, $string, $firstLetter) { $col = Collation::factory($collation); $this->assertEquals($firstLetter, $col->getFirstLetter($string)); }
public function execute() { global $wgCategoryCollation; $dbw = $this->getDB(DB_MASTER); $dbr = $this->getDB(DB_SLAVE); $force = $this->getOption('force'); $dryRun = $this->getOption('dry-run'); $verboseStats = $this->getOption('verbose-stats'); if ($this->hasOption('target-collation')) { $collationName = $this->getOption('target-collation'); $collation = Collation::factory($collationName); } else { $collationName = $wgCategoryCollation; $collation = Collation::singleton(); } // Collation sanity check: in some cases the constructor will work, // but this will raise an exception, breaking all category pages $collation->getFirstLetter('MediaWiki'); // Locally at least, (my local is a rather old version of mysql) // mysql seems to filesort if there is both an equality // (but not for an inequality) condition on cl_collation in the // WHERE and it is also the first item in the ORDER BY. if ($this->hasOption('previous-collation')) { $orderBy = 'cl_to, cl_type, cl_from'; } else { $orderBy = 'cl_collation, cl_to, cl_type, cl_from'; } $options = ['LIMIT' => self::BATCH_SIZE, 'ORDER BY' => $orderBy, 'STRAIGHT_JOIN']; if ($force || $dryRun) { $collationConds = []; } else { if ($this->hasOption('previous-collation')) { $collationConds['cl_collation'] = $this->getOption('previous-collation'); } else { $collationConds = [0 => 'cl_collation != ' . $dbw->addQuotes($collationName)]; } $count = $dbr->estimateRowCount('categorylinks', '*', $collationConds, __METHOD__); // Improve estimate if feasible if ($count < 1000000) { $count = $dbr->selectField('categorylinks', 'COUNT(*)', $collationConds, __METHOD__); } if ($count == 0) { $this->output("Collations up-to-date.\n"); return; } $this->output("Fixing collation for {$count} rows.\n"); wfWaitForSlaves(); } $count = 0; $batchCount = 0; $batchConds = []; do { $this->output("Selecting next " . self::BATCH_SIZE . " rows..."); // cl_type must be selected as a number for proper paging because // enums suck. if ($dbw->getType() === 'mysql') { $clType = 'cl_type+0 AS "cl_type_numeric"'; } else { $clType = 'cl_type'; } $res = $dbw->select(['categorylinks', 'page'], ['cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', 'cl_sortkey', $clType, 'page_namespace', 'page_title'], array_merge($collationConds, $batchConds, ['cl_from = page_id']), __METHOD__, $options); $this->output(" processing..."); if (!$dryRun) { $this->beginTransaction($dbw, __METHOD__); } foreach ($res as $row) { $title = Title::newFromRow($row); if (!$row->cl_collation) { # This is an old-style row, so the sortkey needs to be # converted. if ($row->cl_sortkey == $title->getText() || $row->cl_sortkey == $title->getPrefixedText()) { $prefix = ''; } else { # Custom sortkey, use it as a prefix $prefix = $row->cl_sortkey; } } else { $prefix = $row->cl_sortkey_prefix; } # cl_type will be wrong for lots of pages if cl_collation is 0, # so let's update it while we're here. if ($title->getNamespace() == NS_CATEGORY) { $type = 'subcat'; } elseif ($title->getNamespace() == NS_FILE) { $type = 'file'; } else { $type = 'page'; } $newSortKey = $collation->getSortKey($title->getCategorySortkey($prefix)); if ($verboseStats) { $this->updateSortKeySizeHistogram($newSortKey); } if (!$dryRun) { $dbw->update('categorylinks', ['cl_sortkey' => $newSortKey, 'cl_sortkey_prefix' => $prefix, 'cl_collation' => $collationName, 'cl_type' => $type, 'cl_timestamp = cl_timestamp'], ['cl_from' => $row->cl_from, 'cl_to' => $row->cl_to], __METHOD__); } if ($row) { $batchConds = [$this->getBatchCondition($row, $dbw)]; } } if (!$dryRun) { $this->commitTransaction($dbw, __METHOD__); } $count += $res->numRows(); $this->output("{$count} done.\n"); if (!$dryRun && ++$batchCount % self::SYNC_INTERVAL == 0) { $this->output("Waiting for slaves ... "); wfWaitForSlaves(); $this->output("done\n"); } } while ($res->numRows() == self::BATCH_SIZE); $this->output("{$count} rows processed\n"); if ($verboseStats) { $this->output("\n"); $this->showSortKeySizeHistogram(); } }