Exemplo n.º 1
0
	public function execute() {
		global $wgCategoryCollation;

		$dbw = $this->getDB( DB_MASTER );
		$force = $this->getOption( 'force' );
		$dryRun = $this->getOption( 'dry-run' );
		$verboseStats = $this->getOption( 'verbose-stats' );
		if ( $this->hasOption( 'target-collation' ) ) {
			$collationName = $this->getOption( 'target-collation' );
			$collation = Collation::factory( $collationName );
		} else {
			$collationName = $wgCategoryCollation;
			$collation = Collation::singleton();
		}

		// Collation sanity check: in some cases the constructor will work,
		// but this will raise an exception, breaking all category pages
		$collation->getFirstLetter( 'MediaWiki' );

		$options = array(
			'LIMIT' => self::BATCH_SIZE,
			'ORDER BY' => 'cl_to, cl_type, cl_from',
			'STRAIGHT_JOIN',
		);

		if ( $force || $dryRun ) {
			$collationConds = array();
		} else {
			if ( $this->hasOption( 'previous-collation' ) ) {
				$collationConds['cl_collation'] = $this->getOption( 'previous-collation' );
			} else {
				$collationConds = array( 0 =>
					'cl_collation != ' . $dbw->addQuotes( $collationName )
				);
			}

			$count = $dbw->estimateRowCount(
				'categorylinks',
				'*',
				$collationConds,
				__METHOD__
			);
			// Improve estimate if feasible
			if ( $count < 1000000 ) {
				$count = $dbw->selectField(
					'categorylinks',
					'COUNT(*)',
					$collationConds,
					__METHOD__
				);
			}
			if ( $count == 0 ) {
				$this->output( "Collations up-to-date.\n" );
				return;
			}
			$this->output( "Fixing collation for $count rows.\n" );
		}

		$count = 0;
		$batchCount = 0;
		$batchConds = array();
		do {
			$this->output( "Selecting next " . self::BATCH_SIZE . " rows..." );
			$res = $dbw->select(
				array( 'categorylinks', 'page' ),
				array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
					'cl_sortkey', 'cl_type', 'page_namespace', 'page_title'
				),
				array_merge( $collationConds, $batchConds, array( 'cl_from = page_id' ) ),
				__METHOD__,
				$options
			);
			$this->output( " processing..." );

			if ( !$dryRun ) {
				$dbw->begin( __METHOD__ );
			}
			foreach ( $res as $row ) {
				$title = Title::newFromRow( $row );
				if ( !$row->cl_collation ) {
					# This is an old-style row, so the sortkey needs to be
					# converted.
					if ( $row->cl_sortkey == $title->getText()
						|| $row->cl_sortkey == $title->getPrefixedText() ) {
						$prefix = '';
					} else {
						# Custom sortkey, use it as a prefix
						$prefix = $row->cl_sortkey;
					}
				} else {
					$prefix = $row->cl_sortkey_prefix;
				}
				# cl_type will be wrong for lots of pages if cl_collation is 0,
				# so let's update it while we're here.
				if ( $title->getNamespace() == NS_CATEGORY ) {
					$type = 'subcat';
				} elseif ( $title->getNamespace() == NS_FILE ) {
					$type = 'file';
				} else {
					$type = 'page';
				}
				$newSortKey = $collation->getSortKey(
					$title->getCategorySortkey( $prefix ) );
				if ( $verboseStats ) {
					$this->updateSortKeySizeHistogram( $newSortKey );
				}

				if ( !$dryRun ) {
					$dbw->update(
						'categorylinks',
						array(
							'cl_sortkey' => $newSortKey,
							'cl_sortkey_prefix' => $prefix,
							'cl_collation' => $collationName,
							'cl_type' => $type,
							'cl_timestamp = cl_timestamp',
						),
						array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ),
						__METHOD__
					);
				}
			}
			if ( !$dryRun ) {
				$dbw->commit( __METHOD__ );
			}

			if ( $row ) {
				$batchConds = array( $this->getBatchCondition( $row ) );
			}

			$count += $res->numRows();
			$this->output( "$count done.\n" );

			if ( !$dryRun && ++$batchCount % self::SYNC_INTERVAL == 0 ) {
				$this->output( "Waiting for slaves ... " );
				wfWaitForSlaves();
				$this->output( "done\n" );
			}
		} while ( $res->numRows() == self::BATCH_SIZE );

		$this->output( "$count rows processed\n" );

		if ( $verboseStats ) {
			$this->output( "\n" );
			$this->showSortKeySizeHistogram();
		}
	}
Exemplo n.º 2
0
 /**
  * Test correct first letter is fetched.
  *
  * @param $collation String Collation name (aka uca-en)
  * @param $string String String to get first letter of
  * @param $firstLetter String Expected first letter.
  *
  * @dataProvider firstLetterProvider
  */
 function testGetFirstLetter($collation, $string, $firstLetter)
 {
     $col = Collation::factory($collation);
     $this->assertEquals($firstLetter, $col->getFirstLetter($string));
 }
Exemplo n.º 3
0
 public function execute()
 {
     global $wgCategoryCollation;
     $dbw = $this->getDB(DB_MASTER);
     $dbr = $this->getDB(DB_SLAVE);
     $force = $this->getOption('force');
     $dryRun = $this->getOption('dry-run');
     $verboseStats = $this->getOption('verbose-stats');
     if ($this->hasOption('target-collation')) {
         $collationName = $this->getOption('target-collation');
         $collation = Collation::factory($collationName);
     } else {
         $collationName = $wgCategoryCollation;
         $collation = Collation::singleton();
     }
     // Collation sanity check: in some cases the constructor will work,
     // but this will raise an exception, breaking all category pages
     $collation->getFirstLetter('MediaWiki');
     // Locally at least, (my local is a rather old version of mysql)
     // mysql seems to filesort if there is both an equality
     // (but not for an inequality) condition on cl_collation in the
     // WHERE and it is also the first item in the ORDER BY.
     if ($this->hasOption('previous-collation')) {
         $orderBy = 'cl_to, cl_type, cl_from';
     } else {
         $orderBy = 'cl_collation, cl_to, cl_type, cl_from';
     }
     $options = ['LIMIT' => self::BATCH_SIZE, 'ORDER BY' => $orderBy, 'STRAIGHT_JOIN'];
     if ($force || $dryRun) {
         $collationConds = [];
     } else {
         if ($this->hasOption('previous-collation')) {
             $collationConds['cl_collation'] = $this->getOption('previous-collation');
         } else {
             $collationConds = [0 => 'cl_collation != ' . $dbw->addQuotes($collationName)];
         }
         $count = $dbr->estimateRowCount('categorylinks', '*', $collationConds, __METHOD__);
         // Improve estimate if feasible
         if ($count < 1000000) {
             $count = $dbr->selectField('categorylinks', 'COUNT(*)', $collationConds, __METHOD__);
         }
         if ($count == 0) {
             $this->output("Collations up-to-date.\n");
             return;
         }
         $this->output("Fixing collation for {$count} rows.\n");
         wfWaitForSlaves();
     }
     $count = 0;
     $batchCount = 0;
     $batchConds = [];
     do {
         $this->output("Selecting next " . self::BATCH_SIZE . " rows...");
         // cl_type must be selected as a number for proper paging because
         // enums suck.
         if ($dbw->getType() === 'mysql') {
             $clType = 'cl_type+0 AS "cl_type_numeric"';
         } else {
             $clType = 'cl_type';
         }
         $res = $dbw->select(['categorylinks', 'page'], ['cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', 'cl_sortkey', $clType, 'page_namespace', 'page_title'], array_merge($collationConds, $batchConds, ['cl_from = page_id']), __METHOD__, $options);
         $this->output(" processing...");
         if (!$dryRun) {
             $this->beginTransaction($dbw, __METHOD__);
         }
         foreach ($res as $row) {
             $title = Title::newFromRow($row);
             if (!$row->cl_collation) {
                 # This is an old-style row, so the sortkey needs to be
                 # converted.
                 if ($row->cl_sortkey == $title->getText() || $row->cl_sortkey == $title->getPrefixedText()) {
                     $prefix = '';
                 } else {
                     # Custom sortkey, use it as a prefix
                     $prefix = $row->cl_sortkey;
                 }
             } else {
                 $prefix = $row->cl_sortkey_prefix;
             }
             # cl_type will be wrong for lots of pages if cl_collation is 0,
             # so let's update it while we're here.
             if ($title->getNamespace() == NS_CATEGORY) {
                 $type = 'subcat';
             } elseif ($title->getNamespace() == NS_FILE) {
                 $type = 'file';
             } else {
                 $type = 'page';
             }
             $newSortKey = $collation->getSortKey($title->getCategorySortkey($prefix));
             if ($verboseStats) {
                 $this->updateSortKeySizeHistogram($newSortKey);
             }
             if (!$dryRun) {
                 $dbw->update('categorylinks', ['cl_sortkey' => $newSortKey, 'cl_sortkey_prefix' => $prefix, 'cl_collation' => $collationName, 'cl_type' => $type, 'cl_timestamp = cl_timestamp'], ['cl_from' => $row->cl_from, 'cl_to' => $row->cl_to], __METHOD__);
             }
             if ($row) {
                 $batchConds = [$this->getBatchCondition($row, $dbw)];
             }
         }
         if (!$dryRun) {
             $this->commitTransaction($dbw, __METHOD__);
         }
         $count += $res->numRows();
         $this->output("{$count} done.\n");
         if (!$dryRun && ++$batchCount % self::SYNC_INTERVAL == 0) {
             $this->output("Waiting for slaves ... ");
             wfWaitForSlaves();
             $this->output("done\n");
         }
     } while ($res->numRows() == self::BATCH_SIZE);
     $this->output("{$count} rows processed\n");
     if ($verboseStats) {
         $this->output("\n");
         $this->showSortKeySizeHistogram();
     }
 }