public static function beforeSchemaUpdate() { $table = strtolower(get_called_class()); $schema = Schema::get(); $schemadef = $schema->getTableDef($table); // 2015-02-19 We have to upgrade our table definitions to have the urlhash field populated if (isset($schemadef['fields']['urlhash']) && isset($schemadef['unique keys']['file_urlhash_key'])) { // We already have the urlhash field, so no need to migrate it. return; } echo "\nFound old {$table} table, upgrading it to contain 'urlhash' field..."; $file = new File(); $file->query(sprintf('SELECT id, LEFT(url, 191) AS shortenedurl, COUNT(*) AS c FROM %1$s WHERE LENGTH(url)>191 GROUP BY shortenedurl HAVING c > 1', $schema->quoteIdentifier($table))); print "\nFound {$file->N} URLs with too long entries in file table\n"; while ($file->fetch()) { // We've got a URL that is too long for our future file table // so we'll cut it. We could save the original URL, but there is // no guarantee it is complete anyway since the previous max was 255 chars. $dupfile = new File(); // First we find file entries that would be duplicates of this when shortened // ... and we'll just throw the dupes out the window for now! It's already so borken. $dupfile->query(sprintf('SELECT * FROM file WHERE LEFT(url, 191) = "%1$s"', $file->shortenedurl)); // Leave one of the URLs in the database by using ->find(true) (fetches first entry) if ($dupfile->find(true)) { print "\nShortening url entry for {$table} id: {$file->id} ["; $orig = clone $dupfile; $dupfile->url = $file->shortenedurl; // make sure it's only 191 chars from now on $dupfile->update($orig); print "\nDeleting duplicate entries of too long URL on {$table} id: {$file->id} ["; // only start deleting with this fetch. while ($dupfile->fetch()) { print "."; $dupfile->delete(); } print "]\n"; } else { print "\nWarning! URL suddenly disappeared from database: {$file->url}\n"; } } echo "...and now all the non-duplicates which are longer than 191 characters...\n"; $file->query('UPDATE file SET url=LEFT(url, 191) WHERE LENGTH(url)>191'); echo "\n...now running hacky pre-schemaupdate change for {$table}:"; // We have to create a urlhash that is _not_ the primary key, // transfer data and THEN run checkSchema $schemadef['fields']['urlhash'] = array('type' => 'varchar', 'length' => 64, 'not null' => false, 'description' => 'sha256 of destination URL (url field)'); $schemadef['fields']['url'] = array('type' => 'text', 'description' => 'destination URL after following possible redirections'); unset($schemadef['unique keys']); $schema->ensureTable($table, $schemadef); echo "DONE.\n"; $classname = ucfirst($table); $tablefix = new $classname(); // urlhash is hash('sha256', $url) in the File table echo "Updating urlhash fields in {$table} table..."; // Maybe very MySQL specific :( $tablefix->query(sprintf('UPDATE %1$s SET %2$s=%3$s;', $schema->quoteIdentifier($table), 'urlhash', 'SHA2(url, 256)')); echo "DONE.\n"; echo "Resuming core schema upgrade..."; }
Remove duplicate URL entries in the file and file_redirection tables because they for some reason were not unique. -y --yes do not wait for confirmation END_OF_HELP; require_once INSTALLDIR . '/scripts/commandline.inc'; if (!have_option('y', 'yes')) { print "About to remove duplicate URL entries in file and file_redirection tables. Are you sure? [y/N] "; $response = fgets(STDIN); if (strtolower(trim($response)) != 'y') { print "Aborting.\n"; exit(0); } } $file = new File(); $file->query('SELECT id, url, COUNT(*) AS c FROM file GROUP BY url HAVING c > 1'); print "\nFound {$file->N} URLs with duplicate entries in file table"; while ($file->fetch()) { // We've got a URL that is duplicated in the file table $dupfile = new File(); $dupfile->url = $file->url; if ($dupfile->find(true)) { print "\nDeleting duplicate entries in file table for URL: {$file->url} ["; // Leave one of the URLs in the database by using ->find(true) // and only deleting starting with this fetch. while ($dupfile->fetch()) { print "."; $dupfile->delete(); } print "]\n"; } else {
$dry = have_option('dry-run'); $f = new File(); $f->title = 'h'; $f->mimetype = 'h'; $f->size = 0; $f->protected = 0; $f->find(); echo "Found {$f->N} bad items:\n"; while ($f->fetch()) { echo "{$f->id} {$f->url}"; $data = File_redirection::lookupWhere($f->url); if ($dry) { if (is_array($data)) { echo " (unchanged)\n"; } else { echo " (unchanged, but embedding lookup failed)\n"; } } else { // NULL out the mime/title/size/protected fields $sql = sprintf("UPDATE file " . "SET mimetype=null,title=null,size=null,protected=null " . "WHERE id=%d", $f->id); $f->query($sql); $f->decache(); if (is_array($data)) { Event::handle('EndFileSaveNew', array($f, $data, $f->url)); echo " (ok)\n"; } else { echo " (ok, but embedding lookup failed)\n"; } } } echo "done.\n";