Example #1
0
 public static function beforeSchemaUpdate()
 {
     $table = strtolower(get_called_class());
     $schema = Schema::get();
     $schemadef = $schema->getTableDef($table);
     // 2015-02-19 We have to upgrade our table definitions to have the urlhash field populated
     if (isset($schemadef['fields']['urlhash']) && isset($schemadef['unique keys']['file_urlhash_key'])) {
         // We already have the urlhash field, so no need to migrate it.
         return;
     }
     echo "\nFound old {$table} table, upgrading it to contain 'urlhash' field...";
     $file = new File();
     $file->query(sprintf('SELECT id, LEFT(url, 191) AS shortenedurl, COUNT(*) AS c FROM %1$s WHERE LENGTH(url)>191 GROUP BY shortenedurl HAVING c > 1', $schema->quoteIdentifier($table)));
     print "\nFound {$file->N} URLs with too long entries in file table\n";
     while ($file->fetch()) {
         // We've got a URL that is too long for our future file table
         // so we'll cut it. We could save the original URL, but there is
         // no guarantee it is complete anyway since the previous max was 255 chars.
         $dupfile = new File();
         // First we find file entries that would be duplicates of this when shortened
         // ... and we'll just throw the dupes out the window for now! It's already so borken.
         $dupfile->query(sprintf('SELECT * FROM file WHERE LEFT(url, 191) = "%1$s"', $file->shortenedurl));
         // Leave one of the URLs in the database by using ->find(true) (fetches first entry)
         if ($dupfile->find(true)) {
             print "\nShortening url entry for {$table} id: {$file->id} [";
             $orig = clone $dupfile;
             $dupfile->url = $file->shortenedurl;
             // make sure it's only 191 chars from now on
             $dupfile->update($orig);
             print "\nDeleting duplicate entries of too long URL on {$table} id: {$file->id} [";
             // only start deleting with this fetch.
             while ($dupfile->fetch()) {
                 print ".";
                 $dupfile->delete();
             }
             print "]\n";
         } else {
             print "\nWarning! URL suddenly disappeared from database: {$file->url}\n";
         }
     }
     echo "...and now all the non-duplicates which are longer than 191 characters...\n";
     $file->query('UPDATE file SET url=LEFT(url, 191) WHERE LENGTH(url)>191');
     echo "\n...now running hacky pre-schemaupdate change for {$table}:";
     // We have to create a urlhash that is _not_ the primary key,
     // transfer data and THEN run checkSchema
     $schemadef['fields']['urlhash'] = array('type' => 'varchar', 'length' => 64, 'not null' => false, 'description' => 'sha256 of destination URL (url field)');
     $schemadef['fields']['url'] = array('type' => 'text', 'description' => 'destination URL after following possible redirections');
     unset($schemadef['unique keys']);
     $schema->ensureTable($table, $schemadef);
     echo "DONE.\n";
     $classname = ucfirst($table);
     $tablefix = new $classname();
     // urlhash is hash('sha256', $url) in the File table
     echo "Updating urlhash fields in {$table} table...";
     // Maybe very MySQL specific :(
     $tablefix->query(sprintf('UPDATE %1$s SET %2$s=%3$s;', $schema->quoteIdentifier($table), 'urlhash', 'SHA2(url, 256)'));
     echo "DONE.\n";
     echo "Resuming core schema upgrade...";
 }
Remove duplicate URL entries in the file and file_redirection tables because they for some reason were not unique.

  -y --yes      do not wait for confirmation

END_OF_HELP;
require_once INSTALLDIR . '/scripts/commandline.inc';
if (!have_option('y', 'yes')) {
    print "About to remove duplicate URL entries in file and file_redirection tables. Are you sure? [y/N] ";
    $response = fgets(STDIN);
    if (strtolower(trim($response)) != 'y') {
        print "Aborting.\n";
        exit(0);
    }
}
$file = new File();
$file->query('SELECT id, url, COUNT(*) AS c FROM file GROUP BY url HAVING c > 1');
print "\nFound {$file->N} URLs with duplicate entries in file table";
while ($file->fetch()) {
    // We've got a URL that is duplicated in the file table
    $dupfile = new File();
    $dupfile->url = $file->url;
    if ($dupfile->find(true)) {
        print "\nDeleting duplicate entries in file table for URL: {$file->url} [";
        // Leave one of the URLs in the database by using ->find(true)
        // and only deleting starting with this fetch.
        while ($dupfile->fetch()) {
            print ".";
            $dupfile->delete();
        }
        print "]\n";
    } else {
Example #3
0
$dry = have_option('dry-run');
$f = new File();
$f->title = 'h';
$f->mimetype = 'h';
$f->size = 0;
$f->protected = 0;
$f->find();
echo "Found {$f->N} bad items:\n";
while ($f->fetch()) {
    echo "{$f->id} {$f->url}";
    $data = File_redirection::lookupWhere($f->url);
    if ($dry) {
        if (is_array($data)) {
            echo " (unchanged)\n";
        } else {
            echo " (unchanged, but embedding lookup failed)\n";
        }
    } else {
        // NULL out the mime/title/size/protected fields
        $sql = sprintf("UPDATE file " . "SET mimetype=null,title=null,size=null,protected=null " . "WHERE id=%d", $f->id);
        $f->query($sql);
        $f->decache();
        if (is_array($data)) {
            Event::handle('EndFileSaveNew', array($f, $data, $f->url));
            echo " (ok)\n";
        } else {
            echo " (ok, but embedding lookup failed)\n";
        }
    }
}
echo "done.\n";