if (strlen($regex) < 10000 || strlen($regex) > 32000) {
    print "wrong size, exiting\n";
    exit(1);
}
print "Searching for spam in {$maxID} pages...\n";
if ($dryRun) {
    print "Dry run only\n";
}
for ($id = 1; $id <= $maxID; $id++) {
    if ($id % $reportingInterval == 0) {
        printf("%-8d  %-5.2f%%\r", $id, $id / $maxID * 100);
    }
    $revision = Revision::loadFromPageId($dbr, $id);
    if ($revision) {
        $text = $revision->getText();
        if ($text) {
            if (preg_match($regex, $text, $matches)) {
                $title = $revision->getTitle();
                $titleText = $title->getPrefixedText();
                if ($dryRun) {
                    print "\nFound spam in [[{$titleText}]]\n";
                } else {
                    print "\nCleaning up links to {$matches[0]} in [[{$titleText}]]\n";
                    cleanupArticle($revision, $regex);
                }
            }
        }
    }
}
// Just for satisfaction
printf("%-8d  %-5.2f%%\n", $id - 1, ($id - 1) / $maxID * 100);
Example #2
0
}
$dbr =& wfGetDB(DB_SLAVE);
if ($options['all']) {
    // Clean up spam on all wikis
    $dbr =& wfGetDB(DB_SLAVE);
    print "Finding spam on " . count($wgLocalDatabases) . " wikis\n";
    $found = false;
    foreach ($wgLocalDatabases as $db) {
        $count = $dbr->selectField("`{$db}`.externallinks", 'COUNT(*)', array('el_index LIKE ' . $dbr->addQuotes($like)), $fname);
        if ($count) {
            $found = true;
            passthru("php cleanupSpam.php {$db} {$spec} | sed s/^/{$db}:  /");
        }
    }
    if ($found) {
        print "All done\n";
    } else {
        print "None found\n";
    }
} else {
    // Clean up spam on this wiki
    $res = $dbr->select('externallinks', array('DISTINCT el_from'), array('el_index LIKE ' . $dbr->addQuotes($like)), $fname);
    $count = $dbr->numRows($res);
    print "Found {$count} articles containing {$spec}\n";
    while ($row = $dbr->fetchObject($res)) {
        cleanupArticle($row->el_from, $spec);
    }
    if ($count) {
        print "Done\n";
    }
}
Example #3
0
print "Regexes are " . implode(', ', array_map('count', $regexes)) . " bytes\n";
print "Searching for spam in {$maxID} pages...\n";
if ($dryRun) {
    print "Dry run only\n";
}
for ($id = 1; $id <= $maxID; $id++) {
    if ($id % $reportingInterval == 0) {
        printf("%-8d  %-5.2f%%\r", $id, $id / $maxID * 100);
    }
    $revision = Revision::loadFromPageId($dbr, $id);
    if ($revision) {
        $text = $revision->getText();
        if ($text) {
            foreach ($regexes as $regex) {
                if (preg_match($regex, $text, $matches)) {
                    $title = $revision->getTitle();
                    $titleText = $title->getPrefixedText();
                    if ($dryRun) {
                        print "\nFound spam in [[{$titleText}]]\n";
                    } else {
                        print "\nCleaning up links to {$matches[0]} in [[{$titleText}]]\n";
                        $match = str_replace('http://', '', $matches[0]);
                        cleanupArticle($revision, $regexes, $match);
                    }
                }
            }
        }
    }
}
// Just for satisfaction
printf("%-8d  %-5.2f%%\n", $id - 1, ($id - 1) / $maxID * 100);