if (strlen($regex) < 10000 || strlen($regex) > 32000) { print "wrong size, exiting\n"; exit(1); } print "Searching for spam in {$maxID} pages...\n"; if ($dryRun) { print "Dry run only\n"; } for ($id = 1; $id <= $maxID; $id++) { if ($id % $reportingInterval == 0) { printf("%-8d %-5.2f%%\r", $id, $id / $maxID * 100); } $revision = Revision::loadFromPageId($dbr, $id); if ($revision) { $text = $revision->getText(); if ($text) { if (preg_match($regex, $text, $matches)) { $title = $revision->getTitle(); $titleText = $title->getPrefixedText(); if ($dryRun) { print "\nFound spam in [[{$titleText}]]\n"; } else { print "\nCleaning up links to {$matches[0]} in [[{$titleText}]]\n"; cleanupArticle($revision, $regex); } } } } } // Just for satisfaction printf("%-8d %-5.2f%%\n", $id - 1, ($id - 1) / $maxID * 100);
} $dbr =& wfGetDB(DB_SLAVE); if ($options['all']) { // Clean up spam on all wikis $dbr =& wfGetDB(DB_SLAVE); print "Finding spam on " . count($wgLocalDatabases) . " wikis\n"; $found = false; foreach ($wgLocalDatabases as $db) { $count = $dbr->selectField("`{$db}`.externallinks", 'COUNT(*)', array('el_index LIKE ' . $dbr->addQuotes($like)), $fname); if ($count) { $found = true; passthru("php cleanupSpam.php {$db} {$spec} | sed s/^/{$db}: /"); } } if ($found) { print "All done\n"; } else { print "None found\n"; } } else { // Clean up spam on this wiki $res = $dbr->select('externallinks', array('DISTINCT el_from'), array('el_index LIKE ' . $dbr->addQuotes($like)), $fname); $count = $dbr->numRows($res); print "Found {$count} articles containing {$spec}\n"; while ($row = $dbr->fetchObject($res)) { cleanupArticle($row->el_from, $spec); } if ($count) { print "Done\n"; } }
print "Regexes are " . implode(', ', array_map('count', $regexes)) . " bytes\n"; print "Searching for spam in {$maxID} pages...\n"; if ($dryRun) { print "Dry run only\n"; } for ($id = 1; $id <= $maxID; $id++) { if ($id % $reportingInterval == 0) { printf("%-8d %-5.2f%%\r", $id, $id / $maxID * 100); } $revision = Revision::loadFromPageId($dbr, $id); if ($revision) { $text = $revision->getText(); if ($text) { foreach ($regexes as $regex) { if (preg_match($regex, $text, $matches)) { $title = $revision->getTitle(); $titleText = $title->getPrefixedText(); if ($dryRun) { print "\nFound spam in [[{$titleText}]]\n"; } else { print "\nCleaning up links to {$matches[0]} in [[{$titleText}]]\n"; $match = str_replace('http://', '', $matches[0]); cleanupArticle($revision, $regexes, $match); } } } } } } // Just for satisfaction printf("%-8d %-5.2f%%\n", $id - 1, ($id - 1) / $maxID * 100);