public function execute($par) { global $wgRequest, $wgOut, $IP; $target = isset($par) ? $par : $wgRequest->getVal('target'); if (is_null($target)) { $wgOut->addHTML("<b>Error:</b> No parameter passed to Copyrightchecker."); return; } $query = $wgRequest->getVal('query'); wfLoadExtensionMessages('Newarticleboost'); $title = Title::newFromURL($target); $rev = Revision::newFromTitle($title); $wgOut->setArticleBodyOnly(true); if (!$query) { // Get the text and strip the steps header, any templates, // flatten it to HTML and strip the tags if (!$rev) { echo "Revision for article not found by copyright check"; return; } $wikitext = $rev->getText(); $wikitext = preg_replace("/^==[ ]+" . wfMsg('steps') . "[ ]+==/mix", "", $wikitext); $wikitext = preg_replace("/{{[^}]*}}/im", "", $wikitext); $wikitext = WikihowArticleEditor::textify($wikitext); $parts = preg_split("@\\.@", $wikitext); shuffle($parts); $queries = array(); foreach ($parts as $p) { $p = trim($p); $words = split(" ", $p); if (sizeof($words) > 5) { if (sizeof($words) > 15) { $words = array_slice($words, 0, 15); $p = implode(" ", $words); } $queries[] = $p; if (sizeof($queries) == 2) { break; } } } $query = '"' . implode('" AND "', $queries) . '"'; } require_once dirname(__FILE__) . '/GoogleAjaxSearch.class.php'; $results = GoogleAjaxSearch::getGlobalWebResults($query, 8, null); // Filter out results from wikihow.com if (sizeof($results) > 0 && is_array($results)) { $newresults = array(); for ($i = 0; $i < sizeof($results); $i++) { if (strpos($results[$i]['url'], "http://www.wikihow.com/") === 0 || strpos($results[$i]['url'], "http://m.wikihow.com/") === 0) { continue; } $newresults[] = $results[$i]; } $results = $newresults; } // Process results if (sizeof($results) > 0 && is_array($results)) { $wgOut->addHTML(wfMsg("nap_copyrightlist", $query) . "<table width='100%'>"); for ($i = 0; $i < 3 && $i < sizeof($results); $i++) { $match = $results[$i]; $c = json_decode($match['content']); $wgOut->addHTML("<tr><td><a href='{$match['url']}' target='new'>{$match['title']}</a>\n\t\t\t\t\t<br/>{$c}\n\t\t\t\t\t<br/><font size='-2'>{$match['url']}</font></td><td style='width: 100px; text-align: right; vertical-align: top;'><a href='' onclick='return nap_copyVio(\"" . htmlspecialchars($match['url']) . "\");'>Copyvio</a></td></tr>"); } $wgOut->addHTML("</table>"); } else { $wgOut->addHTML(wfMsg('nap_nocopyrightfound', $query)); } }
function getPoints($r, $d, $de, $showdetails = false) { global $wgOut; $points = 0; $oldText = ""; if ($d['revlo']) { $oldText = $d['revlo']->mText; } $newText = $d['revhi']->mText; $flatOldText = preg_replace("@[^a-zA-z]@", "", WikihowArticleEditor::textify($oldText)); // get the points based on number of new / changed words $diffhtml = $de->generateDiffBody($d['revlo']->mText, $d['revhi']->mText); $addedwords = 0; preg_match_all('@<span class="diffchange diffchange-inline">[^>]*</span>@m', $diffhtml, $matches); foreach ($matches[0] as $m) { $m = WikihowArticleEditor::textify($m); preg_match_all("@\\b\\w+\\b@", $m, $words); $addedwords += sizeof($words[0]); } preg_match_all('@<td class="diff-addedline">(.|\\n)*</td>@Um', $diffhtml, $matches); #echo $diffhtml; print_r($matches); exit; foreach ($matches[0] as $m) { if (preg_match("@diffchange-inline@", $m)) { // already accounted for in change-inline continue; } $m = WikihowArticleEditor::textify($m); // account for changes in formatting and punctuation // by flattening out the change piece of text and comparing to the // flattened old version of the text $flatM = preg_replace("@[^a-zA-z]@", "", $m); if (!empty($flatM) && strpos($flatOldText, $flatM) !== false) { continue; } preg_match_all("@\\b\\w+\\b@", $m, $words); $addedwords += sizeof($words[0]); } if ($showdetails) { $wgOut->addHTML("<h3>Points for edit (10 max):</h3><ul>"); } if (preg_match("@Reverted@", $r->mComment)) { if ($showdetails) { $wgOut->addHTML("<li>No points : reverted edit.</li></ul><hr/>"); } return 0; } if (preg_match("@Reverted edits by.*" . $d['revhi']->mUserText . "@", $d['nextcomment'])) { if ($showdetails) { $wgOut->addHTML("<li>No points: This edit was reverted by {$d['nextuser']}\n</li></ul><hr/>"); } return 0; } $wordpoints = min(floor($addedwords / 100), 5); if ($showdetails) { $wgOut->addHTML("<li>Approx # of new words: " . $addedwords . ": {$wordpoints} points (1 point per 100 words, max 5)</li>"); } $points += $wordpoints; // new images $newimagepoints = array(); preg_match_all("@\\[\\[Image:[^\\]|\\|]*@", $newText, $images); $newimages = $newimagepoints = 0; foreach ($images[0] as $i) { if (strpos($oldText, $i) === false) { $newimagepoints++; $newimages++; } } $newimagepoints = min($newimagepoints, 2); $points += $newimagepoints; if ($showdetails) { $wgOut->addHTML("<li>Number of new images: " . $newimages . ": {$newimagepoints} points (1 point per image, max 2)</li>"); } // new page points if ($d['newpage']) { if ($showdetails) { $wgOut->addHTML("<li>New page: 1 point</li>"); } $points += 1; } // template points preg_match_all("@\\{\\{[^\\}]*\\}\\}@", $newText, $templates); foreach ($templates[0] as $t) { if (strpos($oldText, $t) === false && $t != "{{reflist}}") { if ($showdetails) { $wgOut->addHTML("<li>Template added: 1 point</li>"); } $points++; break; } } // category added points preg_match_all("@\\[\\[Category:[^\\]]*\\]\\]@", $newText, $cats); foreach ($cats[0] as $c) { if (strpos($oldText, $c) === false) { if ($showdetails) { $wgOut->addHTML("<li>Category added: 1 point</li>"); } $points++; break; } } $points = min($points, 10); if ($showdetails) { $wgOut->addHTML("</ul>"); } if ($showdetails) { $wgOut->addHTML("<b>Total points: {$points}</b><hr/>"); } return $points; }
function getStepText($r, $image) { global $wgParser; $stepsMsg = wfMsg('steps'); //grab only the filename $image_name = preg_split('@/@', $image); $image_name = $image_name[count($image_name) - 1]; //removing -crop-... stuff $image_name = preg_replace('@-crop-600--600px-@', '', $image_name); //remove spaces $image_name = preg_replace('@ @', '-', $image_name); $the_text = $r->getText(); for ($i = 1; $i < 10; $i++) { $section = $wgParser->getSection($the_text, $i); if (empty($section)) { break; } if (preg_match('@==\\s*' . $stepsMsg . '\\s*==@', $section)) { $steps = preg_replace('@== ' . $stepsMsg . ' ==@', '', $section); break; } } $stepnums = preg_split('/^#[^*#]/m', $steps); //array of only the actual numbered steps $steps = Wikitext::splitSteps($steps); //array includes steps w/in steps foreach ($steps as $s) { $s_comp = preg_replace('@ @', '-', $s); //strip dashes for the compare if (stripos($s_comp, $image_name)) { //get step number for ($i = 1; $i < count($stepnums); $i++) { $the_step = preg_replace('@ @', '-', $stepnums[$i]); //strip dashes for the compare if (stripos($the_step, $image_name)) { $stepnum = $i; break; } } $text = WikihowArticleEditor::textify($s); $text = preg_replace("@\\'\\'\\'@", "", $text); //remove bold $text = preg_replace("/http?:\\/\\/[^ ]+ /", " ", $text); //remove urls if ($this->bNewLayout_02) { $text = '<div id="gs_text">From Step ' . $stepnum . '</div>'; } else { $text = '<span>From Step ' . $stepnum . '</span><br /><br />' . $text; } break; } } if (!$text && !$this->bNewLayout_02) { //oh, is this the intro image? $intro = Wikitext::getIntro($the_text); $image_name = preg_replace('@-@', ' ', $image_name); if (stripos($intro, $image_name)) { //$text = Wikitext::flatten($intro); } } if (strlen($text) > 250) { $text = substr($text, 0, 250) . '...'; } return $text; }
/** * * Checks a specific article for spelling mistakes. * */ function spellCheckArticle(&$dbw, $articleId, &$pspell, &$capsString, &$whitelistArray) { //first remove all mistakes from the mapping table $dbw->delete('spellchecker_page', array('sp_page' => $articleId), __FUNCTION__); $title = Title::newFromID($articleId); if ($title) { $revision = Revision::newFromTitle($title); if (!$revision) { continue; } $text = $revision->getText(); //now need to remove the sections we're not going to check $wikiArticle = WikihowArticleEditor::newFromText($text); $sourceText = $wikiArticle->getSection(wfMsg('sources')); //WikiHow::textify($wikiArticle->getSection(wfMsg('sources'), array('remove_ext_links'=>1))); $newtext = str_replace($sourceText, "", $text); $relatedText = $wikiArticle->getSection(wfMsg('related')); //WikiHow::textify($wikiArticle->getSection(wfMsg('sources'), array('remove_ext_links'=>1))); $newtext = str_replace($relatedText, "", $newtext); //remove reference tags $newtext = preg_replace('@<ref>[^<].*</ref>@', "", $newtext); //remove links $newtext = preg_replace('@\\[\\[[^\\]].*\\]\\]@', "", $newtext); //remove magic words $newtext = preg_replace('@__[^_]*__@', "", $newtext); //replace wierd apostrophes $newtext = str_replace('’', "'", $newtext); $newtext = WikihowArticleEditor::textify($newtext); preg_match_all('/\\b(\\w|\')+\\b/u', $newtext, $matches); //u modified allows for international characters $foundErrors = false; foreach ($matches[0] as $match) { $word_id = wikiHowDictionary::spellCheckWord($dbw, $match, $pspell, $capsString, $whitelistArray); if ($word_id > 0) { //insert into the mapping table $dbw->insert('spellchecker_page', array('sp_page' => $articleId, 'sp_word' => $word_id), __FUNCTION__, array('IGNORE')); $foundErrors = true; } } if ($foundErrors) { $sql = "INSERT INTO spellchecker (sc_page, sc_timestamp, sc_dirty, sc_errors, sc_exempt) VALUES (" . $articleId . ", " . wfTimestampNow() . ", 0, 1, 0) ON DUPLICATE KEY UPDATE sc_dirty = '0', sc_errors = '1', sc_timestamp = " . wfTimestampNow(); $dbw->query($sql, __FUNCTION__); } else { $dbw->update('spellchecker', array('sc_errors' => 0, 'sc_dirty' => 0), array('sc_page' => $articleId), __FUNCTION__); } } }
} else { echo implode(",", $suggestions) . "\n"; } } else { echo "no suggestions\n"; } } function spellCheck($string) { return preg_replace_callback('/\\b(\\w|\')+\\b/', 'spellCheckWord', $string); } $t = null; if (isset($argv[0])) { $t = Title::newFromURL(urldecode($argv[0])); } else { $rp = new RandomPage(); $t = $rp->getRandomTitle(); } echo "Doing {$t->getFullURL()}\n"; $r = Revision::newFromTitle($t); if (!$r) { echo "can't get revision for this bad boy\n"; } $text = $r->getText(); $newtext = WikihowArticleEditor::textify($text, array('remove_ext_links' => 1)); echo "text ...{$newtext}\n\n"; $pspell = pspell_new('en', 'american', '', 'utf-8', PSPELL_FAST); spellCheck($newtext); if ($bad == 0) { echo "No misspellings\n"; }