public function execute($par)
 {
     global $wgRequest, $wgOut, $IP;
     $target = isset($par) ? $par : $wgRequest->getVal('target');
     if (is_null($target)) {
         $wgOut->addHTML("<b>Error:</b> No parameter passed to Copyrightchecker.");
         return;
     }
     $query = $wgRequest->getVal('query');
     wfLoadExtensionMessages('Newarticleboost');
     $title = Title::newFromURL($target);
     $rev = Revision::newFromTitle($title);
     $wgOut->setArticleBodyOnly(true);
     if (!$query) {
         // Get the text and strip the steps header, any templates,
         // flatten it to HTML and strip the tags
         if (!$rev) {
             echo "Revision for article not found by copyright check";
             return;
         }
         $wikitext = $rev->getText();
         $wikitext = preg_replace("/^==[ ]+" . wfMsg('steps') . "[ ]+==/mix", "", $wikitext);
         $wikitext = preg_replace("/{{[^}]*}}/im", "", $wikitext);
         $wikitext = WikihowArticleEditor::textify($wikitext);
         $parts = preg_split("@\\.@", $wikitext);
         shuffle($parts);
         $queries = array();
         foreach ($parts as $p) {
             $p = trim($p);
             $words = split(" ", $p);
             if (sizeof($words) > 5) {
                 if (sizeof($words) > 15) {
                     $words = array_slice($words, 0, 15);
                     $p = implode(" ", $words);
                 }
                 $queries[] = $p;
                 if (sizeof($queries) == 2) {
                     break;
                 }
             }
         }
         $query = '"' . implode('" AND "', $queries) . '"';
     }
     require_once dirname(__FILE__) . '/GoogleAjaxSearch.class.php';
     $results = GoogleAjaxSearch::getGlobalWebResults($query, 8, null);
     // Filter out results from wikihow.com
     if (sizeof($results) > 0 && is_array($results)) {
         $newresults = array();
         for ($i = 0; $i < sizeof($results); $i++) {
             if (strpos($results[$i]['url'], "http://www.wikihow.com/") === 0 || strpos($results[$i]['url'], "http://m.wikihow.com/") === 0) {
                 continue;
             }
             $newresults[] = $results[$i];
         }
         $results = $newresults;
     }
     // Process results
     if (sizeof($results) > 0 && is_array($results)) {
         $wgOut->addHTML(wfMsg("nap_copyrightlist", $query) . "<table width='100%'>");
         for ($i = 0; $i < 3 && $i < sizeof($results); $i++) {
             $match = $results[$i];
             $c = json_decode($match['content']);
             $wgOut->addHTML("<tr><td><a href='{$match['url']}' target='new'>{$match['title']}</a>\n\t\t\t\t\t<br/>{$c}\n\t\t\t\t\t<br/><font size='-2'>{$match['url']}</font></td><td style='width: 100px; text-align: right; vertical-align: top;'><a href='' onclick='return nap_copyVio(\"" . htmlspecialchars($match['url']) . "\");'>Copyvio</a></td></tr>");
         }
         $wgOut->addHTML("</table>");
     } else {
         $wgOut->addHTML(wfMsg('nap_nocopyrightfound', $query));
     }
 }
Example #2
0
 function getPoints($r, $d, $de, $showdetails = false)
 {
     global $wgOut;
     $points = 0;
     $oldText = "";
     if ($d['revlo']) {
         $oldText = $d['revlo']->mText;
     }
     $newText = $d['revhi']->mText;
     $flatOldText = preg_replace("@[^a-zA-z]@", "", WikihowArticleEditor::textify($oldText));
     // get the points based on number of new / changed words
     $diffhtml = $de->generateDiffBody($d['revlo']->mText, $d['revhi']->mText);
     $addedwords = 0;
     preg_match_all('@<span class="diffchange diffchange-inline">[^>]*</span>@m', $diffhtml, $matches);
     foreach ($matches[0] as $m) {
         $m = WikihowArticleEditor::textify($m);
         preg_match_all("@\\b\\w+\\b@", $m, $words);
         $addedwords += sizeof($words[0]);
     }
     preg_match_all('@<td class="diff-addedline">(.|\\n)*</td>@Um', $diffhtml, $matches);
     #echo $diffhtml; print_r($matches); exit;
     foreach ($matches[0] as $m) {
         if (preg_match("@diffchange-inline@", $m)) {
             // already accounted for in change-inline
             continue;
         }
         $m = WikihowArticleEditor::textify($m);
         // account for changes in formatting and punctuation
         // by flattening out the change piece of text and comparing to the
         // flattened old version of the text
         $flatM = preg_replace("@[^a-zA-z]@", "", $m);
         if (!empty($flatM) && strpos($flatOldText, $flatM) !== false) {
             continue;
         }
         preg_match_all("@\\b\\w+\\b@", $m, $words);
         $addedwords += sizeof($words[0]);
     }
     if ($showdetails) {
         $wgOut->addHTML("<h3>Points for edit (10 max):</h3><ul>");
     }
     if (preg_match("@Reverted@", $r->mComment)) {
         if ($showdetails) {
             $wgOut->addHTML("<li>No points : reverted edit.</li></ul><hr/>");
         }
         return 0;
     }
     if (preg_match("@Reverted edits by.*" . $d['revhi']->mUserText . "@", $d['nextcomment'])) {
         if ($showdetails) {
             $wgOut->addHTML("<li>No points: This edit was reverted by {$d['nextuser']}\n</li></ul><hr/>");
         }
         return 0;
     }
     $wordpoints = min(floor($addedwords / 100), 5);
     if ($showdetails) {
         $wgOut->addHTML("<li>Approx # of new words: " . $addedwords . ": {$wordpoints} points (1 point per 100 words, max 5)</li>");
     }
     $points += $wordpoints;
     // new images
     $newimagepoints = array();
     preg_match_all("@\\[\\[Image:[^\\]|\\|]*@", $newText, $images);
     $newimages = $newimagepoints = 0;
     foreach ($images[0] as $i) {
         if (strpos($oldText, $i) === false) {
             $newimagepoints++;
             $newimages++;
         }
     }
     $newimagepoints = min($newimagepoints, 2);
     $points += $newimagepoints;
     if ($showdetails) {
         $wgOut->addHTML("<li>Number of new images: " . $newimages . ": {$newimagepoints} points (1 point per image, max 2)</li>");
     }
     // new page points
     if ($d['newpage']) {
         if ($showdetails) {
             $wgOut->addHTML("<li>New page: 1 point</li>");
         }
         $points += 1;
     }
     // template points
     preg_match_all("@\\{\\{[^\\}]*\\}\\}@", $newText, $templates);
     foreach ($templates[0] as $t) {
         if (strpos($oldText, $t) === false && $t != "{{reflist}}") {
             if ($showdetails) {
                 $wgOut->addHTML("<li>Template added: 1 point</li>");
             }
             $points++;
             break;
         }
     }
     // category added points
     preg_match_all("@\\[\\[Category:[^\\]]*\\]\\]@", $newText, $cats);
     foreach ($cats[0] as $c) {
         if (strpos($oldText, $c) === false) {
             if ($showdetails) {
                 $wgOut->addHTML("<li>Category added: 1 point</li>");
             }
             $points++;
             break;
         }
     }
     $points = min($points, 10);
     if ($showdetails) {
         $wgOut->addHTML("</ul>");
     }
     if ($showdetails) {
         $wgOut->addHTML("<b>Total points: {$points}</b><hr/>");
     }
     return $points;
 }
 function getStepText($r, $image)
 {
     global $wgParser;
     $stepsMsg = wfMsg('steps');
     //grab only the filename
     $image_name = preg_split('@/@', $image);
     $image_name = $image_name[count($image_name) - 1];
     //removing -crop-... stuff
     $image_name = preg_replace('@-crop-600--600px-@', '', $image_name);
     //remove spaces
     $image_name = preg_replace('@ @', '-', $image_name);
     $the_text = $r->getText();
     for ($i = 1; $i < 10; $i++) {
         $section = $wgParser->getSection($the_text, $i);
         if (empty($section)) {
             break;
         }
         if (preg_match('@==\\s*' . $stepsMsg . '\\s*==@', $section)) {
             $steps = preg_replace('@== ' . $stepsMsg . ' ==@', '', $section);
             break;
         }
     }
     $stepnums = preg_split('/^#[^*#]/m', $steps);
     //array of only the actual numbered steps
     $steps = Wikitext::splitSteps($steps);
     //array includes steps w/in steps
     foreach ($steps as $s) {
         $s_comp = preg_replace('@ @', '-', $s);
         //strip dashes for the compare
         if (stripos($s_comp, $image_name)) {
             //get step number
             for ($i = 1; $i < count($stepnums); $i++) {
                 $the_step = preg_replace('@ @', '-', $stepnums[$i]);
                 //strip dashes for the compare
                 if (stripos($the_step, $image_name)) {
                     $stepnum = $i;
                     break;
                 }
             }
             $text = WikihowArticleEditor::textify($s);
             $text = preg_replace("@\\'\\'\\'@", "", $text);
             //remove bold
             $text = preg_replace("/http?:\\/\\/[^ ]+ /", " ", $text);
             //remove urls
             if ($this->bNewLayout_02) {
                 $text = '<div id="gs_text">From Step ' . $stepnum . '</div>';
             } else {
                 $text = '<span>From Step ' . $stepnum . '</span><br /><br />' . $text;
             }
             break;
         }
     }
     if (!$text && !$this->bNewLayout_02) {
         //oh, is this the intro image?
         $intro = Wikitext::getIntro($the_text);
         $image_name = preg_replace('@-@', ' ', $image_name);
         if (stripos($intro, $image_name)) {
             //$text = Wikitext::flatten($intro);
         }
     }
     if (strlen($text) > 250) {
         $text = substr($text, 0, 250) . '...';
     }
     return $text;
 }
Example #4
0
/**
 *
 * Checks a specific article for spelling mistakes.
 * 
 */
function spellCheckArticle(&$dbw, $articleId, &$pspell, &$capsString, &$whitelistArray)
{
    //first remove all mistakes from the mapping table
    $dbw->delete('spellchecker_page', array('sp_page' => $articleId), __FUNCTION__);
    $title = Title::newFromID($articleId);
    if ($title) {
        $revision = Revision::newFromTitle($title);
        if (!$revision) {
            continue;
        }
        $text = $revision->getText();
        //now need to remove the sections we're not going to check
        $wikiArticle = WikihowArticleEditor::newFromText($text);
        $sourceText = $wikiArticle->getSection(wfMsg('sources'));
        //WikiHow::textify($wikiArticle->getSection(wfMsg('sources'), array('remove_ext_links'=>1)));
        $newtext = str_replace($sourceText, "", $text);
        $relatedText = $wikiArticle->getSection(wfMsg('related'));
        //WikiHow::textify($wikiArticle->getSection(wfMsg('sources'), array('remove_ext_links'=>1)));
        $newtext = str_replace($relatedText, "", $newtext);
        //remove reference tags
        $newtext = preg_replace('@<ref>[^<].*</ref>@', "", $newtext);
        //remove links
        $newtext = preg_replace('@\\[\\[[^\\]].*\\]\\]@', "", $newtext);
        //remove magic words
        $newtext = preg_replace('@__[^_]*__@', "", $newtext);
        //replace wierd apostrophes
        $newtext = str_replace('’', "'", $newtext);
        $newtext = WikihowArticleEditor::textify($newtext);
        preg_match_all('/\\b(\\w|\')+\\b/u', $newtext, $matches);
        //u modified allows for international characters
        $foundErrors = false;
        foreach ($matches[0] as $match) {
            $word_id = wikiHowDictionary::spellCheckWord($dbw, $match, $pspell, $capsString, $whitelistArray);
            if ($word_id > 0) {
                //insert into the mapping table
                $dbw->insert('spellchecker_page', array('sp_page' => $articleId, 'sp_word' => $word_id), __FUNCTION__, array('IGNORE'));
                $foundErrors = true;
            }
        }
        if ($foundErrors) {
            $sql = "INSERT INTO spellchecker (sc_page, sc_timestamp, sc_dirty, sc_errors, sc_exempt) VALUES (" . $articleId . ", " . wfTimestampNow() . ", 0, 1, 0) ON DUPLICATE KEY UPDATE sc_dirty = '0', sc_errors = '1', sc_timestamp = " . wfTimestampNow();
            $dbw->query($sql, __FUNCTION__);
        } else {
            $dbw->update('spellchecker', array('sc_errors' => 0, 'sc_dirty' => 0), array('sc_page' => $articleId), __FUNCTION__);
        }
    }
}
Example #5
0
        } else {
            echo implode(",", $suggestions) . "\n";
        }
    } else {
        echo "no suggestions\n";
    }
}
function spellCheck($string)
{
    return preg_replace_callback('/\\b(\\w|\')+\\b/', 'spellCheckWord', $string);
}
$t = null;
if (isset($argv[0])) {
    $t = Title::newFromURL(urldecode($argv[0]));
} else {
    $rp = new RandomPage();
    $t = $rp->getRandomTitle();
}
echo "Doing {$t->getFullURL()}\n";
$r = Revision::newFromTitle($t);
if (!$r) {
    echo "can't get revision for this bad boy\n";
}
$text = $r->getText();
$newtext = WikihowArticleEditor::textify($text, array('remove_ext_links' => 1));
echo "text ...{$newtext}\n\n";
$pspell = pspell_new('en', 'american', '', 'utf-8', PSPELL_FAST);
spellCheck($newtext);
if ($bad == 0) {
    echo "No misspellings\n";
}