public static function getSelection($maxUrls, $minImages)
 {
     $output = array();
     $dbr = wfGetDB(DB_SLAVE);
     $sql = 'SELECT pr_title FROM page_randomizer';
     $rows = parent::loadRows($dbr, $sql, '', __METHOD__);
     shuffle($rows);
     foreach ($rows as $row) {
         $wikitext = '';
         $steps = '';
         $titleDBkey = $row['pr_title'];
         $title = Title::newFromDBkey($titleDBkey);
         if ($title) {
             $wikitext = parent::getWikitext($dbr, $title);
         }
         if ($wikitext) {
             list($steps, ) = Wikitext::getStepsSection($wikitext);
         }
         if ($steps) {
             $images = parent::getNumStepsImages($steps);
             if ($images >= $minImages) {
                 $output[] = $title->getPartialURL();
                 if (count($output) >= $maxUrls) {
                     break;
                 }
             }
         }
     }
     return $output;
 }
Exemplo n.º 2
0
 /**
  * Cycle through the url list to call the removeIntroImages function
  */
 private static function removeIntroImagesUrls(&$urls, $px, $text)
 {
     $dbr = wfGetDB(DB_SLAVE);
     foreach ($urls as &$url) {
         $err = '';
         $final_step = '';
         if (!$url['title']) {
             $err = 'Unable to load article';
         } else {
             $introText = '';
             $wikitext = Wikitext::getWikitext($dbr, $url['title']);
             if ($wikitext) {
                 $introText = Wikitext::getIntro($wikitext);
             }
             if (!$introText) {
                 $err = 'Unable to load wikitext';
             } else {
                 //first, let's use the intro image for the final step
                 if ($px > 0) {
                     $new_final_step = self::makeFinalStep($introText, $px, $text);
                     if ($new_final_step) {
                         list($stepsText, $sectionID) = Wikitext::getStepsSection($wikitext, true);
                         $stepsText = $stepsText . $new_final_step;
                         $wikitext = Wikitext::replaceStepsSection($wikitext, $sectionID, $stepsText, true);
                         if (preg_match("@[\r\n]+===[^=]*===@m", $stepsText)) {
                             $final_step = 'x (alt)';
                             //success! (yay!) but has alt methods (boo!)
                         } else {
                             $final_step = 'x';
                             //success!
                         }
                     }
                 }
                 $prevIntroText = $introText;
                 $introText = self::removeIntroImages($prevIntroText, $url['title']);
                 if ($introText && $introText != $prevIntroText) {
                     $wikitext = Wikitext::replaceIntro($wikitext, $introText, true);
                     $comment = 'Removing intro images';
                     if ($final_step == 'x') {
                         $comment .= '; Made final step out of former intro image';
                     }
                     $err = Wikitext::saveWikitext($url['title'], $wikitext, $comment);
                     if (empty($err)) {
                         //make sure the intro image adder doesn't grab it
                         $id = $url['title']->getArticleID();
                         if ($id) {
                             $dbw = wfGetDB(DB_MASTER);
                             $dbw->update('imageadder', array('imageadder_hasimage' => 1), array('imageadder_page' => $id));
                         }
                     }
                 } else {
                     $err = 'Either no intro image or no intro found';
                 }
             }
         }
         $url['err'] = $err;
         $url['final_step'] = $final_step;
     }
 }
Exemplo n.º 3
0
 private static function getWikitext($title)
 {
     $dbr = wfGetDB(DB_SLAVE);
     $wikitext = Wikitext::getWikitext($dbr, $title);
     $stepsText = '';
     if ($wikitext) {
         list($stepsText, ) = Wikitext::getStepsSection($wikitext, true);
     }
     return array($wikitext, $stepsText);
 }
 /**
  * grab that wikitext
  */
 private static function getWikitext($title)
 {
     $dbr = wfGetDB(DB_SLAVE);
     $wikitext = Wikitext::getWikitext($dbr, $title);
     $stepsText = '';
     if ($wikitext) {
         list($stepsText, $sectionID) = Wikitext::getStepsSection($wikitext, true);
         //hack for illegal character
         if (strpos($wikitext, '‐')) {
             return array();
         }
     }
     return array($wikitext, $stepsText, $sectionID);
 }
Exemplo n.º 5
0
function synthesizeSummary($wikitext, $maxSteps, $fullURL)
{
    $stepsSec = Wikitext::getStepsSection($wikitext, true);
    if (!$stepsSec) {
        return '';
    }
    $stepsText = Wikitext::stripHeader($stepsSec[0]);
    if (Wikitext::countAltMethods($stepsText) > 0) {
        $altMethods = Wikitext::splitAltMethods($stepsText);
        foreach ($altMethods as $method) {
            if (Wikitext::isAltMethod($method) && Wikitext::countSteps($method) > 0) {
                $stepsText = $method;
                break;
            }
        }
    }
    $countSteps = Wikitext::countSteps($stepsText);
    $summaryOut = '';
    $steps = Wikitext::splitSteps($stepsText);
    $count = 0;
    foreach ($steps as $step) {
        if (Wikitext::isStepSimple($step, false)) {
            $summary = Wikitext::summarizeStep($step);
            $summary = Wikitext::removeRefsFromFlattened($summary);
            if ($summary) {
                $count++;
                $break = $count > 1 ? "<br>" : '';
                if ($count > $maxSteps) {
                    $remaining = $countSteps - $maxSteps;
                    $text = '';
                    if ($remaining >= 2) {
                        $text = "{$remaining} more steps at wikiHow";
                    } elseif ($remaining == 1) {
                        $text = "Another step at wikiHow";
                    }
                    if ($text) {
                        $href = htmlspecialchars($fullURL);
                        $link = "<a href='{$href}'>{$text}</a>";
                        $summaryOut .= "{$break}{$link}";
                    }
                    break;
                } else {
                    $summaryOut .= "{$break}{$count}. {$summary}";
                }
            }
        }
    }
    return $summaryOut;
}
Exemplo n.º 6
0
 private function resizeImages($article)
 {
     global $wgServer;
     if (!$dbw) {
         $dbw = wfGetDB(DB_MASTER);
     }
     $err = '';
     $title = Title::newFromURL($article);
     if (!$title || !$title->exists()) {
         return;
     }
     $wikitext = Wikitext::getWikitext($dbw, $title);
     if (!$wikitext) {
         $err = 'Unable to load wikitext';
     } else {
         if (preg_match('@^#REDIRECT@m', $wikitext)) {
             $err = 'REDIRECT';
         } else {
             list($stepsText, $sectionID) = Wikitext::getStepsSection($wikitext, true);
             list($stepsText, $err) = self::resizeEachImage($stepsText);
             if ($stepsText) {
                 $wikitext = Wikitext::replaceStepsSection($wikitext, $sectionID, $stepsText, true);
                 $comment = 'Resized images to the {{largeimage}} size.';
                 $err = Wikitext::saveWikitext($title, $wikitext, $comment);
             }
         }
     }
     if ($err) {
         $err .= chr(9) . $wgServer . '/' . $article;
         //print $err."\n";
         self::logError($err);
     } elseif ($stepsText) {
         self::logIt($wgServer . '/' . $article);
         return $wgServer . '/' . $article;
     }
 }
Exemplo n.º 7
0
 /**
  * Handle API calls to get the steps from an article 
  */
 private function doAPI()
 {
     global $wgRequest, $wgOut, $wgContLang;
     $articleIds = $wgRequest->getVal("articleIds");
     $articleIds = preg_split("@,@", $articleIds);
     $dbr = wfGetDB(DB_SLAVE);
     $wgOut->setArticleBodyOnly(true);
     $articles = array();
     foreach ($articleIds as $articleId) {
         if (is_numeric($articleId)) {
             $r = Revision::loadFromPageId($dbr, $articleId);
             if ($r) {
                 $txt = $r->getText();
                 $intro = Wikitext::getIntro($txt);
                 $text = Wikitext::getStepsSection($txt, true);
                 $lines = preg_split("@\n@", $text[0]);
                 $text = "";
                 // We remove extra lines technically in the 'steps' section, but which don't actually contain steps
                 // Find the last line starting with a '#'
                 $lastLine = 0;
                 $n = 0;
                 foreach ($lines as $line) {
                     if ($line[0] == '#') {
                         $lastLine = $n;
                     }
                     $n++;
                 }
                 // Truncate lines after the last line with a '#'
                 $n = 0;
                 foreach ($lines as $line) {
                     if ($n > $lastLine) {
                         break;
                     }
                     if ($n != 0) {
                         $text .= "\n";
                     }
                     $text .= $line;
                     $n++;
                 }
                 if (strlen($text) > 0) {
                     $articles[$articleId] = array("steps" => $text, "intro" => $intro, "altImageTags" => array($wgContLang->getNSText(NS_IMAGE)));
                 }
             }
         }
     }
     $wgOut->addHTML(json_encode($articles));
 }
Exemplo n.º 8
0
 /**
  * Handle API calls to get the steps from an article 
  */
 private function doAPI()
 {
     global $wgRequest, $wgOut, $wgContLang;
     $articleIds = $wgRequest->getVal("articleIds");
     $articleIds = preg_split("@,@", $articleIds);
     $dbr = wfGetDB(DB_SLAVE);
     $wgOut->setArticleBodyOnly(true);
     $articles = array();
     foreach ($articleIds as $articleId) {
         if (is_numeric($articleId)) {
             $r = Revision::loadFromPageId($dbr, $articleId);
             if ($r) {
                 $txt = $r->getText();
                 $intro = Wikitext::getIntro($txt);
                 $text = Wikitext::getStepsSection($txt, true);
                 if (is_array($text) && sizeof($text) > 0) {
                     $articles[$articleId] = array("steps" => $text[0], "intro" => $intro, "altImageTags" => array($wgContLang->getNSText(NS_IMAGE)));
                 }
             }
         }
     }
     $wgOut->addHTML(json_encode($articles));
 }
Exemplo n.º 9
0
 /**
  * The process of adding and removing all articles from the randomizer
  * set.
  *
  * @param int $from unix timestamp indicates from when to process.  0
  *   means the epoch.
  */
 private static function processArticles($from)
 {
     $dbr = wfGetDB(DB_SLAVE);
     $articles = self::loadArticles($dbr, $from);
     foreach ($articles as &$article) {
         $pr = array('pr_id' => $article['page_id'], 'pr_namespace' => NS_MAIN, 'pr_title' => $article['page_title'], 'pr_random' => wfRandom(), 'pr_catinfo' => $article['page_catinfo'], 'pr_updated' => wfTimestampNow());
         $article = $pr;
     }
     $featured = self::loadFeaturedArticles($dbr);
     $rising = self::loadRisingStars($dbr);
     $views = self::loadHighViews($dbr);
     $edits = self::loadHighEdits($dbr);
     $rated = self::loadHighlyRated($dbr);
     $add = array();
     $remove = array();
     $reasons = array();
     foreach ($articles as $i => $article) {
         //print "{$article['pr_title']}\n";
         $reason = array();
         $toadd = true;
         $id = $article['pr_id'];
         $title = Title::newFromDBkey($article['pr_title']);
         if (!$title) {
             $toadd = false;
             $reason[] = 'does-not-exist';
             $wikitext = '';
         } else {
             $wikitext = self::getWikitext($dbr, $title);
             if (!$wikitext) {
                 $toadd = false;
                 $reason[] = 'does-not-exist';
             }
         }
         if ($wikitext) {
             $intro = Wikitext::getIntro($wikitext);
             list($steps, ) = Wikitext::getStepsSection($wikitext);
             if (self::excludeViaTemplates($intro)) {
                 $reason[] = 'excluded-via-template';
                 $toadd = false;
             } else {
                 $images = self::getNumStepsImages($steps);
                 if (isset($featured[$id])) {
                     $reason[] = 'featured';
                 }
                 if ($images && isset($rising[$id])) {
                     $reason[] = 'rising';
                 }
                 if ($images && isset($rated[$id])) {
                     $reason[] = 'highly-rated';
                 }
                 if ($images && isset($views[$id]) && isset($edits[$id])) {
                     $reason[] = 'views-and-edits';
                 }
                 if ($images && self::hasAlternateMethods($steps)) {
                     $reason[] = 'views-and-alternate-methods';
                 }
                 if ($images && isset($views[$id]) && self::getNumSteps($steps) >= 9) {
                     $reason[] = 'views-and-nine-steps';
                 }
                 if (self::getNumStepsImages($steps) >= 3) {
                     $reason[] = 'three-steps-images';
                 }
                 if ($images && isset($views[$id]) && self::hasVideo($wikitext)) {
                     $reason[] = 'views-and-video';
                 }
                 if (empty($reason)) {
                     $reason[] = 'no-match';
                     $toadd = false;
                 }
             }
         }
         if ($toadd) {
             $add[] = $article;
         } else {
             $remove[] = $article;
         }
         $reasons[] = array('dprr_id' => $id, 'dprr_namespace' => NS_MAIN, 'dprr_title' => substr($article['pr_title'], 0, 255), 'dprr_reasons' => substr(join(',', $reason), 0, 255));
     }
     $dbw = wfGetDB(DB_MASTER);
     if (!$from) {
         // do this right before we insert a bunch of new rows
         self::dbClearRandomizer($dbw);
     } else {
         self::dbSaveRandom($dbw, $remove, false);
     }
     self::dbSaveRandom($dbw, $add, true);
     self::dbReplaceReasons($dbw, $reasons);
 }
Exemplo n.º 10
0
 public static function getEdits($articleId)
 {
     $dbr = wfGetDB(DB_SLAVE);
     $gr = 0;
     $gr = $dbr->selectField('good_revision', array('gr_rev'), array('gr_page' => $articleId));
     $edits = self::getSigEdits($gr);
     if ($edits) {
         return $edits;
     }
     $res = $dbr->select(array('revision', 'text'), array('rev_id', 'old_text', 'old_flags', 'rev_timestamp', 'rev_user', 'rev_user_text'), array('rev_page' => $articleId, 'rev_text_id = old_id'), __METHOD__, array('order by' => 'rev_timestamp asc'));
     $txts = array();
     $grTxt = false;
     foreach ($res as $row) {
         $flags = explode(',', $row->old_flags);
         $rowText = Revision::decompressRevisionText($row->old_text, $flags);
         $stepsSection = Wikitext::getStepsSection($rowText);
         //print("Got txt for rev" . $row->rev_id . "\n");
         $txts[] = array('text' => $stepsSection[0], 'rev_page' => $row->rev_page, 'rev_id' => $row->rev_id, 'rev_user' => $row->rev_user, 'rev_user_text' => $row->rev_user_text);
         if ($row->rev_id == $gr) {
             $grTxt = $stepsSection[0];
             break;
         }
     }
     if (!$grText) {
         $grText = $txts[sizeof($txts) - 1]['text'];
         $gr = $txts[sizeof($txts) - 1]['rev_id'];
     }
     global $wgContLang;
     $segmentedGr = $wgContLang->segmentForDiff($grText);
     $grArr = explode("\n", $segmentedGr);
     $grSize = strlen($segmentedGr) - sizeof($grArr) + 1;
     // Amount added
     $added = 0;
     $edits = array();
     $first = true;
     $lastAdds = 0;
     foreach ($txts as $txt) {
         $txtArr = explode("\n", $wgContLang->segmentForDiff($txt['text']));
         //print("diff for rev " . $txt['rev_id'] . " " . wfTimestampNow() . "\n");
         $diffs = new Diff($txtArr, $grArr);
         $adds = 0;
         foreach ($diffs as $diff) {
             foreach ($diff as $d) {
                 if ($d->type == 'copy') {
                     foreach ($d->closing as $cl) {
                         $adds += strlen($cl);
                     }
                 } elseif ($d->type == 'change') {
                     $wld = new WordLevelDiff($d->orig, $d->closing);
                     foreach ($wld->edits as $edit) {
                         if ($edit->type == 'copy') {
                             foreach ($edit->orig as $o) {
                                 $adds += strlen($o);
                             }
                         }
                     }
                 }
             }
         }
         if ($adds > $added) {
             $newAdded = $adds - $added;
             $added = $adds;
         } else {
             $newAdded = 0;
         }
         if ($newAdded > 0) {
             // First edit or didn't add steps
             // This prevents counting the steps section formatting fix as a contributor
             if ($first || $lastAdds != 0) {
                 $edits[] = array('added' => $newAdded, 'gr' => $gr, 'rev' => $txt['rev_id'], 'page' => $txt['page_id'], 'user' => $txt['rev_user'], 'username' => $txt['rev_user_text']);
             }
         }
         $first = false;
         $lastAdds = $adds;
     }
     if ($edits) {
         self::saveSigEdits($edits);
     }
     return $edits;
 }
Exemplo n.º 11
0
<?php

require_once 'commandLine.inc';
# Export list of alternative methods for a list of articles to CSV file
$filename = $argv[0];
$f = fopen($filename, 'r');
$contents = fread($f, filesize($filename));
fclose($f);
$pages = preg_split('@[\\r\\n]+@', $contents);
foreach ($pages as $page) {
    $t = Title::newFromText($page);
    $gr = GoodRevision::newFromTitle($t);
    if ($gr) {
        $dbr = wfGetDB(DB_SLAVE);
        $lr = $gr->latestGood();
        $r = Revision::loadFromId($dbr, $lr);
        if ($r) {
            $text = Wikitext::getStepsSection($r->getText(), true);
            if (preg_match_all("@===([^=]+)===@", $text[0], $matches)) {
                print $page;
                foreach ($matches[1] as $m) {
                    if (!preg_match("@\r\n@", $m)) {
                        print ',' . $m;
                    }
                }
                print "\n";
            }
        }
    }
}
Exemplo n.º 12
0
 /**
  * Sets the meta description in the database to be part of the intro, part
  * of the first step, or 'original' which is something like "wikiHow
  * article on How to <title>".
  */
 private function buildDescription($style)
 {
     if (self::DESC_STYLE_ORIGINAL == $style) {
         return array(true, '');
     }
     if (self::DESC_STYLE_EDITED == $style) {
         return array(true, $this->row['ami_desc']);
     }
     $wikitext = $this->getArticleWikiText();
     if (!$wikitext) {
         return array(false, '');
     }
     if (self::DESC_STYLE_INTRO == $style || self::DESC_STYLE_INTRO_NO_TITLE == $style) {
         // grab intro
         $desc = Wikitext::getIntro($wikitext);
         // append first step to intro if intro maybe isn't long enough
         if (strlen($desc) < 2 * self::MAX_DESC_LENGTH) {
             list($steps, ) = Wikitext::getStepsSection($wikitext);
             if ($steps) {
                 $desc .= ' ' . Wikitext::cutFirstStep($steps);
             }
         }
     } elseif (self::DESC_STYLE_STEP1 == $style) {
         // grab steps section
         list($desc, ) = Wikitext::getStepsSection($wikitext);
         // pull out just the first step
         if ($desc) {
             $desc = Wikitext::cutFirstStep($desc);
         } else {
             $desc = Wikitext::getIntro($wikitext);
         }
     } else {
         //throw new Exception('ArticleMetaInfo: unknown style');
         return array(false, '');
     }
     $desc = Wikitext::flatten($desc);
     $howto = wfMsg('howto', $this->titleText);
     if ($desc) {
         if (self::DESC_STYLE_INTRO_NO_TITLE != $style) {
             $desc = $howto . '. ' . $desc;
         }
     } else {
         $desc = $howto;
     }
     $desc = self::trimDescription($desc);
     return array(true, $desc);
 }
Exemplo n.º 13
0
 function hasImages(&$wikitext)
 {
     if ($this->introOnly) {
         $text = WikiText::getIntro($wikitext);
         $firstImage = Wikitext::getFirstImageURL($text);
         $hasImages = !empty($firstImage) ? "Yes" : "No";
     } else {
         list($stepsText, ) = Wikitext::getStepsSection($wikitext, true);
         if ($stepsText) {
             // has steps section, so assume valid candidate for detailed title
             $num_steps = preg_match_all('/^#[^*]/im', $stepsText, $matches);
         }
         $num_photos = preg_match_all('/\\[\\[Image:/im', $wikitext, $matches);
         $hasImages = $num_photos > $num_steps / 2 ? "Yes" : "No";
     }
     return $hasImages;
 }
Exemplo n.º 14
0
 private function hasEnlargedWikiPhotos(&$r)
 {
     $enlargedWikiPhoto = 0;
     $text = Wikitext::getStepsSection($r->getText(), true);
     $text = $text[0];
     if ($text) {
         // Photo is enlarged if it is great than 500px (and less than 9999px)
         $enlargedWikiPhoto = preg_match('/\\|[5-9][\\d]{2,3}px\\]\\]/im', $text);
     }
     return $enlargedWikiPhoto;
 }
$res = $dbr->select('page', 'page_id', array('page_namespace' => 0, 'page_is_redirect' => 0), __FILE__);
while ($row = $dbr->fetchObject($res)) {
    $titles[$row->page_id] = array();
}
$dbr->freeResult($res);
echo "Done grabbing all titles from db at " . microtime(true) . "\n";
$articles = array();
$count = 0;
//first check to see if there are more than 3 steps in the photo
foreach ($titles as $id => $info) {
    $title = Title::newFromID($id);
    $revision = Revision::newFromTitle($title);
    $intro = Wikitext::getIntro($revision->getText());
    $hasIntroImage = preg_match('@\\[\\[Image:([^\\]|]*)(\\|[^\\]]*)?\\]\\]@s', $intro);
    if (!$hasIntroImage) {
        $section = Wikitext::getStepsSection($revision->getText(), true);
        $num_step_photos = preg_match_all('@\\[\\[Image:([^\\]|]*)(\\|[^\\]]*)?\\]\\]@s', $section[0], $matches);
        if ($num_step_photos > 0) {
            $articles[] = $id;
        }
    }
    $count++;
    if ($count % 1000 == 0) {
        echo "Done processing " . $count . " artciles\n";
    }
}
echo "Done processing all titles. Left with " . count($articles) . " titles. At " . microtime(true) . "\n";
$fo = fopen($argv[0], 'w');
fwrite($fo, "<html><head></head><body>");
//now that we have all the data, spit out the info
foreach ($articles as $id) {
Exemplo n.º 16
0
 private function keepMethod($methodId, $articleId, $altMethod, $altSteps)
 {
     global $wgUser, $wgParser;
     $title = Title::newFromID($articleId);
     if ($title) {
         $revision = Revision::newFromTitle($title);
         $article = new Article($title);
         if ($revision && $article) {
             $wikitext = $revision->getText();
             $section = Wikitext::getStepsSection($wikitext, true);
             $newSection = $section[0] . "\n\n=== {$altMethod} ===\n{$altSteps}";
             $newText = $wgParser->replaceSection($wikitext, $section[1], $newSection);
             $success = $article->doEdit($newText, MethodEditor::EDIT_COMMENT);
             if ($success) {
                 $logPage = new LogPage('methedit', false);
                 $altMethodTransform = str_replace(" ", "_", $altMethod);
                 $logS = $logPage->addEntry("Added", $title, wfMsg('editor-approved-logentry', $title->getFullText(), $altMethod, $altMethodTransform));
                 $dbw = wfGetDB(DB_MASTER);
                 $dbw->delete(MethodEditor::TABLE_NAME, array('ama_id' => $methodId));
             }
             wfRunHooks("MethodEdited", array($wgUser, $title, '0'));
             return $success;
         }
     }
 }
foreach ($ids as $id) {
    $title = Title::newFromID($id);
    if ($title) {
        $stepsChanged = false;
        $introChanged = false;
        $article = new Article($title);
        $revision = Revision::newFromTitle($title);
        $wikiText = $revision->getText();
        $intro = Wikitext::getIntro($wikiText, true);
        if ($intro != "") {
            $intro = replaceBrokenLinksInSection($intro, $introChanged, $title);
            if ($introChanged) {
                $wikiText = Wikitext::replaceIntro($wikiText, $intro, true);
            }
        }
        list($steps, $sectionID) = Wikitext::getStepsSection($wikiText, true);
        if ($steps != "") {
            $steps = replaceBrokenLinksInSection($steps, $stepsChanged, $title);
            if ($stepsChanged) {
                $wikiText = Wikitext::replaceStepsSection($wikiText, $sectionID, $steps, true);
            }
        }
        if ($stepsChanged || $introChanged) {
            $article->doEdit($wikiText, "Removing broken links");
        }
    }
}
function replaceBrokenLinksInSection($sectionText, &$changed, &$title)
{
    $matchesarray = array();
    $sectionText = preg_replace_callback("@<nowiki>[^<]*</nowiki>@i", 'handleNoWikiTags', $sectionText);