public static function getSelection($maxUrls, $minImages) { $output = array(); $dbr = wfGetDB(DB_SLAVE); $sql = 'SELECT pr_title FROM page_randomizer'; $rows = parent::loadRows($dbr, $sql, '', __METHOD__); shuffle($rows); foreach ($rows as $row) { $wikitext = ''; $steps = ''; $titleDBkey = $row['pr_title']; $title = Title::newFromDBkey($titleDBkey); if ($title) { $wikitext = parent::getWikitext($dbr, $title); } if ($wikitext) { list($steps, ) = Wikitext::getStepsSection($wikitext); } if ($steps) { $images = parent::getNumStepsImages($steps); if ($images >= $minImages) { $output[] = $title->getPartialURL(); if (count($output) >= $maxUrls) { break; } } } } return $output; }
/** * Cycle through the url list to call the removeIntroImages function */ private static function removeIntroImagesUrls(&$urls, $px, $text) { $dbr = wfGetDB(DB_SLAVE); foreach ($urls as &$url) { $err = ''; $final_step = ''; if (!$url['title']) { $err = 'Unable to load article'; } else { $introText = ''; $wikitext = Wikitext::getWikitext($dbr, $url['title']); if ($wikitext) { $introText = Wikitext::getIntro($wikitext); } if (!$introText) { $err = 'Unable to load wikitext'; } else { //first, let's use the intro image for the final step if ($px > 0) { $new_final_step = self::makeFinalStep($introText, $px, $text); if ($new_final_step) { list($stepsText, $sectionID) = Wikitext::getStepsSection($wikitext, true); $stepsText = $stepsText . $new_final_step; $wikitext = Wikitext::replaceStepsSection($wikitext, $sectionID, $stepsText, true); if (preg_match("@[\r\n]+===[^=]*===@m", $stepsText)) { $final_step = 'x (alt)'; //success! (yay!) but has alt methods (boo!) } else { $final_step = 'x'; //success! } } } $prevIntroText = $introText; $introText = self::removeIntroImages($prevIntroText, $url['title']); if ($introText && $introText != $prevIntroText) { $wikitext = Wikitext::replaceIntro($wikitext, $introText, true); $comment = 'Removing intro images'; if ($final_step == 'x') { $comment .= '; Made final step out of former intro image'; } $err = Wikitext::saveWikitext($url['title'], $wikitext, $comment); if (empty($err)) { //make sure the intro image adder doesn't grab it $id = $url['title']->getArticleID(); if ($id) { $dbw = wfGetDB(DB_MASTER); $dbw->update('imageadder', array('imageadder_hasimage' => 1), array('imageadder_page' => $id)); } } } else { $err = 'Either no intro image or no intro found'; } } } $url['err'] = $err; $url['final_step'] = $final_step; } }
private static function getWikitext($title) { $dbr = wfGetDB(DB_SLAVE); $wikitext = Wikitext::getWikitext($dbr, $title); $stepsText = ''; if ($wikitext) { list($stepsText, ) = Wikitext::getStepsSection($wikitext, true); } return array($wikitext, $stepsText); }
/** * grab that wikitext */ private static function getWikitext($title) { $dbr = wfGetDB(DB_SLAVE); $wikitext = Wikitext::getWikitext($dbr, $title); $stepsText = ''; if ($wikitext) { list($stepsText, $sectionID) = Wikitext::getStepsSection($wikitext, true); //hack for illegal character if (strpos($wikitext, '‐')) { return array(); } } return array($wikitext, $stepsText, $sectionID); }
function synthesizeSummary($wikitext, $maxSteps, $fullURL) { $stepsSec = Wikitext::getStepsSection($wikitext, true); if (!$stepsSec) { return ''; } $stepsText = Wikitext::stripHeader($stepsSec[0]); if (Wikitext::countAltMethods($stepsText) > 0) { $altMethods = Wikitext::splitAltMethods($stepsText); foreach ($altMethods as $method) { if (Wikitext::isAltMethod($method) && Wikitext::countSteps($method) > 0) { $stepsText = $method; break; } } } $countSteps = Wikitext::countSteps($stepsText); $summaryOut = ''; $steps = Wikitext::splitSteps($stepsText); $count = 0; foreach ($steps as $step) { if (Wikitext::isStepSimple($step, false)) { $summary = Wikitext::summarizeStep($step); $summary = Wikitext::removeRefsFromFlattened($summary); if ($summary) { $count++; $break = $count > 1 ? "<br>" : ''; if ($count > $maxSteps) { $remaining = $countSteps - $maxSteps; $text = ''; if ($remaining >= 2) { $text = "{$remaining} more steps at wikiHow"; } elseif ($remaining == 1) { $text = "Another step at wikiHow"; } if ($text) { $href = htmlspecialchars($fullURL); $link = "<a href='{$href}'>{$text}</a>"; $summaryOut .= "{$break}{$link}"; } break; } else { $summaryOut .= "{$break}{$count}. {$summary}"; } } } } return $summaryOut; }
private function resizeImages($article) { global $wgServer; if (!$dbw) { $dbw = wfGetDB(DB_MASTER); } $err = ''; $title = Title::newFromURL($article); if (!$title || !$title->exists()) { return; } $wikitext = Wikitext::getWikitext($dbw, $title); if (!$wikitext) { $err = 'Unable to load wikitext'; } else { if (preg_match('@^#REDIRECT@m', $wikitext)) { $err = 'REDIRECT'; } else { list($stepsText, $sectionID) = Wikitext::getStepsSection($wikitext, true); list($stepsText, $err) = self::resizeEachImage($stepsText); if ($stepsText) { $wikitext = Wikitext::replaceStepsSection($wikitext, $sectionID, $stepsText, true); $comment = 'Resized images to the {{largeimage}} size.'; $err = Wikitext::saveWikitext($title, $wikitext, $comment); } } } if ($err) { $err .= chr(9) . $wgServer . '/' . $article; //print $err."\n"; self::logError($err); } elseif ($stepsText) { self::logIt($wgServer . '/' . $article); return $wgServer . '/' . $article; } }
/** * Handle API calls to get the steps from an article */ private function doAPI() { global $wgRequest, $wgOut, $wgContLang; $articleIds = $wgRequest->getVal("articleIds"); $articleIds = preg_split("@,@", $articleIds); $dbr = wfGetDB(DB_SLAVE); $wgOut->setArticleBodyOnly(true); $articles = array(); foreach ($articleIds as $articleId) { if (is_numeric($articleId)) { $r = Revision::loadFromPageId($dbr, $articleId); if ($r) { $txt = $r->getText(); $intro = Wikitext::getIntro($txt); $text = Wikitext::getStepsSection($txt, true); $lines = preg_split("@\n@", $text[0]); $text = ""; // We remove extra lines technically in the 'steps' section, but which don't actually contain steps // Find the last line starting with a '#' $lastLine = 0; $n = 0; foreach ($lines as $line) { if ($line[0] == '#') { $lastLine = $n; } $n++; } // Truncate lines after the last line with a '#' $n = 0; foreach ($lines as $line) { if ($n > $lastLine) { break; } if ($n != 0) { $text .= "\n"; } $text .= $line; $n++; } if (strlen($text) > 0) { $articles[$articleId] = array("steps" => $text, "intro" => $intro, "altImageTags" => array($wgContLang->getNSText(NS_IMAGE))); } } } } $wgOut->addHTML(json_encode($articles)); }
/** * Handle API calls to get the steps from an article */ private function doAPI() { global $wgRequest, $wgOut, $wgContLang; $articleIds = $wgRequest->getVal("articleIds"); $articleIds = preg_split("@,@", $articleIds); $dbr = wfGetDB(DB_SLAVE); $wgOut->setArticleBodyOnly(true); $articles = array(); foreach ($articleIds as $articleId) { if (is_numeric($articleId)) { $r = Revision::loadFromPageId($dbr, $articleId); if ($r) { $txt = $r->getText(); $intro = Wikitext::getIntro($txt); $text = Wikitext::getStepsSection($txt, true); if (is_array($text) && sizeof($text) > 0) { $articles[$articleId] = array("steps" => $text[0], "intro" => $intro, "altImageTags" => array($wgContLang->getNSText(NS_IMAGE))); } } } } $wgOut->addHTML(json_encode($articles)); }
/** * The process of adding and removing all articles from the randomizer * set. * * @param int $from unix timestamp indicates from when to process. 0 * means the epoch. */ private static function processArticles($from) { $dbr = wfGetDB(DB_SLAVE); $articles = self::loadArticles($dbr, $from); foreach ($articles as &$article) { $pr = array('pr_id' => $article['page_id'], 'pr_namespace' => NS_MAIN, 'pr_title' => $article['page_title'], 'pr_random' => wfRandom(), 'pr_catinfo' => $article['page_catinfo'], 'pr_updated' => wfTimestampNow()); $article = $pr; } $featured = self::loadFeaturedArticles($dbr); $rising = self::loadRisingStars($dbr); $views = self::loadHighViews($dbr); $edits = self::loadHighEdits($dbr); $rated = self::loadHighlyRated($dbr); $add = array(); $remove = array(); $reasons = array(); foreach ($articles as $i => $article) { //print "{$article['pr_title']}\n"; $reason = array(); $toadd = true; $id = $article['pr_id']; $title = Title::newFromDBkey($article['pr_title']); if (!$title) { $toadd = false; $reason[] = 'does-not-exist'; $wikitext = ''; } else { $wikitext = self::getWikitext($dbr, $title); if (!$wikitext) { $toadd = false; $reason[] = 'does-not-exist'; } } if ($wikitext) { $intro = Wikitext::getIntro($wikitext); list($steps, ) = Wikitext::getStepsSection($wikitext); if (self::excludeViaTemplates($intro)) { $reason[] = 'excluded-via-template'; $toadd = false; } else { $images = self::getNumStepsImages($steps); if (isset($featured[$id])) { $reason[] = 'featured'; } if ($images && isset($rising[$id])) { $reason[] = 'rising'; } if ($images && isset($rated[$id])) { $reason[] = 'highly-rated'; } if ($images && isset($views[$id]) && isset($edits[$id])) { $reason[] = 'views-and-edits'; } if ($images && self::hasAlternateMethods($steps)) { $reason[] = 'views-and-alternate-methods'; } if ($images && isset($views[$id]) && self::getNumSteps($steps) >= 9) { $reason[] = 'views-and-nine-steps'; } if (self::getNumStepsImages($steps) >= 3) { $reason[] = 'three-steps-images'; } if ($images && isset($views[$id]) && self::hasVideo($wikitext)) { $reason[] = 'views-and-video'; } if (empty($reason)) { $reason[] = 'no-match'; $toadd = false; } } } if ($toadd) { $add[] = $article; } else { $remove[] = $article; } $reasons[] = array('dprr_id' => $id, 'dprr_namespace' => NS_MAIN, 'dprr_title' => substr($article['pr_title'], 0, 255), 'dprr_reasons' => substr(join(',', $reason), 0, 255)); } $dbw = wfGetDB(DB_MASTER); if (!$from) { // do this right before we insert a bunch of new rows self::dbClearRandomizer($dbw); } else { self::dbSaveRandom($dbw, $remove, false); } self::dbSaveRandom($dbw, $add, true); self::dbReplaceReasons($dbw, $reasons); }
public static function getEdits($articleId) { $dbr = wfGetDB(DB_SLAVE); $gr = 0; $gr = $dbr->selectField('good_revision', array('gr_rev'), array('gr_page' => $articleId)); $edits = self::getSigEdits($gr); if ($edits) { return $edits; } $res = $dbr->select(array('revision', 'text'), array('rev_id', 'old_text', 'old_flags', 'rev_timestamp', 'rev_user', 'rev_user_text'), array('rev_page' => $articleId, 'rev_text_id = old_id'), __METHOD__, array('order by' => 'rev_timestamp asc')); $txts = array(); $grTxt = false; foreach ($res as $row) { $flags = explode(',', $row->old_flags); $rowText = Revision::decompressRevisionText($row->old_text, $flags); $stepsSection = Wikitext::getStepsSection($rowText); //print("Got txt for rev" . $row->rev_id . "\n"); $txts[] = array('text' => $stepsSection[0], 'rev_page' => $row->rev_page, 'rev_id' => $row->rev_id, 'rev_user' => $row->rev_user, 'rev_user_text' => $row->rev_user_text); if ($row->rev_id == $gr) { $grTxt = $stepsSection[0]; break; } } if (!$grText) { $grText = $txts[sizeof($txts) - 1]['text']; $gr = $txts[sizeof($txts) - 1]['rev_id']; } global $wgContLang; $segmentedGr = $wgContLang->segmentForDiff($grText); $grArr = explode("\n", $segmentedGr); $grSize = strlen($segmentedGr) - sizeof($grArr) + 1; // Amount added $added = 0; $edits = array(); $first = true; $lastAdds = 0; foreach ($txts as $txt) { $txtArr = explode("\n", $wgContLang->segmentForDiff($txt['text'])); //print("diff for rev " . $txt['rev_id'] . " " . wfTimestampNow() . "\n"); $diffs = new Diff($txtArr, $grArr); $adds = 0; foreach ($diffs as $diff) { foreach ($diff as $d) { if ($d->type == 'copy') { foreach ($d->closing as $cl) { $adds += strlen($cl); } } elseif ($d->type == 'change') { $wld = new WordLevelDiff($d->orig, $d->closing); foreach ($wld->edits as $edit) { if ($edit->type == 'copy') { foreach ($edit->orig as $o) { $adds += strlen($o); } } } } } } if ($adds > $added) { $newAdded = $adds - $added; $added = $adds; } else { $newAdded = 0; } if ($newAdded > 0) { // First edit or didn't add steps // This prevents counting the steps section formatting fix as a contributor if ($first || $lastAdds != 0) { $edits[] = array('added' => $newAdded, 'gr' => $gr, 'rev' => $txt['rev_id'], 'page' => $txt['page_id'], 'user' => $txt['rev_user'], 'username' => $txt['rev_user_text']); } } $first = false; $lastAdds = $adds; } if ($edits) { self::saveSigEdits($edits); } return $edits; }
<?php require_once 'commandLine.inc'; # Export list of alternative methods for a list of articles to CSV file $filename = $argv[0]; $f = fopen($filename, 'r'); $contents = fread($f, filesize($filename)); fclose($f); $pages = preg_split('@[\\r\\n]+@', $contents); foreach ($pages as $page) { $t = Title::newFromText($page); $gr = GoodRevision::newFromTitle($t); if ($gr) { $dbr = wfGetDB(DB_SLAVE); $lr = $gr->latestGood(); $r = Revision::loadFromId($dbr, $lr); if ($r) { $text = Wikitext::getStepsSection($r->getText(), true); if (preg_match_all("@===([^=]+)===@", $text[0], $matches)) { print $page; foreach ($matches[1] as $m) { if (!preg_match("@\r\n@", $m)) { print ',' . $m; } } print "\n"; } } } }
/** * Sets the meta description in the database to be part of the intro, part * of the first step, or 'original' which is something like "wikiHow * article on How to <title>". */ private function buildDescription($style) { if (self::DESC_STYLE_ORIGINAL == $style) { return array(true, ''); } if (self::DESC_STYLE_EDITED == $style) { return array(true, $this->row['ami_desc']); } $wikitext = $this->getArticleWikiText(); if (!$wikitext) { return array(false, ''); } if (self::DESC_STYLE_INTRO == $style || self::DESC_STYLE_INTRO_NO_TITLE == $style) { // grab intro $desc = Wikitext::getIntro($wikitext); // append first step to intro if intro maybe isn't long enough if (strlen($desc) < 2 * self::MAX_DESC_LENGTH) { list($steps, ) = Wikitext::getStepsSection($wikitext); if ($steps) { $desc .= ' ' . Wikitext::cutFirstStep($steps); } } } elseif (self::DESC_STYLE_STEP1 == $style) { // grab steps section list($desc, ) = Wikitext::getStepsSection($wikitext); // pull out just the first step if ($desc) { $desc = Wikitext::cutFirstStep($desc); } else { $desc = Wikitext::getIntro($wikitext); } } else { //throw new Exception('ArticleMetaInfo: unknown style'); return array(false, ''); } $desc = Wikitext::flatten($desc); $howto = wfMsg('howto', $this->titleText); if ($desc) { if (self::DESC_STYLE_INTRO_NO_TITLE != $style) { $desc = $howto . '. ' . $desc; } } else { $desc = $howto; } $desc = self::trimDescription($desc); return array(true, $desc); }
function hasImages(&$wikitext) { if ($this->introOnly) { $text = WikiText::getIntro($wikitext); $firstImage = Wikitext::getFirstImageURL($text); $hasImages = !empty($firstImage) ? "Yes" : "No"; } else { list($stepsText, ) = Wikitext::getStepsSection($wikitext, true); if ($stepsText) { // has steps section, so assume valid candidate for detailed title $num_steps = preg_match_all('/^#[^*]/im', $stepsText, $matches); } $num_photos = preg_match_all('/\\[\\[Image:/im', $wikitext, $matches); $hasImages = $num_photos > $num_steps / 2 ? "Yes" : "No"; } return $hasImages; }
private function hasEnlargedWikiPhotos(&$r) { $enlargedWikiPhoto = 0; $text = Wikitext::getStepsSection($r->getText(), true); $text = $text[0]; if ($text) { // Photo is enlarged if it is great than 500px (and less than 9999px) $enlargedWikiPhoto = preg_match('/\\|[5-9][\\d]{2,3}px\\]\\]/im', $text); } return $enlargedWikiPhoto; }
$res = $dbr->select('page', 'page_id', array('page_namespace' => 0, 'page_is_redirect' => 0), __FILE__); while ($row = $dbr->fetchObject($res)) { $titles[$row->page_id] = array(); } $dbr->freeResult($res); echo "Done grabbing all titles from db at " . microtime(true) . "\n"; $articles = array(); $count = 0; //first check to see if there are more than 3 steps in the photo foreach ($titles as $id => $info) { $title = Title::newFromID($id); $revision = Revision::newFromTitle($title); $intro = Wikitext::getIntro($revision->getText()); $hasIntroImage = preg_match('@\\[\\[Image:([^\\]|]*)(\\|[^\\]]*)?\\]\\]@s', $intro); if (!$hasIntroImage) { $section = Wikitext::getStepsSection($revision->getText(), true); $num_step_photos = preg_match_all('@\\[\\[Image:([^\\]|]*)(\\|[^\\]]*)?\\]\\]@s', $section[0], $matches); if ($num_step_photos > 0) { $articles[] = $id; } } $count++; if ($count % 1000 == 0) { echo "Done processing " . $count . " artciles\n"; } } echo "Done processing all titles. Left with " . count($articles) . " titles. At " . microtime(true) . "\n"; $fo = fopen($argv[0], 'w'); fwrite($fo, "<html><head></head><body>"); //now that we have all the data, spit out the info foreach ($articles as $id) {
private function keepMethod($methodId, $articleId, $altMethod, $altSteps) { global $wgUser, $wgParser; $title = Title::newFromID($articleId); if ($title) { $revision = Revision::newFromTitle($title); $article = new Article($title); if ($revision && $article) { $wikitext = $revision->getText(); $section = Wikitext::getStepsSection($wikitext, true); $newSection = $section[0] . "\n\n=== {$altMethod} ===\n{$altSteps}"; $newText = $wgParser->replaceSection($wikitext, $section[1], $newSection); $success = $article->doEdit($newText, MethodEditor::EDIT_COMMENT); if ($success) { $logPage = new LogPage('methedit', false); $altMethodTransform = str_replace(" ", "_", $altMethod); $logS = $logPage->addEntry("Added", $title, wfMsg('editor-approved-logentry', $title->getFullText(), $altMethod, $altMethodTransform)); $dbw = wfGetDB(DB_MASTER); $dbw->delete(MethodEditor::TABLE_NAME, array('ama_id' => $methodId)); } wfRunHooks("MethodEdited", array($wgUser, $title, '0')); return $success; } } }
foreach ($ids as $id) { $title = Title::newFromID($id); if ($title) { $stepsChanged = false; $introChanged = false; $article = new Article($title); $revision = Revision::newFromTitle($title); $wikiText = $revision->getText(); $intro = Wikitext::getIntro($wikiText, true); if ($intro != "") { $intro = replaceBrokenLinksInSection($intro, $introChanged, $title); if ($introChanged) { $wikiText = Wikitext::replaceIntro($wikiText, $intro, true); } } list($steps, $sectionID) = Wikitext::getStepsSection($wikiText, true); if ($steps != "") { $steps = replaceBrokenLinksInSection($steps, $stepsChanged, $title); if ($stepsChanged) { $wikiText = Wikitext::replaceStepsSection($wikiText, $sectionID, $steps, true); } } if ($stepsChanged || $introChanged) { $article->doEdit($wikiText, "Removing broken links"); } } } function replaceBrokenLinksInSection($sectionText, &$changed, &$title) { $matchesarray = array(); $sectionText = preg_replace_callback("@<nowiki>[^<]*</nowiki>@i", 'handleNoWikiTags', $sectionText);