function synthesizeSummary($wikitext, $maxSteps, $fullURL) { $stepsSec = Wikitext::getStepsSection($wikitext, true); if (!$stepsSec) { return ''; } $stepsText = Wikitext::stripHeader($stepsSec[0]); if (Wikitext::countAltMethods($stepsText) > 0) { $altMethods = Wikitext::splitAltMethods($stepsText); foreach ($altMethods as $method) { if (Wikitext::isAltMethod($method) && Wikitext::countSteps($method) > 0) { $stepsText = $method; break; } } } $countSteps = Wikitext::countSteps($stepsText); $summaryOut = ''; $steps = Wikitext::splitSteps($stepsText); $count = 0; foreach ($steps as $step) { if (Wikitext::isStepSimple($step, false)) { $summary = Wikitext::summarizeStep($step); $summary = Wikitext::removeRefsFromFlattened($summary); if ($summary) { $count++; $break = $count > 1 ? "<br>" : ''; if ($count > $maxSteps) { $remaining = $countSteps - $maxSteps; $text = ''; if ($remaining >= 2) { $text = "{$remaining} more steps at wikiHow"; } elseif ($remaining == 1) { $text = "Another step at wikiHow"; } if ($text) { $href = htmlspecialchars($fullURL); $link = "<a href='{$href}'>{$text}</a>"; $summaryOut .= "{$break}{$link}"; } break; } else { $summaryOut .= "{$break}{$count}. {$summary}"; } } } } return $summaryOut; }
/** * process a single article for the subheaders */ private static function processSubheaders($title) { list($wikitext, $stepsText, $sectionID) = self::getWikitext($title); if ($wikitext && $stepsText) { $subs = Wikitext::countAltMethods($stepsText); if ($subs > 0) { //we have some parts/methods/ways! $count = preg_match_all('@(^\\s*===\\s*)(Method|Part)(\\s*One|\\s*1)(\\s*===\\s*)@im', $stepsText, $m); if ($count) { $url = 'http://www.wikihow.com/' . $title->getDBKey(); print $url . "\n"; return true; } } } return false; }
/** * process a single article for the subheaders */ private static function processSubheaders($title, $csv) { list($wikitext, $stepsText, $sectionID) = self::getWikitext($title); if ($wikitext && $stepsText) { $subs = Wikitext::countAltMethods($stepsText); if ($subs > 1) { //we have some parts/methods/ways! $newstepsText = preg_replace('@(^\\s*===\\s*)(Method |Part )(.*?:|.*?\\.|\\s*===|)@im', '$1', $stepsText); //have we made a difference? if (strcmp($stepsText, $newstepsText) != 0) { //it has changed; update the sub headers $newWikitext = Wikitext::replaceStepsSection($wikitext, $sectionID, $newstepsText, true); if ($newWikitext) { //sub headers have been updated, add the "magic word" list($newestWikitext, $magic_word) = self::addMagicWord($stepsText, $newWikitext); if ($newestWikitext) { $data = array(); Wikitext::saveWikitext($title, $newestWikitext, self::$comment); $url = 'http://www.wikihow.com/' . $title->getDBKey(); $data[] = $url; $data[] = $magic_word; //rock! now a little logic to grab what changed... preg_match('@^===.*===?@im', $stepsText, $m); $data[] = $m[0]; preg_match('@^===.*===?@im', $newstepsText, $m); $data[] = $m[0]; //show it print $url . ' ' . $magic_word . "\n"; //log it self::logIt($url . ' ' . $magic_word); //write it to the csv fputcsv($csv, $data, chr(9)); //good night, sweet prince... usleep(self::SLEEPTIME); return true; } } } } } return false; }
private static function genTitle($title, $test, $custom) { $titleTxt = $title->getText(); $howto = wfMsg('howto', $titleTxt); list($wikitext, $stepsText) = self::getWikitext($title); switch ($test) { case self::TITLE_CUSTOM: // Custom $title = $custom; break; case self::TITLE_SITE_PREVIOUS: // How to XXX: N Steps (with Pictures) - wikiHow list($numSteps, $withPictures) = self::getTitleExtraInfo($wikitext, $stepsText, $test); $inner = $numSteps > 0 ? "{$howto}: {$numSteps} Steps" : $howto; $inner = $withPictures ? "{$inner} (with Pictures)" : $inner; $title = wfMsg('pagetitle', $inner); break; default: // How to XXX: N Steps (with Pictures) - wikiHow // From Chris's Mar 25 email // How to XXX: N Steps (with Pictures) - wikiHow // From Chris's Mar 25 email case 5: // default, but not "with Pictures" // default, but not "with Pictures" case 6: // n Tips on How to ... "with Pictures" // n Tips on How to ... "with Pictures" case 7: // n Tips on How to ... but not "with Pictures" // n Tips on How to ... but not "with Pictures" case 8: // How to ...: Step-by-Step Instructions "with Pictures" // How to ...: Step-by-Step Instructions "with Pictures" case 9: // How to ...: Step-by-Step Instructions but not "with Pictures" $methods = Wikitext::countAltMethods($stepsText); if ($methods >= 3) { $inner = "{$methods} Ways to {$titleTxt}"; $title = wfMsg('pagetitle', $inner); if (strlen($title) > self::MAX_TITLE_LENGTH) { $title = $inner; } } else { list($numSteps, $withPictures) = self::getTitleExtraInfo($wikitext, $stepsText, $test); $forceNoWithPictures = in_array($test, array(5, 7, 9)); $withPictures = !$forceNoWithPictures ? $withPictures : false; if ($test == 6 || $test == 7) { $inner = $numSteps > 0 ? "{$numSteps} Tips on {$howto}" : $howto; } elseif ($test == 8 || $test == 9) { $inner = $numSteps > 0 ? "{$howto}: Step-by-Step Instructions" : $howto; } else { $inner = $numSteps > 0 ? "{$howto}: {$numSteps} Steps" : $howto; } $inner = $withPictures ? "{$inner} (with Pictures)" : $inner; $title = wfMsg('pagetitle', $inner); // first, try articlename + metadata + wikihow if (strlen($title) > self::MAX_TITLE_LENGTH) { // next, try articlename + metadata $title = $inner; if ($numSteps > 0 && strlen($title) > self::MAX_TITLE_LENGTH) { // next, try articlename + steps if ($test == 6 || $test == 7) { $inner = "{$numSteps} Tips on {$howto}"; } elseif ($test == 8 || $test == 9) { $inner = "{$howto}: Step-by-Step Instructions"; } else { $title = "{$howto}: {$numSteps} Steps"; } } if (strlen($title) > self::MAX_TITLE_LENGTH) { // next, try articlename + wikihow $title = wfMsg('pagetitle', $howto); if (strlen($title) > self::MAX_TITLE_LENGTH) { // lastly, set title just as articlename $title = $howto; } } } } break; // start of new Title Tests from Chris's March 29 email //case 12: // How to XXX: N Tips - wikiHow //case 13: // N Tips on How to XXX - wikiHow //case 14: // How to XXX: Step-by-Step Instructions //case 15: // How to XXX: N Methods - wikiHow //case 16: // N Ways to XXX - wikiHow //case 17: // How to XXX with Step-by-Step Pictures // start of new title tests from Chris's Oct 2 email /*case 18: // How to XXX with Step-by-Step Pictures $inner = ''; $methods = Wikitext::countAltMethods($stepsText); if ($methods >= 4) { $inner = "$methods Ways to $titleTxt"; } else { $steps = Wikitext::countSteps($stepsText); if (3 <= $steps && $steps < 15) { $inner = "$steps Tips on $howto"; } } if (!$inner) { $inner = "$howto: Step-by-Step Instructions"; } $title = wfMsg('pagetitle', $inner); if (strlen($title) > self::MAX_TITLE_LENGTH) { $title = $inner; } break;*/ } return $title; }