Example #1
0
function synthesizeSummary($wikitext, $maxSteps, $fullURL)
{
    $stepsSec = Wikitext::getStepsSection($wikitext, true);
    if (!$stepsSec) {
        return '';
    }
    $stepsText = Wikitext::stripHeader($stepsSec[0]);
    if (Wikitext::countAltMethods($stepsText) > 0) {
        $altMethods = Wikitext::splitAltMethods($stepsText);
        foreach ($altMethods as $method) {
            if (Wikitext::isAltMethod($method) && Wikitext::countSteps($method) > 0) {
                $stepsText = $method;
                break;
            }
        }
    }
    $countSteps = Wikitext::countSteps($stepsText);
    $summaryOut = '';
    $steps = Wikitext::splitSteps($stepsText);
    $count = 0;
    foreach ($steps as $step) {
        if (Wikitext::isStepSimple($step, false)) {
            $summary = Wikitext::summarizeStep($step);
            $summary = Wikitext::removeRefsFromFlattened($summary);
            if ($summary) {
                $count++;
                $break = $count > 1 ? "<br>" : '';
                if ($count > $maxSteps) {
                    $remaining = $countSteps - $maxSteps;
                    $text = '';
                    if ($remaining >= 2) {
                        $text = "{$remaining} more steps at wikiHow";
                    } elseif ($remaining == 1) {
                        $text = "Another step at wikiHow";
                    }
                    if ($text) {
                        $href = htmlspecialchars($fullURL);
                        $link = "<a href='{$href}'>{$text}</a>";
                        $summaryOut .= "{$break}{$link}";
                    }
                    break;
                } else {
                    $summaryOut .= "{$break}{$count}. {$summary}";
                }
            }
        }
    }
    return $summaryOut;
}
 /**
  * process a single article for the subheaders
  */
 private static function processSubheaders($title)
 {
     list($wikitext, $stepsText, $sectionID) = self::getWikitext($title);
     if ($wikitext && $stepsText) {
         $subs = Wikitext::countAltMethods($stepsText);
         if ($subs > 0) {
             //we have some parts/methods/ways!
             $count = preg_match_all('@(^\\s*===\\s*)(Method|Part)(\\s*One|\\s*1)(\\s*===\\s*)@im', $stepsText, $m);
             if ($count) {
                 $url = 'http://www.wikihow.com/' . $title->getDBKey();
                 print $url . "\n";
                 return true;
             }
         }
     }
     return false;
 }
 /**
  * process a single article for the subheaders
  */
 private static function processSubheaders($title, $csv)
 {
     list($wikitext, $stepsText, $sectionID) = self::getWikitext($title);
     if ($wikitext && $stepsText) {
         $subs = Wikitext::countAltMethods($stepsText);
         if ($subs > 1) {
             //we have some parts/methods/ways!
             $newstepsText = preg_replace('@(^\\s*===\\s*)(Method |Part )(.*?:|.*?\\.|\\s*===|)@im', '$1', $stepsText);
             //have we made a difference?
             if (strcmp($stepsText, $newstepsText) != 0) {
                 //it has changed; update the sub headers
                 $newWikitext = Wikitext::replaceStepsSection($wikitext, $sectionID, $newstepsText, true);
                 if ($newWikitext) {
                     //sub headers have been updated, add the "magic word"
                     list($newestWikitext, $magic_word) = self::addMagicWord($stepsText, $newWikitext);
                     if ($newestWikitext) {
                         $data = array();
                         Wikitext::saveWikitext($title, $newestWikitext, self::$comment);
                         $url = 'http://www.wikihow.com/' . $title->getDBKey();
                         $data[] = $url;
                         $data[] = $magic_word;
                         //rock! now a little logic to grab what changed...
                         preg_match('@^===.*===?@im', $stepsText, $m);
                         $data[] = $m[0];
                         preg_match('@^===.*===?@im', $newstepsText, $m);
                         $data[] = $m[0];
                         //show it
                         print $url . '	' . $magic_word . "\n";
                         //log it
                         self::logIt($url . '	' . $magic_word);
                         //write it to the csv
                         fputcsv($csv, $data, chr(9));
                         //good night, sweet prince...
                         usleep(self::SLEEPTIME);
                         return true;
                     }
                 }
             }
         }
     }
     return false;
 }
Example #4
0
 private static function genTitle($title, $test, $custom)
 {
     $titleTxt = $title->getText();
     $howto = wfMsg('howto', $titleTxt);
     list($wikitext, $stepsText) = self::getWikitext($title);
     switch ($test) {
         case self::TITLE_CUSTOM:
             // Custom
             $title = $custom;
             break;
         case self::TITLE_SITE_PREVIOUS:
             // How to XXX: N Steps (with Pictures) - wikiHow
             list($numSteps, $withPictures) = self::getTitleExtraInfo($wikitext, $stepsText, $test);
             $inner = $numSteps > 0 ? "{$howto}: {$numSteps} Steps" : $howto;
             $inner = $withPictures ? "{$inner} (with Pictures)" : $inner;
             $title = wfMsg('pagetitle', $inner);
             break;
         default:
             // How to XXX: N Steps (with Pictures) - wikiHow
             // From Chris's Mar 25 email
         // How to XXX: N Steps (with Pictures) - wikiHow
         // From Chris's Mar 25 email
         case 5:
             // default, but not "with Pictures"
         // default, but not "with Pictures"
         case 6:
             // n Tips on How to ... "with Pictures"
         // n Tips on How to ... "with Pictures"
         case 7:
             // n Tips on How to ... but not "with Pictures"
         // n Tips on How to ... but not "with Pictures"
         case 8:
             // How to ...: Step-by-Step Instructions "with Pictures"
         // How to ...: Step-by-Step Instructions "with Pictures"
         case 9:
             // How to ...: Step-by-Step Instructions but not "with Pictures"
             $methods = Wikitext::countAltMethods($stepsText);
             if ($methods >= 3) {
                 $inner = "{$methods} Ways to {$titleTxt}";
                 $title = wfMsg('pagetitle', $inner);
                 if (strlen($title) > self::MAX_TITLE_LENGTH) {
                     $title = $inner;
                 }
             } else {
                 list($numSteps, $withPictures) = self::getTitleExtraInfo($wikitext, $stepsText, $test);
                 $forceNoWithPictures = in_array($test, array(5, 7, 9));
                 $withPictures = !$forceNoWithPictures ? $withPictures : false;
                 if ($test == 6 || $test == 7) {
                     $inner = $numSteps > 0 ? "{$numSteps} Tips on {$howto}" : $howto;
                 } elseif ($test == 8 || $test == 9) {
                     $inner = $numSteps > 0 ? "{$howto}: Step-by-Step Instructions" : $howto;
                 } else {
                     $inner = $numSteps > 0 ? "{$howto}: {$numSteps} Steps" : $howto;
                 }
                 $inner = $withPictures ? "{$inner} (with Pictures)" : $inner;
                 $title = wfMsg('pagetitle', $inner);
                 // first, try articlename + metadata + wikihow
                 if (strlen($title) > self::MAX_TITLE_LENGTH) {
                     // next, try articlename + metadata
                     $title = $inner;
                     if ($numSteps > 0 && strlen($title) > self::MAX_TITLE_LENGTH) {
                         // next, try articlename + steps
                         if ($test == 6 || $test == 7) {
                             $inner = "{$numSteps} Tips on {$howto}";
                         } elseif ($test == 8 || $test == 9) {
                             $inner = "{$howto}: Step-by-Step Instructions";
                         } else {
                             $title = "{$howto}: {$numSteps} Steps";
                         }
                     }
                     if (strlen($title) > self::MAX_TITLE_LENGTH) {
                         // next, try articlename + wikihow
                         $title = wfMsg('pagetitle', $howto);
                         if (strlen($title) > self::MAX_TITLE_LENGTH) {
                             // lastly, set title just as articlename
                             $title = $howto;
                         }
                     }
                 }
             }
             break;
             // start of new Title Tests from Chris's March 29 email
             //case 12: // How to XXX: N Tips - wikiHow
             //case 13: // N Tips on How to XXX - wikiHow
             //case 14: // How to XXX: Step-by-Step Instructions
             //case 15: // How to XXX: N Methods - wikiHow
             //case 16: // N Ways to XXX - wikiHow
             //case 17: // How to XXX with Step-by-Step Pictures
             // start of new title tests from Chris's Oct 2 email
             /*case 18: // How to XXX with Step-by-Step Pictures
             			$inner = '';
             			$methods = Wikitext::countAltMethods($stepsText);
             			if ($methods >= 4) {
             				$inner = "$methods Ways to $titleTxt";
             			} else {
             				$steps = Wikitext::countSteps($stepsText);
             				if (3 <= $steps && $steps < 15) {
             					$inner = "$steps Tips on $howto";
             				}
             			}
             			if (!$inner) {
             				$inner = "$howto: Step-by-Step Instructions";
             			}
             
             			$title = wfMsg('pagetitle', $inner);
             			if (strlen($title) > self::MAX_TITLE_LENGTH) {
             				$title = $inner;
             			}
             			break;*/
     }
     return $title;
 }