/** * check for plagiarism with copyscape * return true if there's an issue */ private static function copyCheck($t) { $threshold = 0.05; $result = ''; $r = Revision::newFromTitle($t); if (!$r) { return 'No such article'; } $text = Wikitext::flatten($r->getText()); $res = copyscape_api_text_search_internet($text, 'ISO-8859-1', 2); if ($res['count']) { $words = $res['querywords']; foreach ($res['result'] as $r) { if (!preg_match("@^http://[a-z0-9]*.(wikihow|whstatic|youtube).com@i", $r['url'])) { //if ($r['minwordsmatched'] / $words > $threshold) { //we got one! $result .= '<b>Plagiarized:</b> <a href="' . $r['url'] . '">' . $r['url'] . '</a><br />'; //} } } } else { $result = ''; } return $result; }
$dbr = wfGetDB(DB_SLAVE); $sql = "SELECT page_title, page_id FROM page WHERE page_is_redirect=0 AND page_namespace=" . NS_MAIN; $res = $dbr->query($sql, __FILE__); $fp = fopen('short-intros.csv', 'w'); if (!$fp) { die("could not open file for write\n"); } fputcsv($fp, array('page_id', 'URL', 'has_template', 'intro_length', 'intro')); foreach ($res as $row) { $title = Title::newFromDBkey($row->page_title); if (!$title) { print "Can't make title out of {$row->page_title}\n"; continue; } $rev = Revision::newFromTitle($title); $wikitext = $rev->getText(); $intro = Article::getSection($wikitext, 0); $flat = Wikitext::flatten($intro); $flat = trim($flat); $len = mb_strlen($flat); if ($len < 50) { // check whether it has either the {{intro or {{introduction template $hasTemplate = strpos(strtolower($intro), '{{intro') !== false; $fields = array($row->page_id, 'http://www.wikihow.com/' . $title->getPartialURL(), $hasTemplate ? 'y' : 'n', $len, $flat); fputcsv($fp, $fields); if (@++$i % 100 == 0) { print "article {$i}\n"; } } } fclose($fp);
function getIntroText(&$t) { $r = Revision::newFromTitle($t); $intro = Article::getSection($r->getText(), 0); return Wikitext::flatten($intro); }
public function mobileSearch($q, $start, $limit = 20) { global $wgOut, $wgMemc; // Don't return more than 50 search results at a time to prevent abuse if ($limit > 50) { $limit = 50; } $key = wfMemcKey("MobileSearch", str_replace(" ", "-", $q), $start, $limit); if ($val = $wgMemc->get($key)) { return $val; } $contents = $this->googleSearchResultTitles($q, $start, $limit, 0, self::SEARCH_MOBILE); $results = array(); foreach ($contents as $t) { // Only return articles if ($t->getNamespace() != NS_MAIN) { continue; } $result = array(); $result['title'] = $t->getText(); $result['url'] = $t->getFullURL(); $result['imgurl'] = wfGetPad(SkinWikihowskin::getGalleryImage($t, 103, 80)); $result['intro'] = null; if ($r = Revision::newFromid($t->getLatestRevID())) { $intro = Wikitext::getIntro($r->getText()); $intro = trim(Wikitext::flatten($intro)); $result['intro'] = substr($intro, 0, 180); // Put an ellipsis on the end $len = strlen($result['intro']); $result['intro'] .= substr($result['intro'], $len - 1, $len) == '.' ? ".." : "..."; } if (!is_null($result['intro'])) { $results[] = array('article' => $result); } } $searchResults['results'] = $results; $json = json_encode($searchResults); $wgMemc->set($key, $json, 3600); // 1 hour header("Content-type: application/json"); $wgOut->disable(true); echo $json; }
function formatRelated($t) { global $wgUser, $wgParser; $result = ''; if ($t && $t->exists()) { $r = Revision::newFromTitle($t); $intro = $wgParser->getSection($r->getText(), 0); $intro = Wikitext::flatten($intro); if (strlen($intro) > 250) { $intro = substr($intro, 0, 250) . '...'; } $sk = $wgUser->getSkin(); $img = SkinWikihowskin::getGalleryImage($t, 238, 139); $result .= "<div class='slide_related'>\n\t\t\t\t\t\t<a href='{$t->getFullURL()}'><img src='{$img}' alt='' width='238' height='139' class='gs_img' /></a>\n\t\t\t\t\t\t<h3><a href='{$t->getFullURL()}'>{$t->getText()}</a></h3>\n\t\t\t\t\t\t<p>{$intro}</p>\n\t\t\t\t\t\t</div>"; } return $result; }
public function parseArticle_02($article) { global $wgWikiHowSections, $wgTitle, $wgUser; $ads = $wgUser->getID() == 0; $sk = new SkinWikihowskin(); $sectionMap = array(wfMsg('Intro') => 'intro', wfMsg('Ingredients') => 'ingredients', wfMsg('Steps') => 'steps', wfMsg('Video') => 'video', wfMsg('Tips') => 'tips', wfMsg('Warnings') => 'warnings', wfMsg('relatedwikihows') => 'relatedwikihows', wfMsg('sourcescitations') => 'sources', wfMsg('thingsyoullneed') => 'thingsyoullneed'); foreach ($wgWikiHowSections as $section) { $reverse_msgs[wfMsg($section)] = $section; } $parts = preg_split("@(<h2.*</h2>)@im", $article, 0, PREG_SPLIT_DELIM_CAPTURE); $body = ''; $intro_img = ''; for ($i = 0; $i < sizeof($parts); $i++) { if ($i == 0) { //intro preg_match("/Image:(.*)\">/", $parts[$i], $matches); if (count($matches) > 0) { $img = $matches[1]; $img = preg_replace('@%27@', "'", $img); $image = Title::makeTitle(NS_IMAGE, $img); if ($image) { $file = wfFindFile($image); if ($file) { $thumb = $file->getThumbnail(200, -1, true, true); $intro_img = '<a href="' . $image->getFullUrl() . '"><img border="0" width="200" class="mwimage101" src="' . wfGetPad($thumb->url) . '" alt="" /></a>'; } } } if ($intro_img == '') { $intro_img = '<img border="0" width="200" class="mwimage101" src="' . wfGetPad('/skins/WikiHow/images/wikihow_sq_200.png') . '" alt="" />'; } $r = Revision::newFromTitle($wgTitle); $intro_text = Wikitext::getIntro($r->getText()); $intro_text = trim(Wikitext::flatten($intro_text)); $body .= '<br /><div id="color_div"></div><br />'; $body .= '<div id="article_intro">' . $intro_text . '</div>'; if ($ads) { $body .= '<div class="ad_noimage intro_ad">' . wikihowAds::getAdUnitPlaceholder('intro') . '</div>'; } } else { if (stripos($parts[$i], "<h2") === 0 && $i < sizeof($parts) - 1) { preg_match("@<span>.*</span>@", $parts[$i], $matches); $rev = ""; if (sizeof($matches) > 0) { $h2 = trim(strip_tags($matches[0])); $rev = isset($reverse_msgs[$h2]) ? $reverse_msgs[$h2] : ""; } if ($rev !== 'steps') { $body .= $parts[$i]; } $i++; if ($rev == "steps") { $body .= "\n<div id=\"steps\" class='editable'>{$parts[$i]}</div>\n"; } else { if ($rev != "") { $body .= "\n<div id=\"{$rev}\" class='article_inner editable'>{$parts[$i]}</div>\n"; } else { $body .= "\n<div class='article_inner editable'>{$parts[$i]}</div>\n"; } } } else { $body .= $parts[$i]; } } } $punct = "!\\.\\?\\:"; # valid ways of ending a sentence for bolding $i = strpos($body, '<div id="steps"'); if ($i !== false) { $j = strpos($body, '<div id=', $i + 5); } if ($j === false) { $j = strlen($body); } if ($j !== false && $i !== false) { $steps = substr($body, $i, $j - $i); $parts = preg_split("@(<[/]?ul>|<[/]?ol>|<[/]?li>)@im", $steps, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); $numsteps = preg_match_all('/<li>/m', $steps, $matches); $level = 0; $steps = ""; $upper_tag = ""; $levelstack = array(); $tagstack = array(); $current_tag = ""; $current_li = 1; $donefirst = false; // used for ads to tell when we've put the ad after the first step if ($numsteps < 100) { while ($p = array_shift($parts)) { switch (strtolower($p)) { case "<ol>": $level++; if ($level == 1) { $p = '<ol class="steps_list">'; $upper_tag = "ol"; } else { $p = " <div class='listbody'>{$p}"; } if ($current_tag != "") { $tagstack[] = $current_tag; } $current_tag = "ol"; $levelstack[] = $current_li; $current_li = 1; break; case "<ul>": if ($current_tag != "") { $tagstack[] = $current_tag; } $current_tag = "ul"; $levelstack[] = $current_li; $level++; break; case "</ol>": $p .= '<div id="steps_end"></div>'; case "</ul>": $level--; if ($level == 0) { $upper_tag = ""; } $current_tag = array_pop($tagstack); $current_li = array_pop($levelstack); break; case "<li>": $closecount = 0; if ($level == 1 && $upper_tag == "ol") { $li_number = $current_li++; //$p = '<li>'. str_pad($li_number,2,'0',STR_PAD_LEFT); $p = '<li>'; # this is where things get interesting. Want to make first sentence bold! # but we need to handle cases where there are tags in the first sentence # split based on HTML tags $next = array_shift($parts); $htmlparts = preg_split("@(<[^>]*>)@im", $next, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); $dummy = 0; $incaption = false; $apply_b = false; while ($x = array_shift($htmlparts)) { # if it's a tag, just append it and keep going if (preg_match("@(<[^>]*>)@im", $x)) { //tag $p .= $x; if ($x == "<span class='caption'>") { $incaption = true; } else { if ($x == "</span>" && $incaption) { $incaption = false; } } continue; } # put the closing </b> in if we hit the end of the sentence if (!$incaption) { if (!$apply_b && trim($x) != "") { $p .= '<p class="step_head"><span>' . str_pad($li_number, 2, '0', STR_PAD_LEFT) . '</span>'; $apply_b = true; } if ($apply_b) { $x = preg_replace("@([{$punct}])@im", "\$1</p>", $x, 1, &$closecount); } } $p .= $x; if ($closecount > 0) { break; } else { #echo "\n\n-----$x----\n\n"; } $dummy++; } # get anything left over $p .= implode("", $htmlparts); if ($closecount == 0) { $p .= "</b>"; } // close the bold tag if we didn't already if ($level == 1 && $current_li == 2 && $ads && !$donefirst) { $p .= '<br class="clearall" />' . wikihowAds::getAdUnitPlaceholder(0); $donefirst = true; } } break; case "</li>": $p = "<div class='clearall'></div>{$p}"; //changed BR to DIV b/c IE doesn't work with the BR clear tag break; } // switch $steps .= $p; } // while } else { $steps = substr($body, $i, $j - $i); $steps = "<div id='steps_notmunged'>\n" . $steps . "\n</div>\n"; } // we have to put the final_li in the last OL LI step, so reverse the walk of the tokens $parts = preg_split("@(<[/]?ul>|<[/]?ol>|<[/]?li>)@im", $steps, 0, PREG_SPLIT_DELIM_CAPTURE); $parts = array_reverse($parts); $steps = ""; $level = 0; $gotit = false; $donelast = false; foreach ($parts as $p) { $lp = strtolower($p); if ($lp == "</ol>") { $level++; $gotit = false; } else { if ($lp == "</ul>") { $level++; } else { if (strpos($lp, "<li") !== false && $level == 1 && !$gotit) { /// last OL step list f****r //$p = preg_replace("@<li[^>]*>@i", '<li class="steps_li final_li">', $p); $gotit = true; } else { if (strpos($lp, "<ul") !== false) { $level--; } else { if (strpos($lp, "<ol") !== false) { $level--; } else { if ($lp == "</li>" && !$donelast) { // ads after the last step if ($ads) { if (substr($body, $j) == "") { $p = "<script>missing_last_ads = true;</script>" . wikihowAds::getAdUnitPlaceholder(1) . $p; $no_third_ad = true; } else { $p = '<br />' . wikihowAds::getAdUnitPlaceholder(2) . $p; } } $donelast = true; } } } } } } $steps = $p . $steps; } $body = substr($body, 0, $i) . $steps . substr($body, $j); } /// if numsteps == 100? /// ads below tips, walk the sections and put them after the tips if ($ads) { $foundtips = false; $anchorTag = ""; foreach ($wgWikiHowSections as $s) { $isAtEnd = false; if ($s == "ingredients" || $s == "steps") { continue; } // we skip these two top sections $i = strpos($body, '<div id="' . $s . '"'); if ($i !== false) { $j = strpos($body, '<h2>', $i + strlen($s)); } else { continue; // we didnt' find this section } if ($j === false) { $j = strlen($body); // go to the end $isAtEnd = true; } if ($j !== false && $i !== false) { $section = substr($body, $i, $j - $i); if ($s == "video") { // special case for video $newsection = "<div id='video'><center>{$section}</center></div>"; $body = str_replace($section, $newsection, $body); continue; } else { if ($s == "tips") { //tip ad is now at the bottom of the tips section //need to account for the possibility of no sections below this and therefor //no anchor tag if ($isAtEnd) { $anchorTag = "<p></p>"; } $body = str_replace($section, $section . $anchorTag . wikihowAds::getAdUnitPlaceholder('2a') . '<p><br /></p>', $body); $foundtips = true; break; } else { $foundtips = true; if ($isAtEnd) { $anchorTag = "<p></p>"; } $body = str_replace($section, $section . $anchorTag . wikihowAds::getAdUnitPlaceholder(2), $body); break; } } } } if (!$foundtips && !$no_third_ad) { //must be the video section //need to put in the empty <p> tag since all the other sections have them for the anchor tags. $body .= "<p class='video_spacing'></p>" . wikihowAds::getAdUnitPlaceholder(2); } } $catlinks = $sk->getCategoryLinks($false); $authors = $sk->getAuthorFooter(); if ($authors != "" || is_array($this->data['language_urls']) || $catlinks != "") { //k, now grab the bottom stuff $article_bottom .= '<br />' . wfGetSuggestedTitles($wgTitle) . '<br /> <h2 class="section_head" id="article_info_header">' . wfMsg('article_info') . '</h2> <div id="article_info" class="article_inner"> <p>' . self::getLastEdited() . '</p> <p>' . wfMsg('categories') . ':<br/>' . $catlinks . '</p> <p>' . $authors . '</p> </div><!--end article_info-->'; } if ($wgUser->getID() == 0 && !$isMainPage && $action != 'edit' && $wgTitle->getNamespace() == NS_MAIN) { $article_bottom .= '<div class="final_ad">' . wikihowAds::getAdUnitPlaceholder(7) . '</div>'; } $article_bottom .= ' <div id="final_question"> ' . $userstats . ' <p><b>' . $sk->pageStats() . '</b></p> <div id="page_rating">' . RateArticle::showForm() . '</div> <p></p> </div> <!--end last_question--> </div> <!-- article -->'; //share buttons $url = urlencode($wgServer . "/" . $wgTitle->getPrefixedURL()); $fb = '<div class="like_button"><fb:like href="' . $url . '" send="false" layout="button_count" width="86" show_faces="false"></fb:like></div>'; $gp1 = '<div class="gplus1_button"><g:plusone size="medium" callback="plusone_vote"></g:plusone></div>'; // $fb_share = '<div class="like_button like_tools"><fb:like href="' . $url . '" send="false" layout="button_count" width="86" show_faces="false"></fb:like></div>'; $tb_admin = '<div class="admin_state"><a href="http://twitter.com/share" data-lang="' . $wgLanguageCode . '" style="display:none; background-image: none; color: #ffffff;" class="twitter-share-button" data-count="horizontal" data-via="wikiHow" data-text="How to ' . htmlspecialchars($wgTitle->getText()) . '" data-related="JackHerrick:Founder of wikiHow">Tweet</a></div>'; $tb = '<a href="http://twitter.com/share" data-lang="' . $wgLanguageCode . '" style="display:none; background-image: none; color: #ffffff;" class="twitter-share-button" data-count="horizontal" data-via="wikiHow" data-text="How to ' . htmlspecialchars($wgTitle->getText()) . '" data-related="JackHerrick:Founder of wikiHow">Tweet</a>'; $the_buttons = '<div id="share_buttons_top">' . $fb; if ($wgUser->isSysop() && $wgTitle->userCan('delete')) { $the_buttons .= $tb_admin; } else { $the_buttons .= $tb; } $the_buttons .= $gp1 . '</div>'; $title = '<h1>How to ' . $wgTitle->getText() . '</h1>'; $edited = $sk->getAuthorHeader(); $sidebar = '<div id="sidenav"><div id="showslideshow"></div><div id="pp_big_space">' . $intro_img . '</div></div>'; $main = '<div id="article_main">' . $title . $the_buttons . $edited . $body . $article_bottom . '</div>'; $article = '<div id="article_layout_' . self::ARTICLE_LAYOUT . '">' . $sidebar . $main . '</div>'; return $article; }
/** * Sets the meta description in the database to be part of the intro, part * of the first step, or 'original' which is something like "wikiHow * article on How to <title>". */ private function buildDescription($style) { if (self::DESC_STYLE_ORIGINAL == $style) { return array(true, ''); } if (self::DESC_STYLE_EDITED == $style) { return array(true, $this->row['ami_desc']); } $wikitext = $this->getArticleWikiText(); if (!$wikitext) { return array(false, ''); } if (self::DESC_STYLE_INTRO == $style || self::DESC_STYLE_INTRO_NO_TITLE == $style) { // grab intro $desc = Wikitext::getIntro($wikitext); // append first step to intro if intro maybe isn't long enough if (strlen($desc) < 2 * self::MAX_DESC_LENGTH) { list($steps, ) = Wikitext::getStepsSection($wikitext); if ($steps) { $desc .= ' ' . Wikitext::cutFirstStep($steps); } } } elseif (self::DESC_STYLE_STEP1 == $style) { // grab steps section list($desc, ) = Wikitext::getStepsSection($wikitext); // pull out just the first step if ($desc) { $desc = Wikitext::cutFirstStep($desc); } else { $desc = Wikitext::getIntro($wikitext); } } else { //throw new Exception('ArticleMetaInfo: unknown style'); return array(false, ''); } $desc = Wikitext::flatten($desc); $howto = wfMsg('howto', $this->titleText); if ($desc) { if (self::DESC_STYLE_INTRO_NO_TITLE != $style) { $desc = $howto . '. ' . $desc; } } else { $desc = $howto; } $desc = self::trimDescription($desc); return array(true, $desc); }