/**
  * check for plagiarism with copyscape
  * return true if there's an issue
  */
 private static function copyCheck($t)
 {
     $threshold = 0.05;
     $result = '';
     $r = Revision::newFromTitle($t);
     if (!$r) {
         return 'No such article';
     }
     $text = Wikitext::flatten($r->getText());
     $res = copyscape_api_text_search_internet($text, 'ISO-8859-1', 2);
     if ($res['count']) {
         $words = $res['querywords'];
         foreach ($res['result'] as $r) {
             if (!preg_match("@^http://[a-z0-9]*.(wikihow|whstatic|youtube).com@i", $r['url'])) {
                 //if ($r['minwordsmatched'] / $words > $threshold) {
                 //we got one!
                 $result .= '<b>Plagiarized:</b> <a href="' . $r['url'] . '">' . $r['url'] . '</a><br />';
                 //}
             }
         }
     } else {
         $result = '';
     }
     return $result;
 }
$dbr = wfGetDB(DB_SLAVE);
$sql = "SELECT page_title, page_id FROM page WHERE page_is_redirect=0 AND page_namespace=" . NS_MAIN;
$res = $dbr->query($sql, __FILE__);
$fp = fopen('short-intros.csv', 'w');
if (!$fp) {
    die("could not open file for write\n");
}
fputcsv($fp, array('page_id', 'URL', 'has_template', 'intro_length', 'intro'));
foreach ($res as $row) {
    $title = Title::newFromDBkey($row->page_title);
    if (!$title) {
        print "Can't make title out of {$row->page_title}\n";
        continue;
    }
    $rev = Revision::newFromTitle($title);
    $wikitext = $rev->getText();
    $intro = Article::getSection($wikitext, 0);
    $flat = Wikitext::flatten($intro);
    $flat = trim($flat);
    $len = mb_strlen($flat);
    if ($len < 50) {
        // check whether it has either the {{intro or {{introduction template
        $hasTemplate = strpos(strtolower($intro), '{{intro') !== false;
        $fields = array($row->page_id, 'http://www.wikihow.com/' . $title->getPartialURL(), $hasTemplate ? 'y' : 'n', $len, $flat);
        fputcsv($fp, $fields);
        if (@++$i % 100 == 0) {
            print "article {$i}\n";
        }
    }
}
fclose($fp);
Esempio n. 3
0
 function getIntroText(&$t)
 {
     $r = Revision::newFromTitle($t);
     $intro = Article::getSection($r->getText(), 0);
     return Wikitext::flatten($intro);
 }
Esempio n. 4
0
 public function mobileSearch($q, $start, $limit = 20)
 {
     global $wgOut, $wgMemc;
     // Don't return more than 50 search results at a time to prevent abuse
     if ($limit > 50) {
         $limit = 50;
     }
     $key = wfMemcKey("MobileSearch", str_replace(" ", "-", $q), $start, $limit);
     if ($val = $wgMemc->get($key)) {
         return $val;
     }
     $contents = $this->googleSearchResultTitles($q, $start, $limit, 0, self::SEARCH_MOBILE);
     $results = array();
     foreach ($contents as $t) {
         // Only return articles
         if ($t->getNamespace() != NS_MAIN) {
             continue;
         }
         $result = array();
         $result['title'] = $t->getText();
         $result['url'] = $t->getFullURL();
         $result['imgurl'] = wfGetPad(SkinWikihowskin::getGalleryImage($t, 103, 80));
         $result['intro'] = null;
         if ($r = Revision::newFromid($t->getLatestRevID())) {
             $intro = Wikitext::getIntro($r->getText());
             $intro = trim(Wikitext::flatten($intro));
             $result['intro'] = substr($intro, 0, 180);
             // Put an ellipsis on the end
             $len = strlen($result['intro']);
             $result['intro'] .= substr($result['intro'], $len - 1, $len) == '.' ? ".." : "...";
         }
         if (!is_null($result['intro'])) {
             $results[] = array('article' => $result);
         }
     }
     $searchResults['results'] = $results;
     $json = json_encode($searchResults);
     $wgMemc->set($key, $json, 3600);
     // 1 hour
     header("Content-type: application/json");
     $wgOut->disable(true);
     echo $json;
 }
 function formatRelated($t)
 {
     global $wgUser, $wgParser;
     $result = '';
     if ($t && $t->exists()) {
         $r = Revision::newFromTitle($t);
         $intro = $wgParser->getSection($r->getText(), 0);
         $intro = Wikitext::flatten($intro);
         if (strlen($intro) > 250) {
             $intro = substr($intro, 0, 250) . '...';
         }
         $sk = $wgUser->getSkin();
         $img = SkinWikihowskin::getGalleryImage($t, 238, 139);
         $result .= "<div class='slide_related'>\n\t\t\t\t\t\t<a href='{$t->getFullURL()}'><img src='{$img}' alt='' width='238' height='139' class='gs_img' /></a>\n\t\t\t\t\t\t<h3><a href='{$t->getFullURL()}'>{$t->getText()}</a></h3>\n\t\t\t\t\t\t<p>{$intro}</p>\n\t\t\t\t\t\t</div>";
     }
     return $result;
 }
Esempio n. 6
0
    public function parseArticle_02($article)
    {
        global $wgWikiHowSections, $wgTitle, $wgUser;
        $ads = $wgUser->getID() == 0;
        $sk = new SkinWikihowskin();
        $sectionMap = array(wfMsg('Intro') => 'intro', wfMsg('Ingredients') => 'ingredients', wfMsg('Steps') => 'steps', wfMsg('Video') => 'video', wfMsg('Tips') => 'tips', wfMsg('Warnings') => 'warnings', wfMsg('relatedwikihows') => 'relatedwikihows', wfMsg('sourcescitations') => 'sources', wfMsg('thingsyoullneed') => 'thingsyoullneed');
        foreach ($wgWikiHowSections as $section) {
            $reverse_msgs[wfMsg($section)] = $section;
        }
        $parts = preg_split("@(<h2.*</h2>)@im", $article, 0, PREG_SPLIT_DELIM_CAPTURE);
        $body = '';
        $intro_img = '';
        for ($i = 0; $i < sizeof($parts); $i++) {
            if ($i == 0) {
                //intro
                preg_match("/Image:(.*)\">/", $parts[$i], $matches);
                if (count($matches) > 0) {
                    $img = $matches[1];
                    $img = preg_replace('@%27@', "'", $img);
                    $image = Title::makeTitle(NS_IMAGE, $img);
                    if ($image) {
                        $file = wfFindFile($image);
                        if ($file) {
                            $thumb = $file->getThumbnail(200, -1, true, true);
                            $intro_img = '<a href="' . $image->getFullUrl() . '"><img border="0" width="200" class="mwimage101" src="' . wfGetPad($thumb->url) . '" alt="" /></a>';
                        }
                    }
                }
                if ($intro_img == '') {
                    $intro_img = '<img border="0" width="200" class="mwimage101" src="' . wfGetPad('/skins/WikiHow/images/wikihow_sq_200.png') . '" alt="" />';
                }
                $r = Revision::newFromTitle($wgTitle);
                $intro_text = Wikitext::getIntro($r->getText());
                $intro_text = trim(Wikitext::flatten($intro_text));
                $body .= '<br /><div id="color_div"></div><br />';
                $body .= '<div id="article_intro">' . $intro_text . '</div>';
                if ($ads) {
                    $body .= '<div class="ad_noimage intro_ad">' . wikihowAds::getAdUnitPlaceholder('intro') . '</div>';
                }
            } else {
                if (stripos($parts[$i], "<h2") === 0 && $i < sizeof($parts) - 1) {
                    preg_match("@<span>.*</span>@", $parts[$i], $matches);
                    $rev = "";
                    if (sizeof($matches) > 0) {
                        $h2 = trim(strip_tags($matches[0]));
                        $rev = isset($reverse_msgs[$h2]) ? $reverse_msgs[$h2] : "";
                    }
                    if ($rev !== 'steps') {
                        $body .= $parts[$i];
                    }
                    $i++;
                    if ($rev == "steps") {
                        $body .= "\n<div id=\"steps\" class='editable'>{$parts[$i]}</div>\n";
                    } else {
                        if ($rev != "") {
                            $body .= "\n<div id=\"{$rev}\" class='article_inner editable'>{$parts[$i]}</div>\n";
                        } else {
                            $body .= "\n<div class='article_inner editable'>{$parts[$i]}</div>\n";
                        }
                    }
                } else {
                    $body .= $parts[$i];
                }
            }
        }
        $punct = "!\\.\\?\\:";
        # valid ways of ending a sentence for bolding
        $i = strpos($body, '<div id="steps"');
        if ($i !== false) {
            $j = strpos($body, '<div id=', $i + 5);
        }
        if ($j === false) {
            $j = strlen($body);
        }
        if ($j !== false && $i !== false) {
            $steps = substr($body, $i, $j - $i);
            $parts = preg_split("@(<[/]?ul>|<[/]?ol>|<[/]?li>)@im", $steps, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
            $numsteps = preg_match_all('/<li>/m', $steps, $matches);
            $level = 0;
            $steps = "";
            $upper_tag = "";
            $levelstack = array();
            $tagstack = array();
            $current_tag = "";
            $current_li = 1;
            $donefirst = false;
            // used for ads to tell when we've put the ad after the first step
            if ($numsteps < 100) {
                while ($p = array_shift($parts)) {
                    switch (strtolower($p)) {
                        case "<ol>":
                            $level++;
                            if ($level == 1) {
                                $p = '<ol class="steps_list">';
                                $upper_tag = "ol";
                            } else {
                                $p = "&nbsp;<div class='listbody'>{$p}";
                            }
                            if ($current_tag != "") {
                                $tagstack[] = $current_tag;
                            }
                            $current_tag = "ol";
                            $levelstack[] = $current_li;
                            $current_li = 1;
                            break;
                        case "<ul>":
                            if ($current_tag != "") {
                                $tagstack[] = $current_tag;
                            }
                            $current_tag = "ul";
                            $levelstack[] = $current_li;
                            $level++;
                            break;
                        case "</ol>":
                            $p .= '<div id="steps_end"></div>';
                        case "</ul>":
                            $level--;
                            if ($level == 0) {
                                $upper_tag = "";
                            }
                            $current_tag = array_pop($tagstack);
                            $current_li = array_pop($levelstack);
                            break;
                        case "<li>":
                            $closecount = 0;
                            if ($level == 1 && $upper_tag == "ol") {
                                $li_number = $current_li++;
                                //$p = '<li>'. str_pad($li_number,2,'0',STR_PAD_LEFT);
                                $p = '<li>';
                                # this is where things get interesting. Want to make first sentence bold!
                                # but we need to handle cases where there are tags in the first sentence
                                # split based on HTML tags
                                $next = array_shift($parts);
                                $htmlparts = preg_split("@(<[^>]*>)@im", $next, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
                                $dummy = 0;
                                $incaption = false;
                                $apply_b = false;
                                while ($x = array_shift($htmlparts)) {
                                    # if it's a tag, just append it and keep going
                                    if (preg_match("@(<[^>]*>)@im", $x)) {
                                        //tag
                                        $p .= $x;
                                        if ($x == "<span class='caption'>") {
                                            $incaption = true;
                                        } else {
                                            if ($x == "</span>" && $incaption) {
                                                $incaption = false;
                                            }
                                        }
                                        continue;
                                    }
                                    # put the closing </b> in if we hit the end of the sentence
                                    if (!$incaption) {
                                        if (!$apply_b && trim($x) != "") {
                                            $p .= '<p class="step_head"><span>' . str_pad($li_number, 2, '0', STR_PAD_LEFT) . '</span>';
                                            $apply_b = true;
                                        }
                                        if ($apply_b) {
                                            $x = preg_replace("@([{$punct}])@im", "\$1</p>", $x, 1, &$closecount);
                                        }
                                    }
                                    $p .= $x;
                                    if ($closecount > 0) {
                                        break;
                                    } else {
                                        #echo "\n\n-----$x----\n\n";
                                    }
                                    $dummy++;
                                }
                                # get anything left over
                                $p .= implode("", $htmlparts);
                                if ($closecount == 0) {
                                    $p .= "</b>";
                                }
                                // close the bold tag if we didn't already
                                if ($level == 1 && $current_li == 2 && $ads && !$donefirst) {
                                    $p .= '<br class="clearall" />' . wikihowAds::getAdUnitPlaceholder(0);
                                    $donefirst = true;
                                }
                            }
                            break;
                        case "</li>":
                            $p = "<div class='clearall'></div>{$p}";
                            //changed BR to DIV b/c IE doesn't work with the BR clear tag
                            break;
                    }
                    // switch
                    $steps .= $p;
                }
                // while
            } else {
                $steps = substr($body, $i, $j - $i);
                $steps = "<div id='steps_notmunged'>\n" . $steps . "\n</div>\n";
            }
            // we have to put the final_li in the last OL LI step, so reverse the walk of the tokens
            $parts = preg_split("@(<[/]?ul>|<[/]?ol>|<[/]?li>)@im", $steps, 0, PREG_SPLIT_DELIM_CAPTURE);
            $parts = array_reverse($parts);
            $steps = "";
            $level = 0;
            $gotit = false;
            $donelast = false;
            foreach ($parts as $p) {
                $lp = strtolower($p);
                if ($lp == "</ol>") {
                    $level++;
                    $gotit = false;
                } else {
                    if ($lp == "</ul>") {
                        $level++;
                    } else {
                        if (strpos($lp, "<li") !== false && $level == 1 && !$gotit) {
                            /// last OL step list f****r
                            //$p = preg_replace("@<li[^>]*>@i", '<li class="steps_li final_li">', $p);
                            $gotit = true;
                        } else {
                            if (strpos($lp, "<ul") !== false) {
                                $level--;
                            } else {
                                if (strpos($lp, "<ol") !== false) {
                                    $level--;
                                } else {
                                    if ($lp == "</li>" && !$donelast) {
                                        // ads after the last step
                                        if ($ads) {
                                            if (substr($body, $j) == "") {
                                                $p = "<script>missing_last_ads = true;</script>" . wikihowAds::getAdUnitPlaceholder(1) . $p;
                                                $no_third_ad = true;
                                            } else {
                                                $p = '<br />' . wikihowAds::getAdUnitPlaceholder(2) . $p;
                                            }
                                        }
                                        $donelast = true;
                                    }
                                }
                            }
                        }
                    }
                }
                $steps = $p . $steps;
            }
            $body = substr($body, 0, $i) . $steps . substr($body, $j);
        }
        /// if numsteps == 100?
        /// ads below tips, walk the sections and put them after the tips
        if ($ads) {
            $foundtips = false;
            $anchorTag = "";
            foreach ($wgWikiHowSections as $s) {
                $isAtEnd = false;
                if ($s == "ingredients" || $s == "steps") {
                    continue;
                }
                // we skip these two top sections
                $i = strpos($body, '<div id="' . $s . '"');
                if ($i !== false) {
                    $j = strpos($body, '<h2>', $i + strlen($s));
                } else {
                    continue;
                    // we didnt' find this section
                }
                if ($j === false) {
                    $j = strlen($body);
                    // go to the end
                    $isAtEnd = true;
                }
                if ($j !== false && $i !== false) {
                    $section = substr($body, $i, $j - $i);
                    if ($s == "video") {
                        // special case for video
                        $newsection = "<div id='video'><center>{$section}</center></div>";
                        $body = str_replace($section, $newsection, $body);
                        continue;
                    } else {
                        if ($s == "tips") {
                            //tip ad is now at the bottom of the tips section
                            //need to account for the possibility of no sections below this and therefor
                            //no anchor tag
                            if ($isAtEnd) {
                                $anchorTag = "<p></p>";
                            }
                            $body = str_replace($section, $section . $anchorTag . wikihowAds::getAdUnitPlaceholder('2a') . '<p><br /></p>', $body);
                            $foundtips = true;
                            break;
                        } else {
                            $foundtips = true;
                            if ($isAtEnd) {
                                $anchorTag = "<p></p>";
                            }
                            $body = str_replace($section, $section . $anchorTag . wikihowAds::getAdUnitPlaceholder(2), $body);
                            break;
                        }
                    }
                }
            }
            if (!$foundtips && !$no_third_ad) {
                //must be the video section
                //need to put in the empty <p> tag since all the other sections have them for the anchor tags.
                $body .= "<p class='video_spacing'></p>" . wikihowAds::getAdUnitPlaceholder(2);
            }
        }
        $catlinks = $sk->getCategoryLinks($false);
        $authors = $sk->getAuthorFooter();
        if ($authors != "" || is_array($this->data['language_urls']) || $catlinks != "") {
            //k, now grab the bottom stuff
            $article_bottom .= '<br />' . wfGetSuggestedTitles($wgTitle) . '<br />
							<h2 class="section_head" id="article_info_header">' . wfMsg('article_info') . '</h2>
							<div id="article_info" class="article_inner">
								<p>' . self::getLastEdited() . '</p>
								<p>' . wfMsg('categories') . ':<br/>' . $catlinks . '</p>
								<p>' . $authors . '</p>
							</div><!--end article_info-->';
        }
        if ($wgUser->getID() == 0 && !$isMainPage && $action != 'edit' && $wgTitle->getNamespace() == NS_MAIN) {
            $article_bottom .= '<div class="final_ad">' . wikihowAds::getAdUnitPlaceholder(7) . '</div>';
        }
        $article_bottom .= '
						<div id="final_question">
								' . $userstats . '
								<p><b>' . $sk->pageStats() . '</b></p>
								<div id="page_rating">' . RateArticle::showForm() . '</div>
								<p></p>
					   </div>  <!--end last_question-->
					</div> <!-- article -->';
        //share buttons
        $url = urlencode($wgServer . "/" . $wgTitle->getPrefixedURL());
        $fb = '<div class="like_button"><fb:like href="' . $url . '" send="false" layout="button_count" width="86" show_faces="false"></fb:like></div>';
        $gp1 = '<div class="gplus1_button"><g:plusone size="medium" callback="plusone_vote"></g:plusone></div>';
        //		$fb_share = '<div class="like_button like_tools"><fb:like href="' . $url . '" send="false" layout="button_count" width="86" show_faces="false"></fb:like></div>';
        $tb_admin = '<div class="admin_state"><a href="http://twitter.com/share" data-lang="' . $wgLanguageCode . '" style="display:none; background-image: none; color: #ffffff;" class="twitter-share-button" data-count="horizontal" data-via="wikiHow" data-text="How to ' . htmlspecialchars($wgTitle->getText()) . '" data-related="JackHerrick:Founder of wikiHow">Tweet</a></div>';
        $tb = '<a href="http://twitter.com/share" data-lang="' . $wgLanguageCode . '" style="display:none; background-image: none; color: #ffffff;" class="twitter-share-button" data-count="horizontal" data-via="wikiHow" data-text="How to ' . htmlspecialchars($wgTitle->getText()) . '" data-related="JackHerrick:Founder of wikiHow">Tweet</a>';
        $the_buttons = '<div id="share_buttons_top">' . $fb;
        if ($wgUser->isSysop() && $wgTitle->userCan('delete')) {
            $the_buttons .= $tb_admin;
        } else {
            $the_buttons .= $tb;
        }
        $the_buttons .= $gp1 . '</div>';
        $title = '<h1>How to ' . $wgTitle->getText() . '</h1>';
        $edited = $sk->getAuthorHeader();
        $sidebar = '<div id="sidenav"><div id="showslideshow"></div><div id="pp_big_space">' . $intro_img . '</div></div>';
        $main = '<div id="article_main">' . $title . $the_buttons . $edited . $body . $article_bottom . '</div>';
        $article = '<div id="article_layout_' . self::ARTICLE_LAYOUT . '">' . $sidebar . $main . '</div>';
        return $article;
    }
Esempio n. 7
0
 /**
  * Sets the meta description in the database to be part of the intro, part
  * of the first step, or 'original' which is something like "wikiHow
  * article on How to <title>".
  */
 private function buildDescription($style)
 {
     if (self::DESC_STYLE_ORIGINAL == $style) {
         return array(true, '');
     }
     if (self::DESC_STYLE_EDITED == $style) {
         return array(true, $this->row['ami_desc']);
     }
     $wikitext = $this->getArticleWikiText();
     if (!$wikitext) {
         return array(false, '');
     }
     if (self::DESC_STYLE_INTRO == $style || self::DESC_STYLE_INTRO_NO_TITLE == $style) {
         // grab intro
         $desc = Wikitext::getIntro($wikitext);
         // append first step to intro if intro maybe isn't long enough
         if (strlen($desc) < 2 * self::MAX_DESC_LENGTH) {
             list($steps, ) = Wikitext::getStepsSection($wikitext);
             if ($steps) {
                 $desc .= ' ' . Wikitext::cutFirstStep($steps);
             }
         }
     } elseif (self::DESC_STYLE_STEP1 == $style) {
         // grab steps section
         list($desc, ) = Wikitext::getStepsSection($wikitext);
         // pull out just the first step
         if ($desc) {
             $desc = Wikitext::cutFirstStep($desc);
         } else {
             $desc = Wikitext::getIntro($wikitext);
         }
     } else {
         //throw new Exception('ArticleMetaInfo: unknown style');
         return array(false, '');
     }
     $desc = Wikitext::flatten($desc);
     $howto = wfMsg('howto', $this->titleText);
     if ($desc) {
         if (self::DESC_STYLE_INTRO_NO_TITLE != $style) {
             $desc = $howto . '. ' . $desc;
         }
     } else {
         $desc = $howto;
     }
     $desc = self::trimDescription($desc);
     return array(true, $desc);
 }