Ejemplo n.º 1
0
    private static function getSampleHtml($doc_uri_path, $doc_file_path, $doc_name)
    {
        global $wgMemc, $IP;
        $doc_title = self::getDisplayName($doc_name);
        //check if we want to display the PDF instead
        if (self::showPdf($doc_name)) {
            $dv_display_pdf = 'http://www.wikihow.com/' . str_replace('sampledocs', 'samplepdfs', $doc_uri_path) . '/' . $doc_name . '.pdf';
            $dv_fallback_img = self::getFallbackImg($doc_name);
            $pdf_code = '<h1>' . $doc_title . '</h1>
						<div class="sample_ribbon pdf_ribbon"></div>
						<div class="sample_container pdf_container">
						<object id="pdfobject" data="http://docs.google.com/gview?url=' . $dv_display_pdf . '&embedded=true" width="720" height="600">
							   <!--fallback for IE and other non-PDF-embeddable browsers-->
							   <img src="' . $dv_fallback_img . '" id="fallback_img" alt="' . $doc_title . '" />
						</object></div>';
            self::$pdf_carrot = "pdf_carrot";
            return $pdf_code;
        }
        $memkey = wfMemcKey('sample_' . $doc_name);
        $html = $wgMemc->get($memkey);
        if (!$html) {
            require_once "{$IP}/extensions/wikihow/common/composer/vendor/electrolinux/phpquery/phpQuery/phpQuery.php";
            $file = "{$doc_file_path}/{$doc_name}.html";
            //sanitize
            $file = str_replace("\"", "", $file);
            $file = str_replace("`", "", $file);
            $file = str_replace("..", "", $file);
            $file = str_replace("./", "", $file);
            $file = str_replace(":", "", $file);
            if (file_exists($file)) {
                $html = file_get_contents($file);
                // This might be better in the maintenance file
                // since there is other html processing/strippping
                // Putting here for now so we don't have to reprocess all
                // the samples
                $doc = PHPQuery::newDocument($html);
                $styles = pq("style");
                foreach ($styles as $style) {
                    $docHtml .= pq($style)->htmlOuter();
                }
                $docHtml .= pq('body')->html();
                $html = $docHtml;
                //toss it into memcache
                $wgMemc->set($memkey, $html);
            }
        }
        $html = "<div class='sample_ribbon'></div>\n\n\t\t\t\t<div class='sample_container' id='sample_html'>\n\t\t\t\t<h1 id='sample_title'>{$doc_title}</h1>\n\t\t\t\t {$html}</div>";
        return $html;
    }
Ejemplo n.º 2
0
 /**
  * This method is used to process non-article HTML
  */
 static function processHTML($body, $action = '', $opts = array())
 {
     global $wgUser, $wgTitle;
     $processHTML = true;
     // $wgTitle isn't used in the hook below
     wfRunHooks('PreWikihowProcessHTML', array($wgTitle, &$processHTML));
     if (!$processHTML) {
         return $body;
     }
     $doc = PHPQuery::newDocument($body);
     //run ShowGrayContainer hook for this
     if (@$opts['show-gray-container']) {
         pq("#bodycontents")->addClass("minor_section");
     }
     //let's mark each bodycontents section so we can target it with CSS
     if ($action) {
         pq("#bodycontents")->addClass("bc_" . $action);
     }
     //default each mw-htmlform-submit button to a primary button
     //gotta clear too because we're floating it now
     pq(".mw-htmlform-submit")->addClass("primary button buttonright");
     pq(".mw-htmlform-submit")->after("<div class='clearall'></div>");
     //USER PREFERENCES//////////////////////
     pq("#mw-prefsection-echo")->append(pq("#mw-prefsection-echo-emailsettingsind"));
     //DISCUSSION/USER TALK//////////////////////
     //move some pieces above the main part
     pq("#bodycontents")->before(pq(".template_top")->addClass("wh_block"));
     pq("#bodycontents")->before(pq(".archive_table")->addClass("wh_block"));
     //remove those useless paragraph line breaks
     $bc = preg_replace('/<p><br><\\/p>/', '', pq("#bodycontents")->html());
     pq("#bodycontents")->html($bc);
     //insert postcomment form
     $pc = new PostComment();
     $pcf = $pc->getForm(false, $wgTitle, true);
     if ($pcf && $wgTitle->getFullURL() != $wgUser->getUserPage()->getTalkPage()->getFullURL()) {
         $pc_form = $pcf;
         pq("#bodycontents")->append($pc_form);
     } else {
         $pc_form = '<a name="postcomment"></a><a name="post"></a>';
         pq(".de:last")->prepend($pc_form);
     }
     //HISTORY//////////////////////
     //move top nav down a smidge
     pq("#history_form")->before(pq(".navigation:first"));
     //EDIT PREVIEW//////////////////////
     if (substr($action, 0, 6) == 'submit') {
         $name = $action == 'submit2' ? "#editpage" : "#editform";
         $preview = pq("#wikiPreview");
         $changes = pq("#wikiDiff")->addClass("wh_block");
         pq("#wikiPreview")->remove();
         pq("#wikiDiff")->remove();
         //preview before or after based on user preference
         if ($wgUser->getOption('previewontop')) {
             pq($name)->before($preview);
             pq($name)->before($changes);
         } else {
             pq($name)->after($preview);
             pq($name)->after($changes);
         }
     }
     $markPatrolledLink = self::getMarkPatrolledLink();
     if ($markPatrolledLink) {
         pq('#bodycontents')->append($markPatrolledLink);
     }
     return $doc->htmlOuter();
 }
    /**
     * Parse and transform the document from the old HTML for NS_MAIN articles to the new mobile
     * style. This should probably be pulled out and added to a subclass that can then be extended for
     * builders that focus on building NS_MAIN articles
     */
    protected function parseNonMobileArticle(&$article)
    {
        global $IP, $wgContLang, $wgLanguageCode;
        $sectionMap = array(wfMsg('Intro') => 'intro', wfMsg('Ingredients') => 'ingredients', wfMsg('Steps') => 'steps', wfMsg('Video') => 'video', wfMsg('Tips') => 'tips', wfMsg('Warnings') => 'warnings', wfMsg('relatedwikihows') => 'relatedwikihows', wfMsg('sourcescitations') => 'sources', wfMsg('thingsyoullneed') => 'thingsyoullneed', wfMsg('article_info') => 'article_info', wfMsg('user_completed_images') => 'user_completed_images');
        $lang = MobileWikihow::getSiteLanguage();
        $imageNsText = $wgContLang->getNsText(NS_IMAGE);
        $device = $this->getDevice();
        // munge steps first
        $opts = array('no-ads' => true);
        $article = WikihowArticleHTML::postProcess($article, $opts);
        // Make doc correctly formed
        $articleText = <<<DONE
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="{$lang}" lang="{$lang}">
<head>
\t<meta http-equiv="Content-Type" content="text/html; charset='utf-8'" />
</head>
<body>
{$article}
</body>
</html>
DONE;
        require_once "{$IP}/extensions/wikihow/mobile/JSLikeHTMLElement.php";
        $doc = new DOMDocument('1.0', 'utf-8');
        $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
        $doc->strictErrorChecking = false;
        $doc->recover = true;
        //$doc->preserveWhiteSpace = false;
        //$wgOut->setarticlebodyonly(true);
        @$doc->loadHTML($articleText);
        $doc->normalizeDocument();
        //echo $doc->saveHtml();exit;
        $xpath = new DOMXPath($doc);
        $pqDoc = PHPQuery::newDocument($doc);
        // Insert alternate images (or fork, as eliz calls it) that may exist.
        // Do this before other image processing later in this function so
        // these images will be dealt with as any other article image would.
        if (class_exists('WHVid')) {
            WHVid::handleAlternateMobileImages();
        }
        // Delete #featurestar node
        $node = $doc->getElementById('featurestar');
        if (!empty($node)) {
            $node->parentNode->removeChild($node);
        }
        $node = $doc->getElementById('newaltmethod');
        if (!empty($node)) {
            $node->parentNode->removeChild($node);
        }
        // Remove all "Edit" links
        $nodes = $xpath->query('//a[@id = "gatEditSection"]');
        foreach ($nodes as $node) {
            $node->parentNode->removeChild($node);
        }
        // Resize youtube video
        $nodes = $xpath->query('//embed');
        foreach ($nodes as $node) {
            $url = '';
            $src = $node->attributes->getNamedItem('src')->nodeValue;
            if (!$device['show-youtube'] || stripos($src, 'youtube.com') === false) {
                $parent = $node->parentNode;
                $grandParent = $parent->parentNode;
                if ($grandParent && $parent) {
                    $grandParent->removeChild($parent);
                }
            } else {
                foreach (array(&$node, &$node->parentNode) as $node) {
                    $widthAttr = $node->attributes->getNamedItem('width');
                    $oldWidth = (int) $widthAttr->nodeValue;
                    $newWidth = $device['max-video-width'];
                    if ($newWidth < $oldWidth) {
                        $widthAttr->nodeValue = (string) $newWidth;
                        $heightAttr = $node->attributes->getNamedItem('height');
                        $oldHeight = (int) $heightAttr->nodeValue;
                        $newHeight = (int) round($newWidth * $oldHeight / $oldWidth);
                        $heightAttr->nodeValue = (string) $newHeight;
                    }
                }
            }
        }
        // Remove templates from intro so that they don't muck up
        // the text and images we extract
        $nodes = $xpath->query('//div[@class = "template_top"]');
        foreach ($nodes as $node) {
            $node->parentNode->removeChild($node);
        }
        $introResult = ArticleHTMLParser::processMobileIntro($imageNsText);
        $intro = $introResult['html'];
        $firstImage = $introResult['image'];
        // Get rid of the <span> element to standardize the html for the
        // next dom query
        $nodes = $xpath->query('//div/span/a[@class = "image"]');
        foreach ($nodes as $a) {
            $parent = $a->parentNode;
            $grandParent = $parent->parentNode;
            $grandParent->replaceChild($a, $parent);
        }
        // Resize all resize-able images
        $nodes = $xpath->query('//div/a[@class = "image"]/img');
        $imgNum = 1;
        foreach ($nodes as $img) {
            $srcNode = $img->attributes->getNamedItem('src');
            $widthNode = $img->attributes->getNamedItem('width');
            $width = (int) $widthNode->nodeValue;
            $heightNode = $img->attributes->getNamedItem('height');
            $height = (int) $heightNode->nodeValue;
            $imageClasses = $img->parentNode->parentNode->attributes->getNamedItem('class')->nodeValue;
            /*
            if (!stristr($imageClasses, "tcenter")) {
            	$img->parentNode->parentNode->parentNode->attributes->getNamedItem('class')->nodeValue = '';
            	$img->parentNode->parentNode->parentNode->attributes->getNamedItem('style')->nodeValue = '';
            }
            */
            //			if( stristr($imageClasses, "tcenter") !== false) {
            if (stristr($imageClasses, "floatcenter") !== false) {
                $newWidth = $device['full-image-width'];
                $newHeight = (int) round($device['full-image-width'] * $height / $width);
            } else {
                $newWidth = $device['max-image-width'];
                $newHeight = (int) round($device['max-image-width'] * $height / $width);
            }
            $a = $img->parentNode;
            $href = $a->attributes->getNamedItem('href')->nodeValue;
            if (!$href) {
                $onclick = $a->attributes->getNamedItem('onclick')->nodeValue;
                $onclick = preg_replace('@.*",[ ]*"@', '', $onclick);
                $onclick = preg_replace('@".*@', '', $onclick);
                $imgName = preg_replace('@.*(Image|' . $imageNsText . '|' . urlencode($imageNsText) . '):@', '', $onclick);
            } else {
                $imgName = preg_replace('@^/(Image|' . $imageNsText . '|' . urlencode($imageNsText) . '):@', '', $href);
            }
            $title = Title::newFromText($imgName, NS_IMAGE);
            if (!$title) {
                $imgName = urldecode($imgName);
                $title = Title::newFromText($imgName, NS_IMAGE);
            }
            if ($title) {
                $image = wfFindFile($title);
                if ($image) {
                    list($thumb, $newWidth, $newHeight) = self::makeThumbDPI($image, $newWidth, $newHeight, $device['enlarge-thumb-high-dpi']);
                    $url = wfGetPad($thumb->getUrl());
                    $srcNode->nodeValue = $url;
                    $widthNode->nodeValue = $newWidth;
                    $heightNode->nodeValue = $newHeight;
                    // change surrounding div width and height
                    $div = $a->parentNode;
                    $styleNode = $div->attributes->getNamedItem('style');
                    //removing the set width/height
                    $styleNode->nodeValue = '';
                    //$div->attributes->getNamedItem('class')->nodeValue = '';
                    /*					if (preg_match('@^(.*width:)[0-9]+(px;\s*height:)[0-9]+(.*)$@', $styleNode->nodeValue, $m)) {
                    						$styleNode->nodeValue = $m[1] . $newWidth . $m[2] . $newHeight . $m[3];
                    					}
                    */
                    //add in our old class so all our logic still works
                    $imgclass = $img->getAttribute('class');
                    $img->setAttribute('class', $imgclass . 'mwimage101');
                    //default width/height for the srcset
                    $bigWidth = 600;
                    $bigHeight = 800;
                    // change grandparent div width too
                    $grandparent = $div;
                    if ($grandparent && $grandparent->nodeName == 'div') {
                        $class = $grandparent->attributes->getNamedItem('class');
                        if ($class) {
                            $isThumb = stristr($class->nodeValue, 'mthumb') !== false;
                            $isRight = stristr($class->nodeValue, 'tright') !== false;
                            $isLeft = stristr($class->nodeValue, 'tleft') !== false;
                            $isCenter = stristr($class->nodeValue, 'tcenter') !== false;
                            if ($isThumb) {
                                if ($isRight) {
                                    $style = $grandparent->attributes->getNamedItem('style');
                                    $style->nodeValue = 'width:' . $newWidth . 'px;height:' . $newHeight . 'px;';
                                    $bigWidth = 300;
                                    $bigHeight = 500;
                                } elseif ($isCenter) {
                                    $style = $grandparent->attributes->getNamedItem('style');
                                    $style->nodeValue = 'width:' . $newWidth . 'px;height:' . $newHeight . 'px;';
                                    $bigWidth = 600;
                                    $bigHeight = 800;
                                } elseif ($isLeft) {
                                    //if its centered or on the left, give it double the width if too big
                                    $style = $grandparent->attributes->getNamedItem('style');
                                    $oldStyle = $style->nodeValue;
                                    $matches = array();
                                    preg_match('@(width:\\s*)[0-9]+@', $oldStyle, $matches);
                                    if ($matches[0]) {
                                        $curSize = intval(substr($matches[0], 6));
                                        //width: = 6
                                        if ($newWidth * 2 < $curSize) {
                                            $existingCSS = preg_replace('@(width:\\s*)[0-9]+@', 'width:' . $newWidth * 2, $oldStyle);
                                            $style->nodeValue = $existingCSS;
                                        }
                                    }
                                    $bigWidth = 300;
                                    $bigHeight = 500;
                                }
                            }
                        }
                    }
                    list($thumb, $newWidth, $newHeight) = self::makeThumbDPI($image, $bigWidth, $bigHeight, $device['enlarge-thumb-high-dpi']);
                    $url = wfGetPad($thumb->getUrl());
                    $img->setAttribute('srcset', $url . ' ' . $newWidth . 'w');
                    //if we couldn't make it big enough, let's add a class
                    if ($newWidth < $bigWidth) {
                        $imgclass = $img->getAttribute('class');
                        $img->setAttribute('class', $imgclass . ' not_huge');
                    }
                    //add the hidden info
                    /*
                    $newDiv = new DOMElement( 'div', htmlentities('test') );
                    $a->appendChild($newDiv);
                    $newDiv->setAttribute('style', 'display:none;');
                    */
                    $a->setAttribute('id', 'image-zoom-' . $imgNum);
                    $a->setAttribute('class', 'image-zoom');
                    $a->setAttribute('href', '#');
                    global $wgServer;
                    $href = $wgServer . $href;
                    $href = preg_replace('@\\bm\\.@', '', $href);
                    $href = preg_replace('@^http://wikihow\\.com@', 'http://www.wikihow.com', $href);
                    $details = array('url' => $url, 'width' => $newWidth, 'height' => $newHeight, 'credits_page' => $href);
                    $newDiv = new DOMElement('div', htmlentities(json_encode($details)));
                    $a->appendChild($newDiv);
                    $newDiv->setAttribute('style', 'display:none;');
                    $newDiv->setAttribute('id', 'image-details-' . $imgNum);
                    $imgNum++;
                } else {
                    //huh? can't find it? well, then let's not display it
                    $img->parentNode->parentNode->parentNode->parentNode->setAttribute('style', 'display:none;');
                }
            } else {
                //huh? can't find it? well, then let's not display it
                $img->parentNode->parentNode->parentNode->parentNode->setAttribute('style', 'display:none;');
            }
        }
        // Remove template from images, add new zoom one
        $nodes = $xpath->query('//img');
        foreach ($nodes as $node) {
            $src = $node->attributes ? $node->attributes->getNamedItem('src') : null;
            $src = $src ? $src->nodeValue : '';
            if (stripos($src, 'magnify-clip.png') !== false) {
                $parent = $node->parentNode;
                $parent->parentNode->removeChild($parent);
            }
        }
        // //get rid of the corners and watermarks
        // $nodes = $xpath->query('//div[@class = "corner top_left"
        // or @class = "corner bottom_left"
        // or @class = "corner top_right"
        // or @class = "corner bottom_right"
        // or @class = "wikihow_watermark"]');
        // foreach ($nodes as $node) {
        // $parent = $node->parentNode;
        // $parent->removeChild($node);
        // }
        //gotta swap in larger images if the client's width is big enough
        //(i.e. tablet et al)
        $nodes = $xpath->query('//img[@class = "mwimage101" 
								or @class = "mwimage101 not_huge"]');
        foreach ($nodes as $node) {
            //make a quick unique id for this
            $id = md5($node->attributes->getNamedItem('src')->nodeValue) . rand();
            $node->setAttribute('id', $id);
            //pass it to our custom function for swapping in larger images
            $swap_it = 'if (isBig) WH.mobile.swapEm("' . $id . '");';
            $scripttag = new DOMElement('script', htmlentities($swap_it));
            $node->appendChild($scripttag);
        }
        // Change the width attribute from any tables with a width set.
        // This often happen around video elements.
        $nodes = $xpath->query('//table/@width');
        foreach ($nodes as $node) {
            $width = preg_replace('@px\\s*$@', '', $node->nodeValue);
            if ($width > $device['screen-width'] - 20) {
                $node->nodeValue = $device['screen-width'] - 20;
            }
        }
        // Surround step content in its own div. We do this to support other features like checkmarks
        $nodes = $xpath->query('//div[@id="steps"]/ol/li');
        foreach ($nodes as $node) {
            $node->innerHTML = '<div class="step_content">' . $node->innerHTML . '</div>';
        }
        //remove quiz
        $nodes = $xpath->query('//div[@class = "quiz_cta"]');
        foreach ($nodes as $node) {
            $node->parentNode->removeChild($node);
        }
        //remove quiz header
        $nodes = $xpath->query('//h3/span[text()="Quiz"]');
        foreach ($nodes as $node) {
            $parentNode = $node->parentNode;
            $parentNode->parentNode->removeChild($parentNode);
        }
        //remove edit link in h3 headers
        $nodes = $xpath->query('//h3/a[@class="editsection"]');
        foreach ($nodes as $node) {
            $node->parentNode->removeChild($node);
        }
        //remove edit link in h4 headers
        $nodes = $xpath->query('//h4/a[@class="editsection"]');
        foreach ($nodes as $node) {
            $node->parentNode->removeChild($node);
        }
        //pull out the first 6 related wikihows and format them
        $nodes = $xpath->query('//div[@id="relatedwikihows"]/ul/li');
        $count = 0;
        $related_boxes = array();
        foreach ($nodes as $node) {
            if ($count > 6) {
                break;
            }
            //grab the title
            preg_match('@href=\\"\\/(.*?)?\\"@', $node->innerHTML, $m);
            $title = Title::newFromText($m[1]);
            if (!$title) {
                continue;
            }
            $temp_box = $this->makeRelatedBox($title);
            if ($temp_box) {
                $related_boxes[] = $temp_box;
                $last_node = $node;
                $parent = $node->parentNode;
                $last_parent = $parent;
                $parent->removeChild($node);
                $count++;
            }
        }
        //only 1? not enough. throw it back
        if ($count == 1) {
            $related_boxes = array();
            $last_parent->appendChild($last_node);
        }
        // Inject html into the DOM tree for specific features (ie thumb ratings, ads, etc)
        $this->mobileParserBeforeHtmlSave($xpath);
        //self::walkTree($doc->documentElement, 1);
        $html = $doc->saveXML();
        $sections = array();
        $sectionsHtml = explode('<h2>', $html);
        unset($sectionsHtml[0]);
        // remove leftovers from intro section
        foreach ($sectionsHtml as $i => &$section) {
            $section = '<h2>' . $section;
            $count = 0;
            $heading = '';
            $replFunc = function ($matches) use(&$heading) {
                $heading = trim($matches[1]);
                return '';
            };
            $output = preg_replace_callback('@^<h2>[^\\n]*<span class="mw-headline"[^>]*>[ \\t]*([^<]+)</span></h2>@', $replFunc, $section, 1, $count);
            if ($count > 0) {
                $section = $output;
                if (isset($sectionMap[$heading])) {
                    $key = $sectionMap[$heading];
                    $sections[$key] = array('name' => $heading, 'html' => $section);
                }
            }
        }
        // Remove Video section if there is no longer a youtube video
        if (isset($sections['video'])) {
            if (!preg_match('@<object@i', $sections['video']['html'])) {
                unset($sections['video']);
            }
        }
        // Add the related boxes
        if (isset($sections['relatedwikihows']) && !empty($related_boxes)) {
            $sections['relatedwikihows']['boxes'] = $related_boxes;
        }
        // Add article info
        $sections['article_info']['name'] = wfMsg('article_info');
        $sections['article_info']['html'] = $this->getArticleInfo($this->t);
        // add user created images
        if (class_exists('UCIPatrol') && UCIPatrol::showUCI($this->t)) {
            $sections['user_completed_images']['name'] = wfMsg('user_completed_images');
            $sections['user_completed_images']['html'] = $this->getUserCompletedImages($this->t);
        }
        // Remove </body></html> from html
        if (count($sections) > 0) {
            $keys = array_keys($sections);
            $last =& $sections[$keys[count($sections) - 2]]['html'];
            $last = preg_replace('@</body>(\\s|\\n)*</html>(\\s|\\n)*$@', '', $last);
        }
        // Add a simple form for uploading images of completed items to the article
        if ($wgLanguageCode == 'en' && isset($sections['steps']) && isset($device['show-upload-images']) && $device['show-upload-images']) {
            require_once "{$IP}/extensions/wikihow/mobile/MobileUciHtmlBuilder.class.php";
            $userCompletedImages = new MobileUciHtmlBuilder();
            $sections['steps']['html'] .= $userCompletedImages->createByHtml($this->t);
        }
        return array($sections, $intro, $firstImage);
    }
Ejemplo n.º 4
0
 private function parseArticleHtml(&$articleHtml)
 {
     $sectionMap = array(wfMsg('Intro') => 'intro', wfMsg('Ingredients') => 'ingredients', wfMsg('Steps') => 'steps', wfMsg('Video') => 'video', wfMsg('Tips') => 'tips', wfMsg('Warnings') => 'warnings', wfMsg('relatedwikihows') => 'relatedwikihows', wfMsg('sourcescitations') => 'sources', wfMsg('thingsyoullneed') => 'thingsyoullneed', wfMsg('article_info') => 'article_info');
     $doc = self::htmlToDoc($articleHtml);
     $xpath = new DOMXPath($doc);
     // Delete #featurestar node
     $node = $doc->getElementById('featurestar');
     if (!empty($node)) {
         $node->parentNode->removeChild($node);
     }
     // Remove #newaltmethod node
     $node = $doc->getElementById('newaltmethod');
     if (!empty($node)) {
         $node->parentNode->removeChild($node);
     }
     // Remove all "Edit" links
     $nodes = $xpath->query('//a[@id = "gatEditSection"]');
     foreach ($nodes as $node) {
         $node->parentNode->removeChild($node);
     }
     // Resize youtube video
     $nodes = $xpath->query('//embed');
     foreach ($nodes as $node) {
         // Get youtube link
         $src = $node->attributes->getNamedItem('src')->nodeValue;
         if (!$this->device['show-youtube'] || stripos($src, 'youtube.com') === false) {
             $youtubeLink = '';
         } else {
             $youtubeLink = $src;
         }
         // Delete video section node
         $parent = $node->parentNode;
         $grandParent = $parent->parentNode;
         if ($grandParent && $parent) {
             $grandParent->removeChild($parent);
         }
     }
     // Remove templates from intro so that they don't muck up
     // the text and images we extract
     $nodes = $xpath->query('//div[@class = "template_top"]');
     foreach ($nodes as $node) {
         $node->parentNode->removeChild($node);
     }
     // create a php query doc to operate on in pq() calls
     $pqDoc = PHPQuery::newDocument($doc);
     // remove table of contents
     if (pq('table#toc')->length) {
         $toc = pq('table#toc');
         $toc->parent()->remove();
         $toc->remove();
     }
     // creates sections array to be filled up
     $sections = array();
     // get intro data and remove it
     $intro = $this->processIntro("h2");
     $intro['type'] = "intro";
     $introDoc = $this->processGeneric($intro['html']);
     $intro['html'] = $introDoc->html();
     $sections[] = $intro;
     // Get rid of the <span> element to standardize the html for the
     // next dom query
     $nodes = $xpath->query('//div/span/a[@class = "image"]');
     foreach ($nodes as $a) {
         $parent = $a->parentNode;
         $grandParent = $parent->parentNode;
         $grandParent->replaceChild($a, $parent);
     }
     // Change the width attribute from any tables with a width set.
     // This often happen around video elements.
     $nodes = $xpath->query('//table/@width');
     foreach ($nodes as $node) {
         $width = preg_replace('@px\\s*$@', '', $node->nodeValue);
         if ($width > $this->device['screen-width'] - 20) {
             $node->nodeValue = $this->device['screen-width'] - 20;
         }
     }
     // Surround step content in its own div. We do this to support other features like checkmarks
     $nodes = $xpath->query('//div[@id="steps"]/ol/li');
     foreach ($nodes as $node) {
         $node->innerHTML = '<div class="step_content">' . $node->innerHTML . '</div>';
     }
     // Remove quiz
     $nodes = $xpath->query('//div[@class = "quiz_cta_2"]');
     foreach ($nodes as $node) {
         $node->parentNode->removeChild($node);
     }
     $nodes = $xpath->query('//div[@class = "quiz_cta"]');
     foreach ($nodes as $node) {
         $node->parentNode->removeChild($node);
     }
     // Remove quiz header
     $nodes = $xpath->query('//h3/span[text()="Quiz"]');
     foreach ($nodes as $node) {
         $parentNode = $node->parentNode;
         $parentNode->parentNode->removeChild($parentNode);
     }
     $html = self::docToHtml($doc);
     // Add intro section first to output
     $firstImage = self::getImageObj($firstImageName);
     $introImage = AppDataFormatter::getImageDetails($firstImage);
     $sectionsHtml = explode('<h2>', $html);
     unset($sectionsHtml[0]);
     // remove leftovers from intro section
     foreach ($sectionsHtml as $i => &$html) {
         $html = '<h2>' . $html;
         if (preg_match('@^<h2[^>]*>\\s*<span[^>]*>\\s*([^<]+)@i', $html, $m)) {
             $section = array();
             $heading = trim($m[1]);
             $type = null;
             foreach ($sectionMap as $key => $value) {
                 if (strpos($heading, $key) !== FALSE) {
                     $type = $value;
                     break;
                 }
             }
             $html = preg_replace('@^<h2[^>]*>\\s*<span[^>]*>\\s*([^<]+)</span>(\\s|\\n)*</h2>@i', '', $html);
             $section = array('heading' => $heading, 'type' => $type);
             $doc = $this->processGeneric($html);
             if ($type == 'steps') {
                 $section['methods'] = $this->processSteps($doc);
             } elseif ($type == 'video') {
                 if ($youtubeLink) {
                     $section['video'] = $youtubeLink;
                     $vid = pq('#video td');
                     $section['html'] = trim($vid->html());
                 } else {
                     continue;
                 }
             } elseif (in_array($type, array('thingsyoullneed', 'ingredients'))) {
                 $list = $this->processListWithHeaders($doc);
                 $section = array_merge($section, $list);
             } elseif (in_array($type, array('tips', 'warnings'))) {
                 $list = $this->processListWithHeaders($doc);
                 $section = array_merge($section, $list);
             } elseif ($type == 'relatedwikihows') {
                 $list = $this->processList($doc, self::LIST_TYPE_TYPICAL);
                 $section = array_merge($section, $list);
                 if (is_array($section['list'])) {
                     $articles = $this->processRelatedWikihows($section['list']);
                     if ($articles) {
                         unset($section['html']);
                         unset($section['list']);
                         $section['articles'] = $articles;
                     }
                 }
             } elseif ($type == 'sources') {
                 $list = $this->processListHTML($doc);
                 $section = array_merge($section, $list);
             } else {
                 $text = trim($doc->text());
                 $section['html'] = $doc->html();
                 if (empty($text) || empty($section['html'])) {
                     continue;
                 }
                 if (empty($section['type'])) {
                     unset($section['type']);
                 }
             }
             $sections[] = $section;
         }
     }
     return $sections;
 }
Ejemplo n.º 5
0
 static function processHTML($body, $action = '', $opts = array())
 {
     global $wgUser, $wgTitle;
     $processHTML = true;
     wfRunHooks('PreWikihowProcessHTML', array($title, &$processHTML));
     if (!$processHTML) {
         return $body;
     }
     $skin = $wgUser->getSkin();
     $doc = PHPQuery::newDocument($body);
     //run ShowGrayContainer hook for this
     if (@$opts['show-gray-container']) {
         pq("#bodycontents")->addClass("minor_section");
     }
     //let's mark each bodycontents section so we can target it with CSS
     if ($action) {
         pq("#bodycontents")->addClass("bc_" . $action);
     }
     //DISCUSSION/USER TALK//////////////////////
     //move some pieces above the main part
     pq("#bodycontents")->before(pq(".template_top")->addClass("wh_block"));
     pq("#bodycontents")->before(pq(".archive_table")->addClass("wh_block"));
     //remove those useless paragraph line breaks
     $bc = preg_replace('/<p><br><\\/p>/', '', pq("#bodycontents")->html());
     pq("#bodycontents")->html($bc);
     //insert postcomment form
     $pcf = Postcomment::getForm(false, null, true);
     if ($pcf && $wgTitle->getFullURL() != $wgUser->getUserPage()->getTalkPage()->getFullURL()) {
         $pc_form = $pcf;
     } else {
         $pc_form = '<a name="postcomment"></a><a name="post"></a>';
     }
     pq("#bodycontents")->append($pc_form);
     //HISTORY//////////////////////
     //move top nav down a smidge
     pq("#history_form")->before(pq(".navigation:first"));
     //EDIT PREVIEW//////////////////////
     if (substr($action, 0, 6) == 'submit') {
         $name = $action == 'submit2' ? "#editpage" : "#editform";
         $preview = pq("#wikiPreview");
         $changes = pq("#wikiDiff")->addClass("wh_block");
         pq("#wikiPreview")->remove();
         pq("#wikiDiff")->remove();
         //preview before or after based on user preference
         if ($wgUser->getOption('previewontop')) {
             pq($name)->before($preview);
             pq($name)->before($changes);
         } else {
             pq($name)->after($preview);
             pq($name)->after($changes);
         }
     }
     return $doc->htmlOuter();
 }