/** * Processes a list that might have h3 headers and takes all the li out * even if they are separated into different uls. if there are videos it just leaves them in the html * if any li's have images it takes those out **/ private function processListWithHeaders($doc) { $list = array("list" => array()); foreach ($doc->children()->children() as $child) { if (pq($child)->is('h3') || pq($child)->is('p')) { $text = trim(strip_tags(pq($child)->html()), "<a>"); if ($text) { $list["list"][] = array("heading" => trim($text)); } } $processList = false; if (pq($child)->is('ul') || pq($child)->is('ol')) { $processList = true; } if ($processList) { foreach (pq($child)->children() as $ulChild) { ArticleHTMLParser::removeEmptyNodes(&$ulChild); if (pq($ulChild)->is('li')) { $item = array(); //modify any image tags $this->modifyImageTags($ulChild); $text = trim(pq($ulChild)->html()); if ($text) { $item["html"] = $text; } if (count($item) > 0) { $list["list"][] = $item; } } pq($ulChild)->remove(); } } } return $list; }
/** * Parse and transform the document from the old HTML for NS_MAIN articles to the new mobile * style. This should probably be pulled out and added to a subclass that can then be extended for * builders that focus on building NS_MAIN articles */ protected function parseNonMobileArticle(&$article) { global $IP, $wgContLang, $wgLanguageCode; $sectionMap = array(wfMsg('Intro') => 'intro', wfMsg('Ingredients') => 'ingredients', wfMsg('Steps') => 'steps', wfMsg('Video') => 'video', wfMsg('Tips') => 'tips', wfMsg('Warnings') => 'warnings', wfMsg('relatedwikihows') => 'relatedwikihows', wfMsg('sourcescitations') => 'sources', wfMsg('thingsyoullneed') => 'thingsyoullneed', wfMsg('article_info') => 'article_info', wfMsg('user_completed_images') => 'user_completed_images'); $lang = MobileWikihow::getSiteLanguage(); $imageNsText = $wgContLang->getNsText(NS_IMAGE); $device = $this->getDevice(); // munge steps first $opts = array('no-ads' => true); $article = WikihowArticleHTML::postProcess($article, $opts); // Make doc correctly formed $articleText = <<<DONE <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="{$lang}" lang="{$lang}"> <head> \t<meta http-equiv="Content-Type" content="text/html; charset='utf-8'" /> </head> <body> {$article} </body> </html> DONE; require_once "{$IP}/extensions/wikihow/mobile/JSLikeHTMLElement.php"; $doc = new DOMDocument('1.0', 'utf-8'); $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); $doc->strictErrorChecking = false; $doc->recover = true; //$doc->preserveWhiteSpace = false; //$wgOut->setarticlebodyonly(true); @$doc->loadHTML($articleText); $doc->normalizeDocument(); //echo $doc->saveHtml();exit; $xpath = new DOMXPath($doc); $pqDoc = PHPQuery::newDocument($doc); // Insert alternate images (or fork, as eliz calls it) that may exist. // Do this before other image processing later in this function so // these images will be dealt with as any other article image would. if (class_exists('WHVid')) { WHVid::handleAlternateMobileImages(); } // Delete #featurestar node $node = $doc->getElementById('featurestar'); if (!empty($node)) { $node->parentNode->removeChild($node); } $node = $doc->getElementById('newaltmethod'); if (!empty($node)) { $node->parentNode->removeChild($node); } // Remove all "Edit" links $nodes = $xpath->query('//a[@id = "gatEditSection"]'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } // Resize youtube video $nodes = $xpath->query('//embed'); foreach ($nodes as $node) { $url = ''; $src = $node->attributes->getNamedItem('src')->nodeValue; if (!$device['show-youtube'] || stripos($src, 'youtube.com') === false) { $parent = $node->parentNode; $grandParent = $parent->parentNode; if ($grandParent && $parent) { $grandParent->removeChild($parent); } } else { foreach (array(&$node, &$node->parentNode) as $node) { $widthAttr = $node->attributes->getNamedItem('width'); $oldWidth = (int) $widthAttr->nodeValue; $newWidth = $device['max-video-width']; if ($newWidth < $oldWidth) { $widthAttr->nodeValue = (string) $newWidth; $heightAttr = $node->attributes->getNamedItem('height'); $oldHeight = (int) $heightAttr->nodeValue; $newHeight = (int) round($newWidth * $oldHeight / $oldWidth); $heightAttr->nodeValue = (string) $newHeight; } } } } // Remove templates from intro so that they don't muck up // the text and images we extract $nodes = $xpath->query('//div[@class = "template_top"]'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } $introResult = ArticleHTMLParser::processMobileIntro($imageNsText); $intro = $introResult['html']; $firstImage = $introResult['image']; // Get rid of the <span> element to standardize the html for the // next dom query $nodes = $xpath->query('//div/span/a[@class = "image"]'); foreach ($nodes as $a) { $parent = $a->parentNode; $grandParent = $parent->parentNode; $grandParent->replaceChild($a, $parent); } // Resize all resize-able images $nodes = $xpath->query('//div/a[@class = "image"]/img'); $imgNum = 1; foreach ($nodes as $img) { $srcNode = $img->attributes->getNamedItem('src'); $widthNode = $img->attributes->getNamedItem('width'); $width = (int) $widthNode->nodeValue; $heightNode = $img->attributes->getNamedItem('height'); $height = (int) $heightNode->nodeValue; $imageClasses = $img->parentNode->parentNode->attributes->getNamedItem('class')->nodeValue; /* if (!stristr($imageClasses, "tcenter")) { $img->parentNode->parentNode->parentNode->attributes->getNamedItem('class')->nodeValue = ''; $img->parentNode->parentNode->parentNode->attributes->getNamedItem('style')->nodeValue = ''; } */ // if( stristr($imageClasses, "tcenter") !== false) { if (stristr($imageClasses, "floatcenter") !== false) { $newWidth = $device['full-image-width']; $newHeight = (int) round($device['full-image-width'] * $height / $width); } else { $newWidth = $device['max-image-width']; $newHeight = (int) round($device['max-image-width'] * $height / $width); } $a = $img->parentNode; $href = $a->attributes->getNamedItem('href')->nodeValue; if (!$href) { $onclick = $a->attributes->getNamedItem('onclick')->nodeValue; $onclick = preg_replace('@.*",[ ]*"@', '', $onclick); $onclick = preg_replace('@".*@', '', $onclick); $imgName = preg_replace('@.*(Image|' . $imageNsText . '|' . urlencode($imageNsText) . '):@', '', $onclick); } else { $imgName = preg_replace('@^/(Image|' . $imageNsText . '|' . urlencode($imageNsText) . '):@', '', $href); } $title = Title::newFromText($imgName, NS_IMAGE); if (!$title) { $imgName = urldecode($imgName); $title = Title::newFromText($imgName, NS_IMAGE); } if ($title) { $image = wfFindFile($title); if ($image) { list($thumb, $newWidth, $newHeight) = self::makeThumbDPI($image, $newWidth, $newHeight, $device['enlarge-thumb-high-dpi']); $url = wfGetPad($thumb->getUrl()); $srcNode->nodeValue = $url; $widthNode->nodeValue = $newWidth; $heightNode->nodeValue = $newHeight; // change surrounding div width and height $div = $a->parentNode; $styleNode = $div->attributes->getNamedItem('style'); //removing the set width/height $styleNode->nodeValue = ''; //$div->attributes->getNamedItem('class')->nodeValue = ''; /* if (preg_match('@^(.*width:)[0-9]+(px;\s*height:)[0-9]+(.*)$@', $styleNode->nodeValue, $m)) { $styleNode->nodeValue = $m[1] . $newWidth . $m[2] . $newHeight . $m[3]; } */ //add in our old class so all our logic still works $imgclass = $img->getAttribute('class'); $img->setAttribute('class', $imgclass . 'mwimage101'); //default width/height for the srcset $bigWidth = 600; $bigHeight = 800; // change grandparent div width too $grandparent = $div; if ($grandparent && $grandparent->nodeName == 'div') { $class = $grandparent->attributes->getNamedItem('class'); if ($class) { $isThumb = stristr($class->nodeValue, 'mthumb') !== false; $isRight = stristr($class->nodeValue, 'tright') !== false; $isLeft = stristr($class->nodeValue, 'tleft') !== false; $isCenter = stristr($class->nodeValue, 'tcenter') !== false; if ($isThumb) { if ($isRight) { $style = $grandparent->attributes->getNamedItem('style'); $style->nodeValue = 'width:' . $newWidth . 'px;height:' . $newHeight . 'px;'; $bigWidth = 300; $bigHeight = 500; } elseif ($isCenter) { $style = $grandparent->attributes->getNamedItem('style'); $style->nodeValue = 'width:' . $newWidth . 'px;height:' . $newHeight . 'px;'; $bigWidth = 600; $bigHeight = 800; } elseif ($isLeft) { //if its centered or on the left, give it double the width if too big $style = $grandparent->attributes->getNamedItem('style'); $oldStyle = $style->nodeValue; $matches = array(); preg_match('@(width:\\s*)[0-9]+@', $oldStyle, $matches); if ($matches[0]) { $curSize = intval(substr($matches[0], 6)); //width: = 6 if ($newWidth * 2 < $curSize) { $existingCSS = preg_replace('@(width:\\s*)[0-9]+@', 'width:' . $newWidth * 2, $oldStyle); $style->nodeValue = $existingCSS; } } $bigWidth = 300; $bigHeight = 500; } } } } list($thumb, $newWidth, $newHeight) = self::makeThumbDPI($image, $bigWidth, $bigHeight, $device['enlarge-thumb-high-dpi']); $url = wfGetPad($thumb->getUrl()); $img->setAttribute('srcset', $url . ' ' . $newWidth . 'w'); //if we couldn't make it big enough, let's add a class if ($newWidth < $bigWidth) { $imgclass = $img->getAttribute('class'); $img->setAttribute('class', $imgclass . ' not_huge'); } //add the hidden info /* $newDiv = new DOMElement( 'div', htmlentities('test') ); $a->appendChild($newDiv); $newDiv->setAttribute('style', 'display:none;'); */ $a->setAttribute('id', 'image-zoom-' . $imgNum); $a->setAttribute('class', 'image-zoom'); $a->setAttribute('href', '#'); global $wgServer; $href = $wgServer . $href; $href = preg_replace('@\\bm\\.@', '', $href); $href = preg_replace('@^http://wikihow\\.com@', 'http://www.wikihow.com', $href); $details = array('url' => $url, 'width' => $newWidth, 'height' => $newHeight, 'credits_page' => $href); $newDiv = new DOMElement('div', htmlentities(json_encode($details))); $a->appendChild($newDiv); $newDiv->setAttribute('style', 'display:none;'); $newDiv->setAttribute('id', 'image-details-' . $imgNum); $imgNum++; } else { //huh? can't find it? well, then let's not display it $img->parentNode->parentNode->parentNode->parentNode->setAttribute('style', 'display:none;'); } } else { //huh? can't find it? well, then let's not display it $img->parentNode->parentNode->parentNode->parentNode->setAttribute('style', 'display:none;'); } } // Remove template from images, add new zoom one $nodes = $xpath->query('//img'); foreach ($nodes as $node) { $src = $node->attributes ? $node->attributes->getNamedItem('src') : null; $src = $src ? $src->nodeValue : ''; if (stripos($src, 'magnify-clip.png') !== false) { $parent = $node->parentNode; $parent->parentNode->removeChild($parent); } } // //get rid of the corners and watermarks // $nodes = $xpath->query('//div[@class = "corner top_left" // or @class = "corner bottom_left" // or @class = "corner top_right" // or @class = "corner bottom_right" // or @class = "wikihow_watermark"]'); // foreach ($nodes as $node) { // $parent = $node->parentNode; // $parent->removeChild($node); // } //gotta swap in larger images if the client's width is big enough //(i.e. tablet et al) $nodes = $xpath->query('//img[@class = "mwimage101" or @class = "mwimage101 not_huge"]'); foreach ($nodes as $node) { //make a quick unique id for this $id = md5($node->attributes->getNamedItem('src')->nodeValue) . rand(); $node->setAttribute('id', $id); //pass it to our custom function for swapping in larger images $swap_it = 'if (isBig) WH.mobile.swapEm("' . $id . '");'; $scripttag = new DOMElement('script', htmlentities($swap_it)); $node->appendChild($scripttag); } // Change the width attribute from any tables with a width set. // This often happen around video elements. $nodes = $xpath->query('//table/@width'); foreach ($nodes as $node) { $width = preg_replace('@px\\s*$@', '', $node->nodeValue); if ($width > $device['screen-width'] - 20) { $node->nodeValue = $device['screen-width'] - 20; } } // Surround step content in its own div. We do this to support other features like checkmarks $nodes = $xpath->query('//div[@id="steps"]/ol/li'); foreach ($nodes as $node) { $node->innerHTML = '<div class="step_content">' . $node->innerHTML . '</div>'; } //remove quiz $nodes = $xpath->query('//div[@class = "quiz_cta"]'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } //remove quiz header $nodes = $xpath->query('//h3/span[text()="Quiz"]'); foreach ($nodes as $node) { $parentNode = $node->parentNode; $parentNode->parentNode->removeChild($parentNode); } //remove edit link in h3 headers $nodes = $xpath->query('//h3/a[@class="editsection"]'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } //remove edit link in h4 headers $nodes = $xpath->query('//h4/a[@class="editsection"]'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } //pull out the first 6 related wikihows and format them $nodes = $xpath->query('//div[@id="relatedwikihows"]/ul/li'); $count = 0; $related_boxes = array(); foreach ($nodes as $node) { if ($count > 6) { break; } //grab the title preg_match('@href=\\"\\/(.*?)?\\"@', $node->innerHTML, $m); $title = Title::newFromText($m[1]); if (!$title) { continue; } $temp_box = $this->makeRelatedBox($title); if ($temp_box) { $related_boxes[] = $temp_box; $last_node = $node; $parent = $node->parentNode; $last_parent = $parent; $parent->removeChild($node); $count++; } } //only 1? not enough. throw it back if ($count == 1) { $related_boxes = array(); $last_parent->appendChild($last_node); } // Inject html into the DOM tree for specific features (ie thumb ratings, ads, etc) $this->mobileParserBeforeHtmlSave($xpath); //self::walkTree($doc->documentElement, 1); $html = $doc->saveXML(); $sections = array(); $sectionsHtml = explode('<h2>', $html); unset($sectionsHtml[0]); // remove leftovers from intro section foreach ($sectionsHtml as $i => &$section) { $section = '<h2>' . $section; $count = 0; $heading = ''; $replFunc = function ($matches) use(&$heading) { $heading = trim($matches[1]); return ''; }; $output = preg_replace_callback('@^<h2>[^\\n]*<span class="mw-headline"[^>]*>[ \\t]*([^<]+)</span></h2>@', $replFunc, $section, 1, $count); if ($count > 0) { $section = $output; if (isset($sectionMap[$heading])) { $key = $sectionMap[$heading]; $sections[$key] = array('name' => $heading, 'html' => $section); } } } // Remove Video section if there is no longer a youtube video if (isset($sections['video'])) { if (!preg_match('@<object@i', $sections['video']['html'])) { unset($sections['video']); } } // Add the related boxes if (isset($sections['relatedwikihows']) && !empty($related_boxes)) { $sections['relatedwikihows']['boxes'] = $related_boxes; } // Add article info $sections['article_info']['name'] = wfMsg('article_info'); $sections['article_info']['html'] = $this->getArticleInfo($this->t); // add user created images if (class_exists('UCIPatrol') && UCIPatrol::showUCI($this->t)) { $sections['user_completed_images']['name'] = wfMsg('user_completed_images'); $sections['user_completed_images']['html'] = $this->getUserCompletedImages($this->t); } // Remove </body></html> from html if (count($sections) > 0) { $keys = array_keys($sections); $last =& $sections[$keys[count($sections) - 2]]['html']; $last = preg_replace('@</body>(\\s|\\n)*</html>(\\s|\\n)*$@', '', $last); } // Add a simple form for uploading images of completed items to the article if ($wgLanguageCode == 'en' && isset($sections['steps']) && isset($device['show-upload-images']) && $device['show-upload-images']) { require_once "{$IP}/extensions/wikihow/mobile/MobileUciHtmlBuilder.class.php"; $userCompletedImages = new MobileUciHtmlBuilder(); $sections['steps']['html'] .= $userCompletedImages->createByHtml($this->t); } return array($sections, $intro, $firstImage); }