/** * @param $html string * @return string */ public function DOMParse($html) { global $wgScript; wfProfileIn(__METHOD__); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); libxml_use_internal_errors(true); $this->doc = new DOMDocument(); $this->doc->loadHTML('<?xml encoding="UTF-8">' . $html); libxml_use_internal_errors(false); $this->doc->preserveWhiteSpace = false; $this->doc->strictErrorChecking = false; $this->doc->encoding = 'UTF-8'; $itemToRemoveRecords = $this->parseItemsToRemove(); $zeroRatedBannerElement = $this->doc->getElementById('zero-rated-banner'); if (!$zeroRatedBannerElement) { $zeroRatedBannerElement = $this->doc->getElementById('zero-rated-banner-red'); } if ($zeroRatedBannerElement) { self::$zeroRatedBanner = $this->doc->saveXML($zeroRatedBannerElement, LIBXML_NOEMPTYTAG); } if (self::$isBetaGroupMember) { $ptLogout = $this->doc->getElementById('pt-logout'); if ($ptLogout) { $ptLogoutLink = $ptLogout->firstChild; self::$logoutHtml = $this->doc->saveXML($ptLogoutLink, LIBXML_NOEMPTYTAG); } $ptAnonLogin = $this->doc->getElementById('pt-anonlogin'); if (!$ptAnonLogin) { $ptAnonLogin = $this->doc->getElementById('pt-login'); } if ($ptAnonLogin) { $ptAnonLoginLink = $ptAnonLogin->firstChild; if ($ptAnonLoginLink && $ptAnonLoginLink->hasAttributes()) { $ptAnonLoginLinkHref = $ptAnonLoginLink->getAttributeNode('href'); $ptAnonLoginLinkTitle = $ptAnonLoginLink->getAttributeNode('title'); if ($ptAnonLoginLinkTitle) { $ptAnonLoginLinkTitle->nodeValue = self::$messages['mobile-frontend-login']; } if ($ptAnonLoginLinkHref) { $ptAnonLoginLinkHref->nodeValue = str_replace("&", "&", $ptAnonLoginLinkHref->nodeValue); } $ptAnonLoginLinkText = $ptAnonLoginLink->firstChild; if ($ptAnonLoginLinkText) { $ptAnonLoginLinkText->nodeValue = self::$messages['mobile-frontend-login']; } } self::$loginHtml = $this->doc->saveXML($ptAnonLoginLink, LIBXML_NOEMPTYTAG); } } if (self::$title->isSpecial('Userlogin') && self::$isBetaGroupMember) { $userlogin = $this->doc->getElementById('userloginForm'); if ($userlogin && get_class($userlogin) === 'DOMElement') { $firstHeading = $this->doc->getElementById('firstHeading'); if ($firstHeading) { $firstHeading->nodeValue = ''; } } } // Tags // You can't remove DOMNodes from a DOMNodeList as you're iterating // over them in a foreach loop. It will seemingly leave the internal // iterator on the foreach out of wack and results will be quite // strange. Though, making a queue of items to remove seems to work. // For example: if (self::$disableImages == 1) { $itemToRemoveRecords['TAG'][] = "img"; $itemToRemoveRecords['TAG'][] = "audio"; $itemToRemoveRecords['TAG'][] = "video"; $itemToRemoveRecords['CLASS'][] = "thumb tright"; $itemToRemoveRecords['CLASS'][] = "thumb tleft"; $itemToRemoveRecords['CLASS'][] = "thumbcaption"; $itemToRemoveRecords['CLASS'][] = "gallery"; } $tagToRemoveNodeIdAttributeValues = array('zero-language-search'); $domElemsToRemove = array(); foreach ($itemToRemoveRecords['TAG'] as $tagToRemove) { $tagToRemoveNodes = $this->doc->getElementsByTagName($tagToRemove); foreach ($tagToRemoveNodes as $tagToRemoveNode) { $tagToRemoveNodeIdAttributeValue = ''; if ($tagToRemoveNode) { $tagToRemoveNodeIdAttribute = $tagToRemoveNode->getAttributeNode('id'); if ($tagToRemoveNodeIdAttribute) { $tagToRemoveNodeIdAttributeValue = $tagToRemoveNodeIdAttribute->value; } if (!in_array($tagToRemoveNodeIdAttributeValue, $tagToRemoveNodeIdAttributeValues)) { $domElemsToRemove[] = $tagToRemoveNode; } } } } foreach ($domElemsToRemove as $domElement) { $domElement->parentNode->removeChild($domElement); } // Elements with named IDs foreach ($itemToRemoveRecords['ID'] as $itemToRemove) { $itemToRemoveNode = $this->doc->getElementById($itemToRemove); if ($itemToRemoveNode) { $itemToRemoveNode->parentNode->removeChild($itemToRemoveNode); } } // CSS Classes $xpath = new DOMXpath($this->doc); foreach ($itemToRemoveRecords['CLASS'] as $classToRemove) { $elements = $xpath->query('//*[@class="' . $classToRemove . '"]'); foreach ($elements as $element) { $element->parentNode->removeChild($element); } } // Tags with CSS Classes foreach ($itemToRemoveRecords['TAG_CLASS'] as $classToRemove) { $parts = explode('.', $classToRemove); $elements = $xpath->query('//' . $parts[0] . '[@class="' . $parts[1] . '"]'); foreach ($elements as $element) { $removedElement = $element->parentNode->removeChild($element); } } // Handle red links with action equal to edit $redLinks = $xpath->query('//a[@class="new"]'); foreach ($redLinks as $redLink) { // PHP Bug #36795 — Inappropriate "unterminated entity reference" $spanNode = $this->doc->createElement("span", str_replace("&", "&", $redLink->nodeValue)); if ($redLink->hasAttributes()) { $attributes = $redLink->attributes; foreach ($attributes as $i => $attribute) { if ($attribute->name != 'href') { $spanNode->setAttribute($attribute->name, $attribute->value); } } } $redLink->parentNode->replaceChild($spanNode, $redLink); } if (self::$title->isSpecial('Userlogin') && self::$isBetaGroupMember) { if ($userlogin && get_class($userlogin) === 'DOMElement') { $login = $this->renderLogin(); $loginNode = $this->doc->importNode($login, true); $userlogin->appendChild($loginNode); } } $content = $this->doc->getElementById('content'); $contentHtml = $this->doc->saveXML($content, LIBXML_NOEMPTYTAG); if (self::$isMainPage) { $contentHtml = $this->DOMParseMainPage($contentHtml); } $title = htmlspecialchars(self::$title->getText()); $htmlTitle = htmlspecialchars(self::$htmlTitle); if (strlen($contentHtml) > 4000 && $this->contentFormat == 'XHTML' && self::$device['supports_javascript'] === true && empty(self::$search) && !self::$isMainPage) { $contentHtml = $this->headingTransform($contentHtml); } elseif ($this->contentFormat == 'WML') { header('Content-Type: text/vnd.wap.wml'); $contentHtml = $this->headingTransform($contentHtml); // Content removal for WML rendering $elements = array('span', 'div', 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'sup', 'sub'); foreach ($elements as $element) { $contentHtml = preg_replace('#</?' . $element . '[^>]*>#is', '', $contentHtml); } // Wml for searching $searchWml = '<p><input emptyok="true" format="*M" type="text" name="search" value="" size="16" />' . '<do type="accept" label="' . self::$messages['mobile-frontend-search-submit'] . '">' . '<go href="' . $wgScript . '?title=Special%3ASearch&search=$(search)"></go></do></p>'; $contentHtml = $searchWml . $contentHtml; // Content wrapping $contentHtml = $this->createWMLCard($contentHtml); $applicationWmlTemplate = new ApplicationWmlTemplate(); $options = array('mainPageUrl' => self::$mainPageUrl, 'randomPageUrl' => self::$randomPageUrl, 'dir' => self::$dir, 'code' => self::$code, 'contentHtml' => $contentHtml, 'homeButton' => self::$messages['mobile-frontend-home-button'], 'randomButton' => self::$messages['mobile-frontend-random-button']); $applicationWmlTemplate->setByArray($options); $applicationHtml = $applicationWmlTemplate->getHTML(); } if ($this->contentFormat == 'XHTML' && self::$format != 'json') { if (!empty(self::$displayNoticeId)) { if (intval(self::$displayNoticeId) === 1) { $thanksNoticeTemplate = new ThanksNoticeTemplate(); $thanksNoticeTemplate->set('messages', self::$messages); $noticeHtml = $thanksNoticeTemplate->getHTML(); } } // header( 'Content-Type: application/xhtml+xml; charset=utf-8' ); $searchTemplate = $this->getSearchTemplate(); $searchWebkitHtml = $searchTemplate->getHTML(); $footerTemplate = $this->getFooterTemplate(); $footerHtml = $footerTemplate->getHTML(); $noticeHtml = !empty($noticeHtml) ? $noticeHtml : ''; $applicationTemplate = $this->getApplicationTemplate(); $options = array('noticeHtml' => $noticeHtml, 'htmlTitle' => $htmlTitle, 'searchWebkitHtml' => $searchWebkitHtml, 'contentHtml' => $contentHtml, 'footerHtml' => $footerHtml); $applicationTemplate->setByArray($options); $applicationHtml = $applicationTemplate->getHTML(); } if (self::$format === 'json') { header('Content-Type: application/javascript'); header('Content-Disposition: attachment; filename="data.js";'); $json_data = array(); $json_data['title'] = htmlspecialchars(self::$title->getText()); $json_data['html'] = $contentHtml; $json = FormatJson::encode($json_data); if (!empty(self::$callback)) { $json = urlencode(htmlspecialchars(self::$callback)) . '(' . $json . ')'; } wfProfileOut(__METHOD__); return $json; } wfProfileOut(__METHOD__); return $applicationHtml; }