/** * @param \DOMDocument $doc */ protected function registerClasses(\DOMDocument $doc) { $doc->registerNodeClass('\\DOMElement', static::ELEMENT_CLASS); $doc->registerNodeClass('\\DOMAttr', static::ATTR_CLASS); $doc->registerNodeClass('\\DOMText', static::TEXT_CLASS); $doc->registerNodeClass('\\DOMCdataSection', static::CDATA_CLASS); $doc->registerNodeClass('\\DOMComment', static::COMMENT_CLASS); #$doc->registerNodeClass('\DOMDocumentFragment', static::FRAGMENT_CLASS); }
/** * Constructor * * @param string $file path to epub file to work on * @throws Exception if metadata could not be loaded */ public function __construct($file) { // open file $this->file = $file; $zip = new ZipArchive(); if (!@$zip->open($this->file)) { throw new Exception('Failed to read epub file'); } // read container data $data = $zip->getFromName('META-INF/container.xml'); if ($data == false) { throw new Exception('Failed to access epub container data'); } $xml = new DOMDocument(); $xml->registerNodeClass('DOMElement', 'EPubDOMElement'); $xml->loadXML($data); $xpath = new EPubDOMXPath($xml); $nodes = $xpath->query('//n:rootfiles/n:rootfile[@media-type="application/oebps-package+xml"]'); $this->meta = $nodes->item(0)->attr('full-path'); // load metadata $data = $zip->getFromName($this->meta); if (!$data) { throw new Exception('Failed to access epub metadata'); } $this->xml = new DOMDocument(); $this->xml->registerNodeClass('DOMElement', 'EPubDOMElement'); $this->xml->loadXML($data); $this->xml->formatOutput = true; $this->xpath = new EPubDOMXPath($this->xml); $zip->close(); }
public function parse() { // Get the XHTML $xhtml = $this->_getContents(); $doc = new DOMDocument(); $doc->registerNodeClass('DOMElement', 'Ajde_Template_Parser_Xhtml_Element'); $doc->preserveWhiteSpace = false; $doc->formatOutput = true; try { $doc->loadXML($xhtml); } catch (ErrorException $e) { // TODO: //return false; throw new Ajde_Exception('Xhtml Parser error: ' . $e->getMessage()); } // Get the root element /* @var $root DOMNode */ $root = $doc->documentElement; $this->_defaultNS = $root->lookupNamespaceURI(null); $this->_acNS = $root->lookupNamespaceURI(Ajde_Component::AC_XMLNS); $this->_avNS = $root->lookupNamespaceURI(Ajde_Component::AV_XMLNS); // Ajde_Component processing $processed = $this->_process($root); // Return the inner XML of root element (exclusive) $xml = $this->innerXml($processed); // Break out the CDATA $return = $this->_breakOutCdata($xml); return $return; }
/** * * * Example: * new ElementFinder("<html><div>test </div></html>", ElementFinder::HTML); * * @param string $data * @param null|integer $documentType * @param int $options */ public function __construct($data, $documentType = null, $options = null) { if (!is_string($data) or empty($data)) { throw new \InvalidArgumentException('Expect not empty string'); } $this->dom = new \DomDocument(); $this->dom->registerNodeClass('DOMElement', Element::class); $documentType = $documentType !== null ? $documentType : static::DOCUMENT_HTML; $this->setDocumentType($documentType); # default options $options = $options !== null ? $options : LIBXML_NOCDATA & LIBXML_NOERROR; $this->setDocumentOption($options); $this->setData($data); # set default expression to xpath $this->expressionTranslator = new XpathExpression(); }
public function setUp() { $sourceDoc = new DOMDocument(); $sourceDoc->registerNodeClass('DOMElement', 'ezcDocumentLocateableDomElement'); $this->sourceRoot = $sourceDoc->appendChild($sourceDoc->createElement('docbook')); $targetDoc = new DOMDocument(); $this->targetRoot = $targetDoc->appendChild($targetDoc->createElementNS(ezcDocumentOdt::NS_ODT_TEXT, 'text')); $this->proc = new ezcDocumentOdtTextProcessor(); }
function foo() { echo "Enter foo()\n"; $dom = new DOMDocument(); $dom->registerNodeClass('DOMElement', 'MyElement'); $child = $dom->createElement('foo')->init(); $dom->appendChild($child); $dom->appendChild($dom->createElement('bar')->init()); echo "Leave foo()\n"; return [$dom, $child]; }
/** * Constructor * * @param string $file path to epub file to work on * @param string $zipClass class to handle zip * @throws Exception if metadata could not be loaded */ public function __construct($file, $zipClass = 'clsTbsZip') { // open file $this->file = $file; $this->zip = new $zipClass(); if (!$this->zip->Open($this->file)) { throw new Exception('Failed to read epub file'); } // read container data if (!$this->zip->FileExists(METADATA_FILE)) { throw new Exception("Unable to find metadata.xml"); } $data = $this->zip->FileRead(METADATA_FILE); if ($data == false) { throw new Exception('Failed to access epub container data'); } $xml = new DOMDocument(); $xml->registerNodeClass('DOMElement', 'EPubDOMElement'); $xml->loadXML($data); $xpath = new EPubDOMXPath($xml); $nodes = $xpath->query('//n:rootfiles/n:rootfile[@media-type="application/oebps-package+xml"]'); $this->meta = $nodes->item(0)->attr('full-path'); // load metadata if (!$this->zip->FileExists($this->meta)) { throw new Exception("Unable to find " . $this->meta); } $data = $this->zip->FileRead($this->meta); if (!$data) { throw new Exception('Failed to access epub metadata'); } $this->xml = new DOMDocument(); $this->xml->registerNodeClass('DOMElement', 'EPubDOMElement'); $this->xml->loadXML($data); $this->xml->formatOutput = true; $this->xpath = new EPubDOMXPath($this->xml); }
/** * Render given document * * Returns the rendered PDF as string * * @param ezcDocumentDocbook $document * @param ezcDocumentPdfHyphenator $hyphenator * @param ezcDocumentPdfTokenizer $tokenizer * @return string */ public function render(ezcDocumentDocbook $document, ezcDocumentPdfHyphenator $hyphenator = null, ezcDocumentPdfTokenizer $tokenizer = null) { $this->hyphenator = $hyphenator !== null ? $hyphenator : new ezcDocumentPdfDefaultHyphenator(); $this->tokenizer = $tokenizer !== null ? $tokenizer : new ezcDocumentPdfDefaultTokenizer(); $this->document = $document; // Register custom fonts in driver $this->registerFonts(); // Inject custom element class, for style inferencing $dom = $document->getDomDocument(); // Reload the XML document with to a DOMDocument with a custom element // class. Just registering it on the existing document seems not to // work in all cases. $reloaded = new DOMDocument(); $reloaded->registerNodeClass('DOMElement', 'ezcDocumentLocateableDomElement'); $reloaded->loadXml($dom->saveXml()); $this->process($reloaded); return $this->driver->save(); }
// ----------------------------------------------------------- // 1. User-level doesn't override implicit native magic props. // ----------------------------------------------------------- class MyElement extends DOMElement { private $props = array('userMagic' => 'userMagic'); public function __get($name) { echo "__get {$name}: "; if (array_key_exists($name, $this->props)) { return $this->props[$name]; } } } $dom = new DOMDocument(); $dom->registerNodeClass('DOMElement', 'MyElement'); $dom->appendChild($dom->createElement('Foo', 'Bar')); var_dump($dom->documentElement->nodeValue); // Implementaiton-level var_dump($dom->documentElement->userMagic); // User-level, handled var_dump($dom->documentElement->nonExisting); // User-level, unhandled // ----------------------------------------------------------- // 2. Explicit override of the native magic prop. // ----------------------------------------------------------- class MyElementExplicit extends DOMElement { private $nodeValue; public function __construct() {
<?php // ----------------------------------------------------------- // 1. User-level doesn't override implicit native magic props. // ----------------------------------------------------------- class MyTextNode extends DOMText { public function __get($name) { return "__get: {$name}"; } } $dom = new DOMDocument(); $dom->registerNodeClass('DOMText', 'MyTextNode'); $node = $dom->appendChild($dom->createElement('Foo', 'Bar')); var_dump($node->firstChild->textContent); // Impl-level var_dump($node->firstChild->nonExisting); // User-level __get // ----------------------------------------------------------- // 2. Explicit override of the native magic prop. // ----------------------------------------------------------- class MyTextExplicit extends DOMText { private $textContent; public function __construct() { unset($this->textContent); } public function __get($name) {
/** * Constructor * * @codeCoverageIgnore * * @link http://www.php.net/manual/en/Document.construct.php Document::__construct() * * @param string $version * [optional] The version number of the document as part of the XML declaration. * @param string $encoding * [optional] The encoding of the document as part of the XML declaration. * @param string $element * [optional] Class used to create elements in the document. Must extend <code>DOMElement</code>. */ public function __construct($version = '1.0', $encoding = 'UTF-8', $element = null) { $element = @substr($element, 0, 256) ?: '\\BLW\\Model\\DOM\\Element'; // Parent Constructor parent::__construct($version, $encoding); // Update Element class. parent::registerNodeClass('DOMElement', $element); }
/** * Parse and transform the document from the old HTML for NS_MAIN articles to the new mobile * style. This should probably be pulled out and added to a subclass that can then be extended for * builders that focus on building NS_MAIN articles */ protected function parseNonMobileArticle(&$article) { global $IP, $wgContLang, $wgLanguageCode; $sectionMap = array(wfMsg('Intro') => 'intro', wfMsg('Ingredients') => 'ingredients', wfMsg('Steps') => 'steps', wfMsg('Video') => 'video', wfMsg('Tips') => 'tips', wfMsg('Warnings') => 'warnings', wfMsg('relatedwikihows') => 'relatedwikihows', wfMsg('sourcescitations') => 'sources', wfMsg('thingsyoullneed') => 'thingsyoullneed', wfMsg('article_info') => 'article_info'); $lang = MobileWikihow::getSiteLanguage(); $imageNsText = $wgContLang->getNsText(NS_IMAGE); $device = $this->getDevice(); // munge steps first $opts = array('no-ads' => true); $article = WikihowArticleHTML::postProcess($article, $opts); // Make doc correctly formed $articleText = <<<DONE <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="{$lang}" lang="{$lang}"> <head> \t<meta http-equiv="Content-Type" content="text/html; charset='utf-8'" /> </head> <body> {$article} </body> </html> DONE; require_once "{$IP}/extensions/wikihow/mobile/JSLikeHTMLElement.php"; $doc = new DOMDocument('1.0', 'utf-8'); $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); $doc->strictErrorChecking = false; $doc->recover = true; //$doc->preserveWhiteSpace = false; //$wgOut->setarticlebodyonly(true); @$doc->loadHTML($articleText); $doc->normalizeDocument(); //echo $doc->saveHtml();exit; $xpath = new DOMXPath($doc); // Delete #featurestar node $node = $doc->getElementById('featurestar'); if (!empty($node)) { $node->parentNode->removeChild($node); } $node = $doc->getElementById('newaltmethod'); if (!empty($node)) { $node->parentNode->removeChild($node); } // Remove all "Edit" links $nodes = $xpath->query('//a[@id = "gatEditSection"]'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } // Resize youtube video $nodes = $xpath->query('//embed'); foreach ($nodes as $node) { $url = ''; $src = $node->attributes->getNamedItem('src')->nodeValue; if (!$device['show-youtube'] || stripos($src, 'youtube.com') === false) { $parent = $node->parentNode; $grandParent = $parent->parentNode; if ($grandParent && $parent) { $grandParent->removeChild($parent); } } else { foreach (array(&$node, &$node->parentNode) as $node) { $widthAttr = $node->attributes->getNamedItem('width'); $oldWidth = (int) $widthAttr->nodeValue; $newWidth = $device['max-video-width']; if ($newWidth < $oldWidth) { $widthAttr->nodeValue = (string) $newWidth; $heightAttr = $node->attributes->getNamedItem('height'); $oldHeight = (int) $heightAttr->nodeValue; $newHeight = (int) round($newWidth * $oldHeight / $oldWidth); $heightAttr->nodeValue = (string) $newHeight; } } } } // Remove templates from intro so that they don't muck up // the text and images we extract $nodes = $xpath->query('//div[@class = "template_top"]'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } // Grab intro text $intro = ''; $nodes = $xpath->query('//body/div/p'); foreach ($nodes as $i => $node) { $text = $node->textContent; if (!empty($text) && $i == 0) { $introNode = $node; $intro = Wikitext::removeRefsFromFlattened($text); break; } } if ($introNode) { // Grab first image from article $imgs = $xpath->query('.//img', $introNode->parentNode); $firstImage = ''; foreach ($imgs as $img) { // parent is an <a> tag $parent = $img->parentNode; if ($parent->nodeName == 'a') { $href = $parent->attributes->getNamedItem('href')->nodeValue; if (preg_match('@(Image|' . $imageNsText . '):@', $href)) { $firstImage = preg_replace('@^.*(Image|' . $imageNsText . '):([^:]*)([#].*)?$@', '$2', $href); $firstImage = urldecode($firstImage); break; } } } // Remove intro node $parent = $introNode->parentNode; $parent->removeChild($introNode); } // Get rid of the <span> element to standardize the html for the // next dom query $nodes = $xpath->query('//div/span/a[@class = "image"]'); foreach ($nodes as $a) { $parent = $a->parentNode; $grandParent = $parent->parentNode; $grandParent->replaceChild($a, $parent); } // Resize all resize-able images $nodes = $xpath->query('//div/a[@class = "image"]/img'); $imgNum = 1; foreach ($nodes as $img) { $srcNode = $img->attributes->getNamedItem('src'); $widthNode = $img->attributes->getNamedItem('width'); $width = (int) $widthNode->nodeValue; $heightNode = $img->attributes->getNamedItem('height'); $height = (int) $heightNode->nodeValue; $imageClasses = $img->parentNode->parentNode->attributes->getNamedItem('class')->nodeValue; /* if (!stristr($imageClasses, "tcenter")) { $img->parentNode->parentNode->parentNode->attributes->getNamedItem('class')->nodeValue = ''; $img->parentNode->parentNode->parentNode->attributes->getNamedItem('style')->nodeValue = ''; } */ if (stristr($imageClasses, "tcenter") !== false) { $newWidth = $device['full-image-width']; $newHeight = (int) round($device['full-image-width'] * $height / $width); } else { $newWidth = $device['max-image-width']; $newHeight = (int) round($device['max-image-width'] * $height / $width); } $a = $img->parentNode; $href = $a->attributes->getNamedItem('href')->nodeValue; if (!$href) { $onclick = $a->attributes->getNamedItem('onclick')->nodeValue; $onclick = preg_replace('@.*",[ ]*"@', '', $onclick); $onclick = preg_replace('@".*@', '', $onclick); $imgName = preg_replace('@.*(Image|' . $imageNsText . '|' . urlencode($imageNsText) . '):@', '', $onclick); } else { $imgName = preg_replace('@^/(Image|' . $imageNsText . '|' . urlencode($imageNsText) . '):@', '', $href); } $title = Title::newFromURL($imgName, NS_IMAGE); if (!$title) { $imgName = urldecode($imgName); $title = Title::newFromURL($imgName, NS_IMAGE); } if ($title) { $image = RepoGroup::singleton()->findFile($title); if ($image) { list($thumb, $newWidth, $newHeight) = self::makeThumbDPI($image, $newWidth, $newHeight, $device['enlarge-thumb-high-dpi']); $url = wfGetPad($thumb->getUrl()); $srcNode->nodeValue = $url; $widthNode->nodeValue = $newWidth; $heightNode->nodeValue = $newHeight; // change surrounding div width and height $div = $a->parentNode; $styleNode = $div->attributes->getNamedItem('style'); //removing the set width/height $styleNode->nodeValue = ''; //$div->attributes->getNamedItem('class')->nodeValue = ''; /* if (preg_match('@^(.*width:)[0-9]+(px;\s*height:)[0-9]+(.*)$@', $styleNode->nodeValue, $m)) { $styleNode->nodeValue = $m[1] . $newWidth . $m[2] . $newHeight . $m[3]; } */ //default width/height for the srcset $bigWidth = 600; $bigHeight = 800; // change grandparent div width too $grandparent = $div; if ($grandparent && $grandparent->nodeName == 'div') { $class = $grandparent->attributes->getNamedItem('class'); if ($class) { $isThumb = stristr($class->nodeValue, 'mthumb') !== false; $isRight = stristr($class->nodeValue, 'tright') !== false; $isLeft = stristr($class->nodeValue, 'tleft') !== false; $isCenter = stristr($class->nodeValue, 'tcenter') !== false; if ($isThumb) { if ($isRight) { $style = $grandparent->attributes->getNamedItem('style'); $style->nodeValue = 'width:' . $newWidth . 'px;height:' . $newHeight . 'px;'; $bigWidth = 300; $bigHeight = 500; } elseif ($isCenter) { $style = $grandparent->attributes->getNamedItem('style'); $style->nodeValue = 'width:' . $newWidth . 'px;height:' . $newHeight . 'px;'; $bigWidth = 600; $bigHeight = 800; } elseif ($isLeft) { //if its centered or on the left, give it double the width if too big $style = $grandparent->attributes->getNamedItem('style'); $oldStyle = $style->nodeValue; $matches = array(); preg_match('@(width:\\s*)[0-9]+@', $oldStyle, $matches); if ($matches[0]) { $curSize = intval(substr($matches[0], 6)); //width: = 6 if ($newWidth * 2 < $curSize) { $existingCSS = preg_replace('@(width:\\s*)[0-9]+@', 'width:' . $newWidth * 2, $oldStyle); $style->nodeValue = $existingCSS; } } $bigWidth = 300; $bigHeight = 500; } } } } list($thumb, $newWidth, $newHeight) = self::makeThumbDPI($image, $bigWidth, $bigHeight, $device['enlarge-thumb-high-dpi']); $url = wfGetPad($thumb->getUrl()); $img->setAttribute('srcset', $url . ' ' . $newWidth . 'w'); //if we couldn't make it big enough, let's add a class if ($newWidth < $bigWidth) { $imgclass = $img->getAttribute('class'); $img->setAttribute('class', $imgclass . ' not_huge'); } //add the hidden info /* $newDiv = new DOMElement( 'div', htmlentities('test') ); $a->appendChild($newDiv); $newDiv->setAttribute('style', 'display:none;'); */ $a->setAttribute('id', 'image-zoom-' . $imgNum); $a->setAttribute('class', 'image-zoom'); $a->setAttribute('href', '#'); global $wgServerName; $href = $wgServerName . $href; if (!preg_match("/^http:\\/\\//", $href)) { $href = "http://" . $serverName . $href; } $href = preg_replace("/\\m\\./", "", $href); $href = preg_replace("/^http:\\/\\/wikihow\\.com/", "http://www.wikihow.com", $href); $details = array('url' => $url, 'width' => $newWidth, 'height' => $newHeight, 'credits_page' => $href); $newDiv = new DOMElement('div', htmlentities(json_encode($details))); $a->appendChild($newDiv); $newDiv->setAttribute('style', 'display:none;'); $newDiv->setAttribute('id', 'image-details-' . $imgNum); $imgNum++; } else { //huh? can't find it? well, then let's not display it $img->parentNode->parentNode->parentNode->parentNode->setAttribute('style', 'display:none;'); } } else { //huh? can't find it? well, then let's not display it $img->parentNode->parentNode->parentNode->parentNode->setAttribute('style', 'display:none;'); } } // Remove template from images, add new zoom one $nodes = $xpath->query('//img'); foreach ($nodes as $node) { $src = $node->attributes ? $node->attributes->getNamedItem('src') : null; $src = $src ? $src->nodeValue : ''; if (stripos($src, 'magnify-clip.png') !== false) { $parent = $node->parentNode; $parent->parentNode->removeChild($parent); } } //get rid of the corners and watermarks $nodes = $xpath->query('//div[@class = "corner top_left" or @class = "corner bottom_left" or @class = "corner top_right" or @class = "corner bottom_right" or @class = "wikihow_watermark"]'); foreach ($nodes as $node) { $parent = $node->parentNode; $parent->removeChild($node); } //gotta swap in larger images if the client's width is big enough //(i.e. tablet et al) $nodes = $xpath->query('//img[@class = "mwimage101" or @class = "mwimage101 not_huge"]'); foreach ($nodes as $node) { //make a quick unique id for this $id = md5($node->attributes->getNamedItem('src')->nodeValue) . rand(); $node->setAttribute('id', $id); //pass it to our custom function for swapping in larger images $swap_it = 'if (isBig) WH.mobile.swapEm("' . $id . '");'; $scripttag = new DOMElement('script', htmlentities($swap_it)); $node->appendChild($scripttag); } // Change the width attribute from any tables with a width set. // This often happen around video elements. $nodes = $xpath->query('//table/@width'); foreach ($nodes as $node) { $width = preg_replace('@px\\s*$@', '', $node->nodeValue); if ($width > $device['screen-width'] - 20) { $node->nodeValue = $device['screen-width'] - 20; } } // Surround step content in its own div. We do this to support other features like checkmarks $nodes = $xpath->query('//div[@id="steps"]/ol/li'); foreach ($nodes as $node) { $node->innerHTML = '<div class="step_content">' . $node->innerHTML . '</div>'; } //remove quiz $nodes = $xpath->query('//div[@class = "quiz_cta"]'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } //remove quiz header $nodes = $xpath->query('//h3/span[text()="Quiz"]'); foreach ($nodes as $node) { $parentNode = $node->parentNode; $parentNode->parentNode->removeChild($parentNode); } //pull out the first 6 related wikihows and format them $nodes = $xpath->query('//div[@id="relatedwikihows"]/ul/li'); $count = 0; $related_boxes = array(); foreach ($nodes as $node) { if ($count > 6) { break; } //grab the title preg_match('@href=\\"\\/(.*?)?\\"@', $node->innerHTML, $m); $title = Title::newFromText($m[1]); if (!$title) { continue; } $temp_box = $this->makeRelatedBox($title); if ($temp_box) { $related_boxes[] = $temp_box; $last_node = $node; $parent = $node->parentNode; $last_parent = $parent; $parent->removeChild($node); $count++; } } //only 1? not enough. throw it back if ($count == 1) { $related_boxes = array(); $last_parent->appendChild($last_node); } // Inject html into the DOM tree for specific features (ie thumb ratings, ads, etc) $this->mobileParserBeforeHtmlSave($xpath); //self::walkTree($doc->documentElement, 1); $html = $doc->saveXML(); $sections = array(); $sectionsHtml = explode('<h2>', $html); unset($sectionsHtml[0]); // remove leftovers from intro section foreach ($sectionsHtml as $i => &$section) { $section = '<h2>' . $section; if (preg_match('@^<h2[^>]*>\\s*<span[^>]*>\\s*([^<]+)@i', $section, $m)) { $heading = trim($m[1]); $section = preg_replace('@^<h2[^>]*>\\s*<span[^>]*>\\s*([^<]+)</span>(\\s|\\n)*</h2>@i', '', $section); if (isset($sectionMap[$heading])) { $key = $sectionMap[$heading]; $sections[$key] = array('name' => $heading, 'html' => $section); } } } // Remove Video section if there is no longer a youtube video if (isset($sections['video'])) { if (!preg_match('@<object@i', $sections['video']['html'])) { unset($sections['video']); } } // Add the related boxes if (isset($sections['relatedwikihows']) && !empty($related_boxes)) { $sections['relatedwikihows']['boxes'] = $related_boxes; } // Add article info $sections['article_info']['name'] = wfMsg('article_info'); $sections['article_info']['html'] = $this->getArticleInfo($title); // Remove </body></html> from html if (count($sections) > 0) { $keys = array_keys($sections); $last =& $sections[$keys[count($sections) - 2]]['html']; $last = preg_replace('@</body>(\\s|\\n)*</html>(\\s|\\n)*$@', '', $last); } // Add a simple form for uploading images of completed items to the article if ($wgLanguageCode == 'en' && isset($sections['steps']) && isset($device['show-upload-images']) && $device['show-upload-images']) { require_once "{$IP}/extensions/wikihow/mobile/MobileUciHtmlBuilder.class.php"; $userCompletedImages = new MobileUciHtmlBuilder(); $sections['steps']['html'] .= $userCompletedImages->createByHtml($this->t); } return array($sections, $intro, $firstImage); }
<?php class SampleElement extends \DOMElement { } $dom = new DOMDocument(); $dom->registerNodeClass('DOMElement', 'SampleElement'); $dom->loadXML('<?xml version="1.0" ?><root />'); $list = $dom->childNodes; var_dump(get_class($list->item(0)));
public function __construct() { parent::__construct('1.0', 'UTF-8'); parent::registerNodeClass('\\DOMElement', '\\Sped\\Components\\Xml\\Element'); }
/** * MANGASTREAM VERSION * Eg. $obj->download('http://mangastream.com/read/one_piece_green/59213718/1'); * @param string $manga_url */ function download($manga_url) { if (urlParameters(6, $manga_url) == "end") { die; } $html = file_get_contents($manga_url); $doc = new DOMDocument(); $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); @$doc->loadHTML($html); $doc->preserveWhiteSpace = false; $i = 0; /*BUSCAMOS EN EL DOCUMENTO EL NUMERO DE CAPITULO */ $option = $doc->getElementsByTagName('option')->item($i); $chapter = $option->nodeValue; /*RECORREMOS EL DOCUMENTO HASTA ENCONTRAR LO QUE SERIA UNA CAPA AJUSTADA */ do { $elem = $doc->getElementsByTagName('div')->item($i); $str = $elem->getAttribute('style'); if (strstr($str, "position:relative")) { break; } $i++; } while ($i < 20); $ireal = $i; /*CUANDO LO TENEMOS PODEMOS CREAR UN ARREGLO CON TODOS LOS DATOS QUE NECESITAREMOS */ $elem = $doc->getElementsByTagName('div')->item($i); $data[$i]['style'] = $elem->getAttribute('style'); do { $i++; $elem = $doc->getElementsByTagName('div')->item($i); if ($elem->getAttribute('style') == "") { break; } $a = $elem->getElementsByTagName('a'); $img = $elem->getElementsByTagName('img'); $data[$i]['img'] = $img->item(0)->getAttribute('src'); $data[$i]['style'] = $elem->getAttribute('style'); $data[$i]['a'] = $a->item(0)->getAttribute('href'); } while ($elem->getAttribute('style') != ""); $imax = $i; $i = $ireal; /* En este punto solo tenemos los estilos de las capas que forman la imagen, la url de la siguiente pagina y el link a la imagen :-( * Filtraremos los style para conseguir width, height y las posiciones exactas top y left de cada imagen, la primera solo sirve de marco */ $data[$i]['width'] = filtro("width", "px", $data[$i]['style']); $data[$i]['height'] = filtro("height", "px", $data[$i]['style']); for ($i++; $i < $imax; $i++) { $data[$i]['width'] = filtro("width", "px", $data[$i]['style']); $data[$i]['height'] = filtro("height", "px", $data[$i]['style']); $data[$i]['top'] = filtro("top", "px", $data[$i]['style']); $data[$i]['left'] = filtro("left", "px", $data[$i]['style']); } if (urlParameters(6, $manga_url) != "end") { //echo "<br>FILE_NAME 6: ".urlParameters(6, $manga_url)." <br>URLPARAMETERS DIR 4: ".urlParameters(4, $manga_url); $file = combine_data($data, $ireal, $imax, urlParameters(4, $manga_url) . "-" . $chapter, urlParameters(6, $manga_url)); //echo "<br>http://mangastream.com".$data[$ireal+1]['a']; set_time_limit(20); $this->download("http://mangastream.com" . $data[$ireal + 1]['a']); } else { die; } }
function parseArticle_03($article) { global $wgTitle, $wgUser, $wgRequest, $wgServer, $wgLang, $wgArticle, $wgParser, $wgOut, $IP; $article = self::mungeSteps($article); $sk = $wgUser->getSkin(); $url = urlencode($wgServer . "/" . $wgTitle->getPrefixedURL()); $img = urlencode(WikihowShare::getPinterestImage($wgTitle)); $desc = urlencode(wfMsg('howto', $wgTitle->getText()) . WikihowShare::getPinterestTitleInfo()); $fb = '<div class="like_button"><fb:like href="' . $url . '" send="false" layout="button_count" width="100" show_faces="false"></fb:like></div>'; $gp1 = '<div class="gplus1_button"><g:plusone size="medium" callback="plusone_vote"></g:plusone></div>'; $pinterest = '<div id="pinterest"><a href="http://pinterest.com/pin/create/button/?url=' . $url . '&media=' . $img . '&description=' . $desc . '" class="pin-it-button" count-layout="horizontal">Pin It</a></div>'; // German includes "how to " in the title text $howto = wfMsg('howto', htmlspecialchars($wgTitle->getText())); $tb = '<div class="admin_state"><a href="http://twitter.com/share" data-lang="' . $wgLanguageCode . '" style="display:none; background-image: none; color: #ffffff;" class="twitter-share-button" data-count="horizontal" data-via="wikiHow" data-text="' . $howto . '" data-related="JackHerrick:Founder of wikiHow">Tweet</a></div>'; $article = str_replace('<div class="corner top_right"></div>', '<div class="corner top_right"> </div>', $article); $article = str_replace('<div class="corner top_left"></div>', '<div class="corner top_left"> </div>', $article); $article = str_replace('<div class="corner bottom_right"></div>', '<div class="corner bottom_right"> </div>', $article); $article = str_replace('<div class="corner bottom_left"></div>', '<div class="corner bottom_left"> </div>', $article); $article = str_replace("<div class='corner top_right'></div>", "<div class='corner top_right'> </div>", $article); $article = str_replace("<div class='corner top_left'></div>", "<div class='corner top_left'> </div>", $article); $article = str_replace("<div class='corner bottom_right'></div>", "<div class='corner bottom_right'> </div>", $article); $article = str_replace("<div class='corner bottom_left'></div>", "<div class='corner bottom_left'> </div>", $article); $article = str_replace('<div style="clear:both"></div>', '<div style="clear:both"> </div>', $article); $article = str_replace("’", "'", $article); $introImage = ""; require_once "{$IP}/extensions/wikihow/mobile/JSLikeHTMLElement.php"; $doc = new DOMDocument('1.0', 'utf-8'); $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); $doc->strictErrorChecking = false; $doc->recover = true; @$doc->loadHTML($article); $doc->normalizeDocument(); $xpath = new DOMXPath($doc); //removing the featured article star $nodes = $xpath->query('//div[@id="featurestar"]'); foreach ($nodes as $node) { $node->parentNode->removeChild($node->nextSibling->nextSibling); $node->parentNode->removeChild($node); break; } $nodes = $xpath->query('//div[@class="rounders"]'); foreach ($nodes as $node) { $style = $node->getAttribute("style"); $start = strpos($style, "width:"); $end = strpos($style, "px", $start); $width = intval(substr($style, $start + 6, $start + 6 - $end)); $newWidth = $width + 21; $style = substr($style, 0, $start + 6) . $newWidth . substr($style, $end); $start = strpos($style, "height:"); $end = strpos($style, "px", $start); $height = intval(substr($style, $start + 7, $start + 7 - $end)); $newheight = $height + 19; $style = substr($style, 0, $start + 7) . $newHeight . substr($style, $end); $node->setAttribute("style", $style); $childNode = $node->firstChild; $node->removeChild($childNode); $newNode = $doc->createElement("div"); $newNode->setAttribute('class', 'top'); $node->appendChild($newNode); $newNode2 = $doc->createElement("div"); $newNode2->setAttribute('class', 'bottom'); $newNode->appendChild($newNode2); $newNode3 = $doc->createElement("div"); $newNode3->setAttribute('class', 'left'); $newNode2->appendChild($newNode3); $newNode4 = $doc->createElement("div"); $newNode4->setAttribute('class', 'right'); $newNode3->appendChild($newNode4); $newNode4->appendChild($childNode); } //grabbing the intro image /*$nodes = $xpath->query('//div[@class="mwimg"]'); foreach ($nodes as $node) { $introImage = "<div class='mwimg'>" . $node->innerHTML . "</div>"; $node->parentNode->removeChild($node); break; }*/ $nodes = $xpath->query('//ol[@class="steps_list_2"]/li/div[@class="mwimg"]'); foreach ($nodes as $node) { $checkNode = $xpath->evaluate($node->parentNode->getNodePath() . '/div[@class="check"]')->item(0); $node->parentNode->removeChild($node); $checkNode->parentNode->insertBefore($node, $checkNode->nextSibling); } $article = $doc->saveHTML(); $article = str_replace('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> <html><body>', "", $article); $article = str_replace('</body></html>', "", $article); //$share = $fb . $gp1 . $pinterest; $mainVars = array('wgTitle' => $wgTitle, 'wgUser' => $wgUser, 'article' => $article, 'sk' => $sk, 'wgRequest' => $wgRequest, 'share' => $share, 'wgLang' => $wgLang, 'wgArticle' => $wgArticle, 'introImage' => $introImage, 'navigation' => self::getNavigation()); return EasyTemplate::html('main_' . self::ARTICLE_LAYOUT . '.tmpl.php', $mainVars); }
/** * * @param string $version The version number of the document as part of the XML declaration. * @param string $encoding The encoding of the document as part of the XML declaration. */ public function __construct($version = '1.0', $encoding = 'UTF-8') { parent::__construct($version, $encoding); // The magic, otherwise node->ownerDocument will return a \DOMDocument object. parent::registerNodeClass('DOMDocument', get_class($this)); }
/** * Reloads the DOMDocument of the given DocBook to make its elements * locateable. * * @param DOMDocument $docBook * @return DOMDocument */ private function makeLocateable(DOMDocument $docBook) { // Reload the XML document to a DOMDocument with a custom element // class. Just registering it on the existing document seems not to // work in all cases. $reloaded = new DOMDocument(); $reloaded->registerNodeClass('DOMElement', 'ezcDocumentLocateableDomElement'); $reloaded->loadXml($docBook->saveXml()); return $reloaded; }
<?php require_once '../JSLikeHTMLElement.php'; header('Content-Type: text/plain'); $doc = new DOMDocument(); $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); $doc->loadHTML('<div><p>Para 1</p><p>Para 2</p></div>'); $elem = $doc->getElementsByTagName('div')->item(0); // print innerHTML echo $elem->innerHTML; // prints '<p>Para 1</p><p>Para 2</p>' echo "\n\n"; // set innerHTML $elem->innerHTML = '<a href="http://fivefilters.org">FiveFilters.org</a>'; echo $elem->innerHTML; // prints '<a href="http://fivefilters.org">FiveFilters.org</a>' echo "\n\n"; // print document (with our changes) echo $doc->saveXML();
function xmltohtml($messageXML) { $messageXML = mb_convert_encoding($messageXML, 'HTML-ENTITIES', "UTF-8"); // Create a stream $opts = array('http' => array('method' => "GET", 'header' => "Accept-language: en\r\n" . "Cookie: foo=bar\r\n" . "Content-Type: text/xml; charset=UTF-8")); $context = stream_context_create($opts); ## create html instance $doc = new DOMDocument('1.0', 'UTF-8'); $doc->preserveWhiteSpace = false; $doc->formatOutput = false; $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); $xslFilePath = './elife_xmltohtml.xsl'; $docRoot = $doc->documentElement; $xsl = new DOMDocument(); $xslResult = $xsl->load($xslFilePath); if (!$xslResult) { $docRoot->nodeValue = 'ERROR: Failed to load XSLT: ' . $xslFilePath; continue; } $doc->loadXML($messageXML); $xpath = new DOMXPath($doc); $articles = $xpath->query('//article'); foreach ($articles as $article) { $articleID = $article->getAttribute('id'); if (!$articleID) { $article->nodeValue = 'ERROR: invalid article ID'; continue; } $xmlFileURL = "https://s3.amazonaws.com/elife-cdn/elife-articles/{$articleID}/elife{$articleID}.xml"; ## make it utf-8 compliant $articleXML = mb_convert_encoding(file_get_contents($xmlFileURL, NULL, $context), 'HTML-ENTITIES', "UTF-8"); $articleXML = preg_replace('/<?xml version[^>]*>/', '', $articleXML); //remove the XML declaration for obvious reason $articleXML = preg_replace('/<!DOCTYPE [^>]*>/', '', $articleXML); //remove the DTD declaration for obvious reason $articleXML = '<html><head><meta http-equiv=Content-Type content="text/html; charset=utf-8"></meta></head><body>' . mb_convert_encoding($articleXML, 'HTML-ENTITIES', "UTF-8") . '</body></html>'; $aDoc = new DOMDocument('1.0', 'UTF-8'); ## load the xml as html to avoid the named entity/hex entity diff, for e.g., 00444 and 07370, former has named entity while later has hexa $aDoc->loadHTML($articleXML); $articleXML = $aDoc->saveXML(); if (preg_match('/^[\\s\\r\\t\\n]+$/', $articleXML)) { $article->nodeValue = 'ERROR: Something went wrong when reading XML from AWS'; continue; } $articleXML = preg_replace('/<!DOCTYPE [^>]*>/', '', $articleXML); $aDocResult = $aDoc->loadXML($articleXML); if (!$aDocResult) { $article->nodeValue = 'ERROR: Something went wrong during loading XML'; continue; } $proc = new XSLTProcessor(); if (!$proc->importStylesheet($xsl)) { $article->nodeValue = 'ERROR: Something went wrong when loading XSLT'; continue; } $newDoc = $proc->transformToDOC($aDoc); if (!$newDoc) { $article->nodeValue = 'ERROR: Something went wrong during transformation'; continue; } $newDocXpath = new DOMXPath($newDoc); // Add tooltip $contribs = $newDocXpath->query('//contrib[@contrib-type="author"]'); foreach ($contribs as $contrib) { $tooltip = '|<div class="author-tooltip">'; $names = $newDocXpath->query('.//name', $contrib); foreach ($names as $name) { $tooltip .= '<div class="author-tooltip-name">'; foreach ($name->childNodes as $childNode) { if ($childNode->nodeType == 3) { $tooltip .= $childNode->nodeValue; } else { $tooltip .= '<span class="nlm-' . $childNode->nodeName . '">' . $childNode->innerHTML . '</span>'; } } $tooltip .= '</div>'; } $tooltip .= '<div class="author-tooltip-affiliation">'; $affRefs = $newDocXpath->query('.//xref[@ref-type = "aff"]', $contrib); for ($i = 0, $len = $affRefs->length; $i < $len; $i++) { $affRef = $affRefs->item($i); $rid = $affRef->getAttribute('rid'); // goto the affiliation by using the rid $affs = $newDocXpath->query('//aff[@id="' . $rid . '"]'); foreach ($affs as $aff) { $tooltip .= '<span class="nlm-aff">'; // get all the nodes inside the aff and make span class // class name: nlm-(node name) $childNodes = $aff->childNodes; foreach ($childNodes as $childNode) { if ($childNode->nodeType == 3) { $tooltip .= $childNode->nodeValue; } else { $tooltip .= '<span class="nlm-' . $childNode->nodeName . '">' . $childNode->innerHTML . '</span>'; } } $tooltip .= '</span>'; } if ($i != $len - 1) { $tooltip .= ';'; } } $tooltip .= '</div>'; $fnRefs = $newDocXpath->query('.//xref[@ref-type = "fn"]', $contrib); for ($i = 0, $len = $fnRefs->length; $i < $len; $i++) { $fnRef = $fnRefs->item($i); $rid = $fnRef->getAttribute('rid'); // goto the affiliation by using the rid $fns = $newDocXpath->query('//fn[@id="' . $rid . '"]'); foreach ($fns as $fn) { // Author contribution if ($fn->hasAttribute('fn-type') && $fn->getAttribute('fn-type') == 'con') { $class = 'author-tooltip-contrib'; $label = 'Contribution: '; } // Competing interests if ($fn->hasAttribute('fn-type') && $fn->getAttribute('fn-type') == 'conflict') { $class = 'author-tooltip-conflict'; $label = 'Competing Interests: '; } $tooltip .= '<div class="' . $class . '"><span class="author-tooltip-label">' . $label . '</span>'; $tooltip .= '<span class="author-tooltip-text">'; // get p tags inside fn $pTags = $newDocXpath->query('.//p', $fn); if ($pTags->length == 0) { $tooltip .= $pTag->innerHTML; } foreach ($pTags as $pTag) { $tooltip .= '<span class="nlm-p">' . trim($pTag->innerHTML) . '</span>'; } $tooltip .= '</span></div>'; } } $tooltip .= '</div>'; $contrib->setAttribute('tooltip', $tooltip); } $institutions = $newDocXpath->query('//*[@class="elife-institution"]'); $i = 0; foreach ($institutions as $institution) { // modify by arul for start for remove same address end of section $removenode = false; for ($j = $i; $j < $institutions->length; $j++) { if ($institution->nodeValue == $institutions->item($j)->nodeValue) { $removenode = true; } } $i++; // modify by arul for end // get the first node and if it contains only ", " then remove it if ($institution->hasChildNodes()) { if ($institution->childNodes->item(0)->nodeType == 3 && trim($institution->childNodes->item(0)->nodeValue) == ',') { DOMRemove($institution->childNodes->item(0)); } } // get all direct text nodes. $directTextNodes = $newDocXpath->query('./text()', $institution); foreach ($directTextNodes as $directTextNode) { $directTextNode->nodeValue = preg_replace('/\\s*\\,\\s*\\,\\s*/u', ', ', $directTextNode->nodeValue); } if ($removenode) { $institution->parentNode->removeChild($institution->nextSibling); $institution->parentNode->removeChild($institution); } } // Move equation to the end of disp-formula $dispFormulas = $newDocXpath->query('//span[contains(@class,"disp-formula")]'); foreach ($dispFormulas as $dispFormula) { $label = $newDocXpath->query('.//span[contains(@class, "disp-formula-label")]', $dispFormula); if ($label->length != 0) { $dispFormula->appendChild($label->item(0)); } } ## get the queries and return appropriate html snippets $queries = $xpath->query('.//query', $article); if ($queries->length == 0) { $rootNode = $newDoc->documentElement; $importedNode = $doc->importNode($rootNode, true); $article->appendChild($importedNode); } else { foreach ($queries as $query) { $dataBlocks = $newDocXpath->query($query->getAttribute('xpath')); foreach ($dataBlocks as $dataBlock) { $dataNode = $doc->createElement('data'); $importedNode = $doc->importNode($dataBlock, true); $dataNode->appendChild($importedNode); $query->appendChild($dataNode); } //end of foreach datablock } //end of foreach query } } //end of foreach article return preg_replace('/<?xml version[^>]*>/', '', $doc->saveXML()); }
class myAttribute extends DOMAttr { function testit() { return "HELLO Attribute"; } } class myElement extends DOMElement { function testit() { return "HELLO Element"; } } $doc = new DOMDocument(); $doc->registerNodeClass('DOMAttr', 'myAttribute'); $doc->registerNodeClass('DOMElement', 'myElement'); $doc->appendChild(new DOMElement('root')); $root = $doc->documentElement; $root->setAttribute('a', 'a1'); echo get_class($root), "\n"; print $root->testit() . "\n"; $attr = $root->getAttributeNode('a'); echo get_class($attr), "\n"; print $attr->testit() . "\n"; unset($attr); $doc->registerNodeClass('DOMAttr', NULL); $attr = $root->getAttributeNode('a'); echo get_class($attr), "\n"; print $attr->testit() . "\n";
private static function htmlToDoc($articleHtml) { global $wgLanguageCode; // Make doc correctly formed $articleText = <<<DONE <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="{$wgLanguageCode}" lang="{$wgLanguageCode}"> <head> \t<meta http-equiv="Content-Type" content="text/html; charset='utf-8'" /> </head> <body> {$articleHtml} </body> </html> DONE; $doc = new DOMDocument('1.0', 'utf-8'); $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); $doc->strictErrorChecking = false; $doc->recover = true; //$doc->preserveWhiteSpace = false; //$wgOut->setarticlebodyonly(true); @$doc->loadHTML($articleText); $doc->normalizeDocument(); //echo $doc->saveHtml();exit; return $doc; }
public function extractStrings($html) { $dom = null; if ($this->useHtml5Parser) { $intro = substr($html, 0, 255); if (stripos($intro, '<!DOCTYPE html>') !== false) { // this is html5 so we'll use the html5 require_once 'lib/HTML5.php'; $options = new StdClass(); $options->decorateDocument = function (DOMDocument $dom) { $dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); }; $dom = HTML5::loadHTML($html, $options); // noscripts contents are treated like text which causes problems when // filters/replacements are run on them. Let's just remove them $noscripts = $dom->getElementsByTagName('noscript'); foreach ($noscripts as $noscript) { $noscript->parentNode->removeChild($noscript); } } } //$dom = str_get_html($html); if (!isset($dom)) { $dom = new DOMDocument(); $dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); @$dom->loadHtml('<?xml encoding="UTF-8">' . $html); // dirty fix foreach ($dom->childNodes as $item) { if ($item->nodeType == XML_PI_NODE) { $dom->removeChild($item); } } // remove hack $dom->encoding = 'UTF-8'; // insert proper } //print_r($dom); $strings = array(); $this->strings =& $strings; $stringsIndex = array(); $xpath = new DOMXPath($dom); $this->translateDates($xpath); //$text = $xpath->query('//text()[normalize-space() and not(ancestor::script | ancestor::style)]'); //$translatables = $dom->find('[translate]'); $translateAttrs = $xpath->query('//*[@data-swete-translate-attrs or @alt or @title]'); $otherAtts = array('title', 'alt'); foreach ($translateAttrs as $el) { if ($el->hasAttribute('data-swete-translate-attrs')) { $attNames = explode(' ', $el->getAttribute('data-swete-translate-attrs')); } else { $attNames = array(); } foreach ($otherAtts as $attName) { if ($el->hasAttribute($attName)) { $attNames[] = $attName; } } foreach ($attNames as $attName) { $attVal = $el->getAttribute($attName); if ($attVal and trim($attVal)) { $index = count($strings); $strings[] = trim(_n($attVal)); $stringsIndex[trim(_n($attVal))] = $index; $el->setAttribute($attName, '{{$' . $index . '$}}'); $index++; } } } $translatables = $xpath->query('//*[@translate]'); foreach ($translatables as $tr) { $index = count($strings); //$strings[] = trim(_n($tr->innertext)); //$strings[] = trim(_n($tr->innerHTML)); $trStr = trim(_n($tr->innerHTML)); if ($tr->hasAttribute('data-swete-delimiters')) { $delim = trim($tr->getAttribute('data-swete-delimiters')); if ($delim) { $delimSplitter = $delim[0]; $delimiters = explode($delimSplitter, $delim); $delimiters2 = array(); foreach ($delimiters as $delimiterIdx => $delimiter) { if (!trim($delimiter)) { continue; } $delimiters2[] = '(' . preg_quote($delimiter, '/') . ')'; } $delimiters = $delimiters2; $pattern = '/' . implode('|', $delimiters) . '/'; $toks = preg_split($pattern, $trStr, -1, PREG_SPLIT_DELIM_CAPTURE); $innerHTML = array(); foreach ($toks as $tokIdx => $tok) { if (!trim($tok)) { $innerHTML[] = $tok; } else { if ($tokIdx % 2 === 1) { // It is a delimiter $innerHTML[] = $tok; } else { $strings[] = trim(_n($tok)); $stringsIndex[trim(_n($tok))] = $index; $innerHTML[] = '{{$' . $index . '$}}'; $index++; if ($tok[strlen($tok) - 1] === ' ') { $innerHTML[] = ' '; } } } } $tr->innerHTML = implode('', $innerHTML); $trStr = ''; } } if ($trStr) { $strings[] = trim(_n($trStr)); $stringsIndex[trim(_n($trStr))] = $index; $tr->innerHTML = '{{$' . $index . '$}}'; $index++; } $gchildren = $xpath->query('./text()', $tr); foreach ($gchildren as $gchild) { $gchild->isCovered = 1; } } //$untranslatables = $dom->find('[notranslate]'); $untranslatables = $xpath->query('//*[@notranslate]'); foreach ($untranslatables as $tr) { //error_log('Found untranslatable: '.$tr->outertext); //$gchildren = $tr->find('text'); $gchildren = $xpath->query('./text()', $tr); //error_log(count($gchildren).' found'); //foreach ($gchildren as $gchild) $gchild->isCovered = 1; foreach ($gchildren as $gchild) { $gchild->isCovered = 1; } } $textX = $xpath->query('//text()[not(ancestor::script | ancestor::style | ancestor::*[@notranslate] | ancestor::*[@translate])]'); $text = array(); foreach ($textX as $x) { $text[] = $x; } //echo "Found ".$text->length; foreach ($text as $tx) { if (!$tx instanceof DOMNode) { continue; } if (!isset($tx->parentNode)) { continue; } if (!$tx->parentNode instanceof DOMElement) { continue; } // the data-swete-translate is a little different than the notranslate attribute // the notranslate attribute confers block level status to its owner tag. // data-swete-translate simply marks a segment of text as not to be translated // (or to be translated) within the flow of the document. Therefore we don't // use a text node whose parent has the data-swete-translate as an anchor // to start building a group of text. But we will allow a tag with this // to be included in a group of text (that contains content before and/or after). // The SweteTools::encode() method will take care of variablizing the content // at translation time. if ($tx->parentNode->hasAttribute('data-swete-translate') and $tx->parentNode->getAttribute('data-swete-translate') === '0') { continue; } //if ( !trim($tx->innertext) ) continue; if (!trim($tx->nodeValue)) { continue; } //if ( in_array($tx->parent->tag , array('comment','script','style','code') )) continue; if (in_array(strtolower($tx->parentNode->tagName), array('comment', 'script', 'style', 'code'))) { continue; } if ($this->isCovered($tx)) { //echo "This one's covered!!!"; continue; } //echo "[".$tx->nodeValue."]"; //continue; $group = array(); $start = $tx; //if ( $tx->parent->children ){ if (!isset($tx->parentNode)) { //error_log("skipping ".$tx->nodeValue); continue; } if ($tx->parentNode->childNodes->length > 0) { $pos = -1; //foreach ( $tx->parent->nodes as $idx=>$child ){ foreach ($tx->parentNode->childNodes as $idx => $child) { if ($child === $tx) { $pos = $idx; break; } } $mypos = $pos; for ($i = $pos; $i >= 0; $i--) { //$node = $tx->parent->nodes[$i]; $node = $tx->parentNode->childNodes->item($i); //if ( $node->tag != 'text' and !in_array($node->tag, self::$inlineTags) ){ if ($node->nodeType != XML_TEXT_NODE and !in_array(strtolower(@$node->tagName), self::$inlineTags) and !($node instanceof DOMElement and $node->hasAttribute('data-swete-inline'))) { break; } //if ( $node->notranslate ){ if ($node instanceof DOMElement and $node->hasAttribute('notranslate')) { break; } if ($node instanceof DOMElement and $node->hasAttribute('data-swete-block')) { break; } $pos = $i; } //if ( $mypos == $pos or $this->isFirstText($tx->parent, $mypos, $pos)){ if ($mypos == $pos or $this->isFirstText($tx->parentNode, $mypos, $pos)) { $startIdx = $pos; //for ( $i=$startIdx; $i<count($tx->parent->nodes); $i++ ){ for ($i = $startIdx; $i < $tx->parentNode->childNodes->length; $i++) { //$node = $tx->parent->nodes[$i]; $node = $tx->parentNode->childNodes->item($i); if (!$node) { break; } //if ( $node->tag != 'text' and !in_array($node->tag, self::$inlineTags) ){ if ($node->nodeType != XML_TEXT_NODE and !in_array(strtolower(@$node->tagName), self::$inlineTags) and !($node instanceof DOMElement and $node->hasAttribute('data-swete-inline'))) { break; } //if ( $node->notranslate ){ if ($node instanceof DOMElement and $node->hasAttribute('notranslate')) { break; } if ($node instanceof DOMElement and $node->hasAttribute('data-swete-block')) { break; } //if ( $node->tag != 'text' ){ // if ( preg_match('/^<'.$node->tag.'[^>]*>/', $node->outertext, $matches) ){ // // $node->outertext = preg_replace('/^<'.$node->tag.'([^>]*)>/', '<'.$node->tag.' id="{{R'.count($this->replacements).'R}}">', $node->outertext); // $this->replacements[] = $matches[0]; // } // //} $group[] = $node; } } } else { $group[] = $tx; } $combinedText = array(); foreach ($group as $item) { //$combinedText[] = trim($item->outertext); // REquires PHP 5.3.6 or higher.. passing element to saveHtml() $combinedText[] = preg_replace_callback('#<(\\w+)([^>]*)\\s*/>#s', create_function('$m', ' $xhtml_tags = array("br", "hr", "input", "frame", "img", "area", "link", "col", "base", "basefont", "param"); return in_array($m[1], $xhtml_tags) ? "<$m[1]$m[2]/>" : "<$m[1]$m[2]></$m[1]>"; '), $dom->saveXml($item)); } //var_dump($combinedText); $combinedText = implode('', $combinedText); $leadingWhiteSpace = ''; $trailingWhiteSpace = ''; if (preg_match('#^[\\p{Z}\\s]+#', $combinedText, $m1)) { $leadingWhiteSpace = $m1[0]; } //echo 'Checking for trailing space: ['.$combinedText.']'."\n"; if (preg_match('#[\\p{Z}\\s]+$#', $combinedText, $m1)) { //echo "Trailing white space found in '$combinedText'\n"; $trailingWhiteSpace = $m1[0]; } else { //echo "No trailing whitespace found.".ord($combinedText{strlen($combinedText)-1}); } $combinedText = _n($this->replaceStrings($combinedText)); if (!trim(str_ireplace(' ', '', $combinedText))) { continue; } if (isset($stringsIndex[$combinedText])) { $index = $stringsIndex[$combinedText]; } else { $index = count($strings); $strings[] = $combinedText; $stringsIndex[$combinedText] = $index; } foreach ($group as $gnode) { //$gchildren = $gnode->find('text'); $gchildren = @$xpath->query('./text()', $gnode); if (!$gchildren) { continue; } foreach ($gchildren as $gchild) { $gchild->isCovered = 1; } } //$group[0]->outertext = '{{$'.$index.'$}}'; //$group[0]->nodeValue = '{{$'.$index.'$}}'; for ($i = 1; $i < count($group); $i++) { //$group[$i]->outertext = ''; //if ( !@$group[$i] ) continue; if (@$group[$i]->parentNode) { $group[$i]->parentNode->removeChild($group[$i]); } } if (!@$group[0]) { continue; } if (!@$group[0]->parentNode) { continue; } $textNodeContent = $leadingWhiteSpace . '{{$' . $index . '$}}' . $trailingWhiteSpace; //echo 'Content:['.$textNodeContent.']'."\n"; $group[0]->parentNode->replaceChild($dom->createTextNode($textNodeContent), $group[0]); } // Now we need to translate the keywords and the description //foreach ($dom->find('meta') as $el){ foreach ($xpath->query('//meta[@name="keywords" or @name="description"]') as $el) { //$content = _n($el->content); if (!$el->hasAttribute('content')) { continue; } $content = _n($el->getAttribute('content')); //if ( $content and in_array(strtolower(strval($el->name)), array('keywords','description')) ){ if (isset($stringsIndex[$content])) { $index = $stringsIndex[$content]; } else { $index = count($strings); $strings[] = $content; $stringsIndex[$content] = $index; } //$el->content = '{{$'.$index.'$}}'; $el->setAttribute('content', '{{$' . $index . '$}}'); //} } $this->strings = array_map(array($this, 'cleanString'), $this->strings); //return $dom->save(); return $dom->saveHtml(); }
public static function getXPath(&$bodyHtml, &$r) { global $wgWikiHowSections, $IP, $wgTitle; $lang = MobileWikihow::getSiteLanguage(); // munge steps first $opts = array('no-ads' => true); require_once "{$IP}/skins/WikiHowSkin.php"; $oldTitle = $wgTitle; $wgTitle = $r->getTitle(); $vars['bodyHtml'] = WikihowArticleHTML::postProcess($bodyHtml, $opts); $vars['lang'] = $lang; EasyTemplate::set_path(dirname(__FILE__) . '/'); $html = EasyTemplate::html('thumb_html.tmpl.php', $vars); require_once "{$IP}/extensions/wikihow/mobile/JSLikeHTMLElement.php"; $doc = new DOMDocument('1.0', 'utf-8'); $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); $doc->strictErrorChecking = false; $doc->recover = true; @$doc->loadHTML($html); $doc->normalizeDocument(); $xpath = new DOMXPath($doc); $wgTitle = $oldTitle; return $xpath; }
<?php class MyNode extends DOMNode { } class MyElement extends DOMElement { } $dom = new DOMDocument(); var_dump($dom->registerNodeClass('DOMNode', 'MyNode')); var_dump($dom->registerNodeClass('DOMElement', 'MyElement'));