Esempio n. 1
0
 /**
  * @param \DOMDocument $doc
  */
 protected function registerClasses(\DOMDocument $doc)
 {
     $doc->registerNodeClass('\\DOMElement', static::ELEMENT_CLASS);
     $doc->registerNodeClass('\\DOMAttr', static::ATTR_CLASS);
     $doc->registerNodeClass('\\DOMText', static::TEXT_CLASS);
     $doc->registerNodeClass('\\DOMCdataSection', static::CDATA_CLASS);
     $doc->registerNodeClass('\\DOMComment', static::COMMENT_CLASS);
     #$doc->registerNodeClass('\DOMDocumentFragment', static::FRAGMENT_CLASS);
 }
Esempio n. 2
0
 /**
  * Constructor
  *
  * @param string $file path to epub file to work on
  * @throws Exception if metadata could not be loaded
  */
 public function __construct($file)
 {
     // open file
     $this->file = $file;
     $zip = new ZipArchive();
     if (!@$zip->open($this->file)) {
         throw new Exception('Failed to read epub file');
     }
     // read container data
     $data = $zip->getFromName('META-INF/container.xml');
     if ($data == false) {
         throw new Exception('Failed to access epub container data');
     }
     $xml = new DOMDocument();
     $xml->registerNodeClass('DOMElement', 'EPubDOMElement');
     $xml->loadXML($data);
     $xpath = new EPubDOMXPath($xml);
     $nodes = $xpath->query('//n:rootfiles/n:rootfile[@media-type="application/oebps-package+xml"]');
     $this->meta = $nodes->item(0)->attr('full-path');
     // load metadata
     $data = $zip->getFromName($this->meta);
     if (!$data) {
         throw new Exception('Failed to access epub metadata');
     }
     $this->xml = new DOMDocument();
     $this->xml->registerNodeClass('DOMElement', 'EPubDOMElement');
     $this->xml->loadXML($data);
     $this->xml->formatOutput = true;
     $this->xpath = new EPubDOMXPath($this->xml);
     $zip->close();
 }
Esempio n. 3
0
 public function parse()
 {
     // Get the XHTML
     $xhtml = $this->_getContents();
     $doc = new DOMDocument();
     $doc->registerNodeClass('DOMElement', 'Ajde_Template_Parser_Xhtml_Element');
     $doc->preserveWhiteSpace = false;
     $doc->formatOutput = true;
     try {
         $doc->loadXML($xhtml);
     } catch (ErrorException $e) {
         // TODO:
         //return false;
         throw new Ajde_Exception('Xhtml Parser error: ' . $e->getMessage());
     }
     // Get the root element
     /* @var $root DOMNode */
     $root = $doc->documentElement;
     $this->_defaultNS = $root->lookupNamespaceURI(null);
     $this->_acNS = $root->lookupNamespaceURI(Ajde_Component::AC_XMLNS);
     $this->_avNS = $root->lookupNamespaceURI(Ajde_Component::AV_XMLNS);
     // Ajde_Component processing
     $processed = $this->_process($root);
     // Return the inner XML of root element (exclusive)
     $xml = $this->innerXml($processed);
     // Break out the CDATA
     $return = $this->_breakOutCdata($xml);
     return $return;
 }
Esempio n. 4
0
 /**
  *
  *
  * Example:
  * new ElementFinder("<html><div>test </div></html>", ElementFinder::HTML);
  *
  * @param string $data
  * @param null|integer $documentType
  * @param int $options
  */
 public function __construct($data, $documentType = null, $options = null)
 {
     if (!is_string($data) or empty($data)) {
         throw new \InvalidArgumentException('Expect not empty string');
     }
     $this->dom = new \DomDocument();
     $this->dom->registerNodeClass('DOMElement', Element::class);
     $documentType = $documentType !== null ? $documentType : static::DOCUMENT_HTML;
     $this->setDocumentType($documentType);
     # default options
     $options = $options !== null ? $options : LIBXML_NOCDATA & LIBXML_NOERROR;
     $this->setDocumentOption($options);
     $this->setData($data);
     # set default expression to xpath
     $this->expressionTranslator = new XpathExpression();
 }
Esempio n. 5
0
 public function setUp()
 {
     $sourceDoc = new DOMDocument();
     $sourceDoc->registerNodeClass('DOMElement', 'ezcDocumentLocateableDomElement');
     $this->sourceRoot = $sourceDoc->appendChild($sourceDoc->createElement('docbook'));
     $targetDoc = new DOMDocument();
     $this->targetRoot = $targetDoc->appendChild($targetDoc->createElementNS(ezcDocumentOdt::NS_ODT_TEXT, 'text'));
     $this->proc = new ezcDocumentOdtTextProcessor();
 }
Esempio n. 6
0
function foo()
{
    echo "Enter foo()\n";
    $dom = new DOMDocument();
    $dom->registerNodeClass('DOMElement', 'MyElement');
    $child = $dom->createElement('foo')->init();
    $dom->appendChild($child);
    $dom->appendChild($dom->createElement('bar')->init());
    echo "Leave foo()\n";
    return [$dom, $child];
}
Esempio n. 7
0
 /**
  * Constructor
  *
  * @param string $file path to epub file to work on
  * @param string $zipClass class to handle zip
  * @throws Exception if metadata could not be loaded
  */
 public function __construct($file, $zipClass = 'clsTbsZip')
 {
     // open file
     $this->file = $file;
     $this->zip = new $zipClass();
     if (!$this->zip->Open($this->file)) {
         throw new Exception('Failed to read epub file');
     }
     // read container data
     if (!$this->zip->FileExists(METADATA_FILE)) {
         throw new Exception("Unable to find metadata.xml");
     }
     $data = $this->zip->FileRead(METADATA_FILE);
     if ($data == false) {
         throw new Exception('Failed to access epub container data');
     }
     $xml = new DOMDocument();
     $xml->registerNodeClass('DOMElement', 'EPubDOMElement');
     $xml->loadXML($data);
     $xpath = new EPubDOMXPath($xml);
     $nodes = $xpath->query('//n:rootfiles/n:rootfile[@media-type="application/oebps-package+xml"]');
     $this->meta = $nodes->item(0)->attr('full-path');
     // load metadata
     if (!$this->zip->FileExists($this->meta)) {
         throw new Exception("Unable to find " . $this->meta);
     }
     $data = $this->zip->FileRead($this->meta);
     if (!$data) {
         throw new Exception('Failed to access epub metadata');
     }
     $this->xml = new DOMDocument();
     $this->xml->registerNodeClass('DOMElement', 'EPubDOMElement');
     $this->xml->loadXML($data);
     $this->xml->formatOutput = true;
     $this->xpath = new EPubDOMXPath($this->xml);
 }
Esempio n. 8
0
File: main.php Progetto: bmdevel/ezc
 /**
  * Render given document
  *
  * Returns the rendered PDF as string
  *
  * @param ezcDocumentDocbook $document
  * @param ezcDocumentPdfHyphenator $hyphenator
  * @param ezcDocumentPdfTokenizer $tokenizer
  * @return string
  */
 public function render(ezcDocumentDocbook $document, ezcDocumentPdfHyphenator $hyphenator = null, ezcDocumentPdfTokenizer $tokenizer = null)
 {
     $this->hyphenator = $hyphenator !== null ? $hyphenator : new ezcDocumentPdfDefaultHyphenator();
     $this->tokenizer = $tokenizer !== null ? $tokenizer : new ezcDocumentPdfDefaultTokenizer();
     $this->document = $document;
     // Register custom fonts in driver
     $this->registerFonts();
     // Inject custom element class, for style inferencing
     $dom = $document->getDomDocument();
     // Reload the XML document with to a DOMDocument with a custom element
     // class. Just registering it on the existing document seems not to
     // work in all cases.
     $reloaded = new DOMDocument();
     $reloaded->registerNodeClass('DOMElement', 'ezcDocumentLocateableDomElement');
     $reloaded->loadXml($dom->saveXml());
     $this->process($reloaded);
     return $this->driver->save();
 }
// -----------------------------------------------------------
// 1. User-level doesn't override implicit native magic props.
// -----------------------------------------------------------
class MyElement extends DOMElement
{
    private $props = array('userMagic' => 'userMagic');
    public function __get($name)
    {
        echo "__get {$name}: ";
        if (array_key_exists($name, $this->props)) {
            return $this->props[$name];
        }
    }
}
$dom = new DOMDocument();
$dom->registerNodeClass('DOMElement', 'MyElement');
$dom->appendChild($dom->createElement('Foo', 'Bar'));
var_dump($dom->documentElement->nodeValue);
// Implementaiton-level
var_dump($dom->documentElement->userMagic);
// User-level, handled
var_dump($dom->documentElement->nonExisting);
// User-level, unhandled
// -----------------------------------------------------------
// 2. Explicit override of the native magic prop.
// -----------------------------------------------------------
class MyElementExplicit extends DOMElement
{
    private $nodeValue;
    public function __construct()
    {
Esempio n. 10
0
<?php

// -----------------------------------------------------------
// 1. User-level doesn't override implicit native magic props.
// -----------------------------------------------------------
class MyTextNode extends DOMText
{
    public function __get($name)
    {
        return "__get: {$name}";
    }
}
$dom = new DOMDocument();
$dom->registerNodeClass('DOMText', 'MyTextNode');
$node = $dom->appendChild($dom->createElement('Foo', 'Bar'));
var_dump($node->firstChild->textContent);
// Impl-level
var_dump($node->firstChild->nonExisting);
// User-level __get
// -----------------------------------------------------------
// 2. Explicit override of the native magic prop.
// -----------------------------------------------------------
class MyTextExplicit extends DOMText
{
    private $textContent;
    public function __construct()
    {
        unset($this->textContent);
    }
    public function __get($name)
    {
Esempio n. 11
0
 /**
  * Constructor
  *
  * @codeCoverageIgnore
  *
  * @link http://www.php.net/manual/en/Document.construct.php Document::__construct()
  *
  * @param string $version
  *            [optional] The version number of the document as part of the XML declaration.
  * @param string $encoding
  *            [optional] The encoding of the document as part of the XML declaration.
  * @param string $element
  *            [optional] Class used to create elements in the document. Must extend <code>DOMElement</code>.
  */
 public function __construct($version = '1.0', $encoding = 'UTF-8', $element = null)
 {
     $element = @substr($element, 0, 256) ?: '\\BLW\\Model\\DOM\\Element';
     // Parent Constructor
     parent::__construct($version, $encoding);
     // Update Element class.
     parent::registerNodeClass('DOMElement', $element);
 }
Esempio n. 12
0
    /**
     * Parse and transform the document from the old HTML for NS_MAIN articles to the new mobile
     * style. This should probably be pulled out and added to a subclass that can then be extended for
     * builders that focus on building NS_MAIN articles
     */
    protected function parseNonMobileArticle(&$article)
    {
        global $IP, $wgContLang, $wgLanguageCode;
        $sectionMap = array(wfMsg('Intro') => 'intro', wfMsg('Ingredients') => 'ingredients', wfMsg('Steps') => 'steps', wfMsg('Video') => 'video', wfMsg('Tips') => 'tips', wfMsg('Warnings') => 'warnings', wfMsg('relatedwikihows') => 'relatedwikihows', wfMsg('sourcescitations') => 'sources', wfMsg('thingsyoullneed') => 'thingsyoullneed', wfMsg('article_info') => 'article_info');
        $lang = MobileWikihow::getSiteLanguage();
        $imageNsText = $wgContLang->getNsText(NS_IMAGE);
        $device = $this->getDevice();
        // munge steps first
        $opts = array('no-ads' => true);
        $article = WikihowArticleHTML::postProcess($article, $opts);
        // Make doc correctly formed
        $articleText = <<<DONE
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="{$lang}" lang="{$lang}">
<head>
\t<meta http-equiv="Content-Type" content="text/html; charset='utf-8'" />
</head>
<body>
{$article}
</body>
</html>
DONE;
        require_once "{$IP}/extensions/wikihow/mobile/JSLikeHTMLElement.php";
        $doc = new DOMDocument('1.0', 'utf-8');
        $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
        $doc->strictErrorChecking = false;
        $doc->recover = true;
        //$doc->preserveWhiteSpace = false;
        //$wgOut->setarticlebodyonly(true);
        @$doc->loadHTML($articleText);
        $doc->normalizeDocument();
        //echo $doc->saveHtml();exit;
        $xpath = new DOMXPath($doc);
        // Delete #featurestar node
        $node = $doc->getElementById('featurestar');
        if (!empty($node)) {
            $node->parentNode->removeChild($node);
        }
        $node = $doc->getElementById('newaltmethod');
        if (!empty($node)) {
            $node->parentNode->removeChild($node);
        }
        // Remove all "Edit" links
        $nodes = $xpath->query('//a[@id = "gatEditSection"]');
        foreach ($nodes as $node) {
            $node->parentNode->removeChild($node);
        }
        // Resize youtube video
        $nodes = $xpath->query('//embed');
        foreach ($nodes as $node) {
            $url = '';
            $src = $node->attributes->getNamedItem('src')->nodeValue;
            if (!$device['show-youtube'] || stripos($src, 'youtube.com') === false) {
                $parent = $node->parentNode;
                $grandParent = $parent->parentNode;
                if ($grandParent && $parent) {
                    $grandParent->removeChild($parent);
                }
            } else {
                foreach (array(&$node, &$node->parentNode) as $node) {
                    $widthAttr = $node->attributes->getNamedItem('width');
                    $oldWidth = (int) $widthAttr->nodeValue;
                    $newWidth = $device['max-video-width'];
                    if ($newWidth < $oldWidth) {
                        $widthAttr->nodeValue = (string) $newWidth;
                        $heightAttr = $node->attributes->getNamedItem('height');
                        $oldHeight = (int) $heightAttr->nodeValue;
                        $newHeight = (int) round($newWidth * $oldHeight / $oldWidth);
                        $heightAttr->nodeValue = (string) $newHeight;
                    }
                }
            }
        }
        // Remove templates from intro so that they don't muck up
        // the text and images we extract
        $nodes = $xpath->query('//div[@class = "template_top"]');
        foreach ($nodes as $node) {
            $node->parentNode->removeChild($node);
        }
        // Grab intro text
        $intro = '';
        $nodes = $xpath->query('//body/div/p');
        foreach ($nodes as $i => $node) {
            $text = $node->textContent;
            if (!empty($text) && $i == 0) {
                $introNode = $node;
                $intro = Wikitext::removeRefsFromFlattened($text);
                break;
            }
        }
        if ($introNode) {
            // Grab first image from article
            $imgs = $xpath->query('.//img', $introNode->parentNode);
            $firstImage = '';
            foreach ($imgs as $img) {
                // parent is an <a> tag
                $parent = $img->parentNode;
                if ($parent->nodeName == 'a') {
                    $href = $parent->attributes->getNamedItem('href')->nodeValue;
                    if (preg_match('@(Image|' . $imageNsText . '):@', $href)) {
                        $firstImage = preg_replace('@^.*(Image|' . $imageNsText . '):([^:]*)([#].*)?$@', '$2', $href);
                        $firstImage = urldecode($firstImage);
                        break;
                    }
                }
            }
            // Remove intro node
            $parent = $introNode->parentNode;
            $parent->removeChild($introNode);
        }
        // Get rid of the <span> element to standardize the html for the
        // next dom query
        $nodes = $xpath->query('//div/span/a[@class = "image"]');
        foreach ($nodes as $a) {
            $parent = $a->parentNode;
            $grandParent = $parent->parentNode;
            $grandParent->replaceChild($a, $parent);
        }
        // Resize all resize-able images
        $nodes = $xpath->query('//div/a[@class = "image"]/img');
        $imgNum = 1;
        foreach ($nodes as $img) {
            $srcNode = $img->attributes->getNamedItem('src');
            $widthNode = $img->attributes->getNamedItem('width');
            $width = (int) $widthNode->nodeValue;
            $heightNode = $img->attributes->getNamedItem('height');
            $height = (int) $heightNode->nodeValue;
            $imageClasses = $img->parentNode->parentNode->attributes->getNamedItem('class')->nodeValue;
            /*
            if (!stristr($imageClasses, "tcenter")) {
            	$img->parentNode->parentNode->parentNode->attributes->getNamedItem('class')->nodeValue = '';
            	$img->parentNode->parentNode->parentNode->attributes->getNamedItem('style')->nodeValue = '';
            }
            */
            if (stristr($imageClasses, "tcenter") !== false) {
                $newWidth = $device['full-image-width'];
                $newHeight = (int) round($device['full-image-width'] * $height / $width);
            } else {
                $newWidth = $device['max-image-width'];
                $newHeight = (int) round($device['max-image-width'] * $height / $width);
            }
            $a = $img->parentNode;
            $href = $a->attributes->getNamedItem('href')->nodeValue;
            if (!$href) {
                $onclick = $a->attributes->getNamedItem('onclick')->nodeValue;
                $onclick = preg_replace('@.*",[ ]*"@', '', $onclick);
                $onclick = preg_replace('@".*@', '', $onclick);
                $imgName = preg_replace('@.*(Image|' . $imageNsText . '|' . urlencode($imageNsText) . '):@', '', $onclick);
            } else {
                $imgName = preg_replace('@^/(Image|' . $imageNsText . '|' . urlencode($imageNsText) . '):@', '', $href);
            }
            $title = Title::newFromURL($imgName, NS_IMAGE);
            if (!$title) {
                $imgName = urldecode($imgName);
                $title = Title::newFromURL($imgName, NS_IMAGE);
            }
            if ($title) {
                $image = RepoGroup::singleton()->findFile($title);
                if ($image) {
                    list($thumb, $newWidth, $newHeight) = self::makeThumbDPI($image, $newWidth, $newHeight, $device['enlarge-thumb-high-dpi']);
                    $url = wfGetPad($thumb->getUrl());
                    $srcNode->nodeValue = $url;
                    $widthNode->nodeValue = $newWidth;
                    $heightNode->nodeValue = $newHeight;
                    // change surrounding div width and height
                    $div = $a->parentNode;
                    $styleNode = $div->attributes->getNamedItem('style');
                    //removing the set width/height
                    $styleNode->nodeValue = '';
                    //$div->attributes->getNamedItem('class')->nodeValue = '';
                    /*					if (preg_match('@^(.*width:)[0-9]+(px;\s*height:)[0-9]+(.*)$@', $styleNode->nodeValue, $m)) {
                    						$styleNode->nodeValue = $m[1] . $newWidth . $m[2] . $newHeight . $m[3];
                    					}
                    */
                    //default width/height for the srcset
                    $bigWidth = 600;
                    $bigHeight = 800;
                    // change grandparent div width too
                    $grandparent = $div;
                    if ($grandparent && $grandparent->nodeName == 'div') {
                        $class = $grandparent->attributes->getNamedItem('class');
                        if ($class) {
                            $isThumb = stristr($class->nodeValue, 'mthumb') !== false;
                            $isRight = stristr($class->nodeValue, 'tright') !== false;
                            $isLeft = stristr($class->nodeValue, 'tleft') !== false;
                            $isCenter = stristr($class->nodeValue, 'tcenter') !== false;
                            if ($isThumb) {
                                if ($isRight) {
                                    $style = $grandparent->attributes->getNamedItem('style');
                                    $style->nodeValue = 'width:' . $newWidth . 'px;height:' . $newHeight . 'px;';
                                    $bigWidth = 300;
                                    $bigHeight = 500;
                                } elseif ($isCenter) {
                                    $style = $grandparent->attributes->getNamedItem('style');
                                    $style->nodeValue = 'width:' . $newWidth . 'px;height:' . $newHeight . 'px;';
                                    $bigWidth = 600;
                                    $bigHeight = 800;
                                } elseif ($isLeft) {
                                    //if its centered or on the left, give it double the width if too big
                                    $style = $grandparent->attributes->getNamedItem('style');
                                    $oldStyle = $style->nodeValue;
                                    $matches = array();
                                    preg_match('@(width:\\s*)[0-9]+@', $oldStyle, $matches);
                                    if ($matches[0]) {
                                        $curSize = intval(substr($matches[0], 6));
                                        //width: = 6
                                        if ($newWidth * 2 < $curSize) {
                                            $existingCSS = preg_replace('@(width:\\s*)[0-9]+@', 'width:' . $newWidth * 2, $oldStyle);
                                            $style->nodeValue = $existingCSS;
                                        }
                                    }
                                    $bigWidth = 300;
                                    $bigHeight = 500;
                                }
                            }
                        }
                    }
                    list($thumb, $newWidth, $newHeight) = self::makeThumbDPI($image, $bigWidth, $bigHeight, $device['enlarge-thumb-high-dpi']);
                    $url = wfGetPad($thumb->getUrl());
                    $img->setAttribute('srcset', $url . ' ' . $newWidth . 'w');
                    //if we couldn't make it big enough, let's add a class
                    if ($newWidth < $bigWidth) {
                        $imgclass = $img->getAttribute('class');
                        $img->setAttribute('class', $imgclass . ' not_huge');
                    }
                    //add the hidden info
                    /*
                    $newDiv = new DOMElement( 'div', htmlentities('test') );
                    $a->appendChild($newDiv);
                    $newDiv->setAttribute('style', 'display:none;');
                    */
                    $a->setAttribute('id', 'image-zoom-' . $imgNum);
                    $a->setAttribute('class', 'image-zoom');
                    $a->setAttribute('href', '#');
                    global $wgServerName;
                    $href = $wgServerName . $href;
                    if (!preg_match("/^http:\\/\\//", $href)) {
                        $href = "http://" . $serverName . $href;
                    }
                    $href = preg_replace("/\\m\\./", "", $href);
                    $href = preg_replace("/^http:\\/\\/wikihow\\.com/", "http://www.wikihow.com", $href);
                    $details = array('url' => $url, 'width' => $newWidth, 'height' => $newHeight, 'credits_page' => $href);
                    $newDiv = new DOMElement('div', htmlentities(json_encode($details)));
                    $a->appendChild($newDiv);
                    $newDiv->setAttribute('style', 'display:none;');
                    $newDiv->setAttribute('id', 'image-details-' . $imgNum);
                    $imgNum++;
                } else {
                    //huh? can't find it? well, then let's not display it
                    $img->parentNode->parentNode->parentNode->parentNode->setAttribute('style', 'display:none;');
                }
            } else {
                //huh? can't find it? well, then let's not display it
                $img->parentNode->parentNode->parentNode->parentNode->setAttribute('style', 'display:none;');
            }
        }
        // Remove template from images, add new zoom one
        $nodes = $xpath->query('//img');
        foreach ($nodes as $node) {
            $src = $node->attributes ? $node->attributes->getNamedItem('src') : null;
            $src = $src ? $src->nodeValue : '';
            if (stripos($src, 'magnify-clip.png') !== false) {
                $parent = $node->parentNode;
                $parent->parentNode->removeChild($parent);
            }
        }
        //get rid of the corners and watermarks
        $nodes = $xpath->query('//div[@class = "corner top_left" 
								or @class = "corner bottom_left"
								or @class = "corner top_right"
								or @class = "corner bottom_right"
								or @class = "wikihow_watermark"]');
        foreach ($nodes as $node) {
            $parent = $node->parentNode;
            $parent->removeChild($node);
        }
        //gotta swap in larger images if the client's width is big enough
        //(i.e. tablet et al)
        $nodes = $xpath->query('//img[@class = "mwimage101" 
								or @class = "mwimage101 not_huge"]');
        foreach ($nodes as $node) {
            //make a quick unique id for this
            $id = md5($node->attributes->getNamedItem('src')->nodeValue) . rand();
            $node->setAttribute('id', $id);
            //pass it to our custom function for swapping in larger images
            $swap_it = 'if (isBig) WH.mobile.swapEm("' . $id . '");';
            $scripttag = new DOMElement('script', htmlentities($swap_it));
            $node->appendChild($scripttag);
        }
        // Change the width attribute from any tables with a width set.
        // This often happen around video elements.
        $nodes = $xpath->query('//table/@width');
        foreach ($nodes as $node) {
            $width = preg_replace('@px\\s*$@', '', $node->nodeValue);
            if ($width > $device['screen-width'] - 20) {
                $node->nodeValue = $device['screen-width'] - 20;
            }
        }
        // Surround step content in its own div. We do this to support other features like checkmarks
        $nodes = $xpath->query('//div[@id="steps"]/ol/li');
        foreach ($nodes as $node) {
            $node->innerHTML = '<div class="step_content">' . $node->innerHTML . '</div>';
        }
        //remove quiz
        $nodes = $xpath->query('//div[@class = "quiz_cta"]');
        foreach ($nodes as $node) {
            $node->parentNode->removeChild($node);
        }
        //remove quiz header
        $nodes = $xpath->query('//h3/span[text()="Quiz"]');
        foreach ($nodes as $node) {
            $parentNode = $node->parentNode;
            $parentNode->parentNode->removeChild($parentNode);
        }
        //pull out the first 6 related wikihows and format them
        $nodes = $xpath->query('//div[@id="relatedwikihows"]/ul/li');
        $count = 0;
        $related_boxes = array();
        foreach ($nodes as $node) {
            if ($count > 6) {
                break;
            }
            //grab the title
            preg_match('@href=\\"\\/(.*?)?\\"@', $node->innerHTML, $m);
            $title = Title::newFromText($m[1]);
            if (!$title) {
                continue;
            }
            $temp_box = $this->makeRelatedBox($title);
            if ($temp_box) {
                $related_boxes[] = $temp_box;
                $last_node = $node;
                $parent = $node->parentNode;
                $last_parent = $parent;
                $parent->removeChild($node);
                $count++;
            }
        }
        //only 1? not enough. throw it back
        if ($count == 1) {
            $related_boxes = array();
            $last_parent->appendChild($last_node);
        }
        // Inject html into the DOM tree for specific features (ie thumb ratings, ads, etc)
        $this->mobileParserBeforeHtmlSave($xpath);
        //self::walkTree($doc->documentElement, 1);
        $html = $doc->saveXML();
        $sections = array();
        $sectionsHtml = explode('<h2>', $html);
        unset($sectionsHtml[0]);
        // remove leftovers from intro section
        foreach ($sectionsHtml as $i => &$section) {
            $section = '<h2>' . $section;
            if (preg_match('@^<h2[^>]*>\\s*<span[^>]*>\\s*([^<]+)@i', $section, $m)) {
                $heading = trim($m[1]);
                $section = preg_replace('@^<h2[^>]*>\\s*<span[^>]*>\\s*([^<]+)</span>(\\s|\\n)*</h2>@i', '', $section);
                if (isset($sectionMap[$heading])) {
                    $key = $sectionMap[$heading];
                    $sections[$key] = array('name' => $heading, 'html' => $section);
                }
            }
        }
        // Remove Video section if there is no longer a youtube video
        if (isset($sections['video'])) {
            if (!preg_match('@<object@i', $sections['video']['html'])) {
                unset($sections['video']);
            }
        }
        // Add the related boxes
        if (isset($sections['relatedwikihows']) && !empty($related_boxes)) {
            $sections['relatedwikihows']['boxes'] = $related_boxes;
        }
        // Add article info
        $sections['article_info']['name'] = wfMsg('article_info');
        $sections['article_info']['html'] = $this->getArticleInfo($title);
        // Remove </body></html> from html
        if (count($sections) > 0) {
            $keys = array_keys($sections);
            $last =& $sections[$keys[count($sections) - 2]]['html'];
            $last = preg_replace('@</body>(\\s|\\n)*</html>(\\s|\\n)*$@', '', $last);
        }
        // Add a simple form for uploading images of completed items to the article
        if ($wgLanguageCode == 'en' && isset($sections['steps']) && isset($device['show-upload-images']) && $device['show-upload-images']) {
            require_once "{$IP}/extensions/wikihow/mobile/MobileUciHtmlBuilder.class.php";
            $userCompletedImages = new MobileUciHtmlBuilder();
            $sections['steps']['html'] .= $userCompletedImages->createByHtml($this->t);
        }
        return array($sections, $intro, $firstImage);
    }
Esempio n. 13
0
<?php

class SampleElement extends \DOMElement
{
}
$dom = new DOMDocument();
$dom->registerNodeClass('DOMElement', 'SampleElement');
$dom->loadXML('<?xml version="1.0" ?><root />');
$list = $dom->childNodes;
var_dump(get_class($list->item(0)));
Esempio n. 14
0
 public function __construct()
 {
     parent::__construct('1.0', 'UTF-8');
     parent::registerNodeClass('\\DOMElement', '\\Sped\\Components\\Xml\\Element');
 }
Esempio n. 15
0
 /**
  * MANGASTREAM VERSION
  * Eg. $obj->download('http://mangastream.com/read/one_piece_green/59213718/1');
  * @param string $manga_url
  */
 function download($manga_url)
 {
     if (urlParameters(6, $manga_url) == "end") {
         die;
     }
     $html = file_get_contents($manga_url);
     $doc = new DOMDocument();
     $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
     @$doc->loadHTML($html);
     $doc->preserveWhiteSpace = false;
     $i = 0;
     /*BUSCAMOS EN EL DOCUMENTO EL NUMERO DE CAPITULO */
     $option = $doc->getElementsByTagName('option')->item($i);
     $chapter = $option->nodeValue;
     /*RECORREMOS EL DOCUMENTO HASTA ENCONTRAR LO QUE SERIA UNA CAPA AJUSTADA */
     do {
         $elem = $doc->getElementsByTagName('div')->item($i);
         $str = $elem->getAttribute('style');
         if (strstr($str, "position:relative")) {
             break;
         }
         $i++;
     } while ($i < 20);
     $ireal = $i;
     /*CUANDO LO TENEMOS PODEMOS CREAR UN ARREGLO CON TODOS LOS DATOS QUE NECESITAREMOS */
     $elem = $doc->getElementsByTagName('div')->item($i);
     $data[$i]['style'] = $elem->getAttribute('style');
     do {
         $i++;
         $elem = $doc->getElementsByTagName('div')->item($i);
         if ($elem->getAttribute('style') == "") {
             break;
         }
         $a = $elem->getElementsByTagName('a');
         $img = $elem->getElementsByTagName('img');
         $data[$i]['img'] = $img->item(0)->getAttribute('src');
         $data[$i]['style'] = $elem->getAttribute('style');
         $data[$i]['a'] = $a->item(0)->getAttribute('href');
     } while ($elem->getAttribute('style') != "");
     $imax = $i;
     $i = $ireal;
     /* En este punto solo tenemos los estilos de las capas que forman la imagen, la url de la siguiente pagina y el link a la imagen :-(
      * Filtraremos los style para conseguir width, height y las posiciones exactas top y left de cada imagen, la primera solo sirve de marco */
     $data[$i]['width'] = filtro("width", "px", $data[$i]['style']);
     $data[$i]['height'] = filtro("height", "px", $data[$i]['style']);
     for ($i++; $i < $imax; $i++) {
         $data[$i]['width'] = filtro("width", "px", $data[$i]['style']);
         $data[$i]['height'] = filtro("height", "px", $data[$i]['style']);
         $data[$i]['top'] = filtro("top", "px", $data[$i]['style']);
         $data[$i]['left'] = filtro("left", "px", $data[$i]['style']);
     }
     if (urlParameters(6, $manga_url) != "end") {
         //echo "<br>FILE_NAME 6: ".urlParameters(6, $manga_url)." <br>URLPARAMETERS DIR 4: ".urlParameters(4, $manga_url);
         $file = combine_data($data, $ireal, $imax, urlParameters(4, $manga_url) . "-" . $chapter, urlParameters(6, $manga_url));
         //echo "<br>http://mangastream.com".$data[$ireal+1]['a'];
         set_time_limit(20);
         $this->download("http://mangastream.com" . $data[$ireal + 1]['a']);
     } else {
         die;
     }
 }
Esempio n. 16
0
    function parseArticle_03($article)
    {
        global $wgTitle, $wgUser, $wgRequest, $wgServer, $wgLang, $wgArticle, $wgParser, $wgOut, $IP;
        $article = self::mungeSteps($article);
        $sk = $wgUser->getSkin();
        $url = urlencode($wgServer . "/" . $wgTitle->getPrefixedURL());
        $img = urlencode(WikihowShare::getPinterestImage($wgTitle));
        $desc = urlencode(wfMsg('howto', $wgTitle->getText()) . WikihowShare::getPinterestTitleInfo());
        $fb = '<div class="like_button"><fb:like href="' . $url . '" send="false" layout="button_count" width="100" show_faces="false"></fb:like></div>';
        $gp1 = '<div class="gplus1_button"><g:plusone size="medium" callback="plusone_vote"></g:plusone></div>';
        $pinterest = '<div id="pinterest"><a href="http://pinterest.com/pin/create/button/?url=' . $url . '&media=' . $img . '&description=' . $desc . '" class="pin-it-button" count-layout="horizontal">Pin It</a></div>';
        // German includes "how to " in the title text
        $howto = wfMsg('howto', htmlspecialchars($wgTitle->getText()));
        $tb = '<div class="admin_state"><a href="http://twitter.com/share" data-lang="' . $wgLanguageCode . '" style="display:none; background-image: none; color: #ffffff;" class="twitter-share-button" data-count="horizontal" data-via="wikiHow" data-text="' . $howto . '" data-related="JackHerrick:Founder of wikiHow">Tweet</a></div>';
        $article = str_replace('<div class="corner top_right"></div>', '<div class="corner top_right">&nbsp;</div>', $article);
        $article = str_replace('<div class="corner top_left"></div>', '<div class="corner top_left">&nbsp;</div>', $article);
        $article = str_replace('<div class="corner bottom_right"></div>', '<div class="corner bottom_right">&nbsp;</div>', $article);
        $article = str_replace('<div class="corner bottom_left"></div>', '<div class="corner bottom_left">&nbsp;</div>', $article);
        $article = str_replace("<div class='corner top_right'></div>", "<div class='corner top_right'>&nbsp;</div>", $article);
        $article = str_replace("<div class='corner top_left'></div>", "<div class='corner top_left'>&nbsp;</div>", $article);
        $article = str_replace("<div class='corner bottom_right'></div>", "<div class='corner bottom_right'>&nbsp;</div>", $article);
        $article = str_replace("<div class='corner bottom_left'></div>", "<div class='corner bottom_left'>&nbsp;</div>", $article);
        $article = str_replace('<div style="clear:both"></div>', '<div style="clear:both">&nbsp;</div>', $article);
        $article = str_replace("’", "'", $article);
        $introImage = "";
        require_once "{$IP}/extensions/wikihow/mobile/JSLikeHTMLElement.php";
        $doc = new DOMDocument('1.0', 'utf-8');
        $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
        $doc->strictErrorChecking = false;
        $doc->recover = true;
        @$doc->loadHTML($article);
        $doc->normalizeDocument();
        $xpath = new DOMXPath($doc);
        //removing the featured article star
        $nodes = $xpath->query('//div[@id="featurestar"]');
        foreach ($nodes as $node) {
            $node->parentNode->removeChild($node->nextSibling->nextSibling);
            $node->parentNode->removeChild($node);
            break;
        }
        $nodes = $xpath->query('//div[@class="rounders"]');
        foreach ($nodes as $node) {
            $style = $node->getAttribute("style");
            $start = strpos($style, "width:");
            $end = strpos($style, "px", $start);
            $width = intval(substr($style, $start + 6, $start + 6 - $end));
            $newWidth = $width + 21;
            $style = substr($style, 0, $start + 6) . $newWidth . substr($style, $end);
            $start = strpos($style, "height:");
            $end = strpos($style, "px", $start);
            $height = intval(substr($style, $start + 7, $start + 7 - $end));
            $newheight = $height + 19;
            $style = substr($style, 0, $start + 7) . $newHeight . substr($style, $end);
            $node->setAttribute("style", $style);
            $childNode = $node->firstChild;
            $node->removeChild($childNode);
            $newNode = $doc->createElement("div");
            $newNode->setAttribute('class', 'top');
            $node->appendChild($newNode);
            $newNode2 = $doc->createElement("div");
            $newNode2->setAttribute('class', 'bottom');
            $newNode->appendChild($newNode2);
            $newNode3 = $doc->createElement("div");
            $newNode3->setAttribute('class', 'left');
            $newNode2->appendChild($newNode3);
            $newNode4 = $doc->createElement("div");
            $newNode4->setAttribute('class', 'right');
            $newNode3->appendChild($newNode4);
            $newNode4->appendChild($childNode);
        }
        //grabbing the intro image
        /*$nodes = $xpath->query('//div[@class="mwimg"]');
        		foreach ($nodes as $node) {
        			$introImage = "<div class='mwimg'>" . $node->innerHTML . "</div>";
        			$node->parentNode->removeChild($node);
        			break;
        		}*/
        $nodes = $xpath->query('//ol[@class="steps_list_2"]/li/div[@class="mwimg"]');
        foreach ($nodes as $node) {
            $checkNode = $xpath->evaluate($node->parentNode->getNodePath() . '/div[@class="check"]')->item(0);
            $node->parentNode->removeChild($node);
            $checkNode->parentNode->insertBefore($node, $checkNode->nextSibling);
        }
        $article = $doc->saveHTML();
        $article = str_replace('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><body>', "", $article);
        $article = str_replace('</body></html>', "", $article);
        //$share =  $fb . $gp1 . $pinterest;
        $mainVars = array('wgTitle' => $wgTitle, 'wgUser' => $wgUser, 'article' => $article, 'sk' => $sk, 'wgRequest' => $wgRequest, 'share' => $share, 'wgLang' => $wgLang, 'wgArticle' => $wgArticle, 'introImage' => $introImage, 'navigation' => self::getNavigation());
        return EasyTemplate::html('main_' . self::ARTICLE_LAYOUT . '.tmpl.php', $mainVars);
    }
Esempio n. 17
0
 /**
  *
  * @param string $version The version number of the document as part of the XML declaration.
  * @param string $encoding The encoding of the document as part of the XML declaration.
  */
 public function __construct($version = '1.0', $encoding = 'UTF-8')
 {
     parent::__construct($version, $encoding);
     // The magic, otherwise node->ownerDocument will return a \DOMDocument object.
     parent::registerNodeClass('DOMDocument', get_class($this));
 }
Esempio n. 18
0
 /**
  * Reloads the DOMDocument of the given DocBook to make its elements 
  * locateable.
  * 
  * @param DOMDocument $docBook 
  * @return DOMDocument
  */
 private function makeLocateable(DOMDocument $docBook)
 {
     // Reload the XML document to a DOMDocument with a custom element
     // class. Just registering it on the existing document seems not to
     // work in all cases.
     $reloaded = new DOMDocument();
     $reloaded->registerNodeClass('DOMElement', 'ezcDocumentLocateableDomElement');
     $reloaded->loadXml($docBook->saveXml());
     return $reloaded;
 }
<?php

require_once '../JSLikeHTMLElement.php';
header('Content-Type: text/plain');
$doc = new DOMDocument();
$doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
$doc->loadHTML('<div><p>Para 1</p><p>Para 2</p></div>');
$elem = $doc->getElementsByTagName('div')->item(0);
// print innerHTML
echo $elem->innerHTML;
// prints '<p>Para 1</p><p>Para 2</p>'
echo "\n\n";
// set innerHTML
$elem->innerHTML = '<a href="http://fivefilters.org">FiveFilters.org</a>';
echo $elem->innerHTML;
// prints '<a href="http://fivefilters.org">FiveFilters.org</a>'
echo "\n\n";
// print document (with our changes)
echo $doc->saveXML();
function xmltohtml($messageXML)
{
    $messageXML = mb_convert_encoding($messageXML, 'HTML-ENTITIES', "UTF-8");
    // Create a stream
    $opts = array('http' => array('method' => "GET", 'header' => "Accept-language: en\r\n" . "Cookie: foo=bar\r\n" . "Content-Type: text/xml; charset=UTF-8"));
    $context = stream_context_create($opts);
    ## create html instance
    $doc = new DOMDocument('1.0', 'UTF-8');
    $doc->preserveWhiteSpace = false;
    $doc->formatOutput = false;
    $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
    $xslFilePath = './elife_xmltohtml.xsl';
    $docRoot = $doc->documentElement;
    $xsl = new DOMDocument();
    $xslResult = $xsl->load($xslFilePath);
    if (!$xslResult) {
        $docRoot->nodeValue = 'ERROR: Failed to load XSLT: ' . $xslFilePath;
        continue;
    }
    $doc->loadXML($messageXML);
    $xpath = new DOMXPath($doc);
    $articles = $xpath->query('//article');
    foreach ($articles as $article) {
        $articleID = $article->getAttribute('id');
        if (!$articleID) {
            $article->nodeValue = 'ERROR: invalid article ID';
            continue;
        }
        $xmlFileURL = "https://s3.amazonaws.com/elife-cdn/elife-articles/{$articleID}/elife{$articleID}.xml";
        ## make it utf-8 compliant
        $articleXML = mb_convert_encoding(file_get_contents($xmlFileURL, NULL, $context), 'HTML-ENTITIES', "UTF-8");
        $articleXML = preg_replace('/<?xml version[^>]*>/', '', $articleXML);
        //remove the XML declaration for obvious reason
        $articleXML = preg_replace('/<!DOCTYPE [^>]*>/', '', $articleXML);
        //remove the DTD declaration for obvious reason
        $articleXML = '<html><head><meta http-equiv=Content-Type content="text/html; charset=utf-8"></meta></head><body>' . mb_convert_encoding($articleXML, 'HTML-ENTITIES', "UTF-8") . '</body></html>';
        $aDoc = new DOMDocument('1.0', 'UTF-8');
        ## load the xml as html to avoid the named entity/hex entity diff, for e.g., 00444 and 07370, former has named entity while later has hexa
        $aDoc->loadHTML($articleXML);
        $articleXML = $aDoc->saveXML();
        if (preg_match('/^[\\s\\r\\t\\n]+$/', $articleXML)) {
            $article->nodeValue = 'ERROR: Something went wrong when reading XML from AWS';
            continue;
        }
        $articleXML = preg_replace('/<!DOCTYPE [^>]*>/', '', $articleXML);
        $aDocResult = $aDoc->loadXML($articleXML);
        if (!$aDocResult) {
            $article->nodeValue = 'ERROR: Something went wrong during loading XML';
            continue;
        }
        $proc = new XSLTProcessor();
        if (!$proc->importStylesheet($xsl)) {
            $article->nodeValue = 'ERROR: Something went wrong when loading XSLT';
            continue;
        }
        $newDoc = $proc->transformToDOC($aDoc);
        if (!$newDoc) {
            $article->nodeValue = 'ERROR: Something went wrong during transformation';
            continue;
        }
        $newDocXpath = new DOMXPath($newDoc);
        // Add tooltip
        $contribs = $newDocXpath->query('//contrib[@contrib-type="author"]');
        foreach ($contribs as $contrib) {
            $tooltip = '|<div class="author-tooltip">';
            $names = $newDocXpath->query('.//name', $contrib);
            foreach ($names as $name) {
                $tooltip .= '<div class="author-tooltip-name">';
                foreach ($name->childNodes as $childNode) {
                    if ($childNode->nodeType == 3) {
                        $tooltip .= $childNode->nodeValue;
                    } else {
                        $tooltip .= '<span class="nlm-' . $childNode->nodeName . '">' . $childNode->innerHTML . '</span>';
                    }
                }
                $tooltip .= '</div>';
            }
            $tooltip .= '<div class="author-tooltip-affiliation">';
            $affRefs = $newDocXpath->query('.//xref[@ref-type = "aff"]', $contrib);
            for ($i = 0, $len = $affRefs->length; $i < $len; $i++) {
                $affRef = $affRefs->item($i);
                $rid = $affRef->getAttribute('rid');
                // goto the affiliation by using the rid
                $affs = $newDocXpath->query('//aff[@id="' . $rid . '"]');
                foreach ($affs as $aff) {
                    $tooltip .= '<span class="nlm-aff">';
                    // get all the nodes inside the aff and make span class
                    // class name: nlm-(node name)
                    $childNodes = $aff->childNodes;
                    foreach ($childNodes as $childNode) {
                        if ($childNode->nodeType == 3) {
                            $tooltip .= $childNode->nodeValue;
                        } else {
                            $tooltip .= '<span class="nlm-' . $childNode->nodeName . '">' . $childNode->innerHTML . '</span>';
                        }
                    }
                    $tooltip .= '</span>';
                }
                if ($i != $len - 1) {
                    $tooltip .= ';';
                }
            }
            $tooltip .= '</div>';
            $fnRefs = $newDocXpath->query('.//xref[@ref-type = "fn"]', $contrib);
            for ($i = 0, $len = $fnRefs->length; $i < $len; $i++) {
                $fnRef = $fnRefs->item($i);
                $rid = $fnRef->getAttribute('rid');
                // goto the affiliation by using the rid
                $fns = $newDocXpath->query('//fn[@id="' . $rid . '"]');
                foreach ($fns as $fn) {
                    // Author contribution
                    if ($fn->hasAttribute('fn-type') && $fn->getAttribute('fn-type') == 'con') {
                        $class = 'author-tooltip-contrib';
                        $label = 'Contribution: ';
                    }
                    // Competing interests
                    if ($fn->hasAttribute('fn-type') && $fn->getAttribute('fn-type') == 'conflict') {
                        $class = 'author-tooltip-conflict';
                        $label = 'Competing Interests: ';
                    }
                    $tooltip .= '<div class="' . $class . '"><span class="author-tooltip-label">' . $label . '</span>';
                    $tooltip .= '<span class="author-tooltip-text">';
                    // get p tags inside fn
                    $pTags = $newDocXpath->query('.//p', $fn);
                    if ($pTags->length == 0) {
                        $tooltip .= $pTag->innerHTML;
                    }
                    foreach ($pTags as $pTag) {
                        $tooltip .= '<span class="nlm-p">' . trim($pTag->innerHTML) . '</span>';
                    }
                    $tooltip .= '</span></div>';
                }
            }
            $tooltip .= '</div>';
            $contrib->setAttribute('tooltip', $tooltip);
        }
        $institutions = $newDocXpath->query('//*[@class="elife-institution"]');
        $i = 0;
        foreach ($institutions as $institution) {
            // modify by arul for start for remove same address end of section
            $removenode = false;
            for ($j = $i; $j < $institutions->length; $j++) {
                if ($institution->nodeValue == $institutions->item($j)->nodeValue) {
                    $removenode = true;
                }
            }
            $i++;
            // modify by arul for end
            // get the first node and if it contains only ", " then remove it
            if ($institution->hasChildNodes()) {
                if ($institution->childNodes->item(0)->nodeType == 3 && trim($institution->childNodes->item(0)->nodeValue) == ',') {
                    DOMRemove($institution->childNodes->item(0));
                }
            }
            // get all direct text nodes.
            $directTextNodes = $newDocXpath->query('./text()', $institution);
            foreach ($directTextNodes as $directTextNode) {
                $directTextNode->nodeValue = preg_replace('/\\s*\\,\\s*\\,\\s*/u', ', ', $directTextNode->nodeValue);
            }
            if ($removenode) {
                $institution->parentNode->removeChild($institution->nextSibling);
                $institution->parentNode->removeChild($institution);
            }
        }
        // Move equation to the end of disp-formula
        $dispFormulas = $newDocXpath->query('//span[contains(@class,"disp-formula")]');
        foreach ($dispFormulas as $dispFormula) {
            $label = $newDocXpath->query('.//span[contains(@class, "disp-formula-label")]', $dispFormula);
            if ($label->length != 0) {
                $dispFormula->appendChild($label->item(0));
            }
        }
        ## get the queries and return appropriate html snippets
        $queries = $xpath->query('.//query', $article);
        if ($queries->length == 0) {
            $rootNode = $newDoc->documentElement;
            $importedNode = $doc->importNode($rootNode, true);
            $article->appendChild($importedNode);
        } else {
            foreach ($queries as $query) {
                $dataBlocks = $newDocXpath->query($query->getAttribute('xpath'));
                foreach ($dataBlocks as $dataBlock) {
                    $dataNode = $doc->createElement('data');
                    $importedNode = $doc->importNode($dataBlock, true);
                    $dataNode->appendChild($importedNode);
                    $query->appendChild($dataNode);
                }
                //end of foreach datablock
            }
            //end of foreach query
        }
    }
    //end of foreach article
    return preg_replace('/<?xml version[^>]*>/', '', $doc->saveXML());
}
Esempio n. 21
0
class myAttribute extends DOMAttr
{
    function testit()
    {
        return "HELLO Attribute";
    }
}
class myElement extends DOMElement
{
    function testit()
    {
        return "HELLO Element";
    }
}
$doc = new DOMDocument();
$doc->registerNodeClass('DOMAttr', 'myAttribute');
$doc->registerNodeClass('DOMElement', 'myElement');
$doc->appendChild(new DOMElement('root'));
$root = $doc->documentElement;
$root->setAttribute('a', 'a1');
echo get_class($root), "\n";
print $root->testit() . "\n";
$attr = $root->getAttributeNode('a');
echo get_class($attr), "\n";
print $attr->testit() . "\n";
unset($attr);
$doc->registerNodeClass('DOMAttr', NULL);
$attr = $root->getAttributeNode('a');
echo get_class($attr), "\n";
print $attr->testit() . "\n";
Esempio n. 22
0
    private static function htmlToDoc($articleHtml)
    {
        global $wgLanguageCode;
        // Make doc correctly formed
        $articleText = <<<DONE
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="{$wgLanguageCode}" lang="{$wgLanguageCode}">
<head>
\t<meta http-equiv="Content-Type" content="text/html; charset='utf-8'" />
</head>
<body>
{$articleHtml}
</body>
</html>
DONE;
        $doc = new DOMDocument('1.0', 'utf-8');
        $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
        $doc->strictErrorChecking = false;
        $doc->recover = true;
        //$doc->preserveWhiteSpace = false;
        //$wgOut->setarticlebodyonly(true);
        @$doc->loadHTML($articleText);
        $doc->normalizeDocument();
        //echo $doc->saveHtml();exit;
        return $doc;
    }
    public function extractStrings($html)
    {
        $dom = null;
        if ($this->useHtml5Parser) {
            $intro = substr($html, 0, 255);
            if (stripos($intro, '<!DOCTYPE html>') !== false) {
                // this is html5 so we'll use the html5
                require_once 'lib/HTML5.php';
                $options = new StdClass();
                $options->decorateDocument = function (DOMDocument $dom) {
                    $dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
                };
                $dom = HTML5::loadHTML($html, $options);
                // noscripts contents are treated like text which causes problems when
                // filters/replacements are run on them.  Let's just remove them
                $noscripts = $dom->getElementsByTagName('noscript');
                foreach ($noscripts as $noscript) {
                    $noscript->parentNode->removeChild($noscript);
                }
            }
        }
        //$dom = str_get_html($html);
        if (!isset($dom)) {
            $dom = new DOMDocument();
            $dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
            @$dom->loadHtml('<?xml encoding="UTF-8">' . $html);
            // dirty fix
            foreach ($dom->childNodes as $item) {
                if ($item->nodeType == XML_PI_NODE) {
                    $dom->removeChild($item);
                }
            }
            // remove hack
            $dom->encoding = 'UTF-8';
            // insert proper
        }
        //print_r($dom);
        $strings = array();
        $this->strings =& $strings;
        $stringsIndex = array();
        $xpath = new DOMXPath($dom);
        $this->translateDates($xpath);
        //$text = $xpath->query('//text()[normalize-space() and not(ancestor::script | ancestor::style)]');
        //$translatables = $dom->find('[translate]');
        $translateAttrs = $xpath->query('//*[@data-swete-translate-attrs or @alt or @title]');
        $otherAtts = array('title', 'alt');
        foreach ($translateAttrs as $el) {
            if ($el->hasAttribute('data-swete-translate-attrs')) {
                $attNames = explode(' ', $el->getAttribute('data-swete-translate-attrs'));
            } else {
                $attNames = array();
            }
            foreach ($otherAtts as $attName) {
                if ($el->hasAttribute($attName)) {
                    $attNames[] = $attName;
                }
            }
            foreach ($attNames as $attName) {
                $attVal = $el->getAttribute($attName);
                if ($attVal and trim($attVal)) {
                    $index = count($strings);
                    $strings[] = trim(_n($attVal));
                    $stringsIndex[trim(_n($attVal))] = $index;
                    $el->setAttribute($attName, '{{$' . $index . '$}}');
                    $index++;
                }
            }
        }
        $translatables = $xpath->query('//*[@translate]');
        foreach ($translatables as $tr) {
            $index = count($strings);
            //$strings[] = trim(_n($tr->innertext));
            //$strings[] = trim(_n($tr->innerHTML));
            $trStr = trim(_n($tr->innerHTML));
            if ($tr->hasAttribute('data-swete-delimiters')) {
                $delim = trim($tr->getAttribute('data-swete-delimiters'));
                if ($delim) {
                    $delimSplitter = $delim[0];
                    $delimiters = explode($delimSplitter, $delim);
                    $delimiters2 = array();
                    foreach ($delimiters as $delimiterIdx => $delimiter) {
                        if (!trim($delimiter)) {
                            continue;
                        }
                        $delimiters2[] = '(' . preg_quote($delimiter, '/') . ')';
                    }
                    $delimiters = $delimiters2;
                    $pattern = '/' . implode('|', $delimiters) . '/';
                    $toks = preg_split($pattern, $trStr, -1, PREG_SPLIT_DELIM_CAPTURE);
                    $innerHTML = array();
                    foreach ($toks as $tokIdx => $tok) {
                        if (!trim($tok)) {
                            $innerHTML[] = $tok;
                        } else {
                            if ($tokIdx % 2 === 1) {
                                // It is a delimiter
                                $innerHTML[] = $tok;
                            } else {
                                $strings[] = trim(_n($tok));
                                $stringsIndex[trim(_n($tok))] = $index;
                                $innerHTML[] = '{{$' . $index . '$}}';
                                $index++;
                                if ($tok[strlen($tok) - 1] === ' ') {
                                    $innerHTML[] = ' ';
                                }
                            }
                        }
                    }
                    $tr->innerHTML = implode('', $innerHTML);
                    $trStr = '';
                }
            }
            if ($trStr) {
                $strings[] = trim(_n($trStr));
                $stringsIndex[trim(_n($trStr))] = $index;
                $tr->innerHTML = '{{$' . $index . '$}}';
                $index++;
            }
            $gchildren = $xpath->query('./text()', $tr);
            foreach ($gchildren as $gchild) {
                $gchild->isCovered = 1;
            }
        }
        //$untranslatables = $dom->find('[notranslate]');
        $untranslatables = $xpath->query('//*[@notranslate]');
        foreach ($untranslatables as $tr) {
            //error_log('Found untranslatable: '.$tr->outertext);
            //$gchildren = $tr->find('text');
            $gchildren = $xpath->query('./text()', $tr);
            //error_log(count($gchildren).' found');
            //foreach ($gchildren as $gchild) $gchild->isCovered = 1;
            foreach ($gchildren as $gchild) {
                $gchild->isCovered = 1;
            }
        }
        $textX = $xpath->query('//text()[not(ancestor::script | ancestor::style | ancestor::*[@notranslate] | ancestor::*[@translate])]');
        $text = array();
        foreach ($textX as $x) {
            $text[] = $x;
        }
        //echo "Found ".$text->length;
        foreach ($text as $tx) {
            if (!$tx instanceof DOMNode) {
                continue;
            }
            if (!isset($tx->parentNode)) {
                continue;
            }
            if (!$tx->parentNode instanceof DOMElement) {
                continue;
            }
            // the data-swete-translate is a little different than the notranslate attribute
            // the notranslate attribute confers block level status to its owner tag.
            // data-swete-translate simply marks a segment of text as not to be translated
            // (or to be translated) within the flow of the document.  Therefore we don't
            // use a text node whose parent has the data-swete-translate as an anchor
            // to start building a group of text.  But we will allow a tag with this
            // to be included in a group of text (that contains content before and/or after).
            // The SweteTools::encode() method will take care of variablizing the content
            // at translation time.
            if ($tx->parentNode->hasAttribute('data-swete-translate') and $tx->parentNode->getAttribute('data-swete-translate') === '0') {
                continue;
            }
            //if ( !trim($tx->innertext) ) continue;
            if (!trim($tx->nodeValue)) {
                continue;
            }
            //if ( in_array($tx->parent->tag , array('comment','script','style','code') )) continue;
            if (in_array(strtolower($tx->parentNode->tagName), array('comment', 'script', 'style', 'code'))) {
                continue;
            }
            if ($this->isCovered($tx)) {
                //echo "This one's covered!!!";
                continue;
            }
            //echo "[".$tx->nodeValue."]";
            //continue;
            $group = array();
            $start = $tx;
            //if ( $tx->parent->children ){
            if (!isset($tx->parentNode)) {
                //error_log("skipping ".$tx->nodeValue);
                continue;
            }
            if ($tx->parentNode->childNodes->length > 0) {
                $pos = -1;
                //foreach ( $tx->parent->nodes as $idx=>$child ){
                foreach ($tx->parentNode->childNodes as $idx => $child) {
                    if ($child === $tx) {
                        $pos = $idx;
                        break;
                    }
                }
                $mypos = $pos;
                for ($i = $pos; $i >= 0; $i--) {
                    //$node = $tx->parent->nodes[$i];
                    $node = $tx->parentNode->childNodes->item($i);
                    //if ( $node->tag != 'text' and !in_array($node->tag, self::$inlineTags) ){
                    if ($node->nodeType != XML_TEXT_NODE and !in_array(strtolower(@$node->tagName), self::$inlineTags) and !($node instanceof DOMElement and $node->hasAttribute('data-swete-inline'))) {
                        break;
                    }
                    //if ( $node->notranslate ){
                    if ($node instanceof DOMElement and $node->hasAttribute('notranslate')) {
                        break;
                    }
                    if ($node instanceof DOMElement and $node->hasAttribute('data-swete-block')) {
                        break;
                    }
                    $pos = $i;
                }
                //if ( $mypos == $pos or $this->isFirstText($tx->parent, $mypos, $pos)){
                if ($mypos == $pos or $this->isFirstText($tx->parentNode, $mypos, $pos)) {
                    $startIdx = $pos;
                    //for ( $i=$startIdx; $i<count($tx->parent->nodes); $i++ ){
                    for ($i = $startIdx; $i < $tx->parentNode->childNodes->length; $i++) {
                        //$node = $tx->parent->nodes[$i];
                        $node = $tx->parentNode->childNodes->item($i);
                        if (!$node) {
                            break;
                        }
                        //if ( $node->tag != 'text' and !in_array($node->tag, self::$inlineTags) ){
                        if ($node->nodeType != XML_TEXT_NODE and !in_array(strtolower(@$node->tagName), self::$inlineTags) and !($node instanceof DOMElement and $node->hasAttribute('data-swete-inline'))) {
                            break;
                        }
                        //if ( $node->notranslate ){
                        if ($node instanceof DOMElement and $node->hasAttribute('notranslate')) {
                            break;
                        }
                        if ($node instanceof DOMElement and $node->hasAttribute('data-swete-block')) {
                            break;
                        }
                        //if ( $node->tag != 'text' ){
                        //	if ( preg_match('/^<'.$node->tag.'[^>]*>/', $node->outertext, $matches) ){
                        //
                        //		$node->outertext = preg_replace('/^<'.$node->tag.'([^>]*)>/', '<'.$node->tag.' id="{{R'.count($this->replacements).'R}}">', $node->outertext);
                        //		$this->replacements[] = $matches[0];
                        //	}
                        //
                        //}
                        $group[] = $node;
                    }
                }
            } else {
                $group[] = $tx;
            }
            $combinedText = array();
            foreach ($group as $item) {
                //$combinedText[] = trim($item->outertext);
                // REquires PHP 5.3.6 or higher.. passing element to saveHtml()
                $combinedText[] = preg_replace_callback('#<(\\w+)([^>]*)\\s*/>#s', create_function('$m', '
					$xhtml_tags = array("br", "hr", "input", "frame", "img", "area", "link", "col", "base", "basefont", "param");
					return in_array($m[1], $xhtml_tags) ? "<$m[1]$m[2]/>" : "<$m[1]$m[2]></$m[1]>";
					'), $dom->saveXml($item));
            }
            //var_dump($combinedText);
            $combinedText = implode('', $combinedText);
            $leadingWhiteSpace = '';
            $trailingWhiteSpace = '';
            if (preg_match('#^[\\p{Z}\\s]+#', $combinedText, $m1)) {
                $leadingWhiteSpace = $m1[0];
            }
            //echo 'Checking for trailing space: ['.$combinedText.']'."\n";
            if (preg_match('#[\\p{Z}\\s]+$#', $combinedText, $m1)) {
                //echo "Trailing white space found in '$combinedText'\n";
                $trailingWhiteSpace = $m1[0];
            } else {
                //echo "No trailing whitespace found.".ord($combinedText{strlen($combinedText)-1});
            }
            $combinedText = _n($this->replaceStrings($combinedText));
            if (!trim(str_ireplace('&nbsp;', '', $combinedText))) {
                continue;
            }
            if (isset($stringsIndex[$combinedText])) {
                $index = $stringsIndex[$combinedText];
            } else {
                $index = count($strings);
                $strings[] = $combinedText;
                $stringsIndex[$combinedText] = $index;
            }
            foreach ($group as $gnode) {
                //$gchildren = $gnode->find('text');
                $gchildren = @$xpath->query('./text()', $gnode);
                if (!$gchildren) {
                    continue;
                }
                foreach ($gchildren as $gchild) {
                    $gchild->isCovered = 1;
                }
            }
            //$group[0]->outertext = '{{$'.$index.'$}}';
            //$group[0]->nodeValue = '{{$'.$index.'$}}';
            for ($i = 1; $i < count($group); $i++) {
                //$group[$i]->outertext = '';
                //if ( !@$group[$i] ) continue;
                if (@$group[$i]->parentNode) {
                    $group[$i]->parentNode->removeChild($group[$i]);
                }
            }
            if (!@$group[0]) {
                continue;
            }
            if (!@$group[0]->parentNode) {
                continue;
            }
            $textNodeContent = $leadingWhiteSpace . '{{$' . $index . '$}}' . $trailingWhiteSpace;
            //echo 'Content:['.$textNodeContent.']'."\n";
            $group[0]->parentNode->replaceChild($dom->createTextNode($textNodeContent), $group[0]);
        }
        // Now we need to translate the keywords and the description
        //foreach ($dom->find('meta') as $el){
        foreach ($xpath->query('//meta[@name="keywords" or @name="description"]') as $el) {
            //$content = _n($el->content);
            if (!$el->hasAttribute('content')) {
                continue;
            }
            $content = _n($el->getAttribute('content'));
            //if ( $content and in_array(strtolower(strval($el->name)), array('keywords','description')) ){
            if (isset($stringsIndex[$content])) {
                $index = $stringsIndex[$content];
            } else {
                $index = count($strings);
                $strings[] = $content;
                $stringsIndex[$content] = $index;
            }
            //$el->content = '{{$'.$index.'$}}';
            $el->setAttribute('content', '{{$' . $index . '$}}');
            //}
        }
        $this->strings = array_map(array($this, 'cleanString'), $this->strings);
        //return $dom->save();
        return $dom->saveHtml();
    }
Esempio n. 24
0
 public static function getXPath(&$bodyHtml, &$r)
 {
     global $wgWikiHowSections, $IP, $wgTitle;
     $lang = MobileWikihow::getSiteLanguage();
     // munge steps first
     $opts = array('no-ads' => true);
     require_once "{$IP}/skins/WikiHowSkin.php";
     $oldTitle = $wgTitle;
     $wgTitle = $r->getTitle();
     $vars['bodyHtml'] = WikihowArticleHTML::postProcess($bodyHtml, $opts);
     $vars['lang'] = $lang;
     EasyTemplate::set_path(dirname(__FILE__) . '/');
     $html = EasyTemplate::html('thumb_html.tmpl.php', $vars);
     require_once "{$IP}/extensions/wikihow/mobile/JSLikeHTMLElement.php";
     $doc = new DOMDocument('1.0', 'utf-8');
     $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
     $doc->strictErrorChecking = false;
     $doc->recover = true;
     @$doc->loadHTML($html);
     $doc->normalizeDocument();
     $xpath = new DOMXPath($doc);
     $wgTitle = $oldTitle;
     return $xpath;
 }
Esempio n. 25
0
<?php

class MyNode extends DOMNode
{
}
class MyElement extends DOMElement
{
}
$dom = new DOMDocument();
var_dump($dom->registerNodeClass('DOMNode', 'MyNode'));
var_dump($dom->registerNodeClass('DOMElement', 'MyElement'));