Example #1
 public function __construct($sValue)
     if (Sylma::read('dom/encoding/check') && !mb_check_encoding($sValue, 'UTF-8')) {
         //$sValue = utf8_encode($sContent); // TODO , result not always in utf-8
         $this->getParent()->throwException('Bad encoding');
Example #2
  * Gets line number for where the node is defined.
  * The method `DOMNode::getLineNo()` does not return the correct line number for text nodes. So this method
  * can return either the original line number returned by libxml or the fixed and correct line number
  * created by the `XMLDocument::fixLineNumbers()` method.
  * @return int Either the original line number returned by libxml or the fixed and correct line number
  * created by the `XMLDocument::fixLineNumbers()` method.
 public function getLineNo()
     $container = $this->ownerDocument->getLineNumberContainer();
     if ($container && $container->contains($this)) {
         return $container->offsetGet($this);
     return parent::getLineNo();
Example #3
 function __toString()
     // Skip empty text frames
     //     if ( $this->is_text_node() &&
     //          preg_replace("/\s/", "", $this->_node->data) === "" )
     //       return "";
     $str = "<b>" . $this->_node->nodeName . ":</b><br/>";
     //$str .= spl_object_hash($this->_node) . "<br/>";
     $str .= "Id: " . $this->get_id() . "<br/>";
     $str .= "Class: " . get_class($this) . "<br/>";
     if ($this->is_text_node()) {
         $tmp = htmlspecialchars($this->_node->nodeValue);
         $str .= "<pre>'" . mb_substr($tmp, 0, 70) . (mb_strlen($tmp) > 70 ? "..." : "") . "'</pre>";
     } elseif ($css_class = $this->_node->getAttribute("class")) {
         $str .= "CSS class: '{$css_class}'<br/>";
     if ($this->_parent) {
         $str .= "\nParent:" . $this->_parent->_node->nodeName . " (" . spl_object_hash($this->_parent->_node) . ") " . "<br/>";
     if ($this->_prev_sibling) {
         $str .= "Prev: " . $this->_prev_sibling->_node->nodeName . " (" . spl_object_hash($this->_prev_sibling->_node) . ") " . "<br/>";
     if ($this->_next_sibling) {
         $str .= "Next: " . $this->_next_sibling->_node->nodeName . " (" . spl_object_hash($this->_next_sibling->_node) . ") " . "<br/>";
     $d = $this->get_decorator();
     while ($d && $d != $d->get_decorator()) {
         $str .= "Decorator: " . get_class($d) . "<br/>";
         $d = $d->get_decorator();
     $str .= "Position: " . pre_r($this->_position, true);
     $str .= "\nContaining block: " . pre_r($this->_containing_block, true);
     $str .= "\nMargin width: " . pre_r($this->get_margin_width(), true);
     $str .= "\nMargin height: " . pre_r($this->get_margin_height(), true);
     $str .= "\nStyle: <pre>" . $this->_style->__toString() . "</pre>";
     if ($this->_decorator instanceof Block_Frame_Decorator) {
         $str .= "Lines:<pre>";
         foreach ($this->_decorator->get_line_boxes() as $line) {
             foreach ($line->get_frames() as $frame) {
                 if ($frame instanceof Text_Frame_Decorator) {
                     $str .= "\ntext: ";
                     $str .= "'" . htmlspecialchars($frame->get_text()) . "'";
                 } else {
                     $str .= "\nBlock: " . $frame->get_node()->nodeName . " (" . spl_object_hash($frame->get_node()) . ")";
             $str .= "\ny => " . $line->y . "\n" . "w => " . $line->w . "\n" . "h => " . $line->h . "\n" . "left => " . $line->left . "\n" . "right => " . $line->right . "\n";
         $str .= "</pre>";
     $str .= "\n";
     if (php_sapi_name() === "cli") {
         $str = strip_tags(str_replace(array("<br/>", "<b>", "</b>"), array("\n", "", ""), $str));
     return $str;
 function set_id($id)
     $this->_id = $id;
     // We can only set attributes of DOMElement objects (nodeType == 1).
     // Since these are the only objects that we can assign CSS rules to,
     // this shortcoming is okay.
     if ($this->_node->nodeType == XML_ELEMENT_NODE) {
         $this->_node->setAttribute("frame_id", $id);
Example #5
 function set_content($text)
     // the following replace has been added to conform with PHP4.
     // A set_content("&amp;") brought a get_content() = "&" there,
     // whereas PHP5 gives a get_content() = "&amp;"
     $text = str_replace("&lt;", "<", $text);
     $text = str_replace("&gt;", ">", $text);
     $text = str_replace("&amp;", "&", $text);
     $text_node = new DOMText();
     if (is_object($this->myDOMNode->firstChild)) {
         $this->myDOMNode->replaceChild($text_node, $this->myDOMNode->firstChild);
     } else {
Example #6
File: HTML.php Project: rikaix/zf2
  * Highlight text in text node
  * @param DOMText $node
  * @param array   $wordsToHighlight
  * @param callback $callback   Callback method, used to transform (highlighting) text.
  * @param array    $params     Array of additionall callback parameters (first non-optional parameter is a text to transform)
  * @throws \Zend\Search\Lucene\Exception\RuntimeException
 protected function _highlightTextNode(\DOMText $node, $wordsToHighlight, $callback, $params)
     $analyzer = Analyzer\Analyzer::getDefault();
     $analyzer->setInput($node->nodeValue, 'UTF-8');
     $matchedTokens = array();
     while (($token = $analyzer->nextToken()) !== null) {
         if (isset($wordsToHighlight[$token->getTermText()])) {
             $matchedTokens[] = $token;
     if (count($matchedTokens) == 0) {
     $matchedTokens = array_reverse($matchedTokens);
     foreach ($matchedTokens as $token) {
         // Cut text after matched token
         // Cut matched node
         $matchedWordNode = $node->splitText($token->getStartOffset());
         // Retrieve HTML string representation for highlihted word
         $fullCallbackparamsList = $params;
         array_unshift($fullCallbackparamsList, $matchedWordNode->nodeValue);
         $highlightedWordNodeSetHTML = call_user_func_array($callback, $fullCallbackparamsList);
         // Transform HTML string to a DOM representation and automatically transform retrieved string
         // into valid XHTML (It's automatically done by loadHTML() method)
         $highlightedWordNodeSetDomDocument = new \DOMDocument('1.0', 'UTF-8');
         $success = @$highlightedWordNodeSetDomDocument->loadHTML('<html><head><meta http-equiv="Content-type" content="text/html; charset=UTF-8"/></head><body>' . $highlightedWordNodeSetHTML . '</body></html>');
         if (!$success) {
             throw new RuntimeException("Error occured while loading highlighted text fragment: '{$highlightedWordNodeSetHTML}'.");
         $highlightedWordNodeSetXpath = new \DOMXPath($highlightedWordNodeSetDomDocument);
         $highlightedWordNodeSet = $highlightedWordNodeSetXpath->query('/html/body')->item(0)->childNodes;
         for ($count = 0; $count < $highlightedWordNodeSet->length; $count++) {
             $nodeToImport = $highlightedWordNodeSet->item($count);
             $node->parentNode->insertBefore($this->_doc->importNode($nodeToImport, true), $matchedWordNode);
Example #7
  * Highlight text in text node
  * @param DOMText $node
  * @param array   $wordsToHighlight
  * @param string  $color
 public function _highlightTextNode(DOMText $node, $wordsToHighlight, $color)
     $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
     $analyzer->setInput($node->nodeValue, $this->_doc->encoding);
     $matchedTokens = array();
     while (($token = $analyzer->nextToken()) !== null) {
         if (isset($wordsToHighlight[$token->getTermText()])) {
             $matchedTokens[] = $token;
     if (count($matchedTokens) == 0) {
     $matchedTokens = array_reverse($matchedTokens);
     foreach ($matchedTokens as $token) {
         // Cut text after matched token
         // Cut matched node
         $matchedWordNode = $node->splitText($token->getStartOffset());
         $highlightedNode = $this->_doc->createElement('b', $matchedWordNode->nodeValue);
         $highlightedNode->setAttribute('style', 'color:black;background-color:' . $color);
         $node->parentNode->replaceChild($highlightedNode, $matchedWordNode);
  * Highlight text in text node
  * @param DOMText $node
  * @param array   $wordsToHighlight
  * @param callback $callback   Callback method, used to transform (highlighting) text.
  * @param array    $params     Array of additionall callback parameters (first non-optional parameter is a text to transform)
  * @throws Zend_Search_Lucene_Exception
 protected function _highlightTextNode(DOMText $node, $wordsToHighlight, $callback, $params)
     /** Zend_Search_Lucene_Analysis_Analyzer */
     require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
     $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
     $analyzer->setInput($node->nodeValue, 'UTF-8');
     $matchedTokens = array();
     while (($token = $analyzer->nextToken()) !== null) {
         if (isset($wordsToHighlight[$token->getTermText()])) {
             $matchedTokens[] = $token;
     if (count($matchedTokens) == 0) {
     $matchedTokens = array_reverse($matchedTokens);
     foreach ($matchedTokens as $token) {
         // Cut text after matched token
         // Cut matched node
         $matchedWordNode = $node->splitText($token->getStartOffset());
         // Retrieve HTML string representation for highlihted word
         $fullCallbackparamsList = $params;
         array_unshift($fullCallbackparamsList, $matchedWordNode->nodeValue);
         $highlightedWordNodeSetHtml = call_user_func_array($callback, $fullCallbackparamsList);
         // Transform HTML string to a DOM representation and automatically transform retrieved string
         // into valid XHTML (It's automatically done by loadHTML() method)
         $highlightedWordNodeSetDomDocument = new DOMDocument('1.0', 'UTF-8');
         $success = @$highlightedWordNodeSetDomDocument->loadHTML($highlightedWordNodeSetHtml);
         if (!$success) {
             require_once 'Zend/Search/Lucene/Exception.php';
             throw new Zend_Search_Lucene_Exception("Error occured while loading highlighted text fragment: '{$highlightedWordNodeSetHtml}'.");
         $highlightedWordNodeSetXpath = new DOMXPath($highlightedWordNodeSetDomDocument);
         $highlightedWordNodeSet = $highlightedWordNodeSetXpath->query('/html/body')->item(0)->childNodes;
         for ($count = 0; $count < $highlightedWordNodeSet->length; $count++) {
             $nodeToImport = $highlightedWordNodeSet->item($count);
             $node->parentNode->insertBefore($this->_doc->importNode($nodeToImport, true), $matchedWordNode);
Example #9
$comment = new DOMComment('Testing character data and extending nodes');
echo "Comment Length: " . $comment->length . "\n";
$comment->data = 'Updated comment';
echo "New Comment Length: " . $comment->length . "\n";
echo "New Comment Data: " . $comment->data . "\n";
/* DOMCDataSection */
$cdata = new DOMCDataSection('Chars: <>&"');
echo "Substring: " . $cdata->substringData(7, 4) . "\n";
$cdata->replaceData(10, 1, "'");
echo "New Substring: " . $cdata->substringData(7, 4) . "\n";
/* DOMCharacterData using DOMComment */
$comment = new DOMComment('instructions');
echo "Comment Value: " . $comment->data . "\n";
$comment->data = 'some more instructions';
echo "New Comment Value: " . $comment->data . "\n";
$comment->insertData(10, 'pi ');
$comment->replaceData(18, 5, 'i');
$comment->insertData(20, 'g');
$comment->deleteData(13, 2);
$comment->deleteData(10, 3);
$comment->insertData(10, 'comment ');
echo "Updated Comment Value: " . $comment->data . "\n";
/* DOMText */
$text = new DOMText('some text characters');
echo "Whole Text: " . $text->wholeText . "\n";
$text2 = $text->splitText(9);
echo "Split text: " . $text2->wholeText . "\n";
$text3 = $text2->splitText(1);
echo "Is Whitespace?: " . ($text2->isElementContentWhitespace() ? 'YES' : 'NO');
Example #10
  * This method attempts to return a better 'innerText' representation than DOMNode::textContent
  * @param DOMElement|DOMText $el
  * @param bool $implied when parsing for implied name for h-*, rules may be slightly different
  * @see: https://github.com/glennjones/microformat-shiv/blob/dev/lib/text.js
 public function innerText($el, $implied = false)
     $out = '';
     $blockLevelTags = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'hr', 'pre', 'table', 'address', 'article', 'aside', 'blockquote', 'caption', 'col', 'colgroup', 'dd', 'div', 'dt', 'dir', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'header', 'hgroup', 'hr', 'li', 'map', 'menu', 'nav', 'optgroup', 'option', 'section', 'tbody', 'testarea', 'tfoot', 'th', 'thead', 'tr', 'td', 'ul', 'ol', 'dl', 'details');
     $excludeTags = array('noframe', 'noscript', 'script', 'style', 'frames', 'frameset');
     // PHP DOMDocument doesn’t correctly handle whitespace around elements it doesn’t recognise.
     $unsupportedTags = array('data');
     if (isset($el->tagName)) {
         if (in_array(strtolower($el->tagName), $excludeTags)) {
             return $out;
         } else {
             if ($el->tagName == 'img') {
                 if ($el->getAttribute('alt') !== '') {
                     return $el->getAttribute('alt');
                 } else {
                     if (!$implied && $el->getAttribute('src') !== '') {
                         return $this->resolveUrl($el->getAttribute('src'));
             } else {
                 if ($el->tagName == 'area' and $el->getAttribute('alt') !== '') {
                     return $el->getAttribute('alt');
                 } else {
                     if ($el->tagName == 'abbr' and $el->getAttribute('title') !== '') {
                         return $el->getAttribute('title');
     // if node is a text node get its text
     if (isset($el->nodeType) && $el->nodeType === 3) {
         $out .= $el->textContent;
     // get the text of the child nodes
     if ($el->childNodes && $el->childNodes->length > 0) {
         for ($j = 0; $j < $el->childNodes->length; $j++) {
             $text = $this->innerText($el->childNodes->item($j), $implied);
             if (!is_null($text)) {
                 $out .= $text;
     if (isset($el->tagName)) {
         // if its a block level tag add an additional space at the end
         if (in_array(strtolower($el->tagName), $blockLevelTags)) {
             $out .= ' ';
         } elseif ($implied and in_array(strtolower($el->tagName), $unsupportedTags)) {
             $out .= ' ';
         } else {
             if (strtolower($el->tagName) == 'br') {
                 // else if its a br, replace with newline
                 $out .= "\n";
     return $out === '' ? NULL : $out;