public function __construct($sValue) { if (Sylma::read('dom/encoding/check') && !mb_check_encoding($sValue, 'UTF-8')) { //$sValue = utf8_encode($sContent); // TODO , result not always in utf-8 $this->getParent()->throwException('Bad encoding'); } parent::__construct($sValue); }
/** * Gets line number for where the node is defined. * * The method `DOMNode::getLineNo()` does not return the correct line number for text nodes. So this method * can return either the original line number returned by libxml or the fixed and correct line number * created by the `XMLDocument::fixLineNumbers()` method. * * @return int Either the original line number returned by libxml or the fixed and correct line number * created by the `XMLDocument::fixLineNumbers()` method. */ public function getLineNo() { $container = $this->ownerDocument->getLineNumberContainer(); if ($container && $container->contains($this)) { return $container->offsetGet($this); } return parent::getLineNo(); }
function __toString() { // Skip empty text frames // if ( $this->is_text_node() && // preg_replace("/\s/", "", $this->_node->data) === "" ) // return ""; $str = "<b>" . $this->_node->nodeName . ":</b><br/>"; //$str .= spl_object_hash($this->_node) . "<br/>"; $str .= "Id: " . $this->get_id() . "<br/>"; $str .= "Class: " . get_class($this) . "<br/>"; if ($this->is_text_node()) { $tmp = htmlspecialchars($this->_node->nodeValue); $str .= "<pre>'" . mb_substr($tmp, 0, 70) . (mb_strlen($tmp) > 70 ? "..." : "") . "'</pre>"; } elseif ($css_class = $this->_node->getAttribute("class")) { $str .= "CSS class: '{$css_class}'<br/>"; } if ($this->_parent) { $str .= "\nParent:" . $this->_parent->_node->nodeName . " (" . spl_object_hash($this->_parent->_node) . ") " . "<br/>"; } if ($this->_prev_sibling) { $str .= "Prev: " . $this->_prev_sibling->_node->nodeName . " (" . spl_object_hash($this->_prev_sibling->_node) . ") " . "<br/>"; } if ($this->_next_sibling) { $str .= "Next: " . $this->_next_sibling->_node->nodeName . " (" . spl_object_hash($this->_next_sibling->_node) . ") " . "<br/>"; } $d = $this->get_decorator(); while ($d && $d != $d->get_decorator()) { $str .= "Decorator: " . get_class($d) . "<br/>"; $d = $d->get_decorator(); } $str .= "Position: " . pre_r($this->_position, true); $str .= "\nContaining block: " . pre_r($this->_containing_block, true); $str .= "\nMargin width: " . pre_r($this->get_margin_width(), true); $str .= "\nMargin height: " . pre_r($this->get_margin_height(), true); $str .= "\nStyle: <pre>" . $this->_style->__toString() . "</pre>"; if ($this->_decorator instanceof Block_Frame_Decorator) { $str .= "Lines:<pre>"; foreach ($this->_decorator->get_line_boxes() as $line) { foreach ($line->get_frames() as $frame) { if ($frame instanceof Text_Frame_Decorator) { $str .= "\ntext: "; $str .= "'" . htmlspecialchars($frame->get_text()) . "'"; } else { $str .= "\nBlock: " . $frame->get_node()->nodeName . " (" . spl_object_hash($frame->get_node()) . ")"; } } $str .= "\ny => " . $line->y . "\n" . "w => " . $line->w . "\n" . "h => " . $line->h . "\n" . "left => " . $line->left . "\n" . "right => " . $line->right . "\n"; } $str .= "</pre>"; } $str .= "\n"; if (php_sapi_name() === "cli") { $str = strip_tags(str_replace(array("<br/>", "<b>", "</b>"), array("\n", "", ""), $str)); } return $str; }
function set_id($id) { $this->_id = $id; // We can only set attributes of DOMElement objects (nodeType == 1). // Since these are the only objects that we can assign CSS rules to, // this shortcoming is okay. if ($this->_node->nodeType == XML_ELEMENT_NODE) { $this->_node->setAttribute("frame_id", $id); } }
function set_content($text) { // the following replace has been added to conform with PHP4. // A set_content("&") brought a get_content() = "&" there, // whereas PHP5 gives a get_content() = "&" $text = str_replace("<", "<", $text); $text = str_replace(">", ">", $text); $text = str_replace("&", "&", $text); $text_node = new DOMText(); $text_node->appendData($text); if (is_object($this->myDOMNode->firstChild)) { $this->myDOMNode->replaceChild($text_node, $this->myDOMNode->firstChild); } else { $this->myDOMNode->appendChild($text_node); } }
/** * Highlight text in text node * * @param DOMText $node * @param array $wordsToHighlight * @param callback $callback Callback method, used to transform (highlighting) text. * @param array $params Array of additionall callback parameters (first non-optional parameter is a text to transform) * @throws \Zend\Search\Lucene\Exception\RuntimeException */ protected function _highlightTextNode(\DOMText $node, $wordsToHighlight, $callback, $params) { $analyzer = Analyzer\Analyzer::getDefault(); $analyzer->setInput($node->nodeValue, 'UTF-8'); $matchedTokens = array(); while (($token = $analyzer->nextToken()) !== null) { if (isset($wordsToHighlight[$token->getTermText()])) { $matchedTokens[] = $token; } } if (count($matchedTokens) == 0) { return; } $matchedTokens = array_reverse($matchedTokens); foreach ($matchedTokens as $token) { // Cut text after matched token $node->splitText($token->getEndOffset()); // Cut matched node $matchedWordNode = $node->splitText($token->getStartOffset()); // Retrieve HTML string representation for highlihted word $fullCallbackparamsList = $params; array_unshift($fullCallbackparamsList, $matchedWordNode->nodeValue); $highlightedWordNodeSetHTML = call_user_func_array($callback, $fullCallbackparamsList); // Transform HTML string to a DOM representation and automatically transform retrieved string // into valid XHTML (It's automatically done by loadHTML() method) $highlightedWordNodeSetDomDocument = new \DOMDocument('1.0', 'UTF-8'); $success = @$highlightedWordNodeSetDomDocument->loadHTML('<html><head><meta http-equiv="Content-type" content="text/html; charset=UTF-8"/></head><body>' . $highlightedWordNodeSetHTML . '</body></html>'); if (!$success) { throw new RuntimeException("Error occured while loading highlighted text fragment: '{$highlightedWordNodeSetHTML}'."); } $highlightedWordNodeSetXpath = new \DOMXPath($highlightedWordNodeSetDomDocument); $highlightedWordNodeSet = $highlightedWordNodeSetXpath->query('/html/body')->item(0)->childNodes; for ($count = 0; $count < $highlightedWordNodeSet->length; $count++) { $nodeToImport = $highlightedWordNodeSet->item($count); $node->parentNode->insertBefore($this->_doc->importNode($nodeToImport, true), $matchedWordNode); } $node->parentNode->removeChild($matchedWordNode); } }
/** * Highlight text in text node * * @param DOMText $node * @param array $wordsToHighlight * @param string $color */ public function _highlightTextNode(DOMText $node, $wordsToHighlight, $color) { $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault(); $analyzer->setInput($node->nodeValue, $this->_doc->encoding); $matchedTokens = array(); while (($token = $analyzer->nextToken()) !== null) { if (isset($wordsToHighlight[$token->getTermText()])) { $matchedTokens[] = $token; } } if (count($matchedTokens) == 0) { return; } $matchedTokens = array_reverse($matchedTokens); foreach ($matchedTokens as $token) { // Cut text after matched token $node->splitText($token->getEndOffset()); // Cut matched node $matchedWordNode = $node->splitText($token->getStartOffset()); $highlightedNode = $this->_doc->createElement('b', $matchedWordNode->nodeValue); $highlightedNode->setAttribute('style', 'color:black;background-color:' . $color); $node->parentNode->replaceChild($highlightedNode, $matchedWordNode); } }
/** * Highlight text in text node * * @param DOMText $node * @param array $wordsToHighlight * @param callback $callback Callback method, used to transform (highlighting) text. * @param array $params Array of additionall callback parameters (first non-optional parameter is a text to transform) * @throws Zend_Search_Lucene_Exception */ protected function _highlightTextNode(DOMText $node, $wordsToHighlight, $callback, $params) { /** Zend_Search_Lucene_Analysis_Analyzer */ require_once 'Zend/Search/Lucene/Analysis/Analyzer.php'; $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault(); $analyzer->setInput($node->nodeValue, 'UTF-8'); $matchedTokens = array(); while (($token = $analyzer->nextToken()) !== null) { if (isset($wordsToHighlight[$token->getTermText()])) { $matchedTokens[] = $token; } } if (count($matchedTokens) == 0) { return; } $matchedTokens = array_reverse($matchedTokens); foreach ($matchedTokens as $token) { // Cut text after matched token $node->splitText($token->getEndOffset()); // Cut matched node $matchedWordNode = $node->splitText($token->getStartOffset()); // Retrieve HTML string representation for highlihted word $fullCallbackparamsList = $params; array_unshift($fullCallbackparamsList, $matchedWordNode->nodeValue); $highlightedWordNodeSetHtml = call_user_func_array($callback, $fullCallbackparamsList); // Transform HTML string to a DOM representation and automatically transform retrieved string // into valid XHTML (It's automatically done by loadHTML() method) $highlightedWordNodeSetDomDocument = new DOMDocument('1.0', 'UTF-8'); $success = @$highlightedWordNodeSetDomDocument->loadHTML($highlightedWordNodeSetHtml); if (!$success) { require_once 'Zend/Search/Lucene/Exception.php'; throw new Zend_Search_Lucene_Exception("Error occured while loading highlighted text fragment: '{$highlightedWordNodeSetHtml}'."); } $highlightedWordNodeSetXpath = new DOMXPath($highlightedWordNodeSetDomDocument); $highlightedWordNodeSet = $highlightedWordNodeSetXpath->query('/html/body')->item(0)->childNodes; for ($count = 0; $count < $highlightedWordNodeSet->length; $count++) { $nodeToImport = $highlightedWordNodeSet->item($count); $node->parentNode->insertBefore($this->_doc->importNode($nodeToImport, true), $matchedWordNode); } $node->parentNode->removeChild($matchedWordNode); } }
$comment = new DOMComment('Testing character data and extending nodes'); $charnode->appendChild($comment); echo "Comment Length: " . $comment->length . "\n"; $comment->data = 'Updated comment'; echo "New Comment Length: " . $comment->length . "\n"; echo "New Comment Data: " . $comment->data . "\n"; /* DOMCDataSection */ $cdata = new DOMCDataSection('Chars: <>&"'); $charnode->appendChild($cdata); echo "Substring: " . $cdata->substringData(7, 4) . "\n"; $cdata->replaceData(10, 1, "'"); echo "New Substring: " . $cdata->substringData(7, 4) . "\n"; /* DOMCharacterData using DOMComment */ $comment = new DOMComment('instructions'); echo "Comment Value: " . $comment->data . "\n"; $comment->data = 'some more instructions'; echo "New Comment Value: " . $comment->data . "\n"; $comment->insertData(10, 'pi '); $comment->replaceData(18, 5, 'i'); $comment->insertData(20, 'g'); $comment->deleteData(13, 2); $comment->deleteData(10, 3); $comment->insertData(10, 'comment '); echo "Updated Comment Value: " . $comment->data . "\n"; /* DOMText */ $text = new DOMText('some text characters'); echo "Whole Text: " . $text->wholeText . "\n"; $text2 = $text->splitText(9); echo "Split text: " . $text2->wholeText . "\n"; $text3 = $text2->splitText(1); echo "Is Whitespace?: " . ($text2->isElementContentWhitespace() ? 'YES' : 'NO');
/** * This method attempts to return a better 'innerText' representation than DOMNode::textContent * * @param DOMElement|DOMText $el * @param bool $implied when parsing for implied name for h-*, rules may be slightly different * @see: https://github.com/glennjones/microformat-shiv/blob/dev/lib/text.js */ public function innerText($el, $implied = false) { $out = ''; $blockLevelTags = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'hr', 'pre', 'table', 'address', 'article', 'aside', 'blockquote', 'caption', 'col', 'colgroup', 'dd', 'div', 'dt', 'dir', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'header', 'hgroup', 'hr', 'li', 'map', 'menu', 'nav', 'optgroup', 'option', 'section', 'tbody', 'testarea', 'tfoot', 'th', 'thead', 'tr', 'td', 'ul', 'ol', 'dl', 'details'); $excludeTags = array('noframe', 'noscript', 'script', 'style', 'frames', 'frameset'); // PHP DOMDocument doesn’t correctly handle whitespace around elements it doesn’t recognise. $unsupportedTags = array('data'); if (isset($el->tagName)) { if (in_array(strtolower($el->tagName), $excludeTags)) { return $out; } else { if ($el->tagName == 'img') { if ($el->getAttribute('alt') !== '') { return $el->getAttribute('alt'); } else { if (!$implied && $el->getAttribute('src') !== '') { return $this->resolveUrl($el->getAttribute('src')); } } } else { if ($el->tagName == 'area' and $el->getAttribute('alt') !== '') { return $el->getAttribute('alt'); } else { if ($el->tagName == 'abbr' and $el->getAttribute('title') !== '') { return $el->getAttribute('title'); } } } } } // if node is a text node get its text if (isset($el->nodeType) && $el->nodeType === 3) { $out .= $el->textContent; } // get the text of the child nodes if ($el->childNodes && $el->childNodes->length > 0) { for ($j = 0; $j < $el->childNodes->length; $j++) { $text = $this->innerText($el->childNodes->item($j), $implied); if (!is_null($text)) { $out .= $text; } } } if (isset($el->tagName)) { // if its a block level tag add an additional space at the end if (in_array(strtolower($el->tagName), $blockLevelTags)) { $out .= ' '; } elseif ($implied and in_array(strtolower($el->tagName), $unsupportedTags)) { $out .= ' '; } else { if (strtolower($el->tagName) == 'br') { // else if its a br, replace with newline $out .= "\n"; } } } return $out === '' ? NULL : $out; }