private function toArray(\tidyNode $tidyNode, $prefix = '') { $result = array(); if (trim($tidyNode->name) !== '') { $attributesString = ''; if (is_array($tidyNode->attribute) && count($tidyNode->attribute) > 0) { foreach ($tidyNode->attribute as $name => $value) { $attributesString .= ' ' . $name . '="' . $value . '"'; } } $prefix .= '<' . $tidyNode->name . $attributesString . '>'; $result[] = $prefix . 'start'; if ($tidyNode->hasChildren()) { foreach ($tidyNode->child as $childNode) { $tokenized = $this->toArray($childNode, $prefix); $result = array_merge($result, $tokenized); } } $result[] = $prefix . 'end'; } else { if (trim($tidyNode->value) !== '') { $words = explode(' ', trim($tidyNode->value)); foreach ($words as $word) { if ($word !== '') { $result[] = $prefix . '"' . $word . '"'; } } } } return $result; }
function dump_nodes(tidyNode $node) { var_dump($node->hasChildren()); if ($node->hasChildren()) { foreach ($node->child as $c) { var_dump($c); if ($c->hasChildren()) { dump_nodes($c); } } } }
function dump_nodes(tidyNode $node, &$urls = NULL) { $urls = is_array($urls) ? $urls : array(); if (isset($node->id)) { if ($node->id == TIDY_TAG_A) { $urls[] = $node->attribute['href']; } } if ($node->hasChildren()) { foreach ($node->child as $c) { dump_nodes($c, $urls); } } return $urls; }
function dump_tree(tidyNode $node, $indent = 0) { /* Put something there if the node name is empty */ $nodename = trim(strtoupper($node->name)); $nodename = empty($nodename) ? "[EMPTY]" : $nodename; /* Generate the Node, and a pretty name for it */ do_leaf(" + {$nodename} (" . node_type($node->type) . ")\n", $indent); /* Check to see if this node is a text node. Text nodes are generated by start/end tags and contain the text in between. i.e. <B>foo</B> will create a text node with $node->value equal to 'foo' */ if ($node->type == TIDY_NODETYPE_TEXT) { do_leaf(" |\n", $indent); do_leaf(" +---- Value: '{$node->value}'\n", $indent); } if (count($node->attribute)) { do_leaf(" |\n", $indent); do_leaf(" +---- Attributes\n", $indent); foreach ($node->attribute as $name => $value) { @do_leaf(" +-- {$name}\n", $indent); do_leaf(" | +-- Value: {$value}\n", $indent); } } /* Recurse along the children to generate the remaining nodes */ if ($node->hasChildren()) { foreach ($node->child as $child) { dump_tree($child, $indent + 3); } } }
/** * convert blockquotes to quotes ("> ") and strip tags * * this function uses tidy or DOM to recursivly walk the dom tree of the html mail * @see http://php.net/manual/de/tidy.root.php * @see http://php.net/manual/en/book.dom.php * * @param tidyNode|DOMNode $_node * @param integer $_quoteIndent * @param string $_eol * @return string * * @todo we can transform more tags here, i.e. the <strong>BOLDTEXT</strong> tag could be replaced with *BOLDTEXT* * @todo think about removing the tidy code * @todo reduce complexity */ public static function addQuotesAndStripTags($_node, $_quoteIndent = 0, $_eol = "\n") { $result = ''; $hasChildren = $_node instanceof DOMNode ? $_node->hasChildNodes() : $_node->hasChildren(); $nameProperty = $_node instanceof DOMNode ? 'nodeName' : 'name'; $valueProperty = $_node instanceof DOMNode ? 'nodeValue' : 'value'; $divNewline = FALSE; if ($hasChildren) { $lastChild = NULL; $children = $_node instanceof DOMNode ? $_node->childNodes : $_node->child; if ($_node->{$nameProperty} == 'div') { $divNewline = TRUE; } foreach ($children as $child) { $isTextLeaf = $child instanceof DOMNode ? $child->{$nameProperty} == '#text' : !$child->{$nameProperty}; if ($isTextLeaf) { // leaf -> add quotes and append to content string if ($_quoteIndent > 0) { $result .= str_repeat(self::QUOTE, $_quoteIndent) . $child->{$valueProperty}; } else { if (Tinebase_Core::isLogLevel(Zend_Log::TRACE)) { Tinebase_Core::getLogger()->trace(__METHOD__ . '::' . __LINE__ . ' ' . "value: " . $child->{$valueProperty} . " / name: " . $_node->{$nameProperty} . "\n"); } if ($divNewline) { $result .= $_eol . str_repeat(self::QUOTE, $_quoteIndent); $divNewline = FALSE; } $result .= $child->{$valueProperty}; } } else { if ($child->{$nameProperty} == 'blockquote') { // opening blockquote $_quoteIndent++; } else { if ($child->{$nameProperty} == 'br') { if (Tinebase_Core::isLogLevel(Zend_Log::TRACE)) { Tinebase_Core::getLogger()->trace(__METHOD__ . '::' . __LINE__ . ' ' . "value: " . $child->{$valueProperty} . " / name: " . $_node->{$nameProperty} . "\n"); } // reset quoted state on newline if ($lastChild !== NULL && $lastChild->{$nameProperty} == 'br') { // add quotes to repeating newlines $result .= str_repeat(self::QUOTE, $_quoteIndent); } $result .= $_eol; $divNewline = FALSE; } } } $result .= self::addQuotesAndStripTags($child, $_quoteIndent, $_eol); if ($child->{$nameProperty} == 'blockquote') { // closing blockquote $_quoteIndent--; // add newline after last closing blockquote if ($_quoteIndent == 0) { $result .= $_eol; } } $lastChild = $child; } // add newline if closing div if ($divNewline) { $result .= $_eol . str_repeat(self::QUOTE, $_quoteIndent); } } return $result; }
/** * Helper to create the token-array * * @param tidyNode $oNode * * @return void */ protected function _tokenHelper($oNode) { if ($oNode->hasChildren()) { $this->_aTokens[] = array($oNode->type, $oNode->value, $oNode->line); foreach ($oNode->child as $child) { $this->_tokenHelper($child); } } }
protected function nodeToPlainText(\tidyNode $node_, &$output_) { if ($node_->isComment()) { return; } if (isset($this->m_tagConverters[$node_->name])) { $this->{$this->m_tagConverters[$node_->name]}($node_, $output_); } else { if ($node_->hasChildren()) { foreach ($node_->child as $node) { $this->nodeToPlainText($node, $output_); } } } }