/** * get dom node's plain text * * @return string */ public function text() { if (isset($this->_[HDOM_INFO_INNER])) { return $this->_[HDOM_INFO_INNER]; } switch ($this->nodetype) { case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]); case HDOM_TYPE_COMMENT: return ''; case HDOM_TYPE_UNKNOWN: return ''; } if (strcasecmp($this->tag, 'script') === 0) { return ''; } if (strcasecmp($this->tag, 'style') === 0) { return ''; } $ret = ''; // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed for some span tags, and some p tags) $this->nodes is set to NULL. // NOTE: This indicates that there is a problem where it's set to NULL without a clear happening. // WHY is this happening? if (null !== $this->nodes) { foreach ($this->nodes as $n) { $ret .= $n->text(); } // If this node is a span... add a space at the end of it so multiple spans don't run into each other. This is plaintext after all. if ($this->tag == 'span') { $ret .= $this->dom->default_span_text; } } return $ret; }
private function makeup() { // text, comment, unknown if (isset($this->_[DomInfo::TEXT])) { return $this->dom->restore_noise($this->_[DomInfo::TEXT]); } $ret = '<' . $this->tag; $i = -1; foreach ($this->attr as $key => $val) { ++$i; // skip removed attribute if ($val === null || $val === false) { continue; } $ret .= $this->_[DomInfo::SPACE][$i][0]; //no value attr: nowrap, checked selected... if ($val === true) { $ret .= $key; } else { switch ($this->_[DomInfo::QUOTE][$i]) { case QuoteType::DOUBLE: $quote = '"'; break; case QuoteType::SINGLE: $quote = '\''; break; default: $quote = ''; } $ret .= $key . $this->_[DomInfo::SPACE][$i][1] . '=' . $this->_[DomInfo::SPACE][$i][2] . $quote . $val . $quote; } } $ret = $this->dom->restore_noise($ret); return $ret . $this->_[DomInfo::ENDSPACE] . '>'; }