/** * Remove script tags * * @access public * @param string $data Input data * @return string */ public function removeBlacklistedTags($data) { $dom = XmlParser::getDomDocument($data); if ($dom === false) { return ''; } $xpath = new DOMXpath($dom); $nodes = $xpath->query(implode(' | ', $this->tag_blacklist)); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } return $dom->saveXML(); }
/** * Strip useless tags * * @access public */ public function stripGarbage() { $dom = XmlParser::getDomDocument($this->content); if ($dom !== false) { $xpath = new DOMXPath($dom); foreach ($this->stripTags as $tag) { $nodes = $xpath->query('//' . $tag); if ($nodes !== false && $nodes->length > 0) { Logger::setMessage(get_called_class() . ': Strip tag: "' . $tag . '"'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } } } foreach ($this->stripAttributes as $attribute) { $nodes = $xpath->query('//*[contains(@class, "' . $attribute . '") or contains(@id, "' . $attribute . '")]'); if ($nodes !== false && $nodes->length > 0) { Logger::setMessage(get_called_class() . ': Strip attribute: "' . $attribute . '"'); foreach ($nodes as $node) { if ($this->shouldRemove($dom, $node)) { $node->parentNode->removeChild($node); } } } } $this->content = $dom->saveXML($dom->documentElement); } }
/** * Strip useless tags. * * @param string $content * * @return string */ public function stripGarbage($content) { $dom = XmlParser::getDomDocument($content); if ($dom !== false) { $xpath = new DOMXPath($dom); $this->stripTags($xpath); $this->stripAttributes($dom, $xpath); $content = $dom->saveXML($dom->documentElement); } return $content; }
public function testScanXmlWithDTD() { $xml = <<<XML <?xml version="1.0"?> <!DOCTYPE results [ <!ELEMENT results (result+)> <!ELEMENT result (#PCDATA)> ]> <results> <result>test</result> </results> XML; $result = XmlParser::getDomDocument($xml); $this->assertTrue($result instanceof DOMDocument); $this->assertTrue($result->validate()); }