示例#1
0
function cleanUpPost($postText, $name = "", $noSmilies = false)
{
    global $filter_tags, $bbcode, $postNoSmilies;
    $postNoSmilies = $noSmilies;
    require_once 'HTML5/Parser.php';
    $document = HTML5_Parser::parseFragment($postText, null, null, $filter_tags, $bbcode, $name)->item(0)->ownerDocument;
    // The DOM tree is empty. Ignore it.
    if (!$document) {
        return "";
    }
    process($document);
    return $document->saveHTML();
}
示例#2
0
 public function testParseFragment()
 {
     $result = HTML5_Parser::parseFragment('<b>asdf</b> foo');
     $this->assertIsA($result, 'DOMNodeList');
 }
function cc_wordpress_article_filter($article)
{
    require_once 'lib/html5lib/Parser.php';
    // sorry, but parseFragment() returns a DomNodeList, which is as inflexible as it gets
    $dom = HTML5_Parser::parse($article);
    $tagnames = array('img', 'audio', 'video', 'object');
    foreach ($tagnames as $tagname) {
        foreach ($dom->getElementsByTagName($tagname) as $element) {
            $class = $element->getAttribute('class');
            // relevant class name example: wp-image-18
            preg_match('/wp-(image|audio|video|object)-([0-9]*)/', $class, $matches);
            $id = $matches[2];
            // relevant class name example: size-medium
            preg_match('/size-(.*)/', $class, $matches);
            $size = $matches[1];
            // TODO: make cc_wordpress_figure() take and return a DOM fragment
            $figure_html = cc_wordpress_figure($id, $size, false);
            // only replace node if we actually got something
            if ($figure_html) {
                $figure = HTML5_Parser::parseFragment($figure_html)->item(0)->getElementsByTagName('figure')->item(0);
                // a document context change is needed before appending the node
                $figure = $dom->importNode($figure, True);
                $element->parentNode->replaceChild($figure, $element);
            }
        }
    }
    // hackish but reliable way to serialize the DOM
    // TODO: fix this mess
    $XML = $dom->saveXML($dom->getElementsByTagName('body')->item(0));
    $XML = str_replace('<body>', '', $XML);
    $XML = str_replace('</body>', '', $XML);
    // work around a bug regarding <style> elements including CSS '>' selectors
    $XML = str_replace('&gt;', '>', $XML);
    // work around the IE bug that some elements are serialized with a null namespace
    $XML = str_replace('embedNode.value = helperNode.innerHTML;', 'embedNode.value = helperNode.innerHTML.replace(/<:/g,"<").replace(/<.:/g,"</");', $XML);
    return $XML;
}