function getHtmlBody($path) { $dom = new DOMDocument(); $dom->loadHTMLFile($path); $body = $dom->getElementsByTagName('body')->item(0); return dom_save_html($dom, $body); }
function standard_text_escape($text, $mathimg = '../../courses/mathimg/') { global $purifier; $text = preg_replace_callback('/\\[m\\].*?\\[\\/m\\]/s', 'math_unescape', $text); $html = $purifier->purify(mathfilter($text, 12, $mathimg)); if (!isset($_SESSION['glossary_terms_regexp'])) { return $html; } $dom = new DOMDocument(); // workaround because DOM doesn't handle utf8 encoding correctly. @$dom->loadHTML('<div>' . mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8') . '</div>'); $xpath = new DOMXpath($dom); $textNodes = $xpath->query('//text()'); foreach ($textNodes as $textNode) { if (!empty($textNode->data)) { $new_contents = glossary_expand($textNode->data); if ($new_contents != $textNode->data) { $newdoc = new DOMDocument(); $newdoc->loadXML('<span>' . $new_contents . '</span>', LIBXML_NONET | LIBXML_DTDLOAD | LIBXML_DTDATTR); $newnode = $dom->importNode($newdoc->getElementsByTagName('span')->item(0), true); $textNode->parentNode->replaceChild($newnode, $textNode); unset($newdoc); unset($newnode); } } } $base_node = $dom->getElementsByTagName('div')->item(0); // iframe hack return preg_replace(array('|^<div>(.*)</div>$|s', '#(<iframe [^>]+)/>#'), array('\\1', '\\1></iframe>'), dom_save_html($dom, $base_node)); }
$newdoc->loadXML('<span>' . $new_contents . '</span>', LIBXML_NONET|LIBXML_DTDLOAD|LIBXML_DTDATTR); $newnode = $dom->importNode($newdoc->getElementsByTagName('span')->item(0), true); $textNode->parentNode->replaceChild($newnode, $textNode); } } } } foreach (array('link', 'style', 'script') as $tagname) { foreach ($dom->getElementsByTagName($tagname) as $element) { $ebook_head .= str_replace(array('<![CDATA[', ']]>'), array('', ''), dom_save_html($dom, $element)); } } $body_node = $dom->getElementsByTagName('body')->item(0); foreach ($body_node->childNodes as $element) { $ebook_body .= str_replace(' ', '', dom_save_html($dom, $element)); } unset($dom); $t->set_var('ebook_head', $ebook_head); $t->set_var('ebook_body', $ebook_body); $t->set_block('page', 'chapter_select_options', 'option_var'); if (!$show_orphan_file) { foreach ($sections as $section_info) { $t->set_var('chapter_title', ($section_info['indent'] ? ' ' : '') . q(ellipsize($section_info['title'], 40))); $t->set_var('chapter_id', $section_info['id']); if ($section_info['current']) { $t->set_var('chapter_selected', ' selected="selected"'); } else { $t->set_var('chapter_selected', '');
/** * Make sure URLs appearing in href and src attributes in HTML include a host. * * @param string $html - The HTML snippet to canonicalize * @return string - The canonicalized HTML */ function add_host_to_urls($html) { global $urlServer, $urlAppend; static $html_memo, $out_memo; if (!isset($html_memo) or $html_memo != $html) { $html_memo = $html; $url_start = substr($urlServer, 0, strlen($urlServer) - strlen($urlAppend)); $dom = new DOMDocument(); @$dom->loadHTML('<div>' . mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8') . '</div>'); foreach (array('a' => 'href', 'img' => 'src') as $tag_name => $attribute) { $elements = $dom->getElementsByTagName($tag_name); if ($elements instanceof DOMNodeList) { foreach ($elements as $element) { $url = $element->getAttribute($attribute); if ($url) { $url_info = parse_url($url); if (!isset($url_info['scheme']) and !isset($url_info['host'])) { $element->setAttribute($attribute, $url_start . $url); } } } } } $base_node = $dom->getElementsByTagName('div')->item(0); $out_memo = dom_save_html($dom, $base_node); } return $out_memo; }