function replace_body($args) { $html_body = $args['body']; $doc = new DOMDocument(); if ($args['type'] == 'html' && $doc->loadHTML($html_body)) { if (($sig_agglomeration_element = $doc->getElementByID('signature_agglomeration')) != null) { $signature_agglomeration_parent = $sig_agglomeration_element->parentNode; $a_style_element = $doc->createElement('style', $this->style_content); $a_type_attr = new DOMAttr('type', 'text/css'); $a_style_element->setAttributeNode($a_type_attr); $a_returned_style = $signature_agglomeration_parent->insertBefore($a_style_element, $sig_agglomeration_element); return array('body' => $doc->saveHTML()); } } return null; }
<?php if (isset($_GET['id']) && trim($_GET["id"]) == 'working') { $handbookFile = 'working.htm'; } else { $handbookFile = 'current-version.htm'; } $input = new DOMDocument(); $input->loadHTMLFile($handbookFile); $menu = $input->createElement("ul"); $menu->setAttribute("id", "anchor-menu"); $content = $input->getElementByID("content"); $lastMenuItem = null; $subMenu = null; foreach ($content->childNodes as $child) { if ($child->nodeName == 'h3') { $li = $input->createElement("li"); $anchor = $input->createElement("a", $child->nodeValue); $anchor->setAttribute('href', "#" . $child->getAttribute('id')); $li->appendChild($anchor); $menu->appendChild($li); $lastMenuItem = $li; $subMenu = null; } if ($child->nodeName == 'h4') { $li = $input->createElement("li"); $anchor = $input->createElement("a", $child->nodeValue); $anchor->setAttribute('href', "#" . $child->getAttribute('id')); $li->appendChild($anchor); if ($subMenu == null) { $subMenu = $input->createElement("ul");
} $tags = $doc->getElementsByTagName('title'); foreach ($tags as $tag) { $page_title = trim($tag->textContent); } if (!empty($page_title)) { $safe_name = safename($page_title); } $dirname = "/tmp/{$safe_name}"; mkdir($dirname); $page_title = preg_replace('/[^a-zA-Z0-9_\\-]+/', ' ', $base_url->path) . ' - ' . $base_url->host; } // Identification setup complete, start scraping. // Discard parts of the page that may be distracting. // Wordpress etc. if ($unwanted = $doc->getElementByID('sidebar')) { $unwanted->parentNode->removeChild($unwanted); print_log("Discarded sidebar for cleaner parsing."); } $tags = $doc->getElementsByTagName('img'); foreach ($tags as $tag) { $suggested_image_title = ""; // If the image is surrounded by an a tag // And that links directly to a jpeg, get that instead $tag_parent = $tag->parentNode; while ($tag_parent && $tag_parent->nodeName != 'a') { $tag_parent = $tag_parent->parentNode; } if ($tag_parent && $tag_parent->nodeName == 'a') { $tag_src = $base_url->resolve($tag_parent->getAttribute('href')); } else {
echo "Language: " . $node->getAttributeNS('', 'lang') . "\n"; $lang = $node->getAttributeNodeNS('', 'lang'); echo "Language: " . $lang->value . "\n"; echo "\n-- Elements --\n"; $rows = $node->getElementsByTagName('row'); echo "Row Count: " . $rows->length . "\n"; $element_ns = new DOMElement('newns:myelement', 'default content', 'urn::dummyns'); $node->appendChild($element_ns); $element_ns = new DOMElement('newns2:myelement', 'second default content', 'urn::dummyns'); $node->appendChild($element_ns); $myelements = $node->getElementsByTagNameNS('urn::dummyns', 'myelement'); $mylen = $myelements->length; echo "myelements Count: " . $mylen . "\n"; echo "\n-- IDs --\n"; $node->setAttribute('idatt', 'n1'); $node->setIdAttribute('idatt', TRUE); for ($x = 0; $x < $mylen; $x++) { $current = $myelements->item($x); $current->setAttributeNS('urn::dummyns', 'newns:idatt', 'n' . ($x + 2)) . "\n"; $current->setIdAttributeNS('urn::dummyns', 'idatt', TRUE); } echo 'Element Name: ' . (($elem = $dom->getElementByID('n1')) ? $elem->localName : 'Not Found') . "\n"; $idatt = $node->getAttributeNode('idatt'); $node->setIdAttributeNode($idatt, FALSE); echo 'Element Name: ' . (($elem = $dom->getElementByID('n1')) ? $elem->localName : 'Not Found') . "\n"; echo 'Element Name: ' . (($elem = $dom->getElementByID('n3')) ? $elem->nodeName : 'Not Found') . "\n"; for ($x = 0; $x < $mylen; $x++) { $node = $myelements->item($x); $node->setIdAttributeNS('urn::dummyns', 'idatt', FALSE); } echo 'Element Name: ' . (($elem = $dom->getElementByID('n3')) ? $elem->nodeName : 'Not Found') . "\n";