/** * Wrap a CAS 2.0 XML response and output it as a string. * * @return string CAS 2.0+ server response as an XML string. */ public function prepare() { $root = $this->createElement('serviceResponse'); if (!empty($this->response)) { $root->appendChild($this->response); } // Removing all child nodes from response document: while ($this->document->firstChild) { $this->document->removeChild($this->document->firstChild); } $this->document->appendChild($root); return $this->document->saveXML(); }
/** * Tries to convert the given HTML into a plain text format - best suited for * e-mail display, etc. * * <p>In particular, it tries to maintain the following features: * <ul> * <li>Links are maintained, with the 'href' copied over * <li>Information in the <head> is lost * </ul> * * @param html the input HTML * @return the HTML converted, as best as possible, to text */ function convert_html_to_text($html, $width = 74) { $html = fix_newlines($html); $doc = new DOMDocument('1.0', 'utf-8'); if (strpos($html, '<?xml ') === false) { $html = '<?xml encoding="utf-8"?>' . $html; } # <?php (4vim) if (!@$doc->loadHTML($html)) { return $html; } // Thanks, http://us3.php.net/manual/en/domdocument.loadhtml.php#95251 // dirty fix -- remove the inserted processing instruction foreach ($doc->childNodes as $item) { if ($item->nodeType == XML_PI_NODE) { $doc->removeChild($item); // remove hack break; } } $elements = identify_node($doc); // Add the default stylesheet $elements->getRoot()->addStylesheet(HtmlStylesheet::fromArray(array('html' => array('white-space' => 'pre'), 'p' => array('margin-bottom' => '1em'), 'pre' => array('white-space' => 'pre')))); $options = array(); if (is_object($elements)) { $output = $elements->render($width, $options); } else { $output = $elements; } return trim($output); }
/** * {@inheritdoc} */ public function parseHtml($html, $encoding = 'UTF-8') { $document = new \DOMDocument(); foreach ($this->config as $name => $value) { $document->{$name} = $value; } $document->encoding = $encoding; if ($encoding !== false) { // Tell the parser which charset to use $encoding = $encoding ?: $document->encoding; $encoding = '<?xml encoding="' . $encoding . '" ?>'; $html = $encoding . $html; // @codingStandardsIgnoreStart @$document->loadHTML($html); // @codingStandardsIgnoreEnd foreach ($document->childNodes as $item) { if ($item->nodeType == XML_PI_NODE) { $document->removeChild($item); } } } else { // @codingStandardsIgnoreStart @$document->loadHTML($html); // @codingStandardsIgnoreEnd } return $document; }
static function fixChildrenClass($content, $elementType, $class, $fixRawHtml = true) { $classPath = '\\AbstractElement\\' . $elementType; // is this an object that extends AbstractElement? if (is_a($content, '\\Element')) { // is this of the right element type? if (is_a($content, $classPath)) { $content->addClass($class); } if (isset($content->contents)) { foreach ($content->contents as $index => $value) { $content->contents[$index] = AbstractElement\Helper::fixChildrenClass($value, $elementType, $class); } } return $content; } if (is_string($content) && $fixRawHtml) { $dom = new \DOMDocument(); $dom->loadHtml($content); $reflectionClass = new \ReflectionClass($classPath); $elements = $dom->getElementsByTagName($reflectionClass->getConstant('tag')); foreach ($elements as $element) { $element->setAttribute('class', $element->getAttribute('class') . ' ' . $class); } $dom->removeChild($dom->doctype); $dom->replaceChild($dom->firstChild->firstChild, $dom->firstChild); return $dom->saveHTML(); } }
function parse($content) { $doc = new DOMDocument(); @$doc->loadHTML("<div>" . $content . "</div>"); $links = $doc->getElementsByTagName('a'); $blogurl = get_bloginfo("url"); $components = parse_url($blogurl); $host = $components["host"]; /* Set target attribute of all external links to "_blank" */ foreach ($links as $link) { $href = $link->getAttribute("href"); $components = parse_url($href); if (!isset($components["host"])) { continue; } if ($components["host"] != $host) { $link->setAttribute("target", "_blank"); } } /* * Extract the HTML fragment. * Credits: http://stackoverflow.com/questions/29493678/loadhtml-libxml-html-noimplied-on-an-html-fragment-generates-incorrect-tags */ $temporary_wrapper = $doc->getElementsByTagName('div')->item(0); $temporary_wrapper = $temporary_wrapper->parentNode->removeChild($temporary_wrapper); while ($doc->firstChild) { $doc->removeChild($doc->firstChild); } while ($temporary_wrapper->firstChild) { $doc->appendChild($temporary_wrapper->firstChild); } /* Return HTML */ return trim($doc->saveHTML()); }
function preg_replace_html($content, $tags) { $dom = new DOMDocument(); if (!@$dom->loadHTML('<?xml encoding="UTF-8">' . $content)) { return $content; } foreach ($dom->childNodes as $item) { if ($item->nodeType === XML_PI_NODE) { $dom->removeChild($item); break; } } $dom->encoding = 'UTF-8'; $images = $dom->getElementsByTagName('img'); $blankImage = 'data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7'; for ($i = $images->length - 1; $i >= 0; $i--) { $node = $images->item($i); $clone = $node->cloneNode(); $noscript = $dom->createElement('noscript'); $noscript->appendChild($clone); $node->parentNode->insertBefore($noscript, $node); $node->setAttribute('data-src', $node->getAttribute('src')); $node->setAttribute('src', $blankImage); $node->setAttribute('class', trim($node->getAttribute('class') . ' lazy')); } $newHtml = $dom->saveHTML(); if (!$newHtml) { return $content; } return $newHtml; }
/** * Convert HTML into a textual representation * * @return string Text representation */ public function convert() { if (!empty($this->text)) { return $this->text; } $html = $this->preprocess($this->html); $this->document = new DOMDocument(); set_error_handler(array(__CLASS__, 'silence_errors')); $this->document->loadHTML($html); restore_error_handler(); // Remove the DOCTYPE // Seems to cause segfaulting if we don't do this if ($this->document->firstChild instanceof DOMDocumentType) { $this->document->removeChild($this->document->firstChild); } $this->text = $this->parse_children($this->document->getElementsByTagName('body')->item(0)); $this->text = preg_replace("#\n{3,}#", "\n\n", $this->text); $this->text = $this->wrap($this->text, 80); $this->text .= $this->generate_links(); return $this->text; }
public function __construct($document, $tagname = '*') { if (!is_scalar($document)) { throw new Exception('Not a valid {JQMDoc} object'); } $this->_namespace = microtime(true) . uniqid(); jqm_var($this->_namespace, $this); //$document = preg_replace('/\s+/',' ',$document); //Detect if $document is a valid full document $hasHTML = stripos($document, '<html') !== false; $this->__documentMap = array(); $DOM = new DOMDocument(); $DOM->recover = true; $DOM->preserveWhiteSpace = true; $DOM->substituteEntities = true; $DOM->formatOutput = true; $DOM->encoding = 'utf-8'; $DOM->loadHTML(mb_convert_encoding($document, 'HTML-ENTITIES', 'UTF-8')); $DOM->normalizeDocument(); $html = $DOM->getElementsByTagName($tagname); //Determine root / pieced map $hasRoot = false; if ($html->item(0)->childNodes->length > 0) { $hasRoot = false; $html_tmp = $html; if ($html->item(0)->tagName == 'html') { $html_tmp = $html->item(0)->childNodes->item(0); } if (!$hasHTML and $html_tmp->childNodes->length == 1) { $hasRoot = true; if ($html_tmp->childNodes->item(0)->firstChild) { $root = $html_tmp->childNodes->item(0)->firstChild->getNodePath(); } else { $root = $html_tmp->childNodes->item(0)->getNodePath(); } } } $this->__schema['root'] = $hasRoot; $this->__schema['rootPath'] = $root; $this->__mapLength = false; $this->_length = false; $this->length = false; if ($DOM->doctype) { //$this->__schema['doctype'] = $DOM->saveHTML($DOM->doctype); $DOM->removeChild($DOM->doctype); } //$output = $DOM->saveHTML(); //$this->__documentRaw = $output; $this->_DOM = $DOM; $this->_selector = $tagname; }
function hook_article_filter($article) { if (defined('NO_CURL') || !function_exists("curl_init")) { return $article; } $charset_hack = '<head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> </head>'; $doc = new DOMDocument(); $doc->loadHTML($charset_hack . $article["content"]); $found = false; if ($doc) { $xpath = new DOMXpath($doc); $images = $xpath->query('(//img[@src])'); foreach ($images as $img) { $src = $img->getAttribute("src"); $ch = curl_init($src); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); curl_setopt($ch, CURLOPT_RANGE, "0-32768"); @($result = curl_exec($ch)); $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); if ($result && ($http_code == 200 || $http_code == 206)) { $filename = tempnam(sys_get_temp_dir(), "ttsizecheck"); if ($filename) { $fh = fopen($filename, "w"); if ($fh) { fwrite($fh, $result); fclose($fh); @($info = getimagesize($filename)); if ($info && $info[0] > 0 && $info[1] > 0) { $img->setAttribute("width", $info[0]); $img->setAttribute("height", $info[1]); $found = true; } unlink($filename); } } } } if ($found) { $doc->removeChild($doc->firstChild); //remove doctype $article["content"] = $doc->saveHTML(); } } return $article; }
/** * @param string $item * @param int $key * @return string */ public function extProc_beforeAllWrap($item, $key) { if (!empty($item)) { $pageId = $this->I['uid']; $dom = new \DOMDocument(); $dom->loadHTML(mb_convert_encoding($item, 'HTML-ENTITIES', 'UTF-8')); $link = $dom->getElementsByTagName('a'); $item = $link->item(0); $dataAttribute = 'bwrk_onepage_' . $pageId; $classAttribute = $dom->createAttribute('data-bwrkonepage-id'); $classAttribute->value = $dataAttribute; $item->appendChild($classAttribute); $dom->removeChild($dom->doctype); $dom->replaceChild($dom->firstChild->firstChild->firstChild, $dom->firstChild); $newItem = $dom->saveHTML(); return $newItem; } }
/** * Append product recommendations to Autocomplete block html * * @param string $html * @return string */ protected function _appendTopRecommendations($html) { $recommendationsModel = Mage::getModel('autocompleterecommendations/recommendation'); $query = Mage::helper('catalogsearch')->getQuery(); $productRecommendationsHtml = $recommendationsModel->getProductRecommendationsHtml($query); if ($productRecommendationsHtml) { $dom = new DOMDocument('1.0', 'utf8'); $dom->loadHTML($html); $uls = $dom->getElementsByTagName('ul'); $ul = $uls->item(0); $productRecommendationsDom = $recommendationsModel->getRecommendationsDom($productRecommendationsHtml); $productRecommendationsDom = $dom->importNode($productRecommendationsDom, true); $ul->appendChild($productRecommendationsDom); $dom->removeChild($dom->doctype); $dom->replaceChild($dom->firstChild->firstChild->firstChild, $dom->firstChild); $html = $dom->saveHTML(); } return $html; }
function hook_article_filter($article) { if (!function_exists("curl_init") || ini_get("open_basedir")) { return $article; } $charset_hack = '<head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> </head>'; $doc = new DOMDocument(); $doc->loadHTML($charset_hack . $article["content"]); $found = false; if ($doc) { $xpath = new DOMXpath($doc); $images = $xpath->query('(//img[contains(@src, \'media.tumblr.com\')])'); foreach ($images as $img) { $src = $img->getAttribute("src"); $test_src = preg_replace("/_\\d{3}.(jpg|gif|png)/", "_1280.\$1", $src); if ($src != $test_src) { $ch = curl_init($test_src); curl_setopt($ch, CURLOPT_TIMEOUT, 5); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_NOBODY, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT); @($result = curl_exec($ch)); $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); if ($result && $http_code == 200) { $img->setAttribute("src", $test_src); $found = true; } } } if ($found) { $doc->removeChild($doc->firstChild); //remove doctype $article["content"] = $doc->saveHTML(); } } return $article; }
function fof_item_targets($content) { /* quiet warnings */ $old_xml_err = libxml_use_internal_errors(true); $dom = new DOMDocument(); /* Load content into DOM, within a div wrapper. Wrapper div will be stripped before returning altered content. Without doing this, any bare text content would get wrapped in p elements while being parsed in. */ $dom->loadHtml('<div>' . mb_convert_encoding($content, 'HTML-ENTITIES', "UTF-8") . '</div>'); /* strip <!DOCTYPE> which DOMDocument adds */ $dom->removeChild($dom->firstChild); /* strip <html><body> which DOMDocument adds */ $dom->replaceChild($dom->firstChild->firstChild->firstChild, $dom->firstChild); /* replace or add link targets */ $xpath = new DOMXpath($dom); foreach ($xpath->query('//a') as $node) { $node->setAttribute('target', '_blank'); } $content_out = ''; /* emit the updated contents inside our div */ /* start at the first node inside first div.. */ $node = $dom->firstChild->firstChild; while ($node) { $content_out .= $dom->saveHTML($node); /* repeat for all nodes at this level */ $node = $node->nextSibling; } foreach (libxml_get_errors() as $error) { /* just ignore warnings */ if ($error->level === LIBXML_ERR_WARNING) { continue; } fof_log(__FUNCTION__ . ': ' . $error->message); } libxml_clear_errors(); libxml_use_internal_errors($old_xml_err); return $content_out; }
public function create($replaceHtml = false) { $this->_html = file_get_contents($this->getViewFile()); // this is relative root directory $dom = new \DOMDocument(); // libxml_use_internal_errors(true); $dom->loadHTML($this->_html); // libxml_clear_errors(); if ($replaceHtml) { // use this if we ever need to get rid of doc types or html tags that we for some reason get wrapped in when calling loadHTML // remove doc type $dom->removeChild($dom->firstChild); // remove <html><body></body></html> $dom->replaceChild($dom->firstChild->firstChild->firstChild, $dom->firstChild); } foreach ($this->getViewElements() as $viewElement) { $viewElement->replaceHtml($dom, $this->getViewElementValue()); } $html = $dom->saveHTML(); return $this->_html = trim($html); }
public function loadHtmlNoCharset($htmlString = '') { $dom = new DOMDocument('1.0', 'UTF-8'); $dom->preserveWhiteSpace = false; if (strlen($htmlString)) { libxml_use_internal_errors(true); $dom->loadHTML('<?xml encoding="UTF-8">' . $htmlString); // dirty fix foreach ($dom->childNodes as $item) { if ($item->nodeType == XML_PI_NODE) { $dom->removeChild($item); // remove hack break; } } $dom->encoding = 'UTF-8'; // insert proper libxml_clear_errors(); } $this->loadDom($dom); }
function process(&$article) { $owner_uid = $article["owner_uid"]; if (strpos($article["link"], "mrlovenstein.com") !== FALSE) { if (strpos($article["plugin_data"], "af_comics,{$owner_uid}:") === FALSE) { $doc = new DOMDocument(); @$doc->loadHTML($article["content"]); if ($doc) { $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@alt])'); $basenode = false; foreach ($entries as $entry) { // get image $basenode = $entry->parentNode; // add linebreak $linebreak = $doc->createElement("br"); $basenode->appendChild($linebreak); // add text $alt = $entry->getAttribute("alt"); $textnode = $doc->createTextNode($alt); $basenode->appendChild($textnode); break; } if ($basenode) { $doc->removeChild($doc->firstChild); $article["content"] = $doc->saveHTML(); $article["plugin_data"] = "af_comics,{$owner_uid}:" . $article["plugin_data"]; } } } else { if (isset($article["stored"]["content"])) { $article["content"] = $article["stored"]["content"]; } } return true; } return false; }
public function testCreateCdbXMLGuideExample6Dot2() { $event = new CultureFeed_Cdb_Item_Event(); $event->setAvailableFrom('2010-02-25T00:00:00'); $event->setAvailableTo('2010-08-09T00:00:00'); $event->setCdbId('ea37cae2-c91e-4810-89ab-e060432d2b78'); $event->setCreatedBy('mverdoodt'); $event->setCreationDate('2010-07-05T18:28:18'); $event->setExternalId('SKB Import:SKB00001_216413'); $event->setIsParent(FALSE); $event->setLastUpdated('2010-07-28T13:58:55'); $event->setLastUpdatedBy('mverdoodt'); $event->setOwner('SKB Import'); $event->setPctComplete(80); $event->setPublished(TRUE); $event->setValidator('SKB'); $event->setWfStatus('approved'); $event->setAgeFrom(18); $event->setPrivate(FALSE); $calendar = new CultureFeed_Cdb_Data_Calendar_TimestampList(); $calendar->add(new CultureFeed_Cdb_Data_Calendar_Timestamp('2010-08-01', '21:00:00.0000000')); $event->setCalendar($calendar); $categories = new CultureFeed_Cdb_Data_CategoryList(); $categories->add(new CultureFeed_Cdb_Data_Category(CultureFeed_Cdb_Data_Category::CATEGORY_TYPE_EVENT_TYPE, '0.50.4.0.0', 'Concert')); $categories->add(new CultureFeed_Cdb_Data_Category(CultureFeed_Cdb_Data_Category::CATEGORY_TYPE_THEME, '1.8.2.0.0', 'Jazz en blues')); $categories->add(new CultureFeed_Cdb_Data_Category(CultureFeed_Cdb_Data_Category::CATEGORY_TYPE_PUBLICSCOPE, '6.2.0.0.0', 'Regionaal')); $event->setCategories($categories); $contactInfo = new CultureFeed_Cdb_Data_ContactInfo(); $contactInfo->addMail(new CultureFeed_Cdb_Data_Mail('*****@*****.**', NULL, NULL)); $contactInfo->addPhone(new CultureFeed_Cdb_Data_Phone('0487-62.22.31')); $url = new CultureFeed_Cdb_Data_Url('http://www.bonnefooi.be'); $url->setMain(); $contactInfo->addUrl($url); $event->setContactInfo($contactInfo); $details = new CultureFeed_Cdb_Data_EventDetailList(); $detailNl = new CultureFeed_Cdb_Data_EventDetail(); $detailNl->setLanguage('nl'); $detailNl->setTitle('The Bonnefooi Acoustic Jam'); $detailNl->setCalendarSummary('zo 01/08/10 om 21:00'); $performers = new CultureFeed_Cdb_Data_PerformerList(); $performers->add(new CultureFeed_Cdb_Data_Performer('Muzikant', 'Matt, the Englishman in Brussels')); $detailNl->setPerformers($performers); $detailNl->setLongDescription('Weggelaten voor leesbaarheid...'); $file = new CultureFeed_Cdb_Data_File(); $file->setMain(); $file->setCopyright('Bonnefooi'); $file->setHLink('http://www.bonnefooi.be/images/sized/site/images/uploads/Jeroen_Jamming-453x604.jpg'); $file->setMediaType(CultureFeed_Cdb_Data_File::MEDIA_TYPE_IMAGEWEB); $file->setTitle('Jeroen Jamming'); $detailNl->getMedia()->add($file); $price = new CultureFeed_Cdb_Data_Price(0); $price->setTitle('The Bonnefooi Acoustic Jam'); $detailNl->setPrice($price); $detailNl->setShortDescription('Korte omschrijving.'); $details->add($detailNl); $detailEn = new CultureFeed_Cdb_Data_EventDetail(); $detailEn->setLanguage('en'); $detailEn->setShortDescription('Short description.'); $details->add($detailEn); $event->setDetails($details); // @todo Add headings. //$headings = array(); $event->addKeyword('Free Jazz, Acoustisch'); $address = new CultureFeed_Cdb_Data_Address(); $physicalAddress = new CultureFeed_Cdb_Data_Address_PhysicalAddress(); $physicalAddress->setCity('Brussel'); $physicalAddress->setCountry('BE'); $physicalAddress->setHouseNumber(8); $physicalAddress->setStreet('Steenstraat'); $physicalAddress->setZip(1000); $address->setPhysicalAddress($physicalAddress); $location = new CultureFeed_Cdb_Data_Location($address); $location->setLabel('Café Bonnefooi'); $location->setCdbid('920e9755-94a0-42c1-8c8c-9d17f693d0be'); $event->setLocation($location); $organiser = new CultureFeed_Cdb_Data_Organiser(); $organiser->setLabel('Café Bonnefooi'); $event->setOrganiser($organiser); $languages = new CultureFeed_Cdb_Data_LanguageList(); $languages->add(new CultureFeed_Cdb_Data_Language('Nederlands', CultureFeed_Cdb_Data_Language::TYPE_SPOKEN)); $languages->add(new CultureFeed_Cdb_Data_Language('Frans', CultureFeed_Cdb_Data_Language::TYPE_SPOKEN)); $event->setLanguages($languages); $dom = new DOMDocument('1.0', 'UTF-8'); $dom->preserveWhiteSpace = FALSE; $dom->formatOutput = TRUE; $dummy_element = $dom->createElementNS(CultureFeed_Cdb_Xml::namespaceUri(), 'cdbxml'); $dom->appendChild($dummy_element); $event->appendToDOM($dummy_element); $xpath = new DOMXPath($dom); $items = $xpath->query('//event'); $this->assertEquals(1, $items->length); $event_element = $items->item(0); $dom->removeChild($dummy_element); $dom->appendChild($event_element); /*$namespaceAttribute = $dom->createAttribute('xmlns'); $namespaceAttribute->value = CultureFeed_Cdb_Xml::namespaceUri(); $event_element->appendChild($namespaceAttribute);*/ // @todo Put xmlns attribute first. $xml = $dom->saveXML(); $sample_dom = new DOMDocument('1.0', 'UTF-8'); $contents = file_get_contents($this->samplePath('cdbxml-guide-example-6-2.xml')); $contents = str_replace('xmlns="http://www.cultuurdatabank.com/XMLSchema/CdbXSD/3.2/FINAL" ', '', $contents); $sample_dom->preserveWhiteSpace = FALSE; $sample_dom->formatOutput = TRUE; $sample_dom->loadXML($contents); $sample_dom->preserveWhiteSpace = FALSE; $sample_dom->formatOutput = TRUE; $expected_xml = $sample_dom->saveXML(); //$this->assertEquals($sample_dom->documentElement->C14N(), $dom->documentElement->C14N()); $this->assertEquals($expected_xml, $xml); }
/** * log click to temp file and return whether it was a double click or not * * @param $ip ip of client * @param $documentId id of documents table * @param $fileId id of document_files table * @return bool is it a double click */ public function logClick($documentId, $fileId, $time) { $ip = ''; if (array_key_exists('REMOTE_ADDR', $_SERVER)) { $ip = $_SERVER['REMOTE_ADDR']; } $registry = Zend_Registry::getInstance(); $tempDir = $registry->get('temp_dir'); //initialize log data $md5Ip = "h" . md5($ip); //TODO determine file type of file id $filetype = 'pdf'; $dom = new DOMDocument(); if (file_exists($tempDir . '~localstat.xml') === FALSE) { $xmlAccess = $dom->createElement('access'); $dom->appendChild($xmlAccess); } else { $dom->load($tempDir . '~localstat.xml'); } $xmlAccess = $dom->getElementsByTagName("access")->item(0); if (is_null($xmlAccess)) { $message = 'Error loading click-log "' . $tempDir . '~localstat.xml"'; throw new Opus_Model_Exception($message); } //if global file access timestamp too old, the whole log file can be removed $xmlTime = $dom->getElementsByTagName("time")->item(0); if ($xmlTime != null && $time - $xmlTime->nodeValue > max($this->doubleClickIntervalHtml, $this->doubleClickIntervalPdf)) { $xmlAccess = $dom->getElementsByTagName("access")->item(0); $dom->removeChild($xmlAccess); $xmlAccess = $dom->createElement('access'); $dom->appendChild($xmlAccess); } $xmlTime = $xmlAccess->getElementsByTagName('time')->item(0); if ($xmlTime != null) { $xmlAccess->removeChild($xmlTime); } $xmlTime = $dom->createElement('time', $time); $xmlAccess->appendChild($xmlTime); //get document id, create if not exists $xmlDocumentId = $dom->getElementsByTagName('document' . $documentId)->item(0); if ($xmlDocumentId == null) { $xmlDocumentId = $dom->createElement('document' . $documentId); $xmlAccess->appendChild($xmlDocumentId); } //get ip node $xmlIp = $xmlDocumentId->getElementsByTagName($md5Ip)->item(0); if ($xmlIp == null) { $xmlIp = $dom->createElement($md5Ip); $xmlDocumentId->appendChild($xmlIp); } //get file id, create if not exists $xmlFileId = $xmlIp->getElementsByTagName('file' . $fileId)->item(0); if ($xmlFileId == null) { $xmlFileId = $dom->createElement('file' . $fileId); $xmlIp->appendChild($xmlFileId); } //read last Access for this file id $fileIdTime = $xmlFileId->getAttribute('lastAccess'); $doubleClick = false; if ($fileIdTime == null || $time - $fileIdTime > max($this->doubleClickIntervalHtml, $this->doubleClickIntervalPdf)) { /*no lastAccess set (new entry for this id) or lastAccess too far away -> create entry with actual time -> return no double click*/ } else { if ($time - $fileIdTime <= $this->doubleClickIntervalHtml && ($filetype == 'html' || $fileId == -1)) { //html file double click $doubleClick = true; } else { if ($time - $fileIdTime <= $this->doubleClickIntervalPdf && $filetype == 'pdf' && $fileId != -1) { //pdf file double click $doubleClick = true; } } } $xmlFileId->setAttribute('lastAccess', $time); $return = $dom->save($tempDir . '~localstat.xml'); if ($return === false) { $message = 'Error saving click-log "' . $tempDir . '~localstat.xml"'; throw new Opus_Model_Exception($message); } return $doubleClick; }
$set->appendChild($node); $filename = "{$ac['srcdir']}/.manual.{$ac['PARTIAL']}.xml"; $dom->save($filename); echo "done.\n"; echo "Partial manual saved to {$filename}. To build it, run 'phd -d {$filename}'\n"; exit(0); } // }}} $mxml = $ac["OUTPUT_FILENAME"]; if ($dom->validate()) { echo "done.\n"; printf("\nAll good. Saving %s... ", basename($ac["OUTPUT_FILENAME"])); flush(); if ($ac["SEGFAULT_SPEED"] == "yes") { $t = $dom->doctype; $dom->removeChild($t); } $dom->save($mxml); echo "done.\n"; echo "All you have to do now is run 'phd -d {$mxml}'\n"; echo "If the script hangs here, you can abort with ^C.\n"; echo <<<CAT _ _..._ __ \\)` (` / / `\\ | d b | =\\ Y =/--..-="````"-. '.=__.-' `\\ o/ /\\ \\ | | \\ \\ / ) \\ .--""`\\ < \\ '-' /
/** * Add language name to slug * * @param $template * @param $string * @param $lang * * @return string */ private function add_language_name_to_images($template, $string, $lang) { $doc = new DOMDocument(); $loaded = @$doc->loadHTML('<div>' . $string . '</div>'); //dirty hack with <div> to avoid additional <p> for text without tags if (!$loaded) { return $string; } $images = $doc->getElementsByTagName('img'); foreach ($images as $image) { if ($image->hasAttribute('alt')) { $image->setAttribute('alt', wpml_ctt_prepare_string($template, $image->getAttribute('alt'), $lang)); } if ($image->hasAttribute('title')) { $image->setAttribute('title', wpml_ctt_prepare_string($template, $image->getAttribute('title'), $lang)); } } // removes doctype $doc->removeChild($doc->firstChild); // removes html, body and div tags $result = str_replace(array('<html>', '</html>', '<body><div>', '</div></body>'), array('', '', '', ''), $doc->saveHTML()); return $result; }
/** * Removes dom nodes, eg: <script> elements * * @param $html * @param $xpathString * @return string */ private function removeDomNodes($html, $xpathString) { $dom = new DOMDocument(); // Libxml constants not available on all servers (Libxml < 2.7.8) // $html->loadHTML($content, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); $dom->loadHtml('<div class="form-group">' . $html . '</div>'); # remove <!DOCTYPE $dom->removeChild($dom->doctype); # remove <html><body></body></html> $dom->replaceChild($dom->firstChild->firstChild->firstChild, $dom->firstChild); // remove the required node $xpath = new DOMXPath($dom); while ($node = $xpath->query($xpathString)->item(0)) { $node->parentNode->removeChild($node); } return $dom->saveHTML(); }
/** * Return a document object based on a string containing the contents of * a web page * * @param string $page a web page * * @return object document object */ static function dom($page) { /* first do a crude check to see if we have at least an <html> tag otherwise try to make a simplified html document from what we got */ if (!stristr($page, "<html")) { $head_tags = "<title><meta><base>"; $head = strip_tags($page, $head_tags); $body_tags = "<frameset><frame><noscript><img><span><b><i><em>" . "<strong><h1><h2><h3><h4><h5><h6><p><div>" . "<a><table><tr><td><th><dt><dir><dl><dd>"; $body = strip_tags($page, $body_tags); $page = "<html><head>{$head}</head><body>{$body}</body></html>"; } $dom = new DOMDocument(); //this hack modified from php.net @$dom->loadHTML('<?xml encoding="UTF-8">' . $page); foreach ($dom->childNodes as $item) { if ($item->nodeType == XML_PI_NODE) { $dom->removeChild($item); } } // remove hack $dom->encoding = "UTF-8"; // insert proper return $dom; }
public function stripExtra($xpath = null) { if (!empty($xpath)) { $tempDOM = new DOMDocument(); @$tempDOM->loadHTML('<?xml encoding="UTF-8">' . $this->content); // dirty fix foreach ($tempDOM->childNodes as $item) { if ($item->nodeType == XML_PI_NODE) { $tempDOM->removeChild($item); } // remove hack } $tempDOM->encoding = 'UTF-8'; // insert proper $tempXPath = new DOMXPath($tempDOM); $temp_content = $tempXPath->query($xpath)->item(0); $temp_content = $tempDOM->saveHTML($temp_content); $newTempDOM = new DOMDocument(); @$newTempDOM->loadHTML('<?xml encoding="UTF-8">' . $temp_content); // dirty fix foreach ($newTempDOM->childNodes as $item) { if ($item->nodeType == XML_PI_NODE) { $newTempDOM->removeChild($item); } // remove hack } $newTempDOM->encoding = 'UTF-8'; // insert proper $this->content = $newTempDOM->saveHTML(); return true; } return false; }
private function fixEncoding($html) { $doc = new \DOMDocument(); @$doc->loadHTML('<?xml encoding="UTF-8">' . $html); foreach ($doc->childNodes as $item) { if ($item->nodeType == XML_PI_NODE) { $doc->removeChild($item); } } $doc->encoding = 'UTF-8'; return $doc->saveHTML(); }
private function parse($content) { $extension = $this->getExtension(); $path = $this->getPath(); $parseVariable = $this->isParseVariable(); $options = $this->getOptions(); /* * Everything inside a <% IGNORE %>...<% IGNOER_END %> block * will be ignored from being parsed */ $ignoreBlock = array(); $content = preg_replace_callback('@(<% *IGNORE *%>(.*?)<% *IGNORE_END *%>)@s', function ($matches) use(&$ignoreBlock) { $ignoreBlock[] = $matches[2]; return '<!--SimPHPfyIgnoreBlock#' . count($ignoreBlock) . '#SimPHPfyIgnoreBlock-->'; }, $content); /* * Code block is in the form <% code %>, code can be any valid * PHP statements. * A special form of code block is <%= ${variable_name} %> * which is equivalent to echo a variable * * Every code snippet is stored inside the $codeBlock Array to * prevent variable parsing from corrupting the whole document */ $codeBlock = array(); $content = preg_replace_callback('@(<%= *(.*?[^\\\\]) *%>)@s', function ($matches) use(&$codeBlock) { $codeSnippet = $matches[2]; if (preg_match('@^\\${([a-zA-Z_\\x7f-\\xff][\\[\\]\'"a-zA-Z0-9_\\x7f-\\xff*]*(\\[[\'"][a-zA-Z0-9_\\x7f-\\xff*]*[\'"]\\])*)}$@', $codeSnippet)) { $codeSnippet = "<?php if (isset({$codeSnippet})) { echo {$codeSnippet}; } ?>"; } elseif (preg_match('@^\\$([a-zA-Z_\\x7f-\\xff][a-zA-Z0-9_\\x7f-\\xff*]*(\\[[\'"][a-zA-Z0-9_\\x7f-\\xff*]*[\'"]\\])*)$@', $codeSnippet)) { $codeSnippet = "<?php if (isset({$codeSnippet})) { echo {$codeSnippet}; } ?>"; } $codeBlock[] = $codeSnippet; return '<!--SimPHPfyCodeBlock#' . count($codeBlock) . '#SimPHPfyCodeBlock-->'; }, $content); $content = preg_replace_callback('@(<% *(.*?[^\\\\]) *%>)@s', function ($matches) use(&$codeBlock, $path) { $codeSnippet = $matches[2]; if ($codeSnippet == 'IGNORE_MINIFY' || $codeSnippet == 'IGNORE_MINIFY_END') { return $matches[0]; } elseif (preg_match('@^render.*$@', $codeSnippet)) { /* * Layout rendering */ $codeSnippet = '<' . $codeSnippet . ' />'; $dom = new DOMDocument(); @$dom->loadXML($codeSnippet); /* * extract the attributes of render tag */ $render = $dom->getElementsByTagName('render')->item(0); // <% render file='header.html' directory='' static='' dynamic='' %> $file = $directory = $dynamic = $format = $parsedDirectory = ''; if (($file = $render->getAttribute('file')) == '') { throw new InvalidTemplateException(array($path, 'Missing `file` for layout rendering')); } $directory = ASSERT; $parsedDirectory = TEMP_VIEW . 'Assert' . DS; if ($dirAttr = $render->getAttribute('directory') != '') { $directory .= $dirAttr . DS; $parsedDirectory = TEMP_VIEW . $dirAttr . DS; } /* * If `controller` attribute is spceified, the layout is * inside the View/:Controller */ if (($controllerAttr = $render->getAttribute('controller')) != '') { $directory = VIEW . $controllerAttr . DS; $parsedDirectory = TEMP_VIEW . $controllerAttr . DS; } if (($staticAttr = $render->getAttribute('static')) != '') { $dynamic = !$staticAttr; } /* * `dynamic` attribute always overwrite `static` attribute, * thought it is not recommended to set both attributes */ if (($dynamicAttr = $render->getAttribute('dynamic')) != '') { $dynamic = $dynamicAttr; } /* * The behaviour of render. When a render code is parsed, * should it return a URL to the rendered file or directly * render that file, possiblities: 'url' | 'direct' * default: 'direct' */ if (($behaviourAttr = $render->getAttribute('behaviour')) == '') { $behaviour = 'direct'; } else { $behaviour = $behaviourAttr; } $format = $render->getAttribute('format'); $optionArray = 'array('; $i = 0; if ($format != '') { $optionArray .= "'format' => {$format}"; $i++; } if ($parsedDirectory != '') { $optionArray .= ($i++ == 0 ? '' : ', ') . "'parsedDirectory' => '{$parsedDirectory}'"; } if ($dynamic !== '') { $optionArray .= ($i++ == 0 ? '' : ', ') . '\'dynamic\' => ' . ($dynamic ? 'TRUE' : 'FALSE'); } else { $dynamic = TRUE; } $optionArray .= ')'; $extension = substr(strrchr($file, "."), 1); if ($behaviour == 'direct' || $dynamic) { if ($extension == 'html') { $codeSnippet = "<?php include Template::render('{$file}', '{$directory}', {$optionArray}); ?>"; } elseif ($extension == 'js') { $codeSnippet = "<script><?php include Template::render('{$file}', '{$directory}', {$optionArray}); ?></script>"; } elseif ($extension == 'css') { $codeSnippet = "<style><?php include Template::render('{$file}', '{$directory}', {$optionArray}); ?></style>"; } } else { if ($extension == 'html') { $codeSnippet = "<?php include Template::render('{$file}', '{$directory}', {$optionArray}); ?>"; } else { $optionArray = substr($optionArray, 0, -1) . ($i++ == 0 ? '' : ', ') . '\'relative\' => TRUE)'; if ($extension == 'js') { $codeSnippet = "<script src=\"<?php echo Template::render('{$file}', '{$directory}', {$optionArray}); ?>\"></script>"; } elseif ($extension == 'css') { $codeSnippet = "<link rel=\"stylesheet\" href=\"<?php include Template::render('{$file}', '{$directory}', {$optionArray}); ?>\" />"; } } } } else { $codeSnippet = '<?php ' . str_replace('\\%>', '%>', $codeSnippet) . ' ?>'; } $codeBlock[] = $codeSnippet; return '<!--SimPHPfyCodeBlock#' . count($codeBlock) . '#SimPHPfyCodeBlock-->'; }, $content); /* * Perform HTML specificied parsing */ if ($extension == 'html') { $htmlBlock = array(); /* * gurantee the form tag is not inside a comment by examining the * DOM Tree instead of pure regular expression */ $dom = new DOMDocument(); @$dom->loadHTML($content); $forms = $dom->getElementsByTagName('form'); for ($i = 0; $i < $forms->length; $i++) { /* * Re-create the DOM because the previous iteration have * changed the content of $content */ $dom = new DOMDocument(); @$dom->loadHTML($content); $form = $dom->getElementsByTagName('form')->item($i); if ($form->textContent == '') { /* * Create dummy block to make the form tag recognizable */ $dummyString = '<!--SimPHPfyDummyBlock#' . $i . '#SimPHPfyDummyBlock-->'; $textNode = $dom->createTextNode($dummyString); $form->appendChild($textNode); $content = $dom->saveHTML(); $pattern = '@(< *form.*?>)(' . $dummyString . ')@'; } else { /* * Save the form HTML content into the $htmlBlock */ $htmlContent = ''; while ($form->hasChildNodes()) { /* * Loop through the child to skip the parent form tag * from including into the stored $htmlBlock */ $node = $form->childNodes->item(0); $htmlContent .= $dom->saveXML($node); $form->removeChild($node); } $htmlBlock[] = $htmlContent; $commentNode = $dom->createComment('SimPHPfyHTMLBlock#' . count($htmlBlock) . '#SimPHPfyHTMLBlock'); $htmlBlockString = '<!--SimPHPfyHTMLBlock#' . count($htmlBlock) . '#SimPHPfyHTMLBlock-->'; $form->appendChild($commentNode); $content = $dom->saveHTML(); $pattern = '@(< *form.*?>)(' . $htmlBlockString . ')@'; } $content = preg_replace_callback($pattern, function ($matches) use($options, $path) { $formDom = new DOMDocument(); @$formDom->loadHTML($matches[1]); $formTags = @$formDom->getElementsByTagName('form'); $formTag = $formTags->item(0); $controller = $action = ''; /* * data-controller and data-action attributes determine which * controller and action the form should send to * * A table of the possible values of data-* are as followed: * data-controller data-action result * String String $contoller/$action * Omitted String :Controller/$action * String Omitted Disallowed * Omitted Omitted :Controller/:Action */ if ($formTag->getAttribute('data-helper') == 'simphpfy') { if ($formTag->getAttribute('data-controller') == '') { if ($formTag->getAttribute('data-action') == '') { if (isset($options['controller'])) { $controller = $options['controller']->getController(); $action = $options['controller']->getAction(); } else { throw new InvalidTemplateException(array($path, 'missing controller and/or action for form helper')); } } else { if (isset($options['controller'])) { $controller = $options['controller']->getController(); $action = $formTag->getAttribute('data-action'); } else { throw new InvalidTemplateException(array($path, 'missing controller for form helper')); } } } else { if ($formTag->getAttribute('data-action') == '') { throw new InvalidTemplateException(array($path, 'malformed form helper')); } else { $controller = $formTag->getAttribute('data-controller'); $action = $formTag->getAttribute('data-action'); } } if ($controller && $action) { $actionAttr = DIRECTORY_PREFIX . $controller . DS . $action; if (($id = $formTag->getAttribute('data-id')) != '') { $actionAttr .= DS . $id; } $formTag->setAttribute('action', $actionAttr); } else { throw new InvalidTemplateException(array($path, 'malformed form helper')); } // check if method is PUT or DELETE if ($formTag->getAttribute('data-method') != '') { $method = strtoupper($formTag->getAttribute('data-method')); if ($method == "PUT" || $method == "DELETE") { $formTag->setAttribute('method', 'POST'); } elseif ($method == 'GET' || $method == 'POST') { $formTag->setAttribute('method', $method); } $inputMethod = $formDom->createElement('input'); $inputMethod->setAttribute('type', 'hidden'); $inputMethod->setAttribute('name', '_method'); $inputMethod->setAttribute('value', $method); $formTag->appendChild($inputMethod); } // remove <!DOCTYPE $formDom->removeChild($formDom->doctype); // remove <html><body></body></html> $formDom->replaceChild($formDom->firstChild->firstChild->firstChild, $formDom->firstChild); return str_replace('</form>', '', $formDom->saveHTML()) . $matches[2]; } else { return $matches[0]; } }, $content); } $content = preg_replace_callback('@<!--SimPHPfyHTMLBlock#([0-9]+)#SimPHPfyHTMLBlock-->@', function ($matches) use($htmlBlock) { return $htmlBlock[(int) $matches[1] - 1]; }, $content); $dom = new DOMDocument(); @$dom->loadHTML($content); $this->parseInput($dom->getElementsByTagName('input'), $codeBlock); $this->parseInput($dom->getElementsByTagName('select'), $codeBlock); $this->parseInput($dom->getElementsByTagName('textarea'), $codeBlock); $content = $dom->saveHTML(); } if ($parseVariable) { $content = preg_replace('@([^\\\\]|]^)\\${([a-zA-Z_\\x7f-\\xff][a-zA-Z0-9_\\x7f-\\xff]*)}\\$@', '<?php echo $\\2; ?>', $content); $content = preg_replace('@([^\\\\]|^)\\$([a-zA-Z_\\x7f-\\xff][a-zA-Z0-9_\\x7f-\\xff]*)\\$@', '<?php echo $\\2; ?>', $content); } $content = preg_replace_callback('@<!--SimPHPfyIgnoreBlock#([0-9]+)#SimPHPfyIgnoreBlock-->@', function ($matches) use($ignoreBlock) { return $ignoreBlock[(int) $matches[1] - 1]; }, $content); $content = preg_replace_callback('@<!--SimPHPfyCodeBlock#([0-9]+)#SimPHPfyCodeBlock-->@', function ($matches) use($codeBlock) { return $codeBlock[(int) $matches[1] - 1]; }, $content); $content = preg_replace('@<!--SimPHPfyDummyBlock#([0-9]+)#SimPHPfyDummyBlock-->@', '', $content); return $content; }
/** * Convert xml/html code to a DOMXpath object. * * @param String code The html/xml code to be parsed. * @param String type html|xml * @param String encoding * * @return \DOMXPath */ protected static function toXpath($code, $type = 'html', $encoding = 'UTF-8') { $doc = new \DOMDocument(); if ($type == 'xml') { @$doc->loadXML('<?xml encoding="' . $encoding . '">' . $code); } else { @$doc->loadHTML('<?xml encoding="' . $encoding . '">' . $code); } foreach ($doc->childNodes as $item) { if ($item->nodeType == XML_PI_NODE) { $doc->removeChild($item); } #remove encoding node } return new \DOMXPath($doc); }
[expect] root nodeType: 1 child nodeType: 1 [file] <?php /* Node is preserved from removeChild */ $dom = new DOMDocument(); $dom->loadXML('<root><child/></root>'); $xpath = new DOMXpath($dom); $node = $xpath->query('/root')->item(0); echo $node->nodeName . "\n"; $dom->removeChild($GLOBALS['dom']->firstChild); echo "nodeType: " . $node->nodeType . "\n"; /* Node gets destroyed during removeChild */ $dom->loadXML('<root><child/></root>'); $xpath = new DOMXpath($dom); $node = $xpath->query('//child')->item(0); echo $node->nodeName . "\n"; $GLOBALS['dom']->removeChild($GLOBALS['dom']->firstChild); echo "nodeType: " . $node->nodeType . "\n";
/** * Compute <?xyl-meta?> processing-instruction. * * @param \DOMDocument $ownerDocument Document that ownes PIs. * @return void */ protected function computeMeta(\DOMDocument $ownerDocument) { $xpath = new \DOMXPath($ownerDocument); $xyl_meta = $xpath->query('/processing-instruction(\'xyl-meta\')'); unset($xpath); if (0 === $xyl_meta->length) { return; } for ($i = 0, $m = $xyl_meta->length; $i < $m; ++$i) { $item = $xyl_meta->item($i); $this->_metas[] = new Xml\Attribute($item->data); $ownerDocument->removeChild($item); } return; }
protected function processTag($elementMarkup) { # http://stackoverflow.com/q/1148928/200145 libxml_use_internal_errors(true); $DOMDocument = new DOMDocument(); # http://stackoverflow.com/q/11309194/200145 $elementMarkup = mb_convert_encoding($elementMarkup, 'HTML-ENTITIES', 'UTF-8'); # http://stackoverflow.com/q/4879946/200145 $DOMDocument->loadHTML($elementMarkup); $DOMDocument->removeChild($DOMDocument->doctype); $DOMDocument->replaceChild($DOMDocument->firstChild->firstChild->firstChild, $DOMDocument->firstChild); $elementText = ''; if ($DOMDocument->documentElement->getAttribute('markdown') === '1') { foreach ($DOMDocument->documentElement->childNodes as $Node) { $elementText .= $DOMDocument->saveHTML($Node); } $DOMDocument->documentElement->removeAttribute('markdown'); $elementText = "\n" . $this->text($elementText) . "\n"; } else { foreach ($DOMDocument->documentElement->childNodes as $Node) { $nodeMarkup = $DOMDocument->saveHTML($Node); if ($Node instanceof DOMElement and !in_array($Node->nodeName, $this->textLevelElements)) { $elementText .= $this->processTag($nodeMarkup); } else { $elementText .= $nodeMarkup; } } } # because we don't want for markup to get encoded $DOMDocument->documentElement->nodeValue = 'placeholder'; $markup = $DOMDocument->saveHTML($DOMDocument->documentElement); $markup = str_replace('placeholder', $elementText, $markup); return $markup; }
/** * @param string $html * @param array $options OPTIONAL * @return string */ public static function filter($html, array $options = null) { $errors = libxml_use_internal_errors(true); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); libxml_clear_errors(); libxml_use_internal_errors($errors); foreach ($doc->childNodes as $item) { if ($item->nodeType == XML_PI_NODE) { $doc->removeChild($item); } } $doc->encoding = 'UTF-8'; $body = $doc->getElementsByTagName('body')->item(0); $debug = 0; if ($debug) { header('Content-Type: text/plain; charset=utf-8'); echo $doc->saveHTML(), "\n\n"; } if ($body) { $elems = array($body); $refs = array(); $filter = new Zefram_Filter_Slug(); // FIXME dependency! // extract all referenced ids of elements, they will be used for internal links creation while ($elem = array_shift($elems)) { foreach ($elem->childNodes as $item) { if ($item->nodeType === XML_ELEMENT_NODE) { $elems[] = $item; } } if ($elem->nodeType === XML_ELEMENT_NODE && strtoupper($elem->tagName) === 'A') { $href = trim($elem->getAttribute('href')); if (strlen($href) && $href[0] === '#') { $id = substr($href, 1); $refs[$id] = 'ref:' . $filter->filter(str_ireplace('ref:', '', $id)); } } } self::$_refs = $refs; // TODO create IDs map $latex = self::processBlock($body, self::TRIM); if ($debug) { header('Content-Type: text/plain; charset=utf-8'); echo $latex; exit; } return $latex; } return ''; }