function printXML($xml_path, $xsl_path) { $xml_dom = new DOMDocument(); $xml_dom->load($xml_path); $xsl_dom = new DOMDocument(); $xsl_dom->load($xsl_path); $proc = new XSLTProcessor(); $proc->importStylesheet($xsl_dom); $doc = $proc->transformToDOC($xml_dom); $doc->formatOutput = true; return $doc->saveHTML(); }
function xmltohtml($messageXML) { $messageXML = mb_convert_encoding($messageXML, 'HTML-ENTITIES', "UTF-8"); // Create a stream $opts = array('http' => array('method' => "GET", 'header' => "Accept-language: en\r\n" . "Cookie: foo=bar\r\n" . "Content-Type: text/xml; charset=UTF-8")); $context = stream_context_create($opts); ## create html instance $doc = new DOMDocument('1.0', 'UTF-8'); $doc->preserveWhiteSpace = false; $doc->formatOutput = false; $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); $xslFilePath = './elife_xmltohtml.xsl'; $docRoot = $doc->documentElement; $xsl = new DOMDocument(); $xslResult = $xsl->load($xslFilePath); if (!$xslResult) { $docRoot->nodeValue = 'ERROR: Failed to load XSLT: ' . $xslFilePath; continue; } $doc->loadXML($messageXML); $xpath = new DOMXPath($doc); $articles = $xpath->query('//article'); foreach ($articles as $article) { $articleID = $article->getAttribute('id'); if (!$articleID) { $article->nodeValue = 'ERROR: invalid article ID'; continue; } $xmlFileURL = "https://s3.amazonaws.com/elife-cdn/elife-articles/{$articleID}/elife{$articleID}.xml"; ## make it utf-8 compliant $articleXML = mb_convert_encoding(file_get_contents($xmlFileURL, NULL, $context), 'HTML-ENTITIES', "UTF-8"); $articleXML = preg_replace('/<?xml version[^>]*>/', '', $articleXML); //remove the XML declaration for obvious reason $articleXML = preg_replace('/<!DOCTYPE [^>]*>/', '', $articleXML); //remove the DTD declaration for obvious reason $articleXML = '<html><head><meta http-equiv=Content-Type content="text/html; charset=utf-8"></meta></head><body>' . mb_convert_encoding($articleXML, 'HTML-ENTITIES', "UTF-8") . '</body></html>'; $aDoc = new DOMDocument('1.0', 'UTF-8'); ## load the xml as html to avoid the named entity/hex entity diff, for e.g., 00444 and 07370, former has named entity while later has hexa $aDoc->loadHTML($articleXML); $articleXML = $aDoc->saveXML(); if (preg_match('/^[\\s\\r\\t\\n]+$/', $articleXML)) { $article->nodeValue = 'ERROR: Something went wrong when reading XML from AWS'; continue; } $articleXML = preg_replace('/<!DOCTYPE [^>]*>/', '', $articleXML); $aDocResult = $aDoc->loadXML($articleXML); if (!$aDocResult) { $article->nodeValue = 'ERROR: Something went wrong during loading XML'; continue; } $proc = new XSLTProcessor(); if (!$proc->importStylesheet($xsl)) { $article->nodeValue = 'ERROR: Something went wrong when loading XSLT'; continue; } $newDoc = $proc->transformToDOC($aDoc); if (!$newDoc) { $article->nodeValue = 'ERROR: Something went wrong during transformation'; continue; } $newDocXpath = new DOMXPath($newDoc); // Add tooltip $contribs = $newDocXpath->query('//contrib[@contrib-type="author"]'); foreach ($contribs as $contrib) { $tooltip = '|<div class="author-tooltip">'; $names = $newDocXpath->query('.//name', $contrib); foreach ($names as $name) { $tooltip .= '<div class="author-tooltip-name">'; foreach ($name->childNodes as $childNode) { if ($childNode->nodeType == 3) { $tooltip .= $childNode->nodeValue; } else { $tooltip .= '<span class="nlm-' . $childNode->nodeName . '">' . $childNode->innerHTML . '</span>'; } } $tooltip .= '</div>'; } $tooltip .= '<div class="author-tooltip-affiliation">'; $affRefs = $newDocXpath->query('.//xref[@ref-type = "aff"]', $contrib); for ($i = 0, $len = $affRefs->length; $i < $len; $i++) { $affRef = $affRefs->item($i); $rid = $affRef->getAttribute('rid'); // goto the affiliation by using the rid $affs = $newDocXpath->query('//aff[@id="' . $rid . '"]'); foreach ($affs as $aff) { $tooltip .= '<span class="nlm-aff">'; // get all the nodes inside the aff and make span class // class name: nlm-(node name) $childNodes = $aff->childNodes; foreach ($childNodes as $childNode) { if ($childNode->nodeType == 3) { $tooltip .= $childNode->nodeValue; } else { $tooltip .= '<span class="nlm-' . $childNode->nodeName . '">' . $childNode->innerHTML . '</span>'; } } $tooltip .= '</span>'; } if ($i != $len - 1) { $tooltip .= ';'; } } $tooltip .= '</div>'; $fnRefs = $newDocXpath->query('.//xref[@ref-type = "fn"]', $contrib); for ($i = 0, $len = $fnRefs->length; $i < $len; $i++) { $fnRef = $fnRefs->item($i); $rid = $fnRef->getAttribute('rid'); // goto the affiliation by using the rid $fns = $newDocXpath->query('//fn[@id="' . $rid . '"]'); foreach ($fns as $fn) { // Author contribution if ($fn->hasAttribute('fn-type') && $fn->getAttribute('fn-type') == 'con') { $class = 'author-tooltip-contrib'; $label = 'Contribution: '; } // Competing interests if ($fn->hasAttribute('fn-type') && $fn->getAttribute('fn-type') == 'conflict') { $class = 'author-tooltip-conflict'; $label = 'Competing Interests: '; } $tooltip .= '<div class="' . $class . '"><span class="author-tooltip-label">' . $label . '</span>'; $tooltip .= '<span class="author-tooltip-text">'; // get p tags inside fn $pTags = $newDocXpath->query('.//p', $fn); if ($pTags->length == 0) { $tooltip .= $pTag->innerHTML; } foreach ($pTags as $pTag) { $tooltip .= '<span class="nlm-p">' . trim($pTag->innerHTML) . '</span>'; } $tooltip .= '</span></div>'; } } $tooltip .= '</div>'; $contrib->setAttribute('tooltip', $tooltip); } $institutions = $newDocXpath->query('//*[@class="elife-institution"]'); $i = 0; foreach ($institutions as $institution) { // modify by arul for start for remove same address end of section $removenode = false; for ($j = $i; $j < $institutions->length; $j++) { if ($institution->nodeValue == $institutions->item($j)->nodeValue) { $removenode = true; } } $i++; // modify by arul for end // get the first node and if it contains only ", " then remove it if ($institution->hasChildNodes()) { if ($institution->childNodes->item(0)->nodeType == 3 && trim($institution->childNodes->item(0)->nodeValue) == ',') { DOMRemove($institution->childNodes->item(0)); } } // get all direct text nodes. $directTextNodes = $newDocXpath->query('./text()', $institution); foreach ($directTextNodes as $directTextNode) { $directTextNode->nodeValue = preg_replace('/\\s*\\,\\s*\\,\\s*/u', ', ', $directTextNode->nodeValue); } if ($removenode) { $institution->parentNode->removeChild($institution->nextSibling); $institution->parentNode->removeChild($institution); } } // Move equation to the end of disp-formula $dispFormulas = $newDocXpath->query('//span[contains(@class,"disp-formula")]'); foreach ($dispFormulas as $dispFormula) { $label = $newDocXpath->query('.//span[contains(@class, "disp-formula-label")]', $dispFormula); if ($label->length != 0) { $dispFormula->appendChild($label->item(0)); } } ## get the queries and return appropriate html snippets $queries = $xpath->query('.//query', $article); if ($queries->length == 0) { $rootNode = $newDoc->documentElement; $importedNode = $doc->importNode($rootNode, true); $article->appendChild($importedNode); } else { foreach ($queries as $query) { $dataBlocks = $newDocXpath->query($query->getAttribute('xpath')); foreach ($dataBlocks as $dataBlock) { $dataNode = $doc->createElement('data'); $importedNode = $doc->importNode($dataBlock, true); $dataNode->appendChild($importedNode); $query->appendChild($dataNode); } //end of foreach datablock } //end of foreach query } } //end of foreach article return preg_replace('/<?xml version[^>]*>/', '', $doc->saveXML()); }
/** * * @param DOMDocument $xml puvodni XML * @param array|string $styles seznam xsl stylu * @return string */ function applyTransformation(DOMDocument $xml, $styles, $modifyContent, $modifySource) { if (!is_array($styles)) { $styles = array($styles); } if (empty($styles)) { $GLOBALS['xmlerrmsg'] .= "No styles found with given rule. Add some xslt files into plugin configuration after the rule\n"; } $GLOBALS['xmlinfomsg'] .= "Applying transformation with following styles: " . implode(", ", $styles) . "\n"; $backendOptions = array('cache_dir' => CACHE_DIR); $cache = Zend_Cache::factory('Core', 'File', array('lifetime' => null, 'automatic_serialization' => true), $backendOptions, false, false, true); if (isset($_GET['nocache'])) { $cache->clean(Zend_Cache::CLEANING_MODE_ALL); } // ulozime datum posledni zmeny vsech souboru v xml adresari jako asociativni pole nazev_souboru => cas_zmeny $timestamp_cache_id = 'xml_timestamps'; if (!($created = $cache->load($timestamp_cache_id))) { // smazeme radsi celou cache $cache->clean(Zend_Cache::CLEANING_MODE_ALL); $created = array(); $files = getAllFolders(); $files = explode('-;-', $files); $files[0] = $GLOBALS['xml_path']; foreach ($files as $file) { $created[$file] = filectime($file); } $cache->save($created, $timestamp_cache_id); } if (file_exists(CACHE_DIR . '/zend_cache---' . $timestamp_cache_id)) { $lastStamp2 = filectime(CACHE_DIR . '/zend_cache---' . $timestamp_cache_id); $lastStamp = date('Y-m-d H:i:s', $lastStamp2); } if ($modifyContent > $modifySource) { $timeOfChange = $modifyContent; } else { $timeOfChange = $modifySource; } if ($timeOfChange > $lastStamp) { $cache->clean(Zend_Cache::CLEANING_MODE_ALL); } $xml_params = array(); foreach ($_GET as $key => $val) { if (substr($key, 0, 5) == "xslt_") { $mykey = substr($key, 5); $xml_params[$mykey] = $val; } } $cache_id = md5($xml->saveXML() . @$lang . implode("_", $xml_params)); $cache->load($cache_id); // projdeme datum posledni zmeny u prirazenych xslt sablon a pokud nesouhlasi, smazeme cache obsahujici tuto sablonu foreach ($styles as $style) { $sablona = $GLOBALS['xml_path'] . dirname($style); $ctime = filemtime($sablona); if ($ctime != $created[$sablona]) { // smazeme cache obsahujici tuto sablonu $cache->clean(Zend_Cache::CLEANING_MODE_MATCHING_TAG, array(getMyTag($style))); // aktualizujeme datum $created[$sablona] = $ctime; } // ulozime cache obsahujici nove datum posledni zmeny xml $cache->save($created, $timestamp_cache_id); } // cacheId vytvoreno jako hash xml obsahu a jazyk // @todo zde se nekontroluje uzivatelsky vstup lang, zda je povolen $lang = isset($_REQUEST['lang']) ? $_REQUEST['lang'] : ''; if (!empty($lang)) { $GLOBALS['xmlinfomsg'] .= "Language from URL {$lang}\n"; } if (empty($lang)) { $user = JFactory::getUser(); $language = $user->getParam('language'); $lang = substr($language, 0, 2); if (!empty($lang)) { $GLOBALS['xmlinfomsg'] .= "Setting user language {$lang}\n"; } } // default transformation language is set in the xslt transformation if (!($result = $cache->load($cache_id))) { $tags = array(); foreach ($styles as $style) { // otaguj cache pouzitou sablonou $tags[] = getMyTag($style); $xsl = new DOMDocument(); $xsl->load($GLOBALS['xml_path'] . $style); $processor = new XSLTProcessor(); $processor->importStyleSheet($xsl); if (!empty($lang)) { $processor->setParameter('', 'reportLang', $lang); } foreach ($xml_params as $key => $val) { $processor->setParameter('', $key, $val); } $xml = $processor->transformToDOC($xml); } // timto zpusobem vypustime uvodni xml deklaraci $result = ''; foreach ($xml->childNodes as $node) { $result .= $xml->saveXML($node) . "\n"; } $cache->save($result, $cache_id, $tags); } return $result; }