Пример #1
0
 function printXML($xml_path, $xsl_path)
 {
     $xml_dom = new DOMDocument();
     $xml_dom->load($xml_path);
     $xsl_dom = new DOMDocument();
     $xsl_dom->load($xsl_path);
     $proc = new XSLTProcessor();
     $proc->importStylesheet($xsl_dom);
     $doc = $proc->transformToDOC($xml_dom);
     $doc->formatOutput = true;
     return $doc->saveHTML();
 }
function xmltohtml($messageXML)
{
    $messageXML = mb_convert_encoding($messageXML, 'HTML-ENTITIES', "UTF-8");
    // Create a stream
    $opts = array('http' => array('method' => "GET", 'header' => "Accept-language: en\r\n" . "Cookie: foo=bar\r\n" . "Content-Type: text/xml; charset=UTF-8"));
    $context = stream_context_create($opts);
    ## create html instance
    $doc = new DOMDocument('1.0', 'UTF-8');
    $doc->preserveWhiteSpace = false;
    $doc->formatOutput = false;
    $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
    $xslFilePath = './elife_xmltohtml.xsl';
    $docRoot = $doc->documentElement;
    $xsl = new DOMDocument();
    $xslResult = $xsl->load($xslFilePath);
    if (!$xslResult) {
        $docRoot->nodeValue = 'ERROR: Failed to load XSLT: ' . $xslFilePath;
        continue;
    }
    $doc->loadXML($messageXML);
    $xpath = new DOMXPath($doc);
    $articles = $xpath->query('//article');
    foreach ($articles as $article) {
        $articleID = $article->getAttribute('id');
        if (!$articleID) {
            $article->nodeValue = 'ERROR: invalid article ID';
            continue;
        }
        $xmlFileURL = "https://s3.amazonaws.com/elife-cdn/elife-articles/{$articleID}/elife{$articleID}.xml";
        ## make it utf-8 compliant
        $articleXML = mb_convert_encoding(file_get_contents($xmlFileURL, NULL, $context), 'HTML-ENTITIES', "UTF-8");
        $articleXML = preg_replace('/<?xml version[^>]*>/', '', $articleXML);
        //remove the XML declaration for obvious reason
        $articleXML = preg_replace('/<!DOCTYPE [^>]*>/', '', $articleXML);
        //remove the DTD declaration for obvious reason
        $articleXML = '<html><head><meta http-equiv=Content-Type content="text/html; charset=utf-8"></meta></head><body>' . mb_convert_encoding($articleXML, 'HTML-ENTITIES', "UTF-8") . '</body></html>';
        $aDoc = new DOMDocument('1.0', 'UTF-8');
        ## load the xml as html to avoid the named entity/hex entity diff, for e.g., 00444 and 07370, former has named entity while later has hexa
        $aDoc->loadHTML($articleXML);
        $articleXML = $aDoc->saveXML();
        if (preg_match('/^[\\s\\r\\t\\n]+$/', $articleXML)) {
            $article->nodeValue = 'ERROR: Something went wrong when reading XML from AWS';
            continue;
        }
        $articleXML = preg_replace('/<!DOCTYPE [^>]*>/', '', $articleXML);
        $aDocResult = $aDoc->loadXML($articleXML);
        if (!$aDocResult) {
            $article->nodeValue = 'ERROR: Something went wrong during loading XML';
            continue;
        }
        $proc = new XSLTProcessor();
        if (!$proc->importStylesheet($xsl)) {
            $article->nodeValue = 'ERROR: Something went wrong when loading XSLT';
            continue;
        }
        $newDoc = $proc->transformToDOC($aDoc);
        if (!$newDoc) {
            $article->nodeValue = 'ERROR: Something went wrong during transformation';
            continue;
        }
        $newDocXpath = new DOMXPath($newDoc);
        // Add tooltip
        $contribs = $newDocXpath->query('//contrib[@contrib-type="author"]');
        foreach ($contribs as $contrib) {
            $tooltip = '|<div class="author-tooltip">';
            $names = $newDocXpath->query('.//name', $contrib);
            foreach ($names as $name) {
                $tooltip .= '<div class="author-tooltip-name">';
                foreach ($name->childNodes as $childNode) {
                    if ($childNode->nodeType == 3) {
                        $tooltip .= $childNode->nodeValue;
                    } else {
                        $tooltip .= '<span class="nlm-' . $childNode->nodeName . '">' . $childNode->innerHTML . '</span>';
                    }
                }
                $tooltip .= '</div>';
            }
            $tooltip .= '<div class="author-tooltip-affiliation">';
            $affRefs = $newDocXpath->query('.//xref[@ref-type = "aff"]', $contrib);
            for ($i = 0, $len = $affRefs->length; $i < $len; $i++) {
                $affRef = $affRefs->item($i);
                $rid = $affRef->getAttribute('rid');
                // goto the affiliation by using the rid
                $affs = $newDocXpath->query('//aff[@id="' . $rid . '"]');
                foreach ($affs as $aff) {
                    $tooltip .= '<span class="nlm-aff">';
                    // get all the nodes inside the aff and make span class
                    // class name: nlm-(node name)
                    $childNodes = $aff->childNodes;
                    foreach ($childNodes as $childNode) {
                        if ($childNode->nodeType == 3) {
                            $tooltip .= $childNode->nodeValue;
                        } else {
                            $tooltip .= '<span class="nlm-' . $childNode->nodeName . '">' . $childNode->innerHTML . '</span>';
                        }
                    }
                    $tooltip .= '</span>';
                }
                if ($i != $len - 1) {
                    $tooltip .= ';';
                }
            }
            $tooltip .= '</div>';
            $fnRefs = $newDocXpath->query('.//xref[@ref-type = "fn"]', $contrib);
            for ($i = 0, $len = $fnRefs->length; $i < $len; $i++) {
                $fnRef = $fnRefs->item($i);
                $rid = $fnRef->getAttribute('rid');
                // goto the affiliation by using the rid
                $fns = $newDocXpath->query('//fn[@id="' . $rid . '"]');
                foreach ($fns as $fn) {
                    // Author contribution
                    if ($fn->hasAttribute('fn-type') && $fn->getAttribute('fn-type') == 'con') {
                        $class = 'author-tooltip-contrib';
                        $label = 'Contribution: ';
                    }
                    // Competing interests
                    if ($fn->hasAttribute('fn-type') && $fn->getAttribute('fn-type') == 'conflict') {
                        $class = 'author-tooltip-conflict';
                        $label = 'Competing Interests: ';
                    }
                    $tooltip .= '<div class="' . $class . '"><span class="author-tooltip-label">' . $label . '</span>';
                    $tooltip .= '<span class="author-tooltip-text">';
                    // get p tags inside fn
                    $pTags = $newDocXpath->query('.//p', $fn);
                    if ($pTags->length == 0) {
                        $tooltip .= $pTag->innerHTML;
                    }
                    foreach ($pTags as $pTag) {
                        $tooltip .= '<span class="nlm-p">' . trim($pTag->innerHTML) . '</span>';
                    }
                    $tooltip .= '</span></div>';
                }
            }
            $tooltip .= '</div>';
            $contrib->setAttribute('tooltip', $tooltip);
        }
        $institutions = $newDocXpath->query('//*[@class="elife-institution"]');
        $i = 0;
        foreach ($institutions as $institution) {
            // modify by arul for start for remove same address end of section
            $removenode = false;
            for ($j = $i; $j < $institutions->length; $j++) {
                if ($institution->nodeValue == $institutions->item($j)->nodeValue) {
                    $removenode = true;
                }
            }
            $i++;
            // modify by arul for end
            // get the first node and if it contains only ", " then remove it
            if ($institution->hasChildNodes()) {
                if ($institution->childNodes->item(0)->nodeType == 3 && trim($institution->childNodes->item(0)->nodeValue) == ',') {
                    DOMRemove($institution->childNodes->item(0));
                }
            }
            // get all direct text nodes.
            $directTextNodes = $newDocXpath->query('./text()', $institution);
            foreach ($directTextNodes as $directTextNode) {
                $directTextNode->nodeValue = preg_replace('/\\s*\\,\\s*\\,\\s*/u', ', ', $directTextNode->nodeValue);
            }
            if ($removenode) {
                $institution->parentNode->removeChild($institution->nextSibling);
                $institution->parentNode->removeChild($institution);
            }
        }
        // Move equation to the end of disp-formula
        $dispFormulas = $newDocXpath->query('//span[contains(@class,"disp-formula")]');
        foreach ($dispFormulas as $dispFormula) {
            $label = $newDocXpath->query('.//span[contains(@class, "disp-formula-label")]', $dispFormula);
            if ($label->length != 0) {
                $dispFormula->appendChild($label->item(0));
            }
        }
        ## get the queries and return appropriate html snippets
        $queries = $xpath->query('.//query', $article);
        if ($queries->length == 0) {
            $rootNode = $newDoc->documentElement;
            $importedNode = $doc->importNode($rootNode, true);
            $article->appendChild($importedNode);
        } else {
            foreach ($queries as $query) {
                $dataBlocks = $newDocXpath->query($query->getAttribute('xpath'));
                foreach ($dataBlocks as $dataBlock) {
                    $dataNode = $doc->createElement('data');
                    $importedNode = $doc->importNode($dataBlock, true);
                    $dataNode->appendChild($importedNode);
                    $query->appendChild($dataNode);
                }
                //end of foreach datablock
            }
            //end of foreach query
        }
    }
    //end of foreach article
    return preg_replace('/<?xml version[^>]*>/', '', $doc->saveXML());
}
Пример #3
0
/**
 *
 * @param DOMDocument $xml puvodni XML
 * @param array|string $styles seznam xsl stylu
 * @return string
 */
function applyTransformation(DOMDocument $xml, $styles, $modifyContent, $modifySource)
{
    if (!is_array($styles)) {
        $styles = array($styles);
    }
    if (empty($styles)) {
        $GLOBALS['xmlerrmsg'] .= "No styles found with given rule. Add some xslt files into plugin configuration after the rule\n";
    }
    $GLOBALS['xmlinfomsg'] .= "Applying transformation with following styles: " . implode(", ", $styles) . "\n";
    $backendOptions = array('cache_dir' => CACHE_DIR);
    $cache = Zend_Cache::factory('Core', 'File', array('lifetime' => null, 'automatic_serialization' => true), $backendOptions, false, false, true);
    if (isset($_GET['nocache'])) {
        $cache->clean(Zend_Cache::CLEANING_MODE_ALL);
    }
    // ulozime datum posledni zmeny vsech souboru v xml adresari jako asociativni pole nazev_souboru => cas_zmeny
    $timestamp_cache_id = 'xml_timestamps';
    if (!($created = $cache->load($timestamp_cache_id))) {
        // smazeme radsi celou cache
        $cache->clean(Zend_Cache::CLEANING_MODE_ALL);
        $created = array();
        $files = getAllFolders();
        $files = explode('-;-', $files);
        $files[0] = $GLOBALS['xml_path'];
        foreach ($files as $file) {
            $created[$file] = filectime($file);
        }
        $cache->save($created, $timestamp_cache_id);
    }
    if (file_exists(CACHE_DIR . '/zend_cache---' . $timestamp_cache_id)) {
        $lastStamp2 = filectime(CACHE_DIR . '/zend_cache---' . $timestamp_cache_id);
        $lastStamp = date('Y-m-d H:i:s', $lastStamp2);
    }
    if ($modifyContent > $modifySource) {
        $timeOfChange = $modifyContent;
    } else {
        $timeOfChange = $modifySource;
    }
    if ($timeOfChange > $lastStamp) {
        $cache->clean(Zend_Cache::CLEANING_MODE_ALL);
    }
    $xml_params = array();
    foreach ($_GET as $key => $val) {
        if (substr($key, 0, 5) == "xslt_") {
            $mykey = substr($key, 5);
            $xml_params[$mykey] = $val;
        }
    }
    $cache_id = md5($xml->saveXML() . @$lang . implode("_", $xml_params));
    $cache->load($cache_id);
    // projdeme datum posledni zmeny u prirazenych xslt sablon a pokud nesouhlasi, smazeme cache obsahujici tuto sablonu
    foreach ($styles as $style) {
        $sablona = $GLOBALS['xml_path'] . dirname($style);
        $ctime = filemtime($sablona);
        if ($ctime != $created[$sablona]) {
            // smazeme cache obsahujici tuto sablonu
            $cache->clean(Zend_Cache::CLEANING_MODE_MATCHING_TAG, array(getMyTag($style)));
            // aktualizujeme datum
            $created[$sablona] = $ctime;
        }
        // ulozime cache obsahujici nove datum posledni zmeny xml
        $cache->save($created, $timestamp_cache_id);
    }
    // cacheId vytvoreno jako hash xml obsahu a jazyk
    // @todo zde se nekontroluje uzivatelsky vstup lang, zda je povolen
    $lang = isset($_REQUEST['lang']) ? $_REQUEST['lang'] : '';
    if (!empty($lang)) {
        $GLOBALS['xmlinfomsg'] .= "Language from URL {$lang}\n";
    }
    if (empty($lang)) {
        $user = JFactory::getUser();
        $language = $user->getParam('language');
        $lang = substr($language, 0, 2);
        if (!empty($lang)) {
            $GLOBALS['xmlinfomsg'] .= "Setting user language {$lang}\n";
        }
    }
    // default transformation language is set in the xslt transformation
    if (!($result = $cache->load($cache_id))) {
        $tags = array();
        foreach ($styles as $style) {
            // otaguj cache pouzitou sablonou
            $tags[] = getMyTag($style);
            $xsl = new DOMDocument();
            $xsl->load($GLOBALS['xml_path'] . $style);
            $processor = new XSLTProcessor();
            $processor->importStyleSheet($xsl);
            if (!empty($lang)) {
                $processor->setParameter('', 'reportLang', $lang);
            }
            foreach ($xml_params as $key => $val) {
                $processor->setParameter('', $key, $val);
            }
            $xml = $processor->transformToDOC($xml);
        }
        // timto zpusobem vypustime uvodni xml deklaraci
        $result = '';
        foreach ($xml->childNodes as $node) {
            $result .= $xml->saveXML($node) . "\n";
        }
        $cache->save($result, $cache_id, $tags);
    }
    return $result;
}