public static function translateTopicTitleForDocLinks($title, $fromNamespace = NULL, $ver = NULL, $topic = NULL) { if (PONYDOCS_DEBUG) { error_log("DEBUG [PonyDocs] [" . __METHOD__ . "] Raw title: " . $title); } // Get rid of whitespace at the end of the title $title = trim($title); // If we're missing the namespace from a title AND we're in the PonyDocs namespace, prepend PonyDocs namespace to title if (strpos($title, ':') === false && $fromNamespace == PONYDOCS_DOCUMENTATION_NAMESPACE_NAME) { $title = $fromNamespace . ':' . $title; } // Default $toUrl = $title; // Do special parsing for PonyDocs titles if (strpos($toUrl, PONYDOCS_DOCUMENTATION_NAMESPACE_NAME) !== false) { $pieces = explode(':', $title); // Evaluate based on the different "forms" our internal documentation links can take. if (sizeof($pieces) == 2) { // Handles links with no product/manual/version specified: // (Namespace was prepended at the beginning of this function) // [[Documentation:Topic]] -> // Documenation/Product/Version/Manual/Topic if ($ver === NULL || $topic === NULL) { error_log("WARNING [PonyDocs] [" . __METHOD__ . "] If no Product, Manual, and Version specified in PonyDocs title, must include version and topic objects when calling translateTopicTitleForDocLinks()."); return false; } // Get the manual $toTitle = $topic->getTitle(); $topicMetaData = PonyDocsArticleFactory::getArticleMetadataFromTitle($toTitle); // Put together the $toUrl $toUrl = $pieces[0] . '/' . $ver->getProductName() . '/' . $ver->getVersionName() . '/' . $topicMetaData['manual'] . '/' . $pieces[1]; } else { if (sizeof($pieces) == 4) { // Handles links with no version specified: // [[Documentation:Product:Manual:Topic]] -> // Documentation/Product/Version/Manual/Topic // Handle links to other products that don't specify a version if ($ver !== NULL) { // link is from non-Ponydocs namespace $fromProduct = $ver->getProductName(); } else { $fromProduct = ''; } $toProduct = $pieces[1]; if ($fromProduct != $toProduct) { $toVersion = "latest"; } else { if ($ver === NULL) { error_log("WARNING [PonyDocs] [" . __METHOD__ . "] If Version is not specified in title, must include version object when calling translateTopicTitleForDocLinks()."); return false; } $toVersion = $ver->getVersionName(); } // Put together the $toUrl $toUrl = $pieces[0] . '/' . $pieces[1] . '/' . $toVersion . '/' . $pieces[2] . '/' . $pieces[3]; } else { if (sizeof($pieces) == 5) { // Handles links with full product/version/manual specified: // [[Documentation:Product:Manual:Topic:Version]] => // Documentation/Product/Version/Manual/Topic $toUrl = $pieces[0] . '/' . $pieces[1] . '/' . $pieces[4] . '/' . $pieces[2] . '/' . $pieces[3]; } else { // Not a valid number of pieces in title error_log("WARNING [PonyDocs] [" . __METHOD__ . "] Wrong number of pieces in PonyDocs title."); return false; } } } } if (PONYDOCS_DEBUG) { error_log("DEBUG [PonyDocs] [" . __METHOD__ . "] Final title: " . $toUrl); } return $toUrl; }
/** * Generates an HTML string which represents the entire manual for a given product and version. * * @param $product PonyDocsProduct * @param $manual PonyDocsProductManual * @param $version PonyDocsProductVersion * * @return string HTML String representation of manual contents */ public function getManualHTML($product, $manual, $version) { global $wgOut, $wgUser, $wgTitle, $wgParser, $wgRequest; global $wgServer, $wgArticlePath, $wgScriptPath, $wgUploadPath, $wgUploadDirectory, $wgScript, $wgStylePath; // Grab parser options for the logged in user. $opt = ParserOptions::newFromUser($wgUser); // Any potential titles to exclude $exclude = array(); // Determine articles to gather $articles = array(); $toc = new PonyDocsTOC($manual, $version, $product); list($manualtoc, $tocprev, $tocnext, $tocstart) = $toc->loadContent(); // We successfully got our table of contents. It's stored in $manualtoc foreach ($manualtoc as $tocEntry) { if ($tocEntry['level'] > 0 && strlen($tocEntry['title']) > 0) { $title = Title::newFromText($tocEntry['title']); $articles[$tocEntry['section']][] = array('title' => $title, 'text' => $tocEntry['text']); } } // Format the article(s) as a single HTML document with absolute URL's $html = <<<EOT <!doctype html> <html lang="en" xmlns="http://www.w3.org/1999/xhtml" xmlns:og="http://ogp.me/ns#" xmlns:fb="http://ogp.me/ns/fb#" charset="utf-8"> <head> <meta charset="UTF-8"> <title></title> <style> html,body { margin: 0px; padding: 0px; width: 210mm; max-width: 210mm; overflow-x: hidden; } pre { \twidth: 100%; \toverflow-x: hidden; } </style> </head> <body> EOT; $wgArticlePath = $wgServer . $wgArticlePath; $wgScriptPath = $wgServer . $wgScriptPath; $wgUploadPath = $wgServer . $wgUploadPath; $wgScript = $wgServer . $wgScript; $currentSection = ''; foreach ($articles as $section => $subarticles) { foreach ($subarticles as $article) { $title = $article['title']; $ttext = $title->getPrefixedText(); if (!in_array($ttext, $exclude)) { if ($currentSection != $section) { $html .= '<h1>' . $section . '</h1>'; $currentSection = $section; } $article = new Article($title, 0); $text = $article->fetchContent(); $text .= '__NOTOC__'; $opt->setEditSection(false); // remove section-edit links $wgOut->setHTMLTitle($ttext); // use this so DISPLAYTITLE magic works $out = $wgParser->parse($text, $title, $opt, true, true); $ttext = $wgOut->getHTMLTitle(); $text = $out->getText(); // parse article title string and add topic name anchor tag for intramanual linking $articleMeta = PonyDocsArticleFactory::getArticleMetadataFromTitle($title); $text = '<a name="' . $articleMeta['topic'] . '"></a>' . $text; // prepare for replacing pre tags with code tags WEB-5926 derived from // http://stackoverflow.com/questions/1517102/replace-newlines-with-br-tags-but-only-inside-pre-tags // only inside pre tag: // replace space with only when positive lookbehind is a whitespace character // replace \n -> <br/> // replace \t -> 8 * /* split on <pre ... /pre>, basically. probably good enough */ $str = " " . $text; // guarantee split will be in even positions $parts = preg_split("/(< \\s* pre .* \\/ \\s* pre \\s* >)/Umsxu", $str, -1, PREG_SPLIT_DELIM_CAPTURE); foreach ($parts as $idx => $part) { if ($idx % 2) { $parts[$idx] = preg_replace(array("/(?<=\\s) /", "/\n/", "/\t/"), array(" ", "<br/>", " "), $part); } } $str = implode('', $parts); /* chop off the first space, that we had added */ $text = substr($str, 1); // String search and replace $str_search = array('<h5>', '</h5>', '<h4>', '</h4>', '<h3>', '</h3>', '<h2>', '</h2>', '<h1>', '</h1>', '<code>', '</code>', '<pre>', '</pre>'); $str_replace = array('<h6>', '</h6>', '<h5>', '</h5>', '<h4><font size="3"><b><i>', '</i></b></font></h4>', '<h3>', '</h3>', '<h2>', '</h2>', '<code><font size="2">', '</font></code>', '<code><font size="2">', '</font></code>'); $text = str_replace($str_search, $str_replace, $text); /* * HTML regex tweaking prior to sending to PDF library * * 1 - replace intramanual links with just the anchor hash of topic name (e.g. href="#topicname") * 2 - remove all non-intramanual links - strip anchor tags with href attribute whose href value doesn't start * with # * 3 - wrap all span tags having id attribute with <a name="[topicname]_[span_id_attr_value]"> ... </a> * 4 - all anchor links' href values that contain two # characters, replace the second with _ * 5 - make images have absolute URLs * 6 - non-printable areas * 7 - comment * 8 - cell padding * 9 - th bgcolor * 10 - td valign, align and font size * */ $regex_search = array('|<a([^\\>]+)href="(' . str_replace('/', '\\/', $wgServer) . ')+\\/' . PONYDOCS_DOCUMENTATION_NAMESPACE_NAME . '\\/' . $product->getShortName() . '\\/' . $version->getVersionName() . '\\/' . $manual->getShortName() . '\\/([^"]*)"([^\\<]*)>|', '|<a[^\\>]+href="(?!#)[^"]*"[^>]*>(.*?)</a>|', '|<span[^\\>]+id="([^"]*)"[^>]*>(.*?)</span>|', '|<a([^\\>]+)href="#([^"]*)#([^"]*)"([^>]*)>(.*?)</a>|', '|(<img[^>]+?src=")(/.*>)|', '|<div\\s*class=[\'"]?noprint["\']?>.+?</div>|s', '|@{4}([^@]+?)@{4}|s', '/(<table[^>]*)/', '/(<th[^>]*)/', '/(<td[^>]*)>([^<]*)/'); // Table vars $table_extra = ' cellpadding="6"'; $th_extra = ' bgcolor="#C0C0C0"'; $td_extra = ' valign="center" align="left"'; $regex_replace = array('<a${1}href="#${3}"${4}>', '${1}', '<a name="' . $articleMeta['topic'] . '_${1}">${0}</a>', '<a${1}href="#${2}_${3}"${4}>${5}</a>', "\$1{$wgServer}\$2", '', '<!--$1-->', "\$1{$table_extra}", "\$1{$th_extra}", "\$1{$td_extra}>\$2"); $text = preg_replace($regex_search, $regex_replace, $text); // Make all anchor tags uniformly lower case (wkhtmltopdf is case sensitive for internal links) $text = preg_replace_callback('|<a([^\\>])+href="([^"]*)"([^\\<]*)>|', function ($matches) { return '<a' . $matches[1] . 'href="' . strtolower($matches[2]) . '"' . $matches[3] . '>'; }, $text); $text = preg_replace_callback('|<a([^\\>])+name="([^"]*)"([^>]*)>|', function ($matches) { return '<a' . $matches[1] . 'name="' . strtolower($matches[2]) . '"' . $matches[3] . '>'; }, $text); $ttext = basename($ttext); $html .= $text . "\n"; } } } $html .= "</body></html>"; return $html; }