Example #1
0
}
if (!preg_match($badCharactersPattern, $documentPathParts[0]) || !preg_match($badCharactersPattern, $documentPathParts[1])) {
    webServiceError('documentPath contains bad characters. Was "' . revealXml($documentPath) . '"');
}
$pipeline = $unescapedPosts["pipeline"];
if (!preg_match($badCharactersPattern, $pipeline)) {
    webServiceError('pipeline contains bad characters. Was "' . revealXml($pipeline) . '"');
}
$autopipeline = $unescapedPosts["autopipeline"];
if (!preg_match($badCharactersPattern, $autopipeline)) {
    webServiceError('autopipeline contains bad characters. Was "' . revealXml($autopipeline) . '"');
}
$allhtml = '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><body>' . $allhtml . '</body></html>';
$transformDirectory = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'core' . DIRECTORY_SEPARATOR . 'transform' . DIRECTORY_SEPARATOR;
$htmlPagesXsltPath = $transformDirectory . 'htmlpages-to-html.xsl';
$allhtml = characterEntityToNCR($allhtml);
$html = xsltTransform($allhtml, $htmlPagesXsltPath);
if (!$html) {
    webServiceError("Unable to rebuild document, either because I was given invalid XML or there was a programming error.<hr />Document was:<blockquote>" . str_replace("\n", "<br />", str_replace(" ", '&nbsp;', revealXml($allhtml))) . "</blockquote>");
}
//displayXmlString($html);
$docbookBodyXsltPath = $transformDirectory . 'html-to-docbook-body.xsl';
$docbookBody = xsltTransform($html, $docbookBodyXsltPath);
$docbookBody = removeXmlDeclaration($docbookBody);
$docbookBody = preg_replace("/<docvert-remove-me[^>]*?>/", '', $docbookBody);
$docbookBody = preg_replace("/<\\/docvert-remove-me[^>]*?>/", '', $docbookBody);
//displayXmlString($docbookBody);
//displayXmlString(file_get_contents($docbookBodyXsltPath));
$allDocumentsPreviewDirectory = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'writable' . DIRECTORY_SEPARATOR . $documentPathParts[0] . DIRECTORY_SEPARATOR;
$previewDirectory = $allDocumentsPreviewDirectory . $documentPathParts[1] . DIRECTORY_SEPARATOR;
$unitTestResults = $previewDirectory . 'test.html';
Example #2
0
function generateDocument($pages, $generatorPipeline)
{
    if (preg_match('/.\\//s', $generatorPipeline)) {
        webServiceError('&error-disallowed-characters;');
    }
    $userAgent = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:bignumber) Docvert';
    $httpContextOptions = array('http' => array('header' => 'User-Agent: ' . $userAgent));
    $httpContext = stream_context_create($httpContextOptions);
    $docvertDir = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR;
    $docvertWritableDir = getWritableDirectory();
    $disallowDocumentGeneration = getGlobalConfigItem('doNotAllowDocumentGeneration');
    if ($disallowDocumentGeneration == 'true') {
        webServiceError('&document-generation-disabled;');
    }
    $pageXml = '<c:document xmlns="http://www.w3.org/1999/xhtml" xmlns:c="container">' . "\n";
    $pageTemplate = "\n\t" . '<c:page url="{{url}}" {{baseUrl}}>{{page}}</c:page>' . "\n";
    $config = array('indent' => true, 'output-xhtml' => true, 'wrap' => 200);
    if (!class_exists('tidy')) {
        webServiceError('&tidy-is-not-installed;');
    }
    $tidy = new tidy();
    $baseTagPattern = "/<base[^>]*?href=([^>]*?)>/is";
    foreach ($pages as $page) {
        if (trim($page) != '' && (stringStartsWith($page, 'http://') || stringStartsWith($page, 'https://'))) {
            $pageHtml = file_get_contents($page, null, $httpContext);
            $tidy->parseString($pageHtml, $config, 'utf8');
            $tidy->cleanRepair();
            $thisPage = str_replace('{{url}}', $page, $pageTemplate);
            $baseUrl = '';
            //supporting that ugly old hack of <base>
            preg_match($baseTagPattern, $pageHtml, $matches);
            if (count($matches) > 0) {
                $baseUrl = 'baseUrl="' . substr($matches[1], 1, -2) . '"';
            }
            $thisPage = str_replace('{{baseUrl}}', $baseUrl, $thisPage);
            $tidiedPageContents = characterEntityToNCR(removeDoctype(removeXmlComments($tidy)));
            $styleTagPattern = "/<style.*?<\\/style>/is";
            $tidiedPageContents = preg_replace($styleTagPattern, '', $tidiedPageContents);
            $scriptTagPattern = "/<script.*?<\\/script>/is";
            $tidiedPageContents = preg_replace($scriptTagPattern, '', $tidiedPageContents);
            $questionMarkPattern = "/<\\?.*?\\?>/is";
            //as strangely used on news.yahoo.com
            $tidiedPageContents = preg_replace($questionMarkPattern, '', $tidiedPageContents);
            $thisPage = str_replace('{{page}}', $tidiedPageContents, $thisPage);
            $pageXml .= $thisPage;
        }
    }
    $pageXml .= '</c:document>';
    $temporaryDirectory = getTemporaryDirectory();
    $pipelineDirectory = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR . 'generator-pipeline' . DIRECTORY_SEPARATOR . $generatorPipeline . DIRECTORY_SEPARATOR;
    $pipelinePath = $pipelineDirectory . 'pipeline.xml';
    if (!file_exists($pipelinePath)) {
        webServiceError('&generation-pipeline-not-found; ' . revealXml($pipelinePath));
    }
    $pipelineString = file_get_contents($pipelinePath);
    $pipelineString = substr($pipelineString, strpos($pipelineString, '<pipeline>') + 10);
    $pipelineString = substr($pipelineString, 0, strpos($pipelineString, '</pipeline>'));
    $pipelineStages = xmlStringToArray($pipelineString);
    $pipelineSettings = array("pipeline" => $generatorPipeline, "autopipeline" => $generatorPipeline);
    processAPipelineLevel($pipelineStages, $pageXml, $pipelineDirectory, $temporaryDirectory, $temporaryDirectory, $pipelineSettings);
    $openDocumentPath = $temporaryDirectory . 'output.odt';
    zipFiles($temporaryDirectory, $openDocumentPath);
    header('Content-disposition: attachment; filename=' . basename($openDocumentPath));
    header('Content-type: application/vnd.oasis.opendocument.text');
    readfile($openDocumentPath);
}