function process($currentXml) { $currentDirectory = dirname($this->docvertTransformDirectory); $documentType = detectDocumentType($currentXml); switch ($documentType) { case 'OpenDocument1.0': //text-colon-section text-colon-style-name="Sect2" text-colon-name="Section2"> $styles = $this->getStyles(); $currentXml = preg_replace('/<office:document-content[^>]*?>/s', "\$0" . $styles, $currentXml); //displayXmlString($currentXml); $currentXml = xsltTransform($currentXml, $this->docvertTransformDirectory . 'fix-opendocument-content.xsl'); //displayXmlString($currentXml); $currentXml = xsltTransform($currentXml, $this->docvertTransformDirectory . 'fix-opendocument-content-stage2.xsl'); //displayXmlString($currentXml); $currentXml = xsltTransform($currentXml, $currentDirectory . DIRECTORY_SEPARATOR . 'lib' . DIRECTORY_SEPARATOR . 'sun-xsl' . DIRECTORY_SEPARATOR . 'opendocument-content-to-docbook.xsl'); //displayXmlString($this->docvertTransformDirectory.'opendocument-content-to-docbook.xsl'); //displayXmlString($currentXml); $metaData = $this->getMetaData(); $currentXml = preg_replace('/<db:book[^>]*?>/s', "\$0" . $metaData, $currentXml); //displayXmlString($currentXml); $currentXml = xsltTransform($currentXml, $this->docvertTransformDirectory . 'fix-docbook.xsl'); //displayXmlString($currentXml); break; case 'OpenOffice1.x': webServiceError('&error-process-transformopendocumenttodocbook-openofficefile;'); break; default: webServiceError('&error-process-transformopendocumenttodocbook-unsupported-file; [' . revealXml($currentXml) . ']', 500, array('documentType' => $documentType)); break; } return $currentXml; }
function getFtpConnection($host, $username, $password, $port) { $connectionId = ftp_connect($host); if (!@ftp_login($connectionId, $username, $password)) { webServiceError('&error-ftp-authentication;', 500, array('username' => $username, 'host' => $host)); } return $connectionId; }
public function process($currentXml) { if (DOCVERT_CLIENT_TYPE == 'web') { displayXmlString($currentXml); } else { webServiceError($currentXml, 200); } }
public function process($currentXml) { if (!array_key_exists('withFile', $this->elementAttributes)) { webServiceError('&error-process-transform-lacks-withfile;'); } $xslPath = $this->pipelineDirectory . $this->elementAttributes['withFile']; if (!file_exists($xslPath)) { webServiceError('&error-process-transform-withfile-missing-file;', 500, array('xslPath' => $xslPath)); } $this->elementAttributes['loopdepth'] = $this->loopDepth; return xsltTransform($currentXml, $xslPath, $this->elementAttributes); }
public function process($currentXml) { if (!array_key_exists('toFile', $this->elementAttributes)) { webServiceError('&error-process-serialize-no-with-file;'); } $toFile = $this->elementAttributes['toFile']; $configFilenamesPath = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR . 'custom-filenames.php'; include_once $configFilenamesPath; $toFile = replaceCustomFilenamePlaceholders($toFile, $this->depthArray); $destinationFilename = processDepthTemplate($toFile, $this->depthArray); $destinationPath = $this->contentDirectory . DIRECTORY_SEPARATOR . $destinationFilename; file_put_contents($destinationPath, $currentXml); return $currentXml; }
function xsltTransform($xmlString, $xsltPath, $xsltArguments = null) { if (!file_exists($xsltPath)) { webserviceError('&error-xslt-path-not-found;', 500, array('path' => $xsltPath)); } $result = null; $xsltEnabledStatus = getXsltEnabledStatus(); switch ($xsltEnabledStatus) { case 'php5': $xslt = new XSLTProcessor(); $xsltDocument = new DOMDocument(); $xsltDocument->load($xsltPath); $xslt->importStyleSheet($xsltDocument); if (is_array($xsltArguments)) { foreach ($xsltArguments as $key => $value) { $xslt->setParameter('', $key, $value); } } $errorLevelToDescribeMerelyDeprecatedWarnings = 999999; $xmlDocument = new DOMDocument(); $xmlDocument->loadXML($xmlString); $result = $xslt->transformToXML($xmlDocument); break; case 'php4': $xsltproc = xslt_create(); $xmlString = array('/_xml' => $xmlString); $xsltPath = 'file://' . $xsltPath; $result = @xslt_process($xsltproc, 'arg:/_xml', $xsltPath, NULL, $xmlString, $xsltArguments) or webServiceError('&error-xslt-processor-error;', 500, array('path' => $xsltPath, 'errorMessage' => xslt_error($xsltproc))); if (empty($result) or xslt_error($xsltproc) != null) { webServiceError('&error-xslt-processor-error;', 500, array('path' => $xsltPath, 'errorMessage' => xslt_error($xsltproc))); } xslt_free($xsltproc); break; default: $commandLineMessage = ''; $phpVersion = getPhpVersion(); if ($phpVersion >= 5) { webServiceError('&error-xslt-not-available;'); } else { webServiceError('&error-php5-required;', 500, array('phpVersion' => $phpVersion)); } } return $result; }
function performTest(&$currentXml) { if (!array_key_exists('withFile', $this->elementAttributes)) { webServiceError('&error-process-test-withfile;'); } $xslPath = null; if (stripos($this->elementAttributes['withFile'], 'internal://') !== FALSE) { $xslPath = $this->docvertTransformDirectory . str_ireplace('internal://', '', $this->elementAttributes['withFile']); } else { $xslPath = $this->pipelineDirectory . $this->elementAttributes['withFile']; } if (!file_exists($xslPath)) { webServiceError('&error-process-test-missing-xsl;', 500, array('xslPath' => $xslPath)); } $xslAttributes = array('loopdepth' => $this->loopDepth); $testResults = xsltTransform($currentXml, $xslPath, $xslAttributes); if (trim($testResults)) { $this->logError($testResults, 'raw'); } }
function uploadToUploadLocation($uploadLocation, $previewDirectory, $remoteDirectory) { switch ($uploadLocation['protocol']) { case 'ftp': return copyViaFtpRecursively($uploadLocation, $previewDirectory, $remoteDirectory, "active"); break; case 'ftp-pasv': return copyViaFtpRecursively($uploadLocation, $previewDirectory, $remoteDirectory, "passive"); break; case 'webdav': case 'webdav-tls': return copyViaWebDAVRecursively($uploadLocation, $previewDirectory, $remoteDirectory); break; case 'bloggerapi': case 'bloggerapi-ssl': return copyViaBloggerAPI($uploadLocation, $previewDirectory, $remoteDirectory); break; default: webServiceError('&error-unknown-protocol; ' . $uploadLocation['protocol']); break; } }
public function process($currentXml) { if (!array_key_exists('withFiles', $this->elementAttributes)) { webServiceError('&error-process-includedependentfiles-withfiles;'); } $pathsToCopy = explode(',', $this->elementAttributes['withFiles']); foreach ($pathsToCopy as $pathToCopy) { $trimmedPathToCopy = trim($pathToCopy); if ($trimmedPathToCopy) { $pathWildCard = $this->pipelineDirectory . $trimmedPathToCopy; $pathMatches = glob($pathWildCard); if ($pathMatches !== False) { foreach ($pathMatches as $pathMatch) { $destinationPath = $this->contentDirectory . DIRECTORY_SEPARATOR . basename($pathMatch); $this->copyRecursively($pathMatch, $destinationPath); } } else { $this->logError(array('&error-unable-to-read-directory;', array('path' => $pathWildCard)), 'error'); } } } return $currentXml; }
function getOperatingSystemsTemporaryDirectory() { if (defined('OPERATING_SYSTEM_TEMPORARY_DIRECTORY')) { return OPERATING_SYSTEM_TEMPORARY_DIRECTORY; } $directoriesToCheck = array(); if (isset($_ENV)) { if (isset($_ENV['TMPDIR'])) { $directoriesToCheck[] = $_ENV['TMPDIR']; } if (isset($_ENV['TMP'])) { $directoriesToCheck[] = $_ENV['TMP']; } } if (DIRECTORY_SEPARATOR == '/') { $directoriesToCheck[] = '/tmp/'; } else { $directoriesToCheck[] = '\\temp\\'; $directoriesToCheck[] = '\\windows\\temp\\'; } foreach ($directoriesToCheck as $directoryToCheck) { if (is_writable($directoryToCheck)) { define('OPERATING_SYSTEM_TEMPORARY_DIRECTORY', $directoryToCheck); return OPERATING_SYSTEM_TEMPORARY_DIRECTORY; } } webServiceError('&error-config-file-not-writable;', 500, array('path' => implode(', ', $directoriesToCheck))); }
$docbookBody = preg_replace("/<docvert-remove-me[^>]*?>/", '', $docbookBody); $docbookBody = preg_replace("/<\\/docvert-remove-me[^>]*?>/", '', $docbookBody); //displayXmlString($docbookBody); //displayXmlString(file_get_contents($docbookBodyXsltPath)); $allDocumentsPreviewDirectory = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'writable' . DIRECTORY_SEPARATOR . $documentPathParts[0] . DIRECTORY_SEPARATOR; $previewDirectory = $allDocumentsPreviewDirectory . $documentPathParts[1] . DIRECTORY_SEPARATOR; $unitTestResults = $previewDirectory . 'test.html'; if (file_exists($unitTestResults)) { silentlyUnlink($unitTestResults); } $zipsInPreviewDirectory = glob($allDocumentsPreviewDirectory . '*.zip'); if (count($zipsInPreviewDirectory)) { foreach ($zipsInPreviewDirectory as $zipInPreviewDirectory) { silentlyUnlink($zipInPreviewDirectory); if (file_exists($zipInPreviewDirectory)) { webServiceError('Docvert internal error: unable to remove ZIP file at "' . $zipInPreviewDirectory . '"'); } } $zipFilePath = $zipsInPreviewDirectory[0]; } else { $zipFileName = chooseNameOfZipFile($allDocumentsPreviewDirectory); $zipFilePath = $allDocumentsPreviewDirectory . $zipFileName; } $filesInPreviewDirectory = glob($previewDirectory . '*'); foreach ($filesInPreviewDirectory as $fileInPreviewDirectory) { if (!stringStartsWith(basename($fileInPreviewDirectory), "docvert") && !stringEndsWith(basename($fileInPreviewDirectory), "wmf") && !stringEndsWith(basename($fileInPreviewDirectory), "gif") && !stringEndsWith(basename($fileInPreviewDirectory), "png") && !stringEndsWith(basename($fileInPreviewDirectory), "jpeg") && !stringEndsWith(basename($fileInPreviewDirectory), "jpg") && !stringEndsWith(basename($fileInPreviewDirectory), "svg")) { //print 'Delete: '.$fileInPreviewDirectory.'<br />'; silentlyUnlink($fileInPreviewDirectory); } else { //print 'Retain: '.$fileInPreviewDirectory.'<br />'; }
function process($currentXml) { $extractImagesPath = $this->docvertTransformDirectory . 'extract-pages-html-images-and-links.xsl'; $htmlUrls = trim(xsltTransform($currentXml, $extractImagesPath)); $htmlUrlLines = explode("\n", $htmlUrls); $imageUrls = array(); foreach ($htmlUrlLines as $htmlUrlLine) { if (trim($htmlUrlLine) == '') { continue; } $urlLineParts = explode("\t", $htmlUrlLine); $urlType = $urlLineParts[0]; $baseUrl = $urlLineParts[1]; $possiblyRelativeUrl = $urlLineParts[2]; $fullUrl = ''; if (stringStartsWith($possiblyRelativeUrl, "http://") || stringStartsWith($possiblyRelativeUrl, "https://") || stringStartsWith($possiblyRelativeUrl, "mailto:")) { $fullUrl = $possiblyRelativeUrl; } else { $connectionPart = getUrlConnectionPart($baseUrl); $getUrlLocalPart = getUrlLocalPart($baseUrl); $localPartDirectory = getUrlLocalPartDirectory($baseUrl); if (stringStartsWith($possiblyRelativeUrl, '/')) { $fullUrl = $connectionPart . $possiblyRelativeUrl; } else { $relativePath = resolveRelativeUrl($localPartDirectory . $possiblyRelativeUrl); if (!stringStartsWith($relativePath, '/')) { $relativePath = '/' . $relativePath; } $fullUrl = $connectionPart . $relativePath; } } $missingImagePlaceholderImagePath = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR . 'files' . DIRECTORY_SEPARATOR . '404image.gif'; if (!file_exists($missingImagePlaceholderImagePath)) { webServiceError('&dynamic-error-process-downloadimagesandsetlinks-missing-placeholder;', 500, array('fourOhFourImagePath' => $fourOhFourImagePath)); } $fullUrl = html_entity_decode($fullUrl); switch ($urlType) { case 'image': $imageData = file_get_contents($fullUrl); if ($imageData == null) { $imageData = file_get_contents($missingImagePlaceholderImagePath); } $picturesDirectory = $this->contentDirectory . DIRECTORY_SEPARATOR . 'Pictures'; if (!file_exists($picturesDirectory)) { mkdir($picturesDirectory); } if (!function_exists('imagecreatefromstring')) { webServiceError('&error-process-downloadimagesandsetlinks-missing-gd;'); } $imageResource = imagecreatefromstring($imageData); if (!$imageResource) { $imageResource = imagecreatefromstring(file_get_contents($missingImagePlaceholderImagePath)); } $imageWidth = imagesx($imageResource); $imageHeight = imagesy($imageResource); $fileExtension = substr($fullUrl, strrpos($fullUrl, '.') + 1); switch ($fileExtension) { case 'jpg': case 'jpeg': case 'gif': case 'png': break; default: $fileExtension = 'jpg'; } $openDocumentPath = 'Pictures/' . md5($fullUrl) . '.' . $fileExtension; file_put_contents($this->contentDirectory . DIRECTORY_SEPARATOR . $openDocumentPath, $imageData); $imageUrlReplacement = $openDocumentPath . '" c:width="' . $imageWidth . '" c:height="' . $imageHeight; //FIXME: assumes image @src has double-quote and not single $currentXml = str_replace('"' . $possiblyRelativeUrl . '"', '"' . $imageUrlReplacement . '"', $currentXml); break; case 'link': $linkUrl = $urlLineParts[2]; //print '"'.$possiblyRelativeUrl.' vs '.$fullUrl.'<hr />'; $currentXml = str_replace('"' . $possiblyRelativeUrl . '"', '"' . htmlentities($fullUrl) . '"', $currentXml); break; } } return $currentXml; }
function followUrlRedirects($url, $maximumNumberOfOfRedirects = false) { if ($maximumNumberOfOfRedirects === false) { $maximumNumberOfOfRedirects = 10; } $finalRedirectionUrl = false; $redirectPath = array(); $numberOfRedirectsRemaining = $maximumNumberOfOfRedirects; $timeOutInSeconds = 5; while ($finalRedirectionUrl == false) { $previousUrl = $url; $wasRedirectedThisTime = false; $domainAndPort = getUrlDomainAndPortPart($url); $redirectPath[] = $url; $localPart = getUrlLocalPart($url); $result = pullpage('HEAD', $domainAndPort[0], $domainAndPort[1], $localPart, false, false, false, false, null, null, false, $timeOutInSeconds); if ($result === false) { return null; } $result = explode("\n", $result); foreach ($result as $line) { $headParts = explode(':', $line); if (count($headParts) > 1) { $subject = array_shift($headParts); $value = implode(':', $headParts); if (strtolower(trim($subject)) == "location") { $url = trim($value); if (!stringStartsWith($url, 'http://') && !stringStartsWith($url, 'https://')) { $url = 'http://' . $domainAndPort[0] . $url; } //print "Was redirected to ".$url."<br />"; if ($previousUrl != $url) { $wasRedirectedThisTime = true; } } } } if ($wasRedirectedThisTime == false) { $finalRedirectionUrl = true; } if ($numberOfRedirectsRemaining <= 0) { webServiceError("&error-maximum-number-of-redirects-followed;", 500, array('redirectPaths' => implode("</li><li>", $redirectPath))); } $numberOfRedirectsRemaining--; } return $url; }
function wmfOrEmfToPdf($imagePath, &$currentXml) { //Step 1. Detect width/height of image. $imageName = basename($imagePath); $imageOffset = strpos($currentXml, $imageName); if ($imageOffset === False) { return False; } //image not in document, don't worry about it. //header('Content-type: text/xml'); die($currentXml); $dom = simplexml_load_string($currentXml); $xpath = "//*[@xlink:href='" . $imageName . "']//parent::draw:frame"; $imageMatch = $dom->xpath($xpath); if (count($imageMatch) == 0) { webServiceError('&error-process-convertimages-no-dom;', 500, array('xpath' => $xpath)); } $imageMatch = $imageMatch[0]; $attributes = $imageMatch->attributes('svg', true); $width = (string) $attributes['width']; $height = (string) $attributes['height']; //Step 2. Make an ODT file containing only the WMF/EMF // (ugh.. I know, but it works and it's reliable because we benefit from OpenOffice's years of // reverse-engineering the EMF/WMF formats so really we should get over it) //step 2a -- make a working directory for our OpenDocument file and copy the files in $workingDirectory = getTemporaryDirectoryInsideDirectory($this->contentDirectory); mkdir($workingDirectory . DIRECTORY_SEPARATOR . 'Pictures'); $destinationImagePath = $workingDirectory . DIRECTORY_SEPARATOR . 'Pictures' . DIRECTORY_SEPARATOR . basename($imagePath); copy($imagePath, $destinationImagePath); $odtTemplateDirectory = DOCVERT_DIR . 'core' . DIRECTORY_SEPARATOR . 'files' . DIRECTORY_SEPARATOR; $stylesXml = file_get_contents($odtTemplateDirectory . 'styles.xml'); $stylesXml = str_replace('{{page-width}}', $width, $stylesXml); $stylesXml = str_replace('{{page-height}}', $height, $stylesXml); file_put_contents($workingDirectory . DIRECTORY_SEPARATOR . 'styles.xml', $stylesXml); copy($odtTemplateDirectory . 'settings.xml', $workingDirectory . DIRECTORY_SEPARATOR . 'settings.xml'); copy($odtTemplateDirectory . 'meta.xml', $workingDirectory . DIRECTORY_SEPARATOR . 'meta.xml'); copy($odtTemplateDirectory . 'manifest.rdf', $workingDirectory . DIRECTORY_SEPARATOR . 'manifest.rdf'); copy($odtTemplateDirectory . 'mimetype', $workingDirectory . DIRECTORY_SEPARATOR . 'mimetype'); $contentXml = file_get_contents($odtTemplateDirectory . 'content.xml'); $imageTemplate = '<text:p><draw:frame text:anchor-type="as-char" svg:width="{{width}}" svg:height="{{height}}" draw:z-index="1"><draw:image xlink:href="{{path}}" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad"/></draw:frame></text:p>'; $imageString = str_replace('{{width}}', $width, $imageTemplate); $imageString = str_replace('{{height}}', $height, $imageString); $imageString = str_replace('{{path}}', 'Pictures/' . basename($destinationImagePath), $imageString); $contentXml = str_replace('<!--{{content}}-->', $imageString, $contentXml); file_put_contents($workingDirectory . DIRECTORY_SEPARATOR . 'content.xml', $contentXml); mkdir($workingDirectory . DIRECTORY_SEPARATOR . 'META-INF'); $manifestXml = file_get_contents($odtTemplateDirectory . 'manifest.xml'); $manifestItemTemplate = ' <manifest:file-entry manifest:media-type="" manifest:full-path="{{path}}"/>'; $manifestItem = str_replace('{{path}}', 'Pictures/' . basename($imagePath), $manifestItemTemplate); $manifestXml = str_replace('<!--{{content}}-->', $manifestItem, $manifestXml); file_put_contents($workingDirectory . DIRECTORY_SEPARATOR . 'META-INF' . DIRECTORY_SEPARATOR . 'manifest.xml', $manifestXml); //step 2b zip it into an ODT $zipPath = $this->contentDirectory . DIRECTORY_SEPARATOR . basename($imagePath) . '.odt'; $zipPath = zipFiles($workingDirectory, $zipPath); $zipData = file_get_contents($zipPath); silentlyUnlink($zipPath); silentlyUnlink($workingDirectory); //Step 3 . Stream it to PyODConverter. Make a PDF and save it. $pyodConverterPath = DOCVERT_DIR . 'core' . DIRECTORY_SEPARATOR . 'lib' . DIRECTORY_SEPARATOR . 'pyodconverter' . DIRECTORY_SEPARATOR . 'pyodconverter.py'; if (!file_exists($pyodConverterPath)) { die("Can't find PyODconverter at " . htmlentities($pyodConverterPath)); } $command = $pyodConverterPath . ' --stream --pdf'; $response = shellCommand($command, 20, $zipData, false); $pdfMagicBytes = '%PDF'; if (substr($response['stdOut'], 0, strlen($pdfMagicBytes)) != $pdfMagicBytes) { die("Expected a PDF response was didn't receive one. Received back " . htmlentities(print_r($response, true))); } $imagePathInfo = pathinfo($imagePath); $pdfPath = dirname($imagePath) . DIRECTORY_SEPARATOR . basename($imagePath, '.' . $imagePathInfo['extension']) . '.pdf'; file_put_contents($pdfPath, $response['stdOut']); return array('width' => $width, 'height' => $height, 'path' => $pdfPath); }
function generateDocument($pages, $generatorPipeline) { if (preg_match('/.\\//s', $generatorPipeline)) { webServiceError('&error-disallowed-characters;'); } $userAgent = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:bignumber) Docvert'; $httpContextOptions = array('http' => array('header' => 'User-Agent: ' . $userAgent)); $httpContext = stream_context_create($httpContextOptions); $docvertDir = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR; $docvertWritableDir = getWritableDirectory(); $disallowDocumentGeneration = getGlobalConfigItem('doNotAllowDocumentGeneration'); if ($disallowDocumentGeneration == 'true') { webServiceError('&document-generation-disabled;'); } $pageXml = '<c:document xmlns="http://www.w3.org/1999/xhtml" xmlns:c="container">' . "\n"; $pageTemplate = "\n\t" . '<c:page url="{{url}}" {{baseUrl}}>{{page}}</c:page>' . "\n"; $config = array('indent' => true, 'output-xhtml' => true, 'wrap' => 200); if (!class_exists('tidy')) { webServiceError('&tidy-is-not-installed;'); } $tidy = new tidy(); $baseTagPattern = "/<base[^>]*?href=([^>]*?)>/is"; foreach ($pages as $page) { if (trim($page) != '' && (stringStartsWith($page, 'http://') || stringStartsWith($page, 'https://'))) { $pageHtml = file_get_contents($page, null, $httpContext); $tidy->parseString($pageHtml, $config, 'utf8'); $tidy->cleanRepair(); $thisPage = str_replace('{{url}}', $page, $pageTemplate); $baseUrl = ''; //supporting that ugly old hack of <base> preg_match($baseTagPattern, $pageHtml, $matches); if (count($matches) > 0) { $baseUrl = 'baseUrl="' . substr($matches[1], 1, -2) . '"'; } $thisPage = str_replace('{{baseUrl}}', $baseUrl, $thisPage); $tidiedPageContents = characterEntityToNCR(removeDoctype(removeXmlComments($tidy))); $styleTagPattern = "/<style.*?<\\/style>/is"; $tidiedPageContents = preg_replace($styleTagPattern, '', $tidiedPageContents); $scriptTagPattern = "/<script.*?<\\/script>/is"; $tidiedPageContents = preg_replace($scriptTagPattern, '', $tidiedPageContents); $questionMarkPattern = "/<\\?.*?\\?>/is"; //as strangely used on news.yahoo.com $tidiedPageContents = preg_replace($questionMarkPattern, '', $tidiedPageContents); $thisPage = str_replace('{{page}}', $tidiedPageContents, $thisPage); $pageXml .= $thisPage; } } $pageXml .= '</c:document>'; $temporaryDirectory = getTemporaryDirectory(); $pipelineDirectory = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR . 'generator-pipeline' . DIRECTORY_SEPARATOR . $generatorPipeline . DIRECTORY_SEPARATOR; $pipelinePath = $pipelineDirectory . 'pipeline.xml'; if (!file_exists($pipelinePath)) { webServiceError('&generation-pipeline-not-found; ' . revealXml($pipelinePath)); } $pipelineString = file_get_contents($pipelinePath); $pipelineString = substr($pipelineString, strpos($pipelineString, '<pipeline>') + 10); $pipelineString = substr($pipelineString, 0, strpos($pipelineString, '</pipeline>')); $pipelineStages = xmlStringToArray($pipelineString); $pipelineSettings = array("pipeline" => $generatorPipeline, "autopipeline" => $generatorPipeline); processAPipelineLevel($pipelineStages, $pageXml, $pipelineDirectory, $temporaryDirectory, $temporaryDirectory, $pipelineSettings); $openDocumentPath = $temporaryDirectory . 'output.odt'; zipFiles($temporaryDirectory, $openDocumentPath); header('Content-disposition: attachment; filename=' . basename($openDocumentPath)); header('Content-type: application/vnd.oasis.opendocument.text'); readfile($openDocumentPath); }
<?php if (substr(PHP_VERSION, 0, 1) == "4") { webServiceError('&error-ensure-php5;', 500, array('phpVersion' => PHP_VERSION)); }
<?php ob_start(); $appDir = dirname(__FILE__) . DIRECTORY_SEPARATOR; define('DOCVERT_DIR', $appDir); define('DOCVERT_CLIENT_TYPE', 'web'); include_once DOCVERT_DIR . 'core/lib.php'; if (!isset($_REQUEST['pages'])) { webServiceError('&error-generator-expected-pages-parameter;'); } elseif (count($_REQUEST['pages']) == 0) { webServiceError('&error-generator-expected-pages-parameter;'); } if (!isset($_REQUEST['generatorPipeline'])) { webServiceError('&error-generator-expected-generatorpipeline-parameter;'); } return generateDocument($_REQUEST['pages'], $_REQUEST['generatorPipeline']);
function showGenerationStep() { $docvertDir = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR; $disallowDocumentGeneration = getGlobalConfigItem('doNotAllowDocumentGeneration'); if ($disallowDocumentGeneration == 'true') { return $this->getThemeFragment('generation-disabled.htmlf'); } if (isset($_REQUEST['step'])) { switch ($_REQUEST['step']) { case '4': if (!isset($_REQUEST['pages'])) { webServiceError('&error-webpage-generation-no-pages;'); } $template = $this->getThemeFragment('generation-step4.htmlf'); $hiddenFormChosenPages = array(); $listItems = array(); foreach ($_REQUEST['pages'] as $page) { $listItems[] = "\n\t\t\t\t" . '<li>' . $page . '</li>'; $hiddenFormChosenPages[] = "\n\t\t\t\t" . '<input type="hidden" name="pages[]" value="' . $page . '"/>'; } $template = str_replace('{{page-order}}', implode($listItems), $template); $template = str_replace('{{hidden-form-chosen-pages}}', implode($hiddenFormChosenPages), $template); $generatorPipelines = glob($this->docvertRootDirectory . 'generator-pipeline' . DIRECTORY_SEPARATOR . '*'); $generatorPipelinesArray = array(); foreach ($generatorPipelines as $generatorPipeline) { $generatorName = basename($generatorPipeline); $generatorPipelinesArray[] = '<option value="' . $generatorName . '">' . $generatorName . '</option>'; } return str_replace('{{generator-pipelines}}', implode('', $generatorPipelinesArray), $template); case '3': $template = $this->getThemeFragment('generation-step3.htmlf'); $listItems = array(); foreach ($_REQUEST['pages'] as $page) { $listItems[] = "\n\t\t\t\t" . '<option value="' . $page . '">' . $page . '</option>'; } return str_replace('{{chosen-scrape-urls}}', implode($listItems), $template); case '2': if (!isset($_REQUEST['url'])) { webServiceError('&error-webpage-generation-url;'); } $originalUrl = $_REQUEST['url']; if (trim($originalUrl) == '') { webServiceError('&error-webpage-generation-no-url-given;'); } if (!stringStartsWith($originalUrl, 'http')) { $originalUrl = 'http://' . $originalUrl; } $originalUrl = str_replace(array("\n", "\r", "\t", " "), '', $originalUrl); include_once dirname(__FILE__) . '/http.php'; if (trim(getUrlLocalPart($originalUrl)) == '') { $originalUrl = followUrlRedirects($originalUrl . '/'); } else { $originalUrl = followUrlRedirects($originalUrl); } if ($originalUrl === false) { webServiceError('&error-webpage-cannot-get-url;', 500, array('url' => $originalUrl)); } $page = file_get_contents($originalUrl); $baseTagPattern = "/<base[^>]*?href=([^>]*?)>/is"; preg_match($baseTagPattern, $page, $matches); if (count($matches) > 0) { $originalUrl = trim($matches[1]); $originalUrl = substr($originalUrl, 1, strlen($originalUrl) - 2); } $url = $originalUrl; $connectionPart = getUrlConnectionPart($url); $getUrlLocalPart = getUrlLocalPart($url); $localPartDirectory = getUrlLocalPartDirectory($url); $links = array(); $matches = null; preg_match_all('/href="(.*?)"/', $page, $matches); $matches = $matches[1]; $urls = array(); $urls[$originalUrl] = 'value that does not matter'; foreach ($matches as $match) { $link = $match; if (stringStartsWith($link, '/')) { $link = $connectionPart . $link; } elseif (stringStartsWith($link, "http://") || stringStartsWith($link, "https://")) { } elseif (stringStartsWith($link, "mailto:")) { } else { $link = $connectionPart . resolveRelativeUrl($localPartDirectory . $link); } if (containsString($link, '#')) { $link = substringBefore($link, '#'); } if (stringEndsWith($link, '?')) { $link = substringBefore($link, '?'); } if (stringStartsWith($link, 'http')) { $fileExtension = substr($link, strrpos($link, '.') + 1); switch ($fileExtension) { case 'avi': case 'mov': case 'mpg': case 'css': case 'jpeg': case 'jpg': case 'gif': case 'png': case 'bmp': case 'apng': case 'tiff': case 'ico': case 'js': case 'gz': case 'tar': case 'zip': case 'bin': case 'sit': case 'mp3': case 'mp4': case 'wav': case 'swf': case 'fla': case 'rss': case 'atom': case 'pdf': case 'xls': case 'doc': case 'txt': case 'pps': break; default: $urls[$link] = 'value that does not matter'; } } } $urls = array_keys($urls); $mostLikelyUrls = array(); $possibleUrls = array(); $unlikelyUrls = array(); $numberOfSlashesInOriginalUrl = strlen($originalUrl) - strlen(str_replace('/', '', $originalUrl)); foreach ($urls as $url) { $url = followUrlRedirects($url); if (trim($url) != '') { $numberOfSlashesInUrl = strlen($url) - strlen(str_replace('/', '', $url)); if (stringStartsWith($url, $connectionPart . $localPartDirectory) && $numberOfSlashesInUrl == $numberOfSlashesInOriginalUrl) { $mostLikelyUrls[] = $url; } elseif (stringStartsWith($url, $connectionPart)) { $possibleUrls[] = $url; } else { $unlikelyUrls[] = $url; } } } asort($unlikelyUrls); $itemId = 0; foreach ($mostLikelyUrls as $url) { $links[] = '<li class="orderingItem"><label for="urlId' . $itemId . '"><input type="checkbox" name="pages[]" value="' . $url . '" id="urlId' . $itemId . '" checked="checked"/><span class="title">' . $url . '</label></span></li>' . "\n"; $itemId++; } foreach ($possibleUrls as $url) { $links[] = '<li class="orderingItem"><label for="urlId' . $itemId . '"><input type="checkbox" name="pages[]" value="' . $url . '" id="urlId' . $itemId . '"/><span class="title">' . $url . '</label></span></li>' . "\n"; $itemId++; } foreach ($unlikelyUrls as $url) { $links[] = '<li class="orderingItem"><label for="urlId' . $itemId . '"><input type="checkbox" name="pages[]" value="' . $url . '" id="urlId' . $itemId . '"/><span class="title">' . $url . '</label></span></li>' . "\n"; $itemId++; } $step2Template = $this->getThemeFragment('generation-step2.htmlf'); $step2Template = str_replace('{{scrape-results}}', implode('', $links), $step2Template); $step2Template = str_replace('{{scrape-url}}', $url, $step2Template); return $step2Template; default: return $this->getThemeFragment('generation-step1.htmlf'); } } else { return $this->getThemeFragment('generation-step1.htmlf'); } }
/** * Run a shell command. This function returns STD_ERR as well unlike * PHPs inbuilt shell commands like shell_exec() or passthru() or system(). * @return string */ function shellCommand($command, $timeoutInSeconds = null, $dataToStdIn = null, $haltOnError = false) { if ($timeoutInSeconds === null) { $timeoutInSeconds = 120; } $pipes = null; if ($dataToStdIn) { $descriptor = array(0 => array("pipe", "r"), 1 => array("pipe", "w"), 2 => array("pipe", "w")); $currentWorkingDirectory = getOperatingSystemsTemporaryDirectory(); $envionmentVariables = array(); $process = proc_open($command, $descriptor, $pipes, $currentWorkingDirectory, $envionmentVariables); fwrite($pipes[0], $dataToStdIn); fclose($pipes[0]); stream_set_timeout($pipes[1], $timeoutInSeconds); stream_set_timeout($pipes[2], $timeoutInSeconds); } else { $process = popen("({$command})2>&1&", "r"); if ($timeoutInSeconds == 0) { pclose($process); return; } //stream_set_timeout($process, $timeoutInSeconds); $pipes[] = $process; } if (!is_resource($process)) { if ($haltOnError) { webServiceError($command); } if (!$dataToStdIn) { return null; } return array('stdOut' => null, 'statusCode' => -1, 'stdErr' => null); } $response = array(); $endTime = microtime(true) + (double) $timeoutInSeconds; foreach ($pipes as $pipe) { if (!is_resource($pipe)) { continue; } $returnValue = null; while (!feof($pipe)) { $returnValue .= fgets($pipe, 8); $streamInfo = stream_get_meta_data($pipe); if ($streamInfo['timed_out'] === true || microtime(true) > $endTime) { $returnValue .= 'Docvert timeout'; break; } } $response[] = $returnValue; pclose($pipe); } if (!$dataToStdIn) { return $response[0]; } else { $statusCode = proc_close($process); if ($statusCode !== 0 && $haltOnError) { webServiceError($statusCode . ' ' . implode(' ', $pipes)); } return array('stdOut' => $response[0], 'statusCode' => $statusCode, 'stdErr' => $response[1]); } }
} } if (!$files || !$converter || !$pipeline || !$outputZip || $extraParameters) { $commandLineHelp = "\n" . $extraParameters; $commandLineHelp .= '&error-command-line-help;'; $errorPlaceholders['commandLineFiles'] = ''; $errorPlaceholders['commandLineConverter'] = ''; $errorPlaceholders['commandLinePipeline'] = ''; $errorPlaceholders['commandLineOutputZip'] = ''; if (!$files) { $errorPlaceholders['commandLineFiles'] = ' --input-files '; } if (!$converter) { $errorPlaceholders['commandLineConverter'] = ' --converter'; } if (!$pipeline) { $errorPlaceholders['commandLinePipeline'] = ' --pipeline'; } if (!$outputZip) { $errorPlaceholders['commandLineOutputZip'] = ' --output-zip'; } webServiceError($commandLineHelp, 400, $errorPlaceholders); } //print_r($files); //print $converter; //print $pipeline; //print $autoPipeline; //print $afterConversion; //print $setupOpenOfficeOrg; //print $outputZip; processConversion($files, $converter, $pipeline, $autoPipeline, $afterConversion, $setupOpenOfficeOrg, $outputZip);