function process($currentXml)
 {
     $currentDirectory = dirname($this->docvertTransformDirectory);
     $documentType = detectDocumentType($currentXml);
     switch ($documentType) {
         case 'OpenDocument1.0':
             //text-colon-section text-colon-style-name="Sect2" text-colon-name="Section2">
             $styles = $this->getStyles();
             $currentXml = preg_replace('/<office:document-content[^>]*?>/s', "\$0" . $styles, $currentXml);
             //displayXmlString($currentXml);
             $currentXml = xsltTransform($currentXml, $this->docvertTransformDirectory . 'fix-opendocument-content.xsl');
             //displayXmlString($currentXml);
             $currentXml = xsltTransform($currentXml, $this->docvertTransformDirectory . 'fix-opendocument-content-stage2.xsl');
             //displayXmlString($currentXml);
             $currentXml = xsltTransform($currentXml, $currentDirectory . DIRECTORY_SEPARATOR . 'lib' . DIRECTORY_SEPARATOR . 'sun-xsl' . DIRECTORY_SEPARATOR . 'opendocument-content-to-docbook.xsl');
             //displayXmlString($this->docvertTransformDirectory.'opendocument-content-to-docbook.xsl');
             //displayXmlString($currentXml);
             $metaData = $this->getMetaData();
             $currentXml = preg_replace('/<db:book[^>]*?>/s', "\$0" . $metaData, $currentXml);
             //displayXmlString($currentXml);
             $currentXml = xsltTransform($currentXml, $this->docvertTransformDirectory . 'fix-docbook.xsl');
             //displayXmlString($currentXml);
             break;
         case 'OpenOffice1.x':
             webServiceError('&error-process-transformopendocumenttodocbook-openofficefile;');
             break;
         default:
             webServiceError('&error-process-transformopendocumenttodocbook-unsupported-file; [' . revealXml($currentXml) . ']', 500, array('documentType' => $documentType));
             break;
     }
     return $currentXml;
 }
Пример #2
0
function getFtpConnection($host, $username, $password, $port)
{
    $connectionId = ftp_connect($host);
    if (!@ftp_login($connectionId, $username, $password)) {
        webServiceError('&error-ftp-authentication;', 500, array('username' => $username, 'host' => $host));
    }
    return $connectionId;
}
Пример #3
0
 public function process($currentXml)
 {
     if (DOCVERT_CLIENT_TYPE == 'web') {
         displayXmlString($currentXml);
     } else {
         webServiceError($currentXml, 200);
     }
 }
Пример #4
0
 public function process($currentXml)
 {
     if (!array_key_exists('withFile', $this->elementAttributes)) {
         webServiceError('&error-process-transform-lacks-withfile;');
     }
     $xslPath = $this->pipelineDirectory . $this->elementAttributes['withFile'];
     if (!file_exists($xslPath)) {
         webServiceError('&error-process-transform-withfile-missing-file;', 500, array('xslPath' => $xslPath));
     }
     $this->elementAttributes['loopdepth'] = $this->loopDepth;
     return xsltTransform($currentXml, $xslPath, $this->elementAttributes);
 }
Пример #5
0
 public function process($currentXml)
 {
     if (!array_key_exists('toFile', $this->elementAttributes)) {
         webServiceError('&error-process-serialize-no-with-file;');
     }
     $toFile = $this->elementAttributes['toFile'];
     $configFilenamesPath = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR . 'custom-filenames.php';
     include_once $configFilenamesPath;
     $toFile = replaceCustomFilenamePlaceholders($toFile, $this->depthArray);
     $destinationFilename = processDepthTemplate($toFile, $this->depthArray);
     $destinationPath = $this->contentDirectory . DIRECTORY_SEPARATOR . $destinationFilename;
     file_put_contents($destinationPath, $currentXml);
     return $currentXml;
 }
Пример #6
0
function xsltTransform($xmlString, $xsltPath, $xsltArguments = null)
{
    if (!file_exists($xsltPath)) {
        webserviceError('&error-xslt-path-not-found;', 500, array('path' => $xsltPath));
    }
    $result = null;
    $xsltEnabledStatus = getXsltEnabledStatus();
    switch ($xsltEnabledStatus) {
        case 'php5':
            $xslt = new XSLTProcessor();
            $xsltDocument = new DOMDocument();
            $xsltDocument->load($xsltPath);
            $xslt->importStyleSheet($xsltDocument);
            if (is_array($xsltArguments)) {
                foreach ($xsltArguments as $key => $value) {
                    $xslt->setParameter('', $key, $value);
                }
            }
            $errorLevelToDescribeMerelyDeprecatedWarnings = 999999;
            $xmlDocument = new DOMDocument();
            $xmlDocument->loadXML($xmlString);
            $result = $xslt->transformToXML($xmlDocument);
            break;
        case 'php4':
            $xsltproc = xslt_create();
            $xmlString = array('/_xml' => $xmlString);
            $xsltPath = 'file://' . $xsltPath;
            $result = @xslt_process($xsltproc, 'arg:/_xml', $xsltPath, NULL, $xmlString, $xsltArguments) or webServiceError('&error-xslt-processor-error;', 500, array('path' => $xsltPath, 'errorMessage' => xslt_error($xsltproc)));
            if (empty($result) or xslt_error($xsltproc) != null) {
                webServiceError('&error-xslt-processor-error;', 500, array('path' => $xsltPath, 'errorMessage' => xslt_error($xsltproc)));
            }
            xslt_free($xsltproc);
            break;
        default:
            $commandLineMessage = '';
            $phpVersion = getPhpVersion();
            if ($phpVersion >= 5) {
                webServiceError('&error-xslt-not-available;');
            } else {
                webServiceError('&error-php5-required;', 500, array('phpVersion' => $phpVersion));
            }
    }
    return $result;
}
Пример #7
0
 function performTest(&$currentXml)
 {
     if (!array_key_exists('withFile', $this->elementAttributes)) {
         webServiceError('&error-process-test-withfile;');
     }
     $xslPath = null;
     if (stripos($this->elementAttributes['withFile'], 'internal://') !== FALSE) {
         $xslPath = $this->docvertTransformDirectory . str_ireplace('internal://', '', $this->elementAttributes['withFile']);
     } else {
         $xslPath = $this->pipelineDirectory . $this->elementAttributes['withFile'];
     }
     if (!file_exists($xslPath)) {
         webServiceError('&error-process-test-missing-xsl;', 500, array('xslPath' => $xslPath));
     }
     $xslAttributes = array('loopdepth' => $this->loopDepth);
     $testResults = xsltTransform($currentXml, $xslPath, $xslAttributes);
     if (trim($testResults)) {
         $this->logError($testResults, 'raw');
     }
 }
Пример #8
0
function uploadToUploadLocation($uploadLocation, $previewDirectory, $remoteDirectory)
{
    switch ($uploadLocation['protocol']) {
        case 'ftp':
            return copyViaFtpRecursively($uploadLocation, $previewDirectory, $remoteDirectory, "active");
            break;
        case 'ftp-pasv':
            return copyViaFtpRecursively($uploadLocation, $previewDirectory, $remoteDirectory, "passive");
            break;
        case 'webdav':
        case 'webdav-tls':
            return copyViaWebDAVRecursively($uploadLocation, $previewDirectory, $remoteDirectory);
            break;
        case 'bloggerapi':
        case 'bloggerapi-ssl':
            return copyViaBloggerAPI($uploadLocation, $previewDirectory, $remoteDirectory);
            break;
        default:
            webServiceError('&error-unknown-protocol; ' . $uploadLocation['protocol']);
            break;
    }
}
Пример #9
0
 public function process($currentXml)
 {
     if (!array_key_exists('withFiles', $this->elementAttributes)) {
         webServiceError('&error-process-includedependentfiles-withfiles;');
     }
     $pathsToCopy = explode(',', $this->elementAttributes['withFiles']);
     foreach ($pathsToCopy as $pathToCopy) {
         $trimmedPathToCopy = trim($pathToCopy);
         if ($trimmedPathToCopy) {
             $pathWildCard = $this->pipelineDirectory . $trimmedPathToCopy;
             $pathMatches = glob($pathWildCard);
             if ($pathMatches !== False) {
                 foreach ($pathMatches as $pathMatch) {
                     $destinationPath = $this->contentDirectory . DIRECTORY_SEPARATOR . basename($pathMatch);
                     $this->copyRecursively($pathMatch, $destinationPath);
                 }
             } else {
                 $this->logError(array('&error-unable-to-read-directory;', array('path' => $pathWildCard)), 'error');
             }
         }
     }
     return $currentXml;
 }
Пример #10
0
function getOperatingSystemsTemporaryDirectory()
{
    if (defined('OPERATING_SYSTEM_TEMPORARY_DIRECTORY')) {
        return OPERATING_SYSTEM_TEMPORARY_DIRECTORY;
    }
    $directoriesToCheck = array();
    if (isset($_ENV)) {
        if (isset($_ENV['TMPDIR'])) {
            $directoriesToCheck[] = $_ENV['TMPDIR'];
        }
        if (isset($_ENV['TMP'])) {
            $directoriesToCheck[] = $_ENV['TMP'];
        }
    }
    if (DIRECTORY_SEPARATOR == '/') {
        $directoriesToCheck[] = '/tmp/';
    } else {
        $directoriesToCheck[] = '\\temp\\';
        $directoriesToCheck[] = '\\windows\\temp\\';
    }
    foreach ($directoriesToCheck as $directoryToCheck) {
        if (is_writable($directoryToCheck)) {
            define('OPERATING_SYSTEM_TEMPORARY_DIRECTORY', $directoryToCheck);
            return OPERATING_SYSTEM_TEMPORARY_DIRECTORY;
        }
    }
    webServiceError('&error-config-file-not-writable;', 500, array('path' => implode(', ', $directoriesToCheck)));
}
Пример #11
0
$docbookBody = preg_replace("/<docvert-remove-me[^>]*?>/", '', $docbookBody);
$docbookBody = preg_replace("/<\\/docvert-remove-me[^>]*?>/", '', $docbookBody);
//displayXmlString($docbookBody);
//displayXmlString(file_get_contents($docbookBodyXsltPath));
$allDocumentsPreviewDirectory = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'writable' . DIRECTORY_SEPARATOR . $documentPathParts[0] . DIRECTORY_SEPARATOR;
$previewDirectory = $allDocumentsPreviewDirectory . $documentPathParts[1] . DIRECTORY_SEPARATOR;
$unitTestResults = $previewDirectory . 'test.html';
if (file_exists($unitTestResults)) {
    silentlyUnlink($unitTestResults);
}
$zipsInPreviewDirectory = glob($allDocumentsPreviewDirectory . '*.zip');
if (count($zipsInPreviewDirectory)) {
    foreach ($zipsInPreviewDirectory as $zipInPreviewDirectory) {
        silentlyUnlink($zipInPreviewDirectory);
        if (file_exists($zipInPreviewDirectory)) {
            webServiceError('Docvert internal error: unable to remove ZIP file at "' . $zipInPreviewDirectory . '"');
        }
    }
    $zipFilePath = $zipsInPreviewDirectory[0];
} else {
    $zipFileName = chooseNameOfZipFile($allDocumentsPreviewDirectory);
    $zipFilePath = $allDocumentsPreviewDirectory . $zipFileName;
}
$filesInPreviewDirectory = glob($previewDirectory . '*');
foreach ($filesInPreviewDirectory as $fileInPreviewDirectory) {
    if (!stringStartsWith(basename($fileInPreviewDirectory), "docvert") && !stringEndsWith(basename($fileInPreviewDirectory), "wmf") && !stringEndsWith(basename($fileInPreviewDirectory), "gif") && !stringEndsWith(basename($fileInPreviewDirectory), "png") && !stringEndsWith(basename($fileInPreviewDirectory), "jpeg") && !stringEndsWith(basename($fileInPreviewDirectory), "jpg") && !stringEndsWith(basename($fileInPreviewDirectory), "svg")) {
        //print 'Delete: '.$fileInPreviewDirectory.'<br />';
        silentlyUnlink($fileInPreviewDirectory);
    } else {
        //print 'Retain: '.$fileInPreviewDirectory.'<br />';
    }
Пример #12
0
 function process($currentXml)
 {
     $extractImagesPath = $this->docvertTransformDirectory . 'extract-pages-html-images-and-links.xsl';
     $htmlUrls = trim(xsltTransform($currentXml, $extractImagesPath));
     $htmlUrlLines = explode("\n", $htmlUrls);
     $imageUrls = array();
     foreach ($htmlUrlLines as $htmlUrlLine) {
         if (trim($htmlUrlLine) == '') {
             continue;
         }
         $urlLineParts = explode("\t", $htmlUrlLine);
         $urlType = $urlLineParts[0];
         $baseUrl = $urlLineParts[1];
         $possiblyRelativeUrl = $urlLineParts[2];
         $fullUrl = '';
         if (stringStartsWith($possiblyRelativeUrl, "http://") || stringStartsWith($possiblyRelativeUrl, "https://") || stringStartsWith($possiblyRelativeUrl, "mailto:")) {
             $fullUrl = $possiblyRelativeUrl;
         } else {
             $connectionPart = getUrlConnectionPart($baseUrl);
             $getUrlLocalPart = getUrlLocalPart($baseUrl);
             $localPartDirectory = getUrlLocalPartDirectory($baseUrl);
             if (stringStartsWith($possiblyRelativeUrl, '/')) {
                 $fullUrl = $connectionPart . $possiblyRelativeUrl;
             } else {
                 $relativePath = resolveRelativeUrl($localPartDirectory . $possiblyRelativeUrl);
                 if (!stringStartsWith($relativePath, '/')) {
                     $relativePath = '/' . $relativePath;
                 }
                 $fullUrl = $connectionPart . $relativePath;
             }
         }
         $missingImagePlaceholderImagePath = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR . 'files' . DIRECTORY_SEPARATOR . '404image.gif';
         if (!file_exists($missingImagePlaceholderImagePath)) {
             webServiceError('&dynamic-error-process-downloadimagesandsetlinks-missing-placeholder;', 500, array('fourOhFourImagePath' => $fourOhFourImagePath));
         }
         $fullUrl = html_entity_decode($fullUrl);
         switch ($urlType) {
             case 'image':
                 $imageData = file_get_contents($fullUrl);
                 if ($imageData == null) {
                     $imageData = file_get_contents($missingImagePlaceholderImagePath);
                 }
                 $picturesDirectory = $this->contentDirectory . DIRECTORY_SEPARATOR . 'Pictures';
                 if (!file_exists($picturesDirectory)) {
                     mkdir($picturesDirectory);
                 }
                 if (!function_exists('imagecreatefromstring')) {
                     webServiceError('&error-process-downloadimagesandsetlinks-missing-gd;');
                 }
                 $imageResource = imagecreatefromstring($imageData);
                 if (!$imageResource) {
                     $imageResource = imagecreatefromstring(file_get_contents($missingImagePlaceholderImagePath));
                 }
                 $imageWidth = imagesx($imageResource);
                 $imageHeight = imagesy($imageResource);
                 $fileExtension = substr($fullUrl, strrpos($fullUrl, '.') + 1);
                 switch ($fileExtension) {
                     case 'jpg':
                     case 'jpeg':
                     case 'gif':
                     case 'png':
                         break;
                     default:
                         $fileExtension = 'jpg';
                 }
                 $openDocumentPath = 'Pictures/' . md5($fullUrl) . '.' . $fileExtension;
                 file_put_contents($this->contentDirectory . DIRECTORY_SEPARATOR . $openDocumentPath, $imageData);
                 $imageUrlReplacement = $openDocumentPath . '" c:width="' . $imageWidth . '" c:height="' . $imageHeight;
                 //FIXME: assumes image @src has double-quote and not single
                 $currentXml = str_replace('"' . $possiblyRelativeUrl . '"', '"' . $imageUrlReplacement . '"', $currentXml);
                 break;
             case 'link':
                 $linkUrl = $urlLineParts[2];
                 //print '"'.$possiblyRelativeUrl.'  vs  '.$fullUrl.'<hr />';
                 $currentXml = str_replace('"' . $possiblyRelativeUrl . '"', '"' . htmlentities($fullUrl) . '"', $currentXml);
                 break;
         }
     }
     return $currentXml;
 }
Пример #13
0
function followUrlRedirects($url, $maximumNumberOfOfRedirects = false)
{
    if ($maximumNumberOfOfRedirects === false) {
        $maximumNumberOfOfRedirects = 10;
    }
    $finalRedirectionUrl = false;
    $redirectPath = array();
    $numberOfRedirectsRemaining = $maximumNumberOfOfRedirects;
    $timeOutInSeconds = 5;
    while ($finalRedirectionUrl == false) {
        $previousUrl = $url;
        $wasRedirectedThisTime = false;
        $domainAndPort = getUrlDomainAndPortPart($url);
        $redirectPath[] = $url;
        $localPart = getUrlLocalPart($url);
        $result = pullpage('HEAD', $domainAndPort[0], $domainAndPort[1], $localPart, false, false, false, false, null, null, false, $timeOutInSeconds);
        if ($result === false) {
            return null;
        }
        $result = explode("\n", $result);
        foreach ($result as $line) {
            $headParts = explode(':', $line);
            if (count($headParts) > 1) {
                $subject = array_shift($headParts);
                $value = implode(':', $headParts);
                if (strtolower(trim($subject)) == "location") {
                    $url = trim($value);
                    if (!stringStartsWith($url, 'http://') && !stringStartsWith($url, 'https://')) {
                        $url = 'http://' . $domainAndPort[0] . $url;
                    }
                    //print "Was redirected to ".$url."<br />";
                    if ($previousUrl != $url) {
                        $wasRedirectedThisTime = true;
                    }
                }
            }
        }
        if ($wasRedirectedThisTime == false) {
            $finalRedirectionUrl = true;
        }
        if ($numberOfRedirectsRemaining <= 0) {
            webServiceError("&error-maximum-number-of-redirects-followed;", 500, array('redirectPaths' => implode("</li><li>", $redirectPath)));
        }
        $numberOfRedirectsRemaining--;
    }
    return $url;
}
Пример #14
0
 function wmfOrEmfToPdf($imagePath, &$currentXml)
 {
     //Step 1. Detect width/height of image.
     $imageName = basename($imagePath);
     $imageOffset = strpos($currentXml, $imageName);
     if ($imageOffset === False) {
         return False;
     }
     //image not in document, don't worry about it.
     //header('Content-type: text/xml'); die($currentXml);
     $dom = simplexml_load_string($currentXml);
     $xpath = "//*[@xlink:href='" . $imageName . "']//parent::draw:frame";
     $imageMatch = $dom->xpath($xpath);
     if (count($imageMatch) == 0) {
         webServiceError('&error-process-convertimages-no-dom;', 500, array('xpath' => $xpath));
     }
     $imageMatch = $imageMatch[0];
     $attributes = $imageMatch->attributes('svg', true);
     $width = (string) $attributes['width'];
     $height = (string) $attributes['height'];
     //Step 2. Make an ODT file containing only the WMF/EMF
     // (ugh.. I know, but it works and it's reliable because we benefit from OpenOffice's years of
     // reverse-engineering the EMF/WMF formats so really we should get over it)
     //step 2a -- make a working directory for our OpenDocument file and copy the files in
     $workingDirectory = getTemporaryDirectoryInsideDirectory($this->contentDirectory);
     mkdir($workingDirectory . DIRECTORY_SEPARATOR . 'Pictures');
     $destinationImagePath = $workingDirectory . DIRECTORY_SEPARATOR . 'Pictures' . DIRECTORY_SEPARATOR . basename($imagePath);
     copy($imagePath, $destinationImagePath);
     $odtTemplateDirectory = DOCVERT_DIR . 'core' . DIRECTORY_SEPARATOR . 'files' . DIRECTORY_SEPARATOR;
     $stylesXml = file_get_contents($odtTemplateDirectory . 'styles.xml');
     $stylesXml = str_replace('{{page-width}}', $width, $stylesXml);
     $stylesXml = str_replace('{{page-height}}', $height, $stylesXml);
     file_put_contents($workingDirectory . DIRECTORY_SEPARATOR . 'styles.xml', $stylesXml);
     copy($odtTemplateDirectory . 'settings.xml', $workingDirectory . DIRECTORY_SEPARATOR . 'settings.xml');
     copy($odtTemplateDirectory . 'meta.xml', $workingDirectory . DIRECTORY_SEPARATOR . 'meta.xml');
     copy($odtTemplateDirectory . 'manifest.rdf', $workingDirectory . DIRECTORY_SEPARATOR . 'manifest.rdf');
     copy($odtTemplateDirectory . 'mimetype', $workingDirectory . DIRECTORY_SEPARATOR . 'mimetype');
     $contentXml = file_get_contents($odtTemplateDirectory . 'content.xml');
     $imageTemplate = '<text:p><draw:frame text:anchor-type="as-char" svg:width="{{width}}" svg:height="{{height}}" draw:z-index="1"><draw:image xlink:href="{{path}}" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad"/></draw:frame></text:p>';
     $imageString = str_replace('{{width}}', $width, $imageTemplate);
     $imageString = str_replace('{{height}}', $height, $imageString);
     $imageString = str_replace('{{path}}', 'Pictures/' . basename($destinationImagePath), $imageString);
     $contentXml = str_replace('<!--{{content}}-->', $imageString, $contentXml);
     file_put_contents($workingDirectory . DIRECTORY_SEPARATOR . 'content.xml', $contentXml);
     mkdir($workingDirectory . DIRECTORY_SEPARATOR . 'META-INF');
     $manifestXml = file_get_contents($odtTemplateDirectory . 'manifest.xml');
     $manifestItemTemplate = ' <manifest:file-entry manifest:media-type="" manifest:full-path="{{path}}"/>';
     $manifestItem = str_replace('{{path}}', 'Pictures/' . basename($imagePath), $manifestItemTemplate);
     $manifestXml = str_replace('<!--{{content}}-->', $manifestItem, $manifestXml);
     file_put_contents($workingDirectory . DIRECTORY_SEPARATOR . 'META-INF' . DIRECTORY_SEPARATOR . 'manifest.xml', $manifestXml);
     //step 2b zip it into an ODT
     $zipPath = $this->contentDirectory . DIRECTORY_SEPARATOR . basename($imagePath) . '.odt';
     $zipPath = zipFiles($workingDirectory, $zipPath);
     $zipData = file_get_contents($zipPath);
     silentlyUnlink($zipPath);
     silentlyUnlink($workingDirectory);
     //Step 3 . Stream it to PyODConverter. Make a PDF and save it.
     $pyodConverterPath = DOCVERT_DIR . 'core' . DIRECTORY_SEPARATOR . 'lib' . DIRECTORY_SEPARATOR . 'pyodconverter' . DIRECTORY_SEPARATOR . 'pyodconverter.py';
     if (!file_exists($pyodConverterPath)) {
         die("Can't find PyODconverter at " . htmlentities($pyodConverterPath));
     }
     $command = $pyodConverterPath . ' --stream --pdf';
     $response = shellCommand($command, 20, $zipData, false);
     $pdfMagicBytes = '%PDF';
     if (substr($response['stdOut'], 0, strlen($pdfMagicBytes)) != $pdfMagicBytes) {
         die("Expected a PDF response was didn't receive one. Received back " . htmlentities(print_r($response, true)));
     }
     $imagePathInfo = pathinfo($imagePath);
     $pdfPath = dirname($imagePath) . DIRECTORY_SEPARATOR . basename($imagePath, '.' . $imagePathInfo['extension']) . '.pdf';
     file_put_contents($pdfPath, $response['stdOut']);
     return array('width' => $width, 'height' => $height, 'path' => $pdfPath);
 }
Пример #15
0
function generateDocument($pages, $generatorPipeline)
{
    if (preg_match('/.\\//s', $generatorPipeline)) {
        webServiceError('&error-disallowed-characters;');
    }
    $userAgent = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:bignumber) Docvert';
    $httpContextOptions = array('http' => array('header' => 'User-Agent: ' . $userAgent));
    $httpContext = stream_context_create($httpContextOptions);
    $docvertDir = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR;
    $docvertWritableDir = getWritableDirectory();
    $disallowDocumentGeneration = getGlobalConfigItem('doNotAllowDocumentGeneration');
    if ($disallowDocumentGeneration == 'true') {
        webServiceError('&document-generation-disabled;');
    }
    $pageXml = '<c:document xmlns="http://www.w3.org/1999/xhtml" xmlns:c="container">' . "\n";
    $pageTemplate = "\n\t" . '<c:page url="{{url}}" {{baseUrl}}>{{page}}</c:page>' . "\n";
    $config = array('indent' => true, 'output-xhtml' => true, 'wrap' => 200);
    if (!class_exists('tidy')) {
        webServiceError('&tidy-is-not-installed;');
    }
    $tidy = new tidy();
    $baseTagPattern = "/<base[^>]*?href=([^>]*?)>/is";
    foreach ($pages as $page) {
        if (trim($page) != '' && (stringStartsWith($page, 'http://') || stringStartsWith($page, 'https://'))) {
            $pageHtml = file_get_contents($page, null, $httpContext);
            $tidy->parseString($pageHtml, $config, 'utf8');
            $tidy->cleanRepair();
            $thisPage = str_replace('{{url}}', $page, $pageTemplate);
            $baseUrl = '';
            //supporting that ugly old hack of <base>
            preg_match($baseTagPattern, $pageHtml, $matches);
            if (count($matches) > 0) {
                $baseUrl = 'baseUrl="' . substr($matches[1], 1, -2) . '"';
            }
            $thisPage = str_replace('{{baseUrl}}', $baseUrl, $thisPage);
            $tidiedPageContents = characterEntityToNCR(removeDoctype(removeXmlComments($tidy)));
            $styleTagPattern = "/<style.*?<\\/style>/is";
            $tidiedPageContents = preg_replace($styleTagPattern, '', $tidiedPageContents);
            $scriptTagPattern = "/<script.*?<\\/script>/is";
            $tidiedPageContents = preg_replace($scriptTagPattern, '', $tidiedPageContents);
            $questionMarkPattern = "/<\\?.*?\\?>/is";
            //as strangely used on news.yahoo.com
            $tidiedPageContents = preg_replace($questionMarkPattern, '', $tidiedPageContents);
            $thisPage = str_replace('{{page}}', $tidiedPageContents, $thisPage);
            $pageXml .= $thisPage;
        }
    }
    $pageXml .= '</c:document>';
    $temporaryDirectory = getTemporaryDirectory();
    $pipelineDirectory = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR . 'generator-pipeline' . DIRECTORY_SEPARATOR . $generatorPipeline . DIRECTORY_SEPARATOR;
    $pipelinePath = $pipelineDirectory . 'pipeline.xml';
    if (!file_exists($pipelinePath)) {
        webServiceError('&generation-pipeline-not-found; ' . revealXml($pipelinePath));
    }
    $pipelineString = file_get_contents($pipelinePath);
    $pipelineString = substr($pipelineString, strpos($pipelineString, '<pipeline>') + 10);
    $pipelineString = substr($pipelineString, 0, strpos($pipelineString, '</pipeline>'));
    $pipelineStages = xmlStringToArray($pipelineString);
    $pipelineSettings = array("pipeline" => $generatorPipeline, "autopipeline" => $generatorPipeline);
    processAPipelineLevel($pipelineStages, $pageXml, $pipelineDirectory, $temporaryDirectory, $temporaryDirectory, $pipelineSettings);
    $openDocumentPath = $temporaryDirectory . 'output.odt';
    zipFiles($temporaryDirectory, $openDocumentPath);
    header('Content-disposition: attachment; filename=' . basename($openDocumentPath));
    header('Content-type: application/vnd.oasis.opendocument.text');
    readfile($openDocumentPath);
}
Пример #16
0
<?php

if (substr(PHP_VERSION, 0, 1) == "4") {
    webServiceError('&error-ensure-php5;', 500, array('phpVersion' => PHP_VERSION));
}
Пример #17
0
<?php

ob_start();
$appDir = dirname(__FILE__) . DIRECTORY_SEPARATOR;
define('DOCVERT_DIR', $appDir);
define('DOCVERT_CLIENT_TYPE', 'web');
include_once DOCVERT_DIR . 'core/lib.php';
if (!isset($_REQUEST['pages'])) {
    webServiceError('&error-generator-expected-pages-parameter;');
} elseif (count($_REQUEST['pages']) == 0) {
    webServiceError('&error-generator-expected-pages-parameter;');
}
if (!isset($_REQUEST['generatorPipeline'])) {
    webServiceError('&error-generator-expected-generatorpipeline-parameter;');
}
return generateDocument($_REQUEST['pages'], $_REQUEST['generatorPipeline']);
Пример #18
0
 function showGenerationStep()
 {
     $docvertDir = dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR;
     $disallowDocumentGeneration = getGlobalConfigItem('doNotAllowDocumentGeneration');
     if ($disallowDocumentGeneration == 'true') {
         return $this->getThemeFragment('generation-disabled.htmlf');
     }
     if (isset($_REQUEST['step'])) {
         switch ($_REQUEST['step']) {
             case '4':
                 if (!isset($_REQUEST['pages'])) {
                     webServiceError('&error-webpage-generation-no-pages;');
                 }
                 $template = $this->getThemeFragment('generation-step4.htmlf');
                 $hiddenFormChosenPages = array();
                 $listItems = array();
                 foreach ($_REQUEST['pages'] as $page) {
                     $listItems[] = "\n\t\t\t\t" . '<li>' . $page . '</li>';
                     $hiddenFormChosenPages[] = "\n\t\t\t\t" . '<input type="hidden" name="pages[]" value="' . $page . '"/>';
                 }
                 $template = str_replace('{{page-order}}', implode($listItems), $template);
                 $template = str_replace('{{hidden-form-chosen-pages}}', implode($hiddenFormChosenPages), $template);
                 $generatorPipelines = glob($this->docvertRootDirectory . 'generator-pipeline' . DIRECTORY_SEPARATOR . '*');
                 $generatorPipelinesArray = array();
                 foreach ($generatorPipelines as $generatorPipeline) {
                     $generatorName = basename($generatorPipeline);
                     $generatorPipelinesArray[] = '<option value="' . $generatorName . '">' . $generatorName . '</option>';
                 }
                 return str_replace('{{generator-pipelines}}', implode('', $generatorPipelinesArray), $template);
             case '3':
                 $template = $this->getThemeFragment('generation-step3.htmlf');
                 $listItems = array();
                 foreach ($_REQUEST['pages'] as $page) {
                     $listItems[] = "\n\t\t\t\t" . '<option value="' . $page . '">' . $page . '</option>';
                 }
                 return str_replace('{{chosen-scrape-urls}}', implode($listItems), $template);
             case '2':
                 if (!isset($_REQUEST['url'])) {
                     webServiceError('&error-webpage-generation-url;');
                 }
                 $originalUrl = $_REQUEST['url'];
                 if (trim($originalUrl) == '') {
                     webServiceError('&error-webpage-generation-no-url-given;');
                 }
                 if (!stringStartsWith($originalUrl, 'http')) {
                     $originalUrl = 'http://' . $originalUrl;
                 }
                 $originalUrl = str_replace(array("\n", "\r", "\t", " "), '', $originalUrl);
                 include_once dirname(__FILE__) . '/http.php';
                 if (trim(getUrlLocalPart($originalUrl)) == '') {
                     $originalUrl = followUrlRedirects($originalUrl . '/');
                 } else {
                     $originalUrl = followUrlRedirects($originalUrl);
                 }
                 if ($originalUrl === false) {
                     webServiceError('&error-webpage-cannot-get-url;', 500, array('url' => $originalUrl));
                 }
                 $page = file_get_contents($originalUrl);
                 $baseTagPattern = "/<base[^>]*?href=([^>]*?)>/is";
                 preg_match($baseTagPattern, $page, $matches);
                 if (count($matches) > 0) {
                     $originalUrl = trim($matches[1]);
                     $originalUrl = substr($originalUrl, 1, strlen($originalUrl) - 2);
                 }
                 $url = $originalUrl;
                 $connectionPart = getUrlConnectionPart($url);
                 $getUrlLocalPart = getUrlLocalPart($url);
                 $localPartDirectory = getUrlLocalPartDirectory($url);
                 $links = array();
                 $matches = null;
                 preg_match_all('/href="(.*?)"/', $page, $matches);
                 $matches = $matches[1];
                 $urls = array();
                 $urls[$originalUrl] = 'value that does not matter';
                 foreach ($matches as $match) {
                     $link = $match;
                     if (stringStartsWith($link, '/')) {
                         $link = $connectionPart . $link;
                     } elseif (stringStartsWith($link, "http://") || stringStartsWith($link, "https://")) {
                     } elseif (stringStartsWith($link, "mailto:")) {
                     } else {
                         $link = $connectionPart . resolveRelativeUrl($localPartDirectory . $link);
                     }
                     if (containsString($link, '#')) {
                         $link = substringBefore($link, '#');
                     }
                     if (stringEndsWith($link, '?')) {
                         $link = substringBefore($link, '?');
                     }
                     if (stringStartsWith($link, 'http')) {
                         $fileExtension = substr($link, strrpos($link, '.') + 1);
                         switch ($fileExtension) {
                             case 'avi':
                             case 'mov':
                             case 'mpg':
                             case 'css':
                             case 'jpeg':
                             case 'jpg':
                             case 'gif':
                             case 'png':
                             case 'bmp':
                             case 'apng':
                             case 'tiff':
                             case 'ico':
                             case 'js':
                             case 'gz':
                             case 'tar':
                             case 'zip':
                             case 'bin':
                             case 'sit':
                             case 'mp3':
                             case 'mp4':
                             case 'wav':
                             case 'swf':
                             case 'fla':
                             case 'rss':
                             case 'atom':
                             case 'pdf':
                             case 'xls':
                             case 'doc':
                             case 'txt':
                             case 'pps':
                                 break;
                             default:
                                 $urls[$link] = 'value that does not matter';
                         }
                     }
                 }
                 $urls = array_keys($urls);
                 $mostLikelyUrls = array();
                 $possibleUrls = array();
                 $unlikelyUrls = array();
                 $numberOfSlashesInOriginalUrl = strlen($originalUrl) - strlen(str_replace('/', '', $originalUrl));
                 foreach ($urls as $url) {
                     $url = followUrlRedirects($url);
                     if (trim($url) != '') {
                         $numberOfSlashesInUrl = strlen($url) - strlen(str_replace('/', '', $url));
                         if (stringStartsWith($url, $connectionPart . $localPartDirectory) && $numberOfSlashesInUrl == $numberOfSlashesInOriginalUrl) {
                             $mostLikelyUrls[] = $url;
                         } elseif (stringStartsWith($url, $connectionPart)) {
                             $possibleUrls[] = $url;
                         } else {
                             $unlikelyUrls[] = $url;
                         }
                     }
                 }
                 asort($unlikelyUrls);
                 $itemId = 0;
                 foreach ($mostLikelyUrls as $url) {
                     $links[] = '<li class="orderingItem"><label for="urlId' . $itemId . '"><input type="checkbox" name="pages[]" value="' . $url . '" id="urlId' . $itemId . '" checked="checked"/><span class="title">' . $url . '</label></span></li>' . "\n";
                     $itemId++;
                 }
                 foreach ($possibleUrls as $url) {
                     $links[] = '<li class="orderingItem"><label for="urlId' . $itemId . '"><input type="checkbox" name="pages[]" value="' . $url . '" id="urlId' . $itemId . '"/><span class="title">' . $url . '</label></span></li>' . "\n";
                     $itemId++;
                 }
                 foreach ($unlikelyUrls as $url) {
                     $links[] = '<li class="orderingItem"><label for="urlId' . $itemId . '"><input type="checkbox" name="pages[]" value="' . $url . '" id="urlId' . $itemId . '"/><span class="title">' . $url . '</label></span></li>' . "\n";
                     $itemId++;
                 }
                 $step2Template = $this->getThemeFragment('generation-step2.htmlf');
                 $step2Template = str_replace('{{scrape-results}}', implode('', $links), $step2Template);
                 $step2Template = str_replace('{{scrape-url}}', $url, $step2Template);
                 return $step2Template;
             default:
                 return $this->getThemeFragment('generation-step1.htmlf');
         }
     } else {
         return $this->getThemeFragment('generation-step1.htmlf');
     }
 }
Пример #19
0
/**
 * Run a shell command. This function returns STD_ERR as well unlike
 * PHPs inbuilt shell commands like shell_exec() or passthru() or system().
 * @return string
*/
function shellCommand($command, $timeoutInSeconds = null, $dataToStdIn = null, $haltOnError = false)
{
    if ($timeoutInSeconds === null) {
        $timeoutInSeconds = 120;
    }
    $pipes = null;
    if ($dataToStdIn) {
        $descriptor = array(0 => array("pipe", "r"), 1 => array("pipe", "w"), 2 => array("pipe", "w"));
        $currentWorkingDirectory = getOperatingSystemsTemporaryDirectory();
        $envionmentVariables = array();
        $process = proc_open($command, $descriptor, $pipes, $currentWorkingDirectory, $envionmentVariables);
        fwrite($pipes[0], $dataToStdIn);
        fclose($pipes[0]);
        stream_set_timeout($pipes[1], $timeoutInSeconds);
        stream_set_timeout($pipes[2], $timeoutInSeconds);
    } else {
        $process = popen("({$command})2>&1&", "r");
        if ($timeoutInSeconds == 0) {
            pclose($process);
            return;
        }
        //stream_set_timeout($process, $timeoutInSeconds);
        $pipes[] = $process;
    }
    if (!is_resource($process)) {
        if ($haltOnError) {
            webServiceError($command);
        }
        if (!$dataToStdIn) {
            return null;
        }
        return array('stdOut' => null, 'statusCode' => -1, 'stdErr' => null);
    }
    $response = array();
    $endTime = microtime(true) + (double) $timeoutInSeconds;
    foreach ($pipes as $pipe) {
        if (!is_resource($pipe)) {
            continue;
        }
        $returnValue = null;
        while (!feof($pipe)) {
            $returnValue .= fgets($pipe, 8);
            $streamInfo = stream_get_meta_data($pipe);
            if ($streamInfo['timed_out'] === true || microtime(true) > $endTime) {
                $returnValue .= 'Docvert timeout';
                break;
            }
        }
        $response[] = $returnValue;
        pclose($pipe);
    }
    if (!$dataToStdIn) {
        return $response[0];
    } else {
        $statusCode = proc_close($process);
        if ($statusCode !== 0 && $haltOnError) {
            webServiceError($statusCode . ' ' . implode(' ', $pipes));
        }
        return array('stdOut' => $response[0], 'statusCode' => $statusCode, 'stdErr' => $response[1]);
    }
}
Пример #20
0
    }
}
if (!$files || !$converter || !$pipeline || !$outputZip || $extraParameters) {
    $commandLineHelp = "\n" . $extraParameters;
    $commandLineHelp .= '&error-command-line-help;';
    $errorPlaceholders['commandLineFiles'] = '';
    $errorPlaceholders['commandLineConverter'] = '';
    $errorPlaceholders['commandLinePipeline'] = '';
    $errorPlaceholders['commandLineOutputZip'] = '';
    if (!$files) {
        $errorPlaceholders['commandLineFiles'] = ' --input-files ';
    }
    if (!$converter) {
        $errorPlaceholders['commandLineConverter'] = ' --converter';
    }
    if (!$pipeline) {
        $errorPlaceholders['commandLinePipeline'] = ' --pipeline';
    }
    if (!$outputZip) {
        $errorPlaceholders['commandLineOutputZip'] = ' --output-zip';
    }
    webServiceError($commandLineHelp, 400, $errorPlaceholders);
}
//print_r($files);
//print $converter;
//print $pipeline;
//print $autoPipeline;
//print $afterConversion;
//print $setupOpenOfficeOrg;
//print $outputZip;
processConversion($files, $converter, $pipeline, $autoPipeline, $afterConversion, $setupOpenOfficeOrg, $outputZip);