Beispiel #1
0
 /**
  * Primary processing routine for the source
  */
 private function processBook()
 {
     // Preset collectors
     $pageContents = array();
     $pageCssContents = array();
     // Work through all source files
     foreach ($this->sourceFiles as $sourceFile) {
         // Init Apache POI
         $converter = new XWPFToHTMLConverter($this->workingDir, $this->progress);
         if (!$converter) {
             throw new Exception('[WordProcessor::routine] ' . 'Book ID ' . $this->bookId . ' cannot be processed as a working directory cannot be found.');
         }
         // Set docx file to parse
         $converter->setDocFileToParse($sourceFile);
         // Convert everything to HTML
         $converter->convertToHTML();
         // Get HTML pages
         $pages = $converter->getHTMLPages();
         $this->progress->adjustMaxSteps(count($pages) * 2 + count($this->sourceAssets) + 1);
         foreach ($pages as $key => $page) {
             $pages[$key]->setStyleInline(false);
             $pageContents[] = $pages[$key]->getBodyHTML();
             $this->progress->incrementStep();
         }
         // Get CSS
         $pageCssContents[] = $converter->mainStyleSheet->getPagesCSS();
         // Save image assets
         $this->createImageAssets();
     }
     // Save page HTML
     foreach ($pageContents as $page => $contents) {
         $this->savePageHTML($pageContents[$page], $page + 1);
         $this->savePageCSS('', $page + 1);
         $this->progress->incrementStep();
     }
     // Save book CSS
     $this->saveBookCSS("body {margin: 0px; padding: 0px;} \n\n" . implode("\n", $pageCssContents));
     $this->progress->incrementStep();
     $this->setCoverImage();
     $numPages = count($pages);
     $this->importSourceAssets();
     $this->updatePageCount($numPages);
     return true;
 }
Beispiel #2
0
 /**
  * @return HTMLElement
  */
 public function parseRun()
 {
     // Get and process pictures if there are any
     $pictures = java_values($this->run->getEmbeddedPictures());
     if (count($pictures) > 0) {
         foreach ($pictures as $key => $picture) {
             $path = XWPFToHTMLConverter::getDirectoryPath();
             $pictureContainer = new XWPFPicture($picture, $this->mainStyleSheet, $path);
             $container = $pictureContainer->processPicture();
             return $container;
         }
     }
     // Character parser
     $runContainer = $this->getSubscript() != 1 ? $this->selectSubscriptContainer() : new HTMLElement(HTMLElement::SPAN);
     $text = $this->getText();
     $addNewLine = strlen($text) == 1 && (substr($text, -1, 1) == "\r" || ord(substr($text, -1, 1)) == HWPFWrapper::BEL_MARK) ? true : false;
     if ($addNewLine) {
         $text .= '<br />';
     }
     $runStyle = $this->processRunStyle($this->run);
     $runContainer->setInnerText($text);
     $runContainer->setClass($runStyle);
     return $runContainer;
 }
Beispiel #3
0
    <form>
        <input type="text" id="file" name="file" />
        <input type="submit" title="process" />
    </form>
</div>

<?php 
include_once 'Word/XWPFToHTMLConverter.php';
//Set document directory
$progress = "";
$workingDir = "/home/peter/Documents";
$sourceFile = "/home/peter/Documents/Strikethrough.docx";
//Initiate time counter
$start = microtime(true);
// Init Apache POI
$converter = new XWPFToHTMLConverter($workingDir, $progress);
if (!$converter) {
    throw new Exception('[WordProcessor::routine] ' . 'Book ID ' . $this->bookId . ' cannot be processed as a working directory cannot be found.');
}
// Set docx file to parse
$converter->setDocFileToParse($sourceFile);
// Convert everything to HTML
$converter->convertToHTML();
$hasToc = $converter->hasTOC();
if ($hasToc) {
    $tocNumbering = $converter->getTocNumbering();
    $TOC = $converter->getTableOfContents();
    //    foreach($TOC as $entry){
    //        if(strlen($entry['num']) == 0){
    //            var_dump($entry);
    //        }
 /**
  * Parses a characterRun element of HWPF document
  * @param   object  Character run
  * @return  HTMLElement
  */
 private function parseCharacterRun($characterRun)
 {
     // Create null container
     $container = null;
     // Even non text elements will have a text property
     $text = nl2br(java_values($characterRun->getText(0)));
     // Get and process pictures if there are any
     $pictures = java_values($characterRun->getEmbeddedPictures());
     if (count($pictures) > 0) {
         foreach ($pictures as $key => $picture) {
             $path = XWPFToHTMLConverter::getDirectoryPath();
             $pictureContainer = new XWPFPicture($picture, $this->mainStyleSheet, $path);
             $container = $pictureContainer->processPicture();
             return $container;
         }
     }
     //Get Run xml
     $charXml = java_values($characterRun->getCTR()->ToString());
     //Check for section numbering
     $this->checkSectionNumbering($characterRun, $charXml);
     $charXml = str_replace('w:', 'w', $charXml);
     $xml = new SimpleXMLElement($charXml);
     //Get the value of hyperlink
     $link = $xml->xpath("wrPr/wrStyle");
     //Check if the hyperlink xml tag exists
     if (!empty($link)) {
         $linkValue = $link[0]['wval'];
     } else {
         $linkValue = "none";
     }
     //Check if is valid internet link
     if ($linkValue == 'InternetLink') {
         //Create empty A tag for the hyperlink and the text
         $container = new HTMLElement(HTMLElement::A);
         $container->setAttribute('href', java_values($characterRun->getText(0)));
     } else {
         /* In every other case, if we got here we do simple text parsing */
         // Create empty text element
         $container = new HTMLElement(HTMLElement::SPAN);
     }
     $styleClass = $this->processCharacterRunStyle($characterRun, $xml);
     if ($container->getTagName() != 'a') {
         $container = new HTMLElement(HTMLElement::SPAN);
     }
     $addNewLine = false;
     // Check for new line
     if (strlen($text) == 1 && (substr($text, -1, 1) == "\r" || ord(substr($text, -1, 1)) == HWPFWrapper::BEL_MARK)) {
         $addNewLine = true;
     }
     //escape text for xhtml
     $text = XhtmlEntityConverter::convertToNumericalEntities(htmlentities($text, ENT_COMPAT | ENT_XHTML));
     if ($addNewLine) {
         $text .= '<br />';
     }
     $boldContainer = $styleClass['containers']['bold'];
     $italicContainer = $styleClass['containers']['italic'];
     //TODO check why this fails with large documents
     //if($container->getTagName() == '') $container->setInnerElement($text);
     if ($boldContainer and $italicContainer) {
         //Set Bold and italic semantic tags
         $container->addInnerElement($boldContainer);
         $boldContainer->addInnerElement($italicContainer);
         $italicContainer->setInnerText($text);
     } elseif ($boldContainer) {
         //Set bold strong tag
         $container->addInnerElement($boldContainer);
         $boldContainer->setInnerText($text);
     } elseif ($italicContainer) {
         // Set italic em tag
         $container->addInnerElement($italicContainer);
         $italicContainer->setInnerText($text);
     } else {
         // Set inner text to span tag
         $container->setInnerText($text);
     }
     // Get and set class name on container
     $container->setClass($styleClass['style'] . ' textframe cke_focus');
     // Return container
     return $container;
 }