/** * Primary processing routine for the source */ private function processBook() { // Preset collectors $pageContents = array(); $pageCssContents = array(); // Work through all source files foreach ($this->sourceFiles as $sourceFile) { // Init Apache POI $converter = new XWPFToHTMLConverter($this->workingDir, $this->progress); if (!$converter) { throw new Exception('[WordProcessor::routine] ' . 'Book ID ' . $this->bookId . ' cannot be processed as a working directory cannot be found.'); } // Set docx file to parse $converter->setDocFileToParse($sourceFile); // Convert everything to HTML $converter->convertToHTML(); // Get HTML pages $pages = $converter->getHTMLPages(); $this->progress->adjustMaxSteps(count($pages) * 2 + count($this->sourceAssets) + 1); foreach ($pages as $key => $page) { $pages[$key]->setStyleInline(false); $pageContents[] = $pages[$key]->getBodyHTML(); $this->progress->incrementStep(); } // Get CSS $pageCssContents[] = $converter->mainStyleSheet->getPagesCSS(); // Save image assets $this->createImageAssets(); } // Save page HTML foreach ($pageContents as $page => $contents) { $this->savePageHTML($pageContents[$page], $page + 1); $this->savePageCSS('', $page + 1); $this->progress->incrementStep(); } // Save book CSS $this->saveBookCSS("body {margin: 0px; padding: 0px;} \n\n" . implode("\n", $pageCssContents)); $this->progress->incrementStep(); $this->setCoverImage(); $numPages = count($pages); $this->importSourceAssets(); $this->updatePageCount($numPages); return true; }
include_once 'Word/XWPFToHTMLConverter.php'; //Set document directory $progress = ""; $workingDir = "/home/peter/Documents"; $sourceFile = "/home/peter/Documents/Strikethrough.docx"; //Initiate time counter $start = microtime(true); // Init Apache POI $converter = new XWPFToHTMLConverter($workingDir, $progress); if (!$converter) { throw new Exception('[WordProcessor::routine] ' . 'Book ID ' . $this->bookId . ' cannot be processed as a working directory cannot be found.'); } // Set docx file to parse $converter->setDocFileToParse($sourceFile); // Convert everything to HTML $converter->convertToHTML(); $hasToc = $converter->hasTOC(); if ($hasToc) { $tocNumbering = $converter->getTocNumbering(); $TOC = $converter->getTableOfContents(); // foreach($TOC as $entry){ // if(strlen($entry['num']) == 0){ // var_dump($entry); // } // } $contentStructure = array(); $headlineList = $converter->getHeadLineList(); // if(!empty($headlineList)) { // // $currentChapter = ''; // $prevItem = '';