$splitter = new EPubChapterSplitter(); $splitter->setSplitSize(15000); // For this test, we split at approx 15k. Default is 250000 had we left it alone. $log->logLine("new EPubChapterSplitter()"); /* Using the # as regexp delimiter here, it makes writing the regexp easier. * in this case we could have just searched for "Chapter ", or if we were using regexp '#^<h1>Chapter #i', * using regular text (no regexp delimiters) will look for the text after the first tag. Meaning had we used * "Chapter ", any paragraph or header starting with "Chapter " would have matched. The regexp equivalent of * "Chapter " is '#^<.+?>Chapter #' * Essentially, the search string is looking for lines starting with... */ $log->logLine("Split chapter 5"); // $html2 = $splitter->splitChapter($chapter5, true, "#^\<.+?\>Chapter \d*#i"); // $html2 = $splitter->splitChapter($chapter5, true, "Chapter "); $searchString = '/<h1/i'; $html2 = $splitter->splitChapter($chapter5, true, $searchString); // $html2 is an array where the keys are the entire line, including start and end tags of the hit. // and the value is the segment for the match. // The returned array can just be parsed to the addChapter like this: // $book->addChapter($cName, "Chapter005.html", $html2, true); // and EPub will add the parts automatically. // However, often you'd want to try to get a measure of control over the process $log->logLine("Add Chapter 5"); $idx = 0; while (list($k, $v) = each($html2)) { $idx++; // Because we used a string search in the splitter, the returned hits are put in the key part of the array. // The entire HTML tag of the line matching the chapter search. // Strip start and end tags. This Regexp will keep the tag name as well as the data between them. preg_match('#^<(\\w+)\\s*.*?>(.+)</\\s*\\1>$#i', $k, $cName); // This is simply to clean up the chapter name, it can't contain any HTML.
/** * Add a chapter to the book, as a chapter should not exceed 250kB, you can parse an array with multiple parts as $chapterData. * These will still only show up as a single chapter in the book TOC. * * @param string $chapterName Name of the chapter, will be use din the TOC * @param string $fileName Filename to use for the chapter, must be unique for the book. * @param string $chapterData Chapter text in XHTML or array $chapterData valid XHTML data for the chapter. File should NOT exceed 250kB. * @param bool $autoSplit Should the chapter be split if it exceeds the default split size? Default=FALSE, only used if $chapterData is a string. * @param int $externalReferences How to handle external references, EPub::EXTERNAL_REF_IGNORE, EPub::EXTERNAL_REF_ADD or EPub::EXTERNAL_REF_REMOVE_IMAGES? See documentation for <code>processChapterExternalReferences</code> for explanation. Default is EPub::EXTERNAL_REF_IGNORE. * @param string $baseDir Default is "", meaning it is pointing to the document root. NOT used if $externalReferences is set to EPub::EXTERNAL_REF_IGNORE. * * @return mixed $success FALSE if the addition failed, else the new NavPoint. */ function addChapter($chapterName, $fileName, $chapterData = null, $autoSplit = false, $externalReferences = EPub::EXTERNAL_REF_IGNORE, $baseDir = "") { if ($this->isFinalized) { return false; } $fileName = RelativePath::getRelativePath($fileName); $fileName = preg_replace('#^[/\\.]+#i', "", $fileName); $navPoint = false; $chapter = $chapterData; if ($autoSplit && is_string($chapterData) && mb_strlen($chapterData) > $this->splitDefaultSize) { $splitter = new EPubChapterSplitter(); $splitter->setSplitSize($this->splitDefaultSize); $chapterArray = $splitter->splitChapter($chapterData); if (count($chapterArray) > 1) { $chapter = $chapterArray; } } if (!empty($chapter) && is_string($chapter)) { if ($externalReferences !== EPub::EXTERNAL_REF_IGNORE) { $htmlDirInfo = pathinfo($fileName); $htmlDir = preg_replace('#^[/\\.]+#i', "", $htmlDirInfo["dirname"] . "/"); $this->processChapterExternalReferences($chapter, $externalReferences, $baseDir, $htmlDir); } if ($this->encodeHTML === true) { $chapter = StringHelper::encodeHtml($chapter); } $this->chapterCount++; $this->addFile($fileName, "chapter" . $this->chapterCount, $chapter, "application/xhtml+xml"); $this->extractIdAttributes("chapter" . $this->chapterCount, $chapter); $this->opf->addItemRef("chapter" . $this->chapterCount); $navPoint = new NavPoint(StringHelper::decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount); $this->ncx->addNavPoint($navPoint); $this->ncx->chapterList[$chapterName] = $navPoint; } elseif (is_array($chapter)) { $this->log->logLine("addChapter: \$chapterName: {$chapterName} ; \$fileName: {$fileName} ; "); $fileNameParts = pathinfo($fileName); $extension = $fileNameParts['extension']; $name = $fileNameParts['filename']; $partCount = 0; $this->chapterCount++; $oneChapter = each($chapter); while ($oneChapter) { /** @noinspection PhpUnusedLocalVariableInspection */ list($k, $v) = $oneChapter; if ($this->encodeHTML === true) { $v = StringHelper::encodeHtml($v); } if ($externalReferences !== EPub::EXTERNAL_REF_IGNORE) { $this->processChapterExternalReferences($v, $externalReferences, $baseDir); } $partCount++; $partName = $name . "_" . $partCount; $this->addFile($partName . "." . $extension, $partName, $v, "application/xhtml+xml"); $this->extractIdAttributes($partName, $v); $this->opf->addItemRef($partName); $oneChapter = each($chapter); } $partName = $name . "_1." . $extension; $navPoint = new NavPoint(StringHelper::decodeHtmlEntities($chapterName), $partName, $partName); $this->ncx->addNavPoint($navPoint); $this->ncx->chapterList[$chapterName] = $navPoint; } elseif (!isset($chapterData) && strpos($fileName, "#") > 0) { $this->chapterCount++; //$this->opf->addItemRef("chapter" . $this->chapterCount); $id = preg_split("/[#]/", $fileName); if (sizeof($id) == 2 && $this->isLogging) { $name = preg_split('/[\\.]/', $id[0]); if (sizeof($name) > 1) { $name = $name[0]; } $rv = $this->opf->getItemByHref($name, true); if ($rv != false) { /** @var Item $item */ foreach ($rv as $item) { if ($item->hasIndexPoint($id[1])) { $fileName = $item->getHref() . "#" . $id[1]; break; } } } } $navPoint = new NavPoint(StringHelper::decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount); $this->ncx->addNavPoint($navPoint); $this->ncx->chapterList[$chapterName] = $navPoint; } elseif (!isset($chapterData) && $fileName == "TOC.xhtml") { $this->chapterCount++; $this->opf->addItemRef("toc"); $navPoint = new NavPoint(StringHelper::decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount); $this->ncx->addNavPoint($navPoint); $this->ncx->chapterList[$chapterName] = $navPoint; $this->tocNavAdded = true; } return $navPoint; }
/** * Add a chapter to the book, as a chapter should not exceed 250kB, you can parse an array with multiple parts as $chapterData. * These will still only show up as a single chapter in the book TOC. * * @param string $chapterName Name of the chapter, will be use din the TOC * @param string $fileName Filename to use for the chapter, must be unique for the book. * @param string $chapterData Chapter text in XHTML or array $chapterData valid XHTML data for the chapter. File should NOT exceed 250kB. * @param bool $autoSplit Should the chapter be split if it exceeds the default split size? Default=FALSE, only used if $chapterData is a string. * @param int $externalReferences How to handle external references, EPub::EXTERNAL_REF_IGNORE, EPub::EXTERNAL_REF_ADD or EPub::EXTERNAL_REF_REMOVE_IMAGES? See documentation for <code>processChapterExternalReferences</code> for explanation. Default is EPub::EXTERNAL_REF_IGNORE. * @param string $baseDir Default is "", meaning it is pointing to the document root. NOT used if $externalReferences is set to EPub::EXTERNAL_REF_IGNORE. * * @return mixed $success FALSE if the addition failed, else the new NavPoint. */ function addChapter($chapterName, $fileName, $chapterData = null, $autoSplit = false, $externalReferences = EPub::EXTERNAL_REF_IGNORE, $baseDir = "") { if ($this->isFinalized) { return false; } $fileName = RelativePath::getRelativePath($fileName); $fileName = preg_replace('#^[/\\.]+#i', "", $fileName); $navPoint = false; $chapter = $chapterData; if ($autoSplit && is_string($chapterData) && mb_strlen($chapterData) > $this->splitDefaultSize) { $splitter = new EPubChapterSplitter(); $chapterArray = $splitter->splitChapter($chapterData); if (count($chapterArray) > 1) { $chapter = $chapterArray; } } if (!empty($chapter) && is_string($chapter)) { if ($externalReferences !== EPub::EXTERNAL_REF_IGNORE) { $htmlDirInfo = pathinfo($fileName); $htmlDir = preg_replace('#^[/\\.]+#i', "", $htmlDirInfo["dirname"] . "/"); $this->processChapterExternalReferences($chapter, $externalReferences, $baseDir, $htmlDir); } if ($this->encodeHTML === true) { $chapter = $this->encodeHtml($chapter); } $this->chapterCount++; $this->addFile($fileName, "chapter" . $this->chapterCount, $chapter, "application/xhtml+xml"); $this->opf->addItemRef("chapter" . $this->chapterCount); $navPoint = new NavPoint($this->decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount); $this->ncx->addNavPoint($navPoint); $this->ncx->chapterList[$chapterName] = $navPoint; } elseif (is_array($chapter)) { $fileNameParts = pathinfo($fileName); $extension = $fileNameParts['extension']; $name = $fileNameParts['filename']; $partCount = 0; $this->chapterCount++; $oneChapter = each($chapter); while ($oneChapter) { /** @noinspection PhpUnusedLocalVariableInspection */ list($k, $v) = $oneChapter; if ($this->encodeHTML === true) { $v = $this->encodeHtml($v); } if ($externalReferences !== EPub::EXTERNAL_REF_IGNORE) { $this->processChapterExternalReferences($v, $externalReferences, $baseDir); } $partCount++; $partName = $name . "_" . $partCount; $this->addFile($partName . "." . $extension, $partName, $v, "application/xhtml+xml"); $this->opf->addItemRef($partName); $oneChapter = each($chapter); } $partName = $name . "_1." . $extension; $navPoint = new NavPoint($this->decodeHtmlEntities($chapterName), $partName, $partName); $this->ncx->addNavPoint($navPoint); $this->ncx->chapterList[$chapterName] = $navPoint; } elseif (!isset($chapterData) && strpos($fileName, "#") > 0) { $this->chapterCount++; //$this->opf->addItemRef("chapter" . $this->chapterCount); $navPoint = new NavPoint($this->decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount); $this->ncx->addNavPoint($navPoint); $this->ncx->chapterList[$chapterName] = $navPoint; } elseif (!isset($chapterData) && $fileName == "TOC.xhtml") { $this->chapterCount++; $this->opf->addItemRef("toc"); $navPoint = new NavPoint($this->decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount); $this->ncx->addNavPoint($navPoint); $this->ncx->chapterList[$chapterName] = $navPoint; $this->tocNavAdded = true; } return $navPoint; }
$book->backLevel(); // More advanced use of the splitter: // Still using Chapter 4, but as you can see, "Chapter 4" also contains a header for Chapter 5. $splitter = new EPubChapterSplitter(); $splitter->setSplitSize(15000); // For this test, we split at approx 15k. Default is 250000 had we left it alone. $log->logLine("new EPubChapterSplitter()"); /* Using the # as regexp delimiter here, it makes writing the regexp easier. * in this case we could have just searched for "Chapter ", or if we were using regexp '#^<h1>Chapter #i', * using regular text (no regexp delimiters) will look for the text after the first tag. Meaning had we used * "Chapter ", any paragraph or header starting with "Chapter " would have matched. The regexp equivalent of * "Chapter " is '#^<.+?>Chapter #' * Essentially, the search string is looking for lines starting with... */ $log->logLine("Add Chapter 5"); $html2 = $splitter->splitChapter($chapter5, true, "Chapter "); /* '#^<.+?>Chapter \d*#i'); */ $log->logLine("Split chapter 5"); $idx = 0; while (list($k, $v) = each($html2)) { $idx++; // Because we used a string search in the splitter, the returned hits are put in the key part of the array. // The entire HTML tag of the line matching the chapter search. // find the text inside the tags preg_match('#^<(\\w+)\\ *.*?>(.+)</\\ *\\1>$#i', $k, $cName); // because of the back reference, the tag name is in $cName[1], and the content is in $cName[2] // Change any line breaks in the chapter name to " - " $cName = preg_replace('#<br.+?>#i', " - ", $cName[2]); // Remove any other tags $cName = preg_replace('#<.+?>#i', " ", $cName); // clean the chapter name by removing any double spaces left behind to single space.