EPubChapterSplitter::splitChapter, PHPePub\Core PHP代码示例

示例#1

0

显示文件

文件： EPub.Example1.php 项目： elevenone/PHPePub

$splitter = new EPubChapterSplitter();
$splitter->setSplitSize(15000);
// For this test, we split at approx 15k. Default is 250000 had we left it alone.
$log->logLine("new EPubChapterSplitter()");
/* Using the # as regexp delimiter here, it makes writing the regexp easier.
 *  in this case we could have just searched for "Chapter ", or if we were using regexp '#^<h1>Chapter #i',
 *  using regular text (no regexp delimiters) will look for the text after the first tag. Meaning had we used
 *  "Chapter ", any paragraph or header starting with "Chapter " would have matched. The regexp equivalent of
 *  "Chapter " is '#^<.+?>Chapter #'
 * Essentially, the search string is looking for lines starting with...
 */
$log->logLine("Split chapter 5");
// $html2 = $splitter->splitChapter($chapter5, true, "#^\<.+?\>Chapter \d*#i");
// $html2 = $splitter->splitChapter($chapter5, true, "Chapter ");
$searchString = '/<h1/i';
$html2 = $splitter->splitChapter($chapter5, true, $searchString);
// $html2 is an array where the keys are the entire line, including start and end tags of the hit.
// and the value is the segment for the match.
// The returned array can just be parsed to the addChapter like this:
//     $book->addChapter($cName, "Chapter005.html", $html2, true);
// and EPub will add the parts automatically.
// However, often you'd want to try to get a measure of control over the process
$log->logLine("Add Chapter 5");
$idx = 0;
while (list($k, $v) = each($html2)) {
    $idx++;
    // Because we used a string search in the splitter, the returned hits are put in the key part of the array.
    // The entire HTML tag of the line matching the chapter search.
    // Strip start and end tags. This Regexp will keep the tag name as well as the data between them.
    preg_match('#^<(\\w+)\\s*.*?>(.+)</\\s*\\1>$#i', $k, $cName);
    // This is simply to clean up the chapter name, it can't contain any HTML.

示例#2

0

显示文件

文件： EPub.php 项目： grandt/phpepub

 /**
  * Add a chapter to the book, as a chapter should not exceed 250kB, you can parse an array with multiple parts as $chapterData.
  * These will still only show up as a single chapter in the book TOC.
  *
  * @param string $chapterName        Name of the chapter, will be use din the TOC
  * @param string $fileName           Filename to use for the chapter, must be unique for the book.
  * @param string $chapterData        Chapter text in XHTML or array $chapterData valid XHTML data for the chapter. File should NOT exceed 250kB.
  * @param bool   $autoSplit          Should the chapter be split if it exceeds the default split size? Default=FALSE, only used if $chapterData is a string.
  * @param int    $externalReferences How to handle external references, EPub::EXTERNAL_REF_IGNORE, EPub::EXTERNAL_REF_ADD or EPub::EXTERNAL_REF_REMOVE_IMAGES? See documentation for <code>processChapterExternalReferences</code> for explanation. Default is EPub::EXTERNAL_REF_IGNORE.
  * @param string $baseDir            Default is "", meaning it is pointing to the document root. NOT used if $externalReferences is set to EPub::EXTERNAL_REF_IGNORE.
  *
  * @return mixed $success            FALSE if the addition failed, else the new NavPoint.
  */
 function addChapter($chapterName, $fileName, $chapterData = null, $autoSplit = false, $externalReferences = EPub::EXTERNAL_REF_IGNORE, $baseDir = "")
 {
     if ($this->isFinalized) {
         return false;
     }
     $fileName = RelativePath::getRelativePath($fileName);
     $fileName = preg_replace('#^[/\\.]+#i', "", $fileName);
     $navPoint = false;
     $chapter = $chapterData;
     if ($autoSplit && is_string($chapterData) && mb_strlen($chapterData) > $this->splitDefaultSize) {
         $splitter = new EPubChapterSplitter();
         $splitter->setSplitSize($this->splitDefaultSize);
         $chapterArray = $splitter->splitChapter($chapterData);
         if (count($chapterArray) > 1) {
             $chapter = $chapterArray;
         }
     }
     if (!empty($chapter) && is_string($chapter)) {
         if ($externalReferences !== EPub::EXTERNAL_REF_IGNORE) {
             $htmlDirInfo = pathinfo($fileName);
             $htmlDir = preg_replace('#^[/\\.]+#i', "", $htmlDirInfo["dirname"] . "/");
             $this->processChapterExternalReferences($chapter, $externalReferences, $baseDir, $htmlDir);
         }
         if ($this->encodeHTML === true) {
             $chapter = StringHelper::encodeHtml($chapter);
         }
         $this->chapterCount++;
         $this->addFile($fileName, "chapter" . $this->chapterCount, $chapter, "application/xhtml+xml");
         $this->extractIdAttributes("chapter" . $this->chapterCount, $chapter);
         $this->opf->addItemRef("chapter" . $this->chapterCount);
         $navPoint = new NavPoint(StringHelper::decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount);
         $this->ncx->addNavPoint($navPoint);
         $this->ncx->chapterList[$chapterName] = $navPoint;
     } elseif (is_array($chapter)) {
         $this->log->logLine("addChapter: \$chapterName: {$chapterName} ; \$fileName: {$fileName} ; ");
         $fileNameParts = pathinfo($fileName);
         $extension = $fileNameParts['extension'];
         $name = $fileNameParts['filename'];
         $partCount = 0;
         $this->chapterCount++;
         $oneChapter = each($chapter);
         while ($oneChapter) {
             /** @noinspection PhpUnusedLocalVariableInspection */
             list($k, $v) = $oneChapter;
             if ($this->encodeHTML === true) {
                 $v = StringHelper::encodeHtml($v);
             }
             if ($externalReferences !== EPub::EXTERNAL_REF_IGNORE) {
                 $this->processChapterExternalReferences($v, $externalReferences, $baseDir);
             }
             $partCount++;
             $partName = $name . "_" . $partCount;
             $this->addFile($partName . "." . $extension, $partName, $v, "application/xhtml+xml");
             $this->extractIdAttributes($partName, $v);
             $this->opf->addItemRef($partName);
             $oneChapter = each($chapter);
         }
         $partName = $name . "_1." . $extension;
         $navPoint = new NavPoint(StringHelper::decodeHtmlEntities($chapterName), $partName, $partName);
         $this->ncx->addNavPoint($navPoint);
         $this->ncx->chapterList[$chapterName] = $navPoint;
     } elseif (!isset($chapterData) && strpos($fileName, "#") > 0) {
         $this->chapterCount++;
         //$this->opf->addItemRef("chapter" . $this->chapterCount);
         $id = preg_split("/[#]/", $fileName);
         if (sizeof($id) == 2 && $this->isLogging) {
             $name = preg_split('/[\\.]/', $id[0]);
             if (sizeof($name) > 1) {
                 $name = $name[0];
             }
             $rv = $this->opf->getItemByHref($name, true);
             if ($rv != false) {
                 /** @var Item $item */
                 foreach ($rv as $item) {
                     if ($item->hasIndexPoint($id[1])) {
                         $fileName = $item->getHref() . "#" . $id[1];
                         break;
                     }
                 }
             }
         }
         $navPoint = new NavPoint(StringHelper::decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount);
         $this->ncx->addNavPoint($navPoint);
         $this->ncx->chapterList[$chapterName] = $navPoint;
     } elseif (!isset($chapterData) && $fileName == "TOC.xhtml") {
         $this->chapterCount++;
         $this->opf->addItemRef("toc");
         $navPoint = new NavPoint(StringHelper::decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount);
         $this->ncx->addNavPoint($navPoint);
         $this->ncx->chapterList[$chapterName] = $navPoint;
         $this->tocNavAdded = true;
     }
     return $navPoint;
 }

示例#3

0

显示文件

文件： EPub.php 项目： elevenone/PHPePub

 /**
  * Add a chapter to the book, as a chapter should not exceed 250kB, you can parse an array with multiple parts as $chapterData.
  * These will still only show up as a single chapter in the book TOC.
  *
  * @param string $chapterName        Name of the chapter, will be use din the TOC
  * @param string $fileName           Filename to use for the chapter, must be unique for the book.
  * @param string $chapterData        Chapter text in XHTML or array $chapterData valid XHTML data for the chapter. File should NOT exceed 250kB.
  * @param bool   $autoSplit          Should the chapter be split if it exceeds the default split size? Default=FALSE, only used if $chapterData is a string.
  * @param int    $externalReferences How to handle external references, EPub::EXTERNAL_REF_IGNORE, EPub::EXTERNAL_REF_ADD or EPub::EXTERNAL_REF_REMOVE_IMAGES? See documentation for <code>processChapterExternalReferences</code> for explanation. Default is EPub::EXTERNAL_REF_IGNORE.
  * @param string $baseDir            Default is "", meaning it is pointing to the document root. NOT used if $externalReferences is set to EPub::EXTERNAL_REF_IGNORE.
  *
  * @return mixed $success            FALSE if the addition failed, else the new NavPoint.
  */
 function addChapter($chapterName, $fileName, $chapterData = null, $autoSplit = false, $externalReferences = EPub::EXTERNAL_REF_IGNORE, $baseDir = "")
 {
     if ($this->isFinalized) {
         return false;
     }
     $fileName = RelativePath::getRelativePath($fileName);
     $fileName = preg_replace('#^[/\\.]+#i', "", $fileName);
     $navPoint = false;
     $chapter = $chapterData;
     if ($autoSplit && is_string($chapterData) && mb_strlen($chapterData) > $this->splitDefaultSize) {
         $splitter = new EPubChapterSplitter();
         $chapterArray = $splitter->splitChapter($chapterData);
         if (count($chapterArray) > 1) {
             $chapter = $chapterArray;
         }
     }
     if (!empty($chapter) && is_string($chapter)) {
         if ($externalReferences !== EPub::EXTERNAL_REF_IGNORE) {
             $htmlDirInfo = pathinfo($fileName);
             $htmlDir = preg_replace('#^[/\\.]+#i', "", $htmlDirInfo["dirname"] . "/");
             $this->processChapterExternalReferences($chapter, $externalReferences, $baseDir, $htmlDir);
         }
         if ($this->encodeHTML === true) {
             $chapter = $this->encodeHtml($chapter);
         }
         $this->chapterCount++;
         $this->addFile($fileName, "chapter" . $this->chapterCount, $chapter, "application/xhtml+xml");
         $this->opf->addItemRef("chapter" . $this->chapterCount);
         $navPoint = new NavPoint($this->decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount);
         $this->ncx->addNavPoint($navPoint);
         $this->ncx->chapterList[$chapterName] = $navPoint;
     } elseif (is_array($chapter)) {
         $fileNameParts = pathinfo($fileName);
         $extension = $fileNameParts['extension'];
         $name = $fileNameParts['filename'];
         $partCount = 0;
         $this->chapterCount++;
         $oneChapter = each($chapter);
         while ($oneChapter) {
             /** @noinspection PhpUnusedLocalVariableInspection */
             list($k, $v) = $oneChapter;
             if ($this->encodeHTML === true) {
                 $v = $this->encodeHtml($v);
             }
             if ($externalReferences !== EPub::EXTERNAL_REF_IGNORE) {
                 $this->processChapterExternalReferences($v, $externalReferences, $baseDir);
             }
             $partCount++;
             $partName = $name . "_" . $partCount;
             $this->addFile($partName . "." . $extension, $partName, $v, "application/xhtml+xml");
             $this->opf->addItemRef($partName);
             $oneChapter = each($chapter);
         }
         $partName = $name . "_1." . $extension;
         $navPoint = new NavPoint($this->decodeHtmlEntities($chapterName), $partName, $partName);
         $this->ncx->addNavPoint($navPoint);
         $this->ncx->chapterList[$chapterName] = $navPoint;
     } elseif (!isset($chapterData) && strpos($fileName, "#") > 0) {
         $this->chapterCount++;
         //$this->opf->addItemRef("chapter" . $this->chapterCount);
         $navPoint = new NavPoint($this->decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount);
         $this->ncx->addNavPoint($navPoint);
         $this->ncx->chapterList[$chapterName] = $navPoint;
     } elseif (!isset($chapterData) && $fileName == "TOC.xhtml") {
         $this->chapterCount++;
         $this->opf->addItemRef("toc");
         $navPoint = new NavPoint($this->decodeHtmlEntities($chapterName), $fileName, "chapter" . $this->chapterCount);
         $this->ncx->addNavPoint($navPoint);
         $this->ncx->chapterList[$chapterName] = $navPoint;
         $this->tocNavAdded = true;
     }
     return $navPoint;
 }

示例#4

0

显示文件

文件： EPub.Example2.php 项目： AlpinHologramm/PHPePub

$book->backLevel();
// More advanced use of the splitter:
// Still using Chapter 4, but as you can see, "Chapter 4" also contains a header for Chapter 5.
$splitter = new EPubChapterSplitter();
$splitter->setSplitSize(15000);
// For this test, we split at approx 15k. Default is 250000 had we left it alone.
$log->logLine("new EPubChapterSplitter()");
/* Using the # as regexp delimiter here, it makes writing the regexp easier.
 *  in this case we could have just searched for "Chapter ", or if we were using regexp '#^<h1>Chapter #i',
 *  using regular text (no regexp delimiters) will look for the text after the first tag. Meaning had we used
 *  "Chapter ", any paragraph or header starting with "Chapter " would have matched. The regexp equivalent of
 *  "Chapter " is '#^<.+?>Chapter #'
 * Essentially, the search string is looking for lines starting with...
 */
$log->logLine("Add Chapter 5");
$html2 = $splitter->splitChapter($chapter5, true, "Chapter ");
/* '#^<.+?>Chapter \d*#i'); */
$log->logLine("Split chapter 5");
$idx = 0;
while (list($k, $v) = each($html2)) {
    $idx++;
    // Because we used a string search in the splitter, the returned hits are put in the key part of the array.
    // The entire HTML tag of the line matching the chapter search.
    // find the text inside the tags
    preg_match('#^<(\\w+)\\ *.*?>(.+)</\\ *\\1>$#i', $k, $cName);
    // because of the back reference, the tag name is in $cName[1], and the content is in $cName[2]
    // Change any line breaks in the chapter name to " - "
    $cName = preg_replace('#<br.+?>#i', " - ", $cName[2]);
    // Remove any other tags
    $cName = preg_replace('#<.+?>#i', " ", $cName);
    // clean the chapter name by removing any double spaces left behind to single space.

PHP PHPePub\Core EPubChapterSplitter::splitChapter示例