Exemple #1
0
 /**
  * Extract HTML from XHTML container.
  *
  * @param \DOMElement $element
  *
  * @return string
  *
  * @since 1.0
  */
 public static function extract(\DOMElement $element)
 {
     $doc = new \DOMDocument('1.0', 'utf-8');
     $imported = $doc->importNode($element, true);
     $doc->appendChild($imported);
     $prefix = $doc->lookupPrefix('http://www.w3.org/1999/xhtml');
     if ('' !== $prefix) {
         $prefix .= ':';
     }
     $patterns = ['/<\\?xml[^<]*>[^<]*<' . $prefix . 'div[^<]*/', '/<\\/' . $prefix . 'div>\\s*$/'];
     $text = preg_replace($patterns, '', $doc->saveXML());
     if ('' !== $prefix) {
         $text = preg_replace('/(<[\\/]?)' . $prefix . '([a-zA-Z]+)/', '$1$2', $text);
     }
     return $text;
 }
Exemple #2
0
 /**
  * Get the entry content
  *
  * @return string
  */
 public function getContent()
 {
     if (array_key_exists('content', $this->_data)) {
         return $this->_data['content'];
     }
     $content = null;
     $el = $this->getXpath()->query($this->getXpathPrefix() . '/atom:content');
     if ($el->length > 0) {
         $el = $el->item(0);
         $type = $el->getAttribute('type');
         switch ($type) {
             case '':
             case 'text':
             case 'text/plain':
             case 'html':
             case 'text/html':
                 $content = $el->nodeValue;
                 break;
             case 'xhtml':
                 $this->getXpath()->registerNamespace('xhtml', 'http://www.w3.org/1999/xhtml');
                 $xhtml = $this->getXpath()->query($this->getXpathPrefix() . '/atom:content/xhtml:div')->item(0);
                 //$xhtml->setAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
                 $d = new DOMDocument('1.0', $this->getEncoding());
                 $xhtmls = $d->importNode($xhtml, true);
                 $d->appendChild($xhtmls);
                 $content = $this->_collectXhtml($d->saveXML(), $d->lookupPrefix('http://www.w3.org/1999/xhtml'));
                 break;
         }
     }
     //var_dump($content); exit;
     if (!$content) {
         $content = $this->getDescription();
     }
     $this->_data['content'] = trim($content);
     return $this->_data['content'];
 }
Exemple #3
0
 /**
  * Process external references from a HTML to the book. The chapter itself is not stored.
  * the HTML is scanned for &lt;link..., &lt;style..., and &lt;img tags.
  * Embedded CSS styles and links will also be processed.
  * Script tags are not processed, as scripting should be avoided in e-books.
  *
  * EPub keeps track of added files, and duplicate files referenced across multiple
  *  chapters, are only added once.
  *
  * If the $doc is a string, it is assumed to be the content of an HTML file,
  *  else is it assumes to be a DOMDocument.
  *
  * Basedir is the root dir the HTML is supposed to "live" in, used to resolve
  *  relative references such as <code>&lt;img src="../images/image.png"/&gt;</code>
  *
  * $externalReferences determins how the function will handle external references.
  *
  * @param mixed  $doc (referenced)
  * @param int    $externalReferences How to handle external references, EPub::EXTERNAL_REF_IGNORE, EPub::EXTERNAL_REF_ADD or EPub::EXTERNAL_REF_REMOVE_IMAGES? Default is EPub::EXTERNAL_REF_ADD.
  * @param String $baseDir Default is "", meaning it is pointing to the document root.
  * @param String $htmlDir The path to the parent HTML file's directory from the root of the archive.
  *
  * @return Bool  FALSE if uncuccessful (book is finalized or $externalReferences == EXTERNAL_REF_IGNORE).
  */
 protected function processChapterExternalReferences(&$doc, $externalReferences = EPub::EXTERNAL_REF_ADD, $baseDir = "", $htmlDir = "")
 {
     if ($this->isFinalized || $externalReferences === EPub::EXTERNAL_REF_IGNORE) {
         return FALSE;
     }
     $backPath = preg_replace('#[^/]+/#i', "../", $htmlDir);
     $isDocAString = is_string($doc);
     $xmlDoc = NULL;
     if ($isDocAString) {
         $xmlDoc = new DOMDocument();
         @$xmlDoc->loadHTML($doc);
     } else {
         $xmlDoc = $doc;
     }
     $this->processChapterStyles($xmlDoc, $externalReferences, $baseDir, $htmlDir);
     $this->processChapterLinks($xmlDoc, $externalReferences, $baseDir, $htmlDir, $backPath);
     $this->processChapterImages($xmlDoc, $externalReferences, $baseDir, $htmlDir, $backPath);
     if ($isDocAString) {
         $html = $xmlDoc->saveXML();
         $head = $xmlDoc->getElementsByTagName("head");
         $body = $xmlDoc->getElementsByTagName("body");
         $xml = new DOMDocument('1.0', "utf-8");
         $xml->lookupPrefix("http://www.w3.org/1999/xhtml");
         $xml->preserveWhiteSpace = FALSE;
         $xml->formatOutput = TRUE;
         $xml2Doc = new DOMDocument('1.0', "utf-8");
         $xml2Doc->lookupPrefix("http://www.w3.org/1999/xhtml");
         $xml2Doc->loadXML("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\n\t\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n</html>\n");
         $html = $xml2Doc->getElementsByTagName("html")->item(0);
         $html->appendChild($xml2Doc->importNode($head->item(0), TRUE));
         $html->appendChild($xml2Doc->importNode($body->item(0), TRUE));
         // force pretty printing and correct formatting, should not be needed, but it is.
         $xml->loadXML($xml2Doc->saveXML());
         $doc = $xml->saveXML();
     }
     return TRUE;
 }
 /**
  * Split $chapter into multiple parts.
  * 
  * The search string can either be a regular string or a PHP PECL Regular Expression pattern as defined here: http://www.php.net/manual/en/pcre.pattern.php 
  * If the search string is a regular string, the matching will be for lines in the HTML starting with the string given
  * 
  * @param String $chapter XHTML file
  * @param Bool   $splitOnSearchString Split on chapter boundaries, Splitting on search strings disables the split size check.  
  * @param String $searchString Chapter string to search for can be fixed text, or a regular expression pattern.
  * 
  * @return array with 1 or more parts
  */
 function splitChapter($chapter, $splitOnSearchString = false, $searchString = '/^Chapter\\ /i')
 {
     $chapterData = array();
     $isSearchRegexp = $splitOnSearchString && preg_match('#^(\\D|\\S|\\W).+\\1[imsxeADSUXJu]*$#m', $searchString) == 1;
     if ($splitOnSearchString && !$isSearchRegexp) {
         $searchString = '#^<.+?>' . preg_quote($searchString, '#') . "#";
     }
     if (!$splitOnSearchString && strlen($chapter) <= $this->splitDefaultSize) {
         return array($chapter);
     }
     $xmlDoc = new DOMDocument();
     @$xmlDoc->loadHTML($chapter);
     $head = $xmlDoc->getElementsByTagName("head");
     $body = $xmlDoc->getElementsByTagName("body");
     $htmlPos = stripos($chapter, "<html");
     $htmlEndPos = stripos($chapter, ">", $htmlPos);
     $newXML = substr($chapter, 0, $htmlEndPos + 1) . "\n</html>";
     $headerLength = strlen($newXML);
     $files = array();
     $chapterNames = array();
     $domDepth = 0;
     $domPath = array();
     $domClonedPath = array();
     $curFile = $xmlDoc->createDocumentFragment();
     $files[] = $curFile;
     $curParent = $curFile;
     $curSize = 0;
     $bodyLen = strlen($xmlDoc->saveXML($body->item(0)));
     $headLen = strlen($xmlDoc->saveXML($head->item(0))) + $headerLength;
     $partSize = $this->splitDefaultSize - $headLen;
     if ($bodyLen > $partSize) {
         $parts = ceil($bodyLen / $partSize);
         $partSize = $bodyLen / $parts - $headLen;
     }
     $node = $body->item(0)->firstChild;
     do {
         $nodeData = $xmlDoc->saveXML($node);
         $nodeLen = strlen($nodeData);
         if ($nodeLen > $partSize && $node->hasChildNodes()) {
             $domPath[] = $node;
             $domClonedPath[] = $node->cloneNode(false);
             $domDepth++;
             $node = $node->firstChild;
         }
         $node2 = $node->nextSibling;
         if ($node != null && $node->nodeName != "#text") {
             $doSplit = false;
             if ($splitOnSearchString) {
                 $doSplit = preg_match($searchString, $nodeData) == 1;
                 if ($doSplit) {
                     $chapterNames[] = trim($nodeData);
                 }
             }
             if ($curSize > 0 && ($doSplit || !$splitOnSearchString && $curSize + $nodeLen > $partSize)) {
                 $curFile = $xmlDoc->createDocumentFragment();
                 $files[] = $curFile;
                 $curParent = $curFile;
                 if ($domDepth > 0) {
                     reset($domPath);
                     reset($domClonedPath);
                     $oneDomClonedPath = each($domClonedPath);
                     while ($oneDomClonedPath) {
                         list($k, $v) = $oneDomClonedPath;
                         $newParent = $v->cloneNode(false);
                         $curParent->appendChild($newParent);
                         $curParent = $newParent;
                         $oneDomClonedPath = each($domClonedPath);
                     }
                 }
                 $curSize = strlen($xmlDoc->saveXML($curFile));
             }
             $curParent->appendChild($node->cloneNode(true));
             $curSize += $nodeLen;
         }
         $node = $node2;
         while ($node == null && $domDepth > 0) {
             $domDepth--;
             $node = end($domPath)->nextSibling;
             array_pop($domPath);
             array_pop($domClonedPath);
             $curParent = $curParent->parentNode;
         }
     } while ($node != null);
     $curFile = null;
     $curSize = 0;
     $xml = new DOMDocument('1.0', $xmlDoc->xmlEncoding);
     $xml->lookupPrefix("http://www.w3.org/1999/xhtml");
     $xml->preserveWhiteSpace = false;
     $xml->formatOutput = true;
     for ($idx = 0; $idx < count($files); $idx++) {
         $xml2Doc = new DOMDocument('1.0', $xmlDoc->xmlEncoding);
         $xml2Doc->lookupPrefix("http://www.w3.org/1999/xhtml");
         $xml2Doc->loadXML($newXML);
         $html = $xml2Doc->getElementsByTagName("html")->item(0);
         $html->appendChild($xml2Doc->importNode($head->item(0), true));
         $body = $xml2Doc->createElement("body");
         $html->appendChild($body);
         $body->appendChild($xml2Doc->importNode($files[$idx], true));
         // force pretty printing and correct formatting, should not be needed, but it is.
         $xml->loadXML($xml2Doc->saveXML());
         $chapterData[$splitOnSearchString ? $chapterNames[$idx] : $idx] = $xml->saveXML();
     }
     return $chapterData;
 }
 /**
  * Split $chapter into multiple parts.
  *
  * @param $chapter
  * @return array with 1 or more parts
  */
 function splitChapter($chapter)
 {
     $chapterData = array();
     if (strlen($chapter) <= $splitDefaultSize) {
         $chapterData[] = $chapter;
         return $chapterData;
     }
     $xmlDoc = new DOMDocument();
     $xmlDoc->loadHTML($chapter);
     $head = $xmlDoc->getElementsByTagName("head");
     $body = $xmlDoc->getElementsByTagName("body");
     $htmlPos = stripos($chapter, "<html");
     $htmlEndPos = stripos($chapter, ">", $htmlPos);
     $newXML = substr($chapter, 0, $htmlEndPos + 1) . "\n</html>";
     $headerLength = strlen($newXML);
     $files = array();
     $domDepth = 0;
     $domPath = array();
     $domClonedPath = array();
     $curFile = $xmlDoc->createDocumentFragment();
     $files[] = $curFile;
     $curParent = $curFile;
     $curSize = 0;
     $bodyLen = strlen($xmlDoc->saveXML($body->item(0)));
     $headLen = strlen($xmlDoc->saveXML($head->item(0))) + $headerLength;
     $partSize = $this->splitDefaultSize - $headLen;
     if ($bodyLen > $partSize) {
         $parts = ceil($bodyLen / $partSize);
         $partSize = $bodyLen / $parts - $headLen;
     }
     $node = $body->item(0)->firstChild;
     do {
         $nodeData = $xmlDoc->saveXML($node);
         $nodeLen = strlen($nodeData);
         if ($nodeLen > $partSize && $node->hasChildNodes()) {
             $domPath[] = $node;
             $domClonedPath[] = $node->cloneNode(false);
             $domDepth++;
             $node = $node->firstChild;
         }
         $node2 = $node->nextSibling;
         if ($node != null && $node->nodeName != "#text") {
             if ($curSize > 0 && $curSize + $nodeLen > $partSize) {
                 $curFile = $xmlDoc->createDocumentFragment();
                 $files[] = $curFile;
                 $curParent = $curFile;
                 if ($domDepth > 0) {
                     reset($domPath);
                     reset($domClonedPath);
                     while (list($k, $v) = each($domClonedPath)) {
                         $newParent = $v->cloneNode(false);
                         $curParent->appendChild($newParent);
                         $curParent = $newParent;
                     }
                 }
                 $curSize = strlen($xmlDoc->saveXML($curFile));
             }
             $curParent->appendChild($node->cloneNode(true));
             $curSize += $nodeLen;
         }
         $node = $node2;
         while ($node == null && $domDepth > 0) {
             $domDepth--;
             $node = end($domPath)->nextSibling;
             array_pop($domPath);
             array_pop($domClonedPath);
             $curParent = $curParent->parentNode;
         }
     } while ($node != null);
     $curFile = null;
     $curSize = 0;
     $xml = new DOMDocument('1.0', $xmlDoc->xmlEncoding);
     $xml->lookupPrefix("http://www.w3.org/1999/xhtml");
     $xml->preserveWhiteSpace = false;
     $xml->formatOutput = true;
     for ($idx = 0; $idx < count($files); $idx++) {
         $xml2Doc = new DOMDocument('1.0', $xmlDoc->xmlEncoding);
         $xml2Doc->lookupPrefix("http://www.w3.org/1999/xhtml");
         $xml2Doc->loadXML($newXML);
         $html = $xml2Doc->getElementsByTagName("html")->item(0);
         $html->appendChild($xml2Doc->importNode($head->item(0), true));
         $body = $xml2Doc->createElement("body");
         $html->appendChild($body);
         $body->appendChild($xml2Doc->importNode($files[$idx], true));
         // force pretty printing and correct formatting, should not be needed, but it is.
         $xml->loadXML($xml2Doc->saveXML());
         $chapterData[] = $xml->saveXML();
     }
     return $chapterData;
 }
Exemple #6
0
<?php

$doc = new DOMDocument();
$doc->load(dirname(__FILE__) . "/nsdoc.xml");
$root = $doc->documentElement;
$duri = $doc->lookupNamespaceURI("ns2") . "\n";
$euri = $root->lookupNamespaceURI("ns2") . "\n";
var_dump($duri == $euri);
$dpref = $doc->lookupPrefix("http://ns2") . "\n";
$epref = $root->lookupPrefix("http://ns2") . "\n";
var_dump($dpref == $epref);
$disdef = $doc->isDefaultNamespace("http://ns") . "\n";
$eisdef = $root->isDefaultNamespace("http://ns") . "\n";
var_dump($dpref === $epref);