public function __construct($html, $encoding, $pageNumber, $paragraphsPerPage = HTMLPager::PARAGRAPH_LIMIT) { $dom = new DOMDocument(); libxml_use_internal_errors(true); libxml_clear_errors(); // clean up any errors belonging to other operations $dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', $encoding)); foreach (libxml_get_errors() as $error) { Kurogo::log(LOG_WARNING, "HTMLPager got loadHTML warning (line {$error->line}; column {$error->column}) {$error->message}", 'data'); } libxml_clear_errors(); // free up memory associated with the errors libxml_use_internal_errors(false); $body = $dom->getElementsByTagName("body")->item(0); $currentPage = NULL; $pages = array(); $currentParagraphCount = 0; foreach ($body->childNodes as $node) { if ($currentPage == NULL) { // need to start a new page if ($node->nodeName == "#text" && trim($node->nodeValue) == "") { continue; // this node is blank so do not start a new page yet } $currentPage = new HTMLPage(); $pages[] = $currentPage; } $currentPage->addNode($node); if ($node->nodeName == "p") { $currentParagraphCount++; } if ($currentParagraphCount == $paragraphsPerPage) { $currentPage = NULL; $currentParagraphCount = 0; } } $this->pages = $pages; $this->pageCount = count($pages); if ($pageNumber >= 0 && $pageNumber < $this->pageCount) { $this->pageNumber = $pageNumber; } }