/** * Truncate given HTML string to specified length. * If length_in_chars is false it's trimmed by number * of words, otherwise by number of characters. * * @param string $html * @param integer $length * @param string|array $opts * @return string */ public static function truncate($html, $length, $opts = array()) { if (is_string($opts)) { $opts = array('ellipsis' => $opts); } $opts = array_merge(static::$default_options, $opts); // wrap the html in case it consists of adjacent nodes like <p>foo</p><p>bar</p> $html = mb_convert_encoding("<div>" . $html . "</div>", 'HTML-ENTITIES', 'UTF-8'); $root_node = null; // Parse using HTML5Lib if it's available. if (class_exists('HTML5Lib\\Parser')) { try { $doc = \HTML5Lib\Parser::parse($html); $root_node = $doc->documentElement->lastChild->lastChild; } catch (\Exception $e) { } } if ($root_node === null) { // HTML5Lib not available so we'll have to use DOMDocument // We'll only be able to parse HTML5 if it's valid XML $doc = new DOMDocument('4.01', 'utf-8'); $doc->formatOutput = false; $doc->preserveWhiteSpace = true; // loadHTML will fail with HTML5 tags (article, nav, etc) // so we need to suppress errors and if it fails to parse we // retry with the XML parser instead $prev_use_errors = libxml_use_internal_errors(true); if ($doc->loadHTML($html)) { $root_node = $doc->documentElement->lastChild->lastChild; } else { if ($doc->loadXML($html)) { $root_node = $doc->documentElement; } else { libxml_use_internal_errors($prev_use_errors); throw new \RuntimeException(); } } libxml_use_internal_errors($prev_use_errors); } list($text, $_, $opts) = static::truncateNode($doc, $root_node, $length, $opts); $text = mb_substr(mb_substr($text, 0, -6), 5); return $text; }
public function testParse() { $result = Parser::parse('<html><body></body></html>'); $this->assertIsA($result, 'DOMDocument'); }