/** * Create DomQuery by given string * * @param $string * @param null|string $baseUrl * @param null|string $charset * @return DOMQuery */ public static function createFromString($string, $baseUrl = null, $charset = null) { $tag = null; if (preg_match('/<(!DOCTYPE|html[ >]|head[ >]|body[ >])/siU', trim($string), $match)) { $tag = strtolower(trim($match[1])); $tag = str_replace(array('>', ' '), '', $tag); if (preg_match('/\\<meta[^\\>]+charset *= *["\']?([a-zA-Z\\-0-9]+)/i', $string, $matches)) { $charset = $matches[1]; } if (preg_match('/\\<base[^\\>]+href *= *["\']?([^"\'>]+)["\']?/i', $string, $matches)) { $baseUrl = $matches[1]; } } else { $string = '<!DOCTYPE html> <html> <head><meta http-equiv="content-type" content="text/html; charset=' . $charset . '"></head> <body>' . $string . '</body> </html>'; } if (null === $charset) { $charset = 'UTF-8'; } if (null !== $charset && function_exists('mb_convert_encoding') && in_array(strtolower($charset), array_map('strtolower', mb_list_encodings()))) { $string = mb_convert_encoding($string, 'HTML-ENTITIES', $charset); } $string = str_replace(' ', '[nbsp]', $string); $current = libxml_use_internal_errors(true); $disableEntities = libxml_disable_entity_loader(true); $dom = new \DOMDocument('1.0', $charset); $dom->validateOnParse = true; @$dom->loadHTML($string); libxml_use_internal_errors($current); libxml_disable_entity_loader($disableEntities); $nodes = array(); if (null === $tag) { foreach ($dom->getElementsByTagName('body')->item(0)->childNodes as $node) { $nodes[] = $node; } } elseif ($tag === '!doctype') { $nodes[] = $dom; } else { $node = $dom->getElementsByTagName($tag)->item(0); if ($node) { $nodes[] = $node; } } $domQuery = new self($nodes); if (count($domQuery->nodes)) { $domQuery->loadDOMDocument(reset($domQuery->nodes))->formatOutput = true; } if (null !== $baseUrl) { foreach ($domQuery->find('frame, iframe, img, input, script') as $el) { if ($el->getAttribute('src')) { $el->setAttribute('src', Url::combine($baseUrl, $el->getAttribute('src'))); } } foreach ($domQuery->find('a, area, link') as $el) { $el->setAttribute('href', Url::combine($baseUrl, $el->getAttribute('href'))); } foreach ($domQuery->find('form') as $el) { $el->setAttribute('action', Url::combine($baseUrl, $el->getAttribute('action'))); } } return $domQuery; }