Example #1
0
 /**
  * Create DomQuery by given string
  *
  * @param $string
  * @param null|string $baseUrl
  * @param null|string $charset
  * @return DOMQuery
  */
 public static function createFromString($string, $baseUrl = null, $charset = null)
 {
     $tag = null;
     if (preg_match('/<(!DOCTYPE|html[ >]|head[ >]|body[ >])/siU', trim($string), $match)) {
         $tag = strtolower(trim($match[1]));
         $tag = str_replace(array('>', ' '), '', $tag);
         if (preg_match('/\\<meta[^\\>]+charset *= *["\']?([a-zA-Z\\-0-9]+)/i', $string, $matches)) {
             $charset = $matches[1];
         }
         if (preg_match('/\\<base[^\\>]+href *= *["\']?([^"\'>]+)["\']?/i', $string, $matches)) {
             $baseUrl = $matches[1];
         }
     } else {
         $string = '<!DOCTYPE html>
                     <html>
                         <head><meta http-equiv="content-type" content="text/html; charset=' . $charset . '"></head>
                         <body>' . $string . '</body>
                     </html>';
     }
     if (null === $charset) {
         $charset = 'UTF-8';
     }
     if (null !== $charset && function_exists('mb_convert_encoding') && in_array(strtolower($charset), array_map('strtolower', mb_list_encodings()))) {
         $string = mb_convert_encoding($string, 'HTML-ENTITIES', $charset);
     }
     $string = str_replace('&nbsp;', '[nbsp]', $string);
     $current = libxml_use_internal_errors(true);
     $disableEntities = libxml_disable_entity_loader(true);
     $dom = new \DOMDocument('1.0', $charset);
     $dom->validateOnParse = true;
     @$dom->loadHTML($string);
     libxml_use_internal_errors($current);
     libxml_disable_entity_loader($disableEntities);
     $nodes = array();
     if (null === $tag) {
         foreach ($dom->getElementsByTagName('body')->item(0)->childNodes as $node) {
             $nodes[] = $node;
         }
     } elseif ($tag === '!doctype') {
         $nodes[] = $dom;
     } else {
         $node = $dom->getElementsByTagName($tag)->item(0);
         if ($node) {
             $nodes[] = $node;
         }
     }
     $domQuery = new self($nodes);
     if (count($domQuery->nodes)) {
         $domQuery->loadDOMDocument(reset($domQuery->nodes))->formatOutput = true;
     }
     if (null !== $baseUrl) {
         foreach ($domQuery->find('frame, iframe, img, input, script') as $el) {
             if ($el->getAttribute('src')) {
                 $el->setAttribute('src', Url::combine($baseUrl, $el->getAttribute('src')));
             }
         }
         foreach ($domQuery->find('a, area, link') as $el) {
             $el->setAttribute('href', Url::combine($baseUrl, $el->getAttribute('href')));
         }
         foreach ($domQuery->find('form') as $el) {
             $el->setAttribute('action', Url::combine($baseUrl, $el->getAttribute('action')));
         }
     }
     return $domQuery;
 }