/** * Extract the css files from a html code * * @param string $html * * @return string */ public function extractStyle($html) { // the CSS content $style = ' '; // extract the link tags, and remove them in the html code preg_match_all('/<link([^>]*)>/isU', $html, $match); $html = preg_replace('/<link[^>]*>/isU', '', $html); $html = preg_replace('/<\\/link[^>]*>/isU', '', $html); // analyse each link tag foreach ($match[1] as $code) { $tmp = $this->tagParser->extractTagAttributes($code); // if type text/css => we keep it if (isset($tmp['type']) && strtolower($tmp['type']) == 'text/css' && isset($tmp['href'])) { // get the href $url = $tmp['href']; // get the content of the css file $content = @file_get_contents($url); // if "http://" in the url if (strpos($url, 'http://') !== false) { // get the domain "http://xxx/" $url = str_replace('http://', '', $url); $url = explode('/', $url); $urlMain = 'http://' . $url[0] . '/'; // get the absolute url of the path $urlSelf = $url; unset($urlSelf[count($urlSelf) - 1]); $urlSelf = 'http://' . implode('/', $urlSelf) . '/'; // adapt the url in the css content $content = preg_replace('/url\\(([^\\\\][^)]*)\\)/isU', 'url(' . $urlSelf . '$1)', $content); $content = preg_replace('/url\\((\\\\[^)]*)\\)/isU', 'url(' . $urlMain . '$1)', $content); } else { // @TODO correction on url in absolute on a local css content // $content = preg_replace('/url\(([^)]*)\)/isU', 'url('.dirname($url).'/$1)', $content); } // add to the CSS content $style .= $content . "\n"; } } // extract the style tags des tags style, and remove them in the html code preg_match_all('/<style[^>]*>(.*)<\\/style[^>]*>/isU', $html, $match); $html = preg_replace('/<style[^>]*>(.*)<\\/style[^>]*>/isU', '', $html); // analyse each style tags foreach ($match[1] as $code) { // add to the CSS content $code = str_replace('<!--', '', $code); $code = str_replace('-->', '', $code); $style .= $code . "\n"; } //analyse the css content $this->analyseStyle($style); return $html; }
/** * TODO remove the reference on the $parents variable * * @param Token $token * @param array $parents * * @return array * @throws HtmlParsingException */ protected function getTagAction(Token $token, &$parents) { // tag that can be not closed $tagsNotClosed = array('br', 'hr', 'img', 'col', 'input', 'link', 'option', 'circle', 'ellipse', 'path', 'rect', 'line', 'polygon', 'polyline'); // analyze the HTML code $node = $this->tagParser->analyzeTag($token->getData()); // save the current position in the HTML code $node->setLine($token->getLine()); $actions = array(); // if the tag must be closed if (!in_array($node->getName(), $tagsNotClosed)) { // if it is a closure tag if ($node->isClose()) { // HTML validation if (count($parents) < 1) { $e = new HtmlParsingException('Too many tag closures found for [' . $node->getName() . ']'); $e->setInvalidTag($node->getName()); $e->setHtmlLine($token->getLine()); throw $e; } elseif (end($parents) != $node->getName()) { $e = new HtmlParsingException('Tags are closed in a wrong order for [' . $node->getName() . ']'); $e->setInvalidTag($node->getName()); $e->setHtmlLine($token->getLine()); throw $e; } else { array_pop($parents); } } else { // if it is an auto-closed tag if ($node->isAutoClose()) { // save the opened tag $actions[] = $node; // prepare the closed tag $node = clone $node; $node->setParams(array()); $node->setClose(true); } else { // else: add a child for validation array_push($parents, $node->getName()); } } // if it is a <pre> tag (or <code> tag) not auto-closed => update the flag if (($node->getName() == 'pre' || $node->getName() == 'code') && !$node->isAutoClose()) { $this->tagPreIn = !$node->isClose(); } } // save the actions to convert $actions[] = $node; return $actions; }
/** * @param string $code * @param array $expected * * @dataProvider tagAttributesProvider */ public function testExtractTagAttributes($code, $expected) { $result = $this->parser->extractTagAttributes($code); $this->assertEquals($expected, $result); }