/** * Extract URLs from a web page. * * URLs are extracted from a long list of tags and attributes as defined * by the HTML 2.0, HTML 3.2, HTML 4.01, and draft HTML 5.0 specifications. * URLs are also extracted from tags and attributes that are common * extensions of HTML, from the draft Forms 2.0 specification, from XHTML, * and from WML 1.3 and 2.0. * * The function returns an associative array of associative arrays of * arrays of URLs. The outermost array's keys are the tag (element) name, * such as "a" for <a> or "img" for <img>. The values for these entries * are associative arrays where the keys are attribute names for those * tags, such as "href" for <a href="...">. Finally, the values for * those arrays are URLs found in those tags and attributes throughout * the text. * * Parameters: * text the UTF-8 text to scan * * Return values: * an associative array where keys are tags and values are an * associative array where keys are attributes and values are * an array of URLs. * * See: * http://nadeausoftware.com/articles/2008/01/php_tip_how_extract_urls_web_page */ function extract_html_urls($text) { $match_elements = array(array('element' => 'a', 'attribute' => 'href'), array('element' => 'a', 'attribute' => 'urn'), array('element' => 'base', 'attribute' => 'href'), array('element' => 'form', 'attribute' => 'action'), array('element' => 'img', 'attribute' => 'src'), array('element' => 'link', 'attribute' => 'href'), array('element' => 'applet', 'attribute' => 'code'), array('element' => 'applet', 'attribute' => 'codebase'), array('element' => 'area', 'attribute' => 'href'), array('element' => 'body', 'attribute' => 'background'), array('element' => 'img', 'attribute' => 'usemap'), array('element' => 'input', 'attribute' => 'src'), array('element' => 'applet', 'attribute' => 'archive'), array('element' => 'applet', 'attribute' => 'object'), array('element' => 'blockquote', 'attribute' => 'cite'), array('element' => 'del', 'attribute' => 'cite'), array('element' => 'frame', 'attribute' => 'longdesc'), array('element' => 'frame', 'attribute' => 'src'), array('element' => 'head', 'attribute' => 'profile'), array('element' => 'iframe', 'attribute' => 'longdesc'), array('element' => 'iframe', 'attribute' => 'src'), array('element' => 'img', 'attribute' => 'longdesc'), array('element' => 'input', 'attribute' => 'usemap'), array('element' => 'ins', 'attribute' => 'cite'), array('element' => 'object', 'attribute' => 'archive'), array('element' => 'object', 'attribute' => 'classid'), array('element' => 'object', 'attribute' => 'codebase'), array('element' => 'object', 'attribute' => 'data'), array('element' => 'object', 'attribute' => 'usemap'), array('element' => 'q', 'attribute' => 'cite'), array('element' => 'script', 'attribute' => 'src'), array('element' => 'audio', 'attribute' => 'src'), array('element' => 'command', 'attribute' => 'icon'), array('element' => 'embed', 'attribute' => 'src'), array('element' => 'event-source', 'attribute' => 'src'), array('element' => 'html', 'attribute' => 'manifest'), array('element' => 'source', 'attribute' => 'src'), array('element' => 'video', 'attribute' => 'src'), array('element' => 'video', 'attribute' => 'poster'), array('element' => 'bgsound', 'attribute' => 'src'), array('element' => 'body', 'attribute' => 'credits'), array('element' => 'body', 'attribute' => 'instructions'), array('element' => 'body', 'attribute' => 'logo'), array('element' => 'div', 'attribute' => 'href'), array('element' => 'div', 'attribute' => 'src'), array('element' => 'embed', 'attribute' => 'code'), array('element' => 'embed', 'attribute' => 'pluginspage'), array('element' => 'html', 'attribute' => 'background'), array('element' => 'ilayer', 'attribute' => 'src'), array('element' => 'img', 'attribute' => 'dynsrc'), array('element' => 'img', 'attribute' => 'lowsrc'), array('element' => 'input', 'attribute' => 'dynsrc'), array('element' => 'input', 'attribute' => 'lowsrc'), array('element' => 'table', 'attribute' => 'background'), array('element' => 'td', 'attribute' => 'background'), array('element' => 'th', 'attribute' => 'background'), array('element' => 'layer', 'attribute' => 'src'), array('element' => 'xml', 'attribute' => 'src'), array('element' => 'button', 'attribute' => 'action'), array('element' => 'datalist', 'attribute' => 'data'), array('element' => 'form', 'attribute' => 'data'), array('element' => 'input', 'attribute' => 'action'), array('element' => 'select', 'attribute' => 'data'), array('element' => 'html', 'attribute' => 'xmlns'), array('element' => 'access', 'attribute' => 'path'), array('element' => 'card', 'attribute' => 'onenterforward'), array('element' => 'card', 'attribute' => 'onenterbackward'), array('element' => 'card', 'attribute' => 'ontimer'), array('element' => 'go', 'attribute' => 'href'), array('element' => 'option', 'attribute' => 'onpick'), array('element' => 'template', 'attribute' => 'onenterforward'), array('element' => 'template', 'attribute' => 'onenterbackward'), array('element' => 'template', 'attribute' => 'ontimer'), array('element' => 'wml', 'attribute' => 'xmlns')); $match_metas = array('content-base', 'content-location', 'referer', 'location', 'refresh'); // Extract all elements if (!preg_match_all('/<([a-z][^>]*)>/iu', $text, $matches)) { return array(); } $elements = $matches[1]; $value_pattern = '=(("([^"]*)")|([^\\s]*))'; // Match elements and attributes foreach ($match_elements as $match_element) { $name = $match_element['element']; $attr = $match_element['attribute']; $pattern = '/^' . $name . '\\s.*' . $attr . $value_pattern . '/iu'; if ($name == 'object') { $split_pattern = '/\\s*/u'; } else { if ($name == 'archive') { $split_pattern = '/,\\s*/u'; } else { unset($split_pattern); } } // Single URL foreach ($elements as $element) { if (!preg_match($pattern, $element, $match)) { continue; } $m = empty($match[3]) ? !empty($match[4]) ? $match[4] : '' : $match[3]; if (!isset($split_pattern)) { $urls[$name][$attr][] = $m; } else { $msplit = preg_split($split_pattern, $m); foreach ($msplit as $ms) { $urls[$name][$attr][] = $ms; } } } } // Match meta http-equiv elements foreach ($match_metas as $match_meta) { $attr_pattern = '/http-equiv="?' . $match_meta . '"?/iu'; $content_pattern = '/content' . $value_pattern . '/iu'; $refresh_pattern = '/\\d*;\\s*(url=)?(.*)$/iu'; foreach ($elements as $element) { if (!preg_match('/^meta/iu', $element) || !preg_match($attr_pattern, $element) || !preg_match($content_pattern, $element, $match)) { continue; } $m = empty($match[3]) ? $match[4] : $match[3]; if ($match_meta != 'refresh') { $urls['meta']['http-equiv'][] = $m; } else { if (preg_match($refresh_pattern, $m, $match)) { $urls['meta']['http-equiv'][] = $match[2]; } } } } // Match style attributes $urls['style'] = array(); $style_pattern = '/style' . $value_pattern . '/iu'; foreach ($elements as $element) { if (!preg_match($style_pattern, $element, $match)) { continue; } $m = empty($match[3]) ? $match[4] : $match[3]; $style_urls = extract_css_urls($m); if (!empty($style_urls)) { $urls['style'] = array_merge_recursive($urls['style'], $style_urls); } } // Match style bodies if (preg_match_all('/<style[^>]*>(.*?)<\\/style>/siu', $text, $style_bodies)) { foreach ($style_bodies[1] as $style_body) { $style_urls = extract_css_urls($style_body); if (!empty($style_urls)) { $urls['style'] = array_merge_recursive($urls['style'], $style_urls); } } } if (empty($urls['style'])) { unset($urls['style']); } return $urls; }
/** * Parses one file (either html or css) * * @param string $baseurl (optional) URL of the file where link to this file was found * @param string $relativeurl relative or absolute link to the file * @param array $list * @param bool $mainfile true only for main HTML false and false for all embedded/linked files */ protected function parse_file($baseurl, $relativeurl, &$list, $mainfile = false) { if (preg_match('/([\'"])(.*)\\1/', $relativeurl, $matches)) { $relativeurl = $matches[2]; } if (empty($baseurl)) { $url = $relativeurl; } else { $url = htmlspecialchars_decode(url_to_absolute($baseurl, $relativeurl)); } if (in_array($url, $this->processedfiles)) { // avoid endless recursion return; } $this->processedfiles[] = $url; $curl = new curl(); $curl->setopt(array('CURLOPT_FOLLOWLOCATION' => true, 'CURLOPT_MAXREDIRS' => 3)); $msg = $curl->head($url); $info = $curl->get_info(); if ($info['http_code'] != 200) { if ($mainfile) { $list['error'] = $msg; } } else { $csstoanalyze = ''; if ($mainfile && (strstr($info['content_type'], 'text/html') || empty($info['content_type']))) { // parse as html $htmlcontent = $curl->get($info['url']); $ddoc = new DOMDocument(); @$ddoc->loadHTML($htmlcontent); // extract <img> $tags = $ddoc->getElementsByTagName('img'); foreach ($tags as $tag) { $url = $tag->getAttribute('src'); $this->add_image_to_list($info['url'], $url, $list); } // analyse embedded css (<style>) $tags = $ddoc->getElementsByTagName('style'); foreach ($tags as $tag) { if ($tag->getAttribute('type') == 'text/css') { $csstoanalyze .= $tag->textContent . "\n"; } } // analyse links to css (<link type='text/css' href='...'>) $tags = $ddoc->getElementsByTagName('link'); foreach ($tags as $tag) { if ($tag->getAttribute('type') == 'text/css' && strlen($tag->getAttribute('href'))) { $this->parse_file($info['url'], $tag->getAttribute('href'), $list); } } } else { if (strstr($info['content_type'], 'css')) { // parse as css $csscontent = $curl->get($info['url']); $csstoanalyze .= $csscontent . "\n"; } else { if (strstr($info['content_type'], 'image/')) { // download this file $this->add_image_to_list($info['url'], $info['url'], $list); } else { $list['error'] = get_string('validfiletype', 'repository_url'); } } } // parse all found css styles if (strlen($csstoanalyze)) { $urls = extract_css_urls($csstoanalyze); if (!empty($urls['property'])) { foreach ($urls['property'] as $url) { $this->add_image_to_list($info['url'], $url, $list); } } if (!empty($urls['import'])) { foreach ($urls['import'] as $cssurl) { $this->parse_file($info['url'], $cssurl, $list); } } } } }
function theme_combine_css($handles){ if(is_admin()){ return; } global $wp_styles; if (! is_object($wp_styles)) return; $combine_styles = array(); $queue_unset = array(); $wp_styles->all_deps($wp_styles->queue); foreach ($wp_styles->to_do as $key => $handle) { $media = ($wp_styles->registered[$handle]->args ? $wp_styles->registered[$handle]->args : 'screen'); $src = $wp_styles->registered[$handle]->src; if (substr($src, 0, 4) != 'http') { $src = site_url($src); $external = false; } else { $home = home_url(); if (substr($src, 0, strlen($home)) == $home) { $external = false; } else { $external = true; } } if(!$external){ $combine_styles[$media][$handle] = $src; unset($wp_styles->to_do[$key]); $queue_unset[$handle] = true; } } foreach ($wp_styles->queue as $key => $handle) { if (isset($queue_unset[$handle])){ if(!in_array($handle, $wp_styles->done, true)){ $wp_styles->done[] = $handle; } unset($wp_styles->queue[$key]); } } foreach ($combine_styles as $media => $styles) { $fileId = 0; foreach($styles as $handle => $src){ $path = ABSPATH . str_replace(get_option('siteurl').'/', '', $src); $fileId += @filemtime($path); } //z},}m-MlKA7O $cache_name = md5(serialize($combine_styles).$fileId); $cache_file_path = THEME_CACHE_DIR . '/' .$cache_name .'.css'; $cache_file_url = THEME_CACHE_URL . '/' .$cache_name .'.css'; if(!is_readable($cache_file_path)){ $content = ''; foreach($styles as $handle => $src){ $htppath = str_replace(basename($src),'',$src); $content .= "/* $handle: ($src) $htppath*/\n"; $file_content =@file_get_contents($src) ; $file_content = str_replace('../fonts/glyphicons', ULTIMATUM_URL.'/assets/css/font/glyphicons', $file_content); //do the url fixes $urls = array(); $urls = extract_css_urls( $file_content ); if(count($urls)): $uniqueurls =$urls['property']; $uniqueurls= array_unique($uniqueurls); foreach ($uniqueurls as $url){ if(!strstr($url,'//')){ $urlnew =''; if(strstr($url,'..')){ $urlnew = dirname($htppath).str_replace('..', '', $url); } else { $urlnew = $htppath.$url; } $urlnew =str_replace('/./','/',$urlnew); $urlnew =str_replace('http:','',$urlnew); $urlnew =str_replace('https:','',$urlnew); $file_content = str_replace($url, $urlnew, $file_content); } else { $urlnew =''; $urlnew =str_replace('http:','',$url); $urlnew =str_replace('https:','',$urlnew); $file_content = str_replace($url, $urlnew, $file_content); } } endif; $content .= $file_content. "\n\n"; } if (is_writable(THEME_CACHE_DIR)) { $content = preg_replace( '!/\*[^*]*\*+([^/][^*]*\*+)*/!', '', $content ); $content = str_replace( array("\r\n", "\r", "\n", "\t", ' ', ' ', ' '), '', $content ); $fhandle = @fopen($cache_file_path, 'w+'); if ($fhandle) fwrite($fhandle, $content, strlen($content)); } } wp_enqueue_style(THEME_SLUG.'-styles-'.$media, $cache_file_url, false, false, $media); } }