Esempio n. 1
0
/**
 * Extract URLs from a web page.
 *
 * URLs are extracted from a long list of tags and attributes as defined
 * by the HTML 2.0, HTML 3.2, HTML 4.01, and draft HTML 5.0 specifications.
 * URLs are also extracted from tags and attributes that are common
 * extensions of HTML, from the draft Forms 2.0 specification, from XHTML,
 * and from WML 1.3 and 2.0.
 *
 * The function returns an associative array of associative arrays of
 * arrays of URLs.  The outermost array's keys are the tag (element) name,
 * such as "a" for <a> or "img" for <img>.  The values for these entries
 * are associative arrays where the keys are attribute names for those
 * tags, such as "href" for <a href="...">.  Finally, the values for
 * those arrays are URLs found in those tags and attributes throughout
 * the text.
 *
 * Parameters:
 * 	text		the UTF-8 text to scan
 *
 * Return values:
 * 	an associative array where keys are tags and values are an
 * 	associative array where keys are attributes and values are
 * 	an array of URLs.
 *
 * See:
 * 	http://nadeausoftware.com/articles/2008/01/php_tip_how_extract_urls_web_page
 */
function extract_html_urls($text)
{
    $match_elements = array(array('element' => 'a', 'attribute' => 'href'), array('element' => 'a', 'attribute' => 'urn'), array('element' => 'base', 'attribute' => 'href'), array('element' => 'form', 'attribute' => 'action'), array('element' => 'img', 'attribute' => 'src'), array('element' => 'link', 'attribute' => 'href'), array('element' => 'applet', 'attribute' => 'code'), array('element' => 'applet', 'attribute' => 'codebase'), array('element' => 'area', 'attribute' => 'href'), array('element' => 'body', 'attribute' => 'background'), array('element' => 'img', 'attribute' => 'usemap'), array('element' => 'input', 'attribute' => 'src'), array('element' => 'applet', 'attribute' => 'archive'), array('element' => 'applet', 'attribute' => 'object'), array('element' => 'blockquote', 'attribute' => 'cite'), array('element' => 'del', 'attribute' => 'cite'), array('element' => 'frame', 'attribute' => 'longdesc'), array('element' => 'frame', 'attribute' => 'src'), array('element' => 'head', 'attribute' => 'profile'), array('element' => 'iframe', 'attribute' => 'longdesc'), array('element' => 'iframe', 'attribute' => 'src'), array('element' => 'img', 'attribute' => 'longdesc'), array('element' => 'input', 'attribute' => 'usemap'), array('element' => 'ins', 'attribute' => 'cite'), array('element' => 'object', 'attribute' => 'archive'), array('element' => 'object', 'attribute' => 'classid'), array('element' => 'object', 'attribute' => 'codebase'), array('element' => 'object', 'attribute' => 'data'), array('element' => 'object', 'attribute' => 'usemap'), array('element' => 'q', 'attribute' => 'cite'), array('element' => 'script', 'attribute' => 'src'), array('element' => 'audio', 'attribute' => 'src'), array('element' => 'command', 'attribute' => 'icon'), array('element' => 'embed', 'attribute' => 'src'), array('element' => 'event-source', 'attribute' => 'src'), array('element' => 'html', 'attribute' => 'manifest'), array('element' => 'source', 'attribute' => 'src'), array('element' => 'video', 'attribute' => 'src'), array('element' => 'video', 'attribute' => 'poster'), array('element' => 'bgsound', 'attribute' => 'src'), array('element' => 'body', 'attribute' => 'credits'), array('element' => 'body', 'attribute' => 'instructions'), array('element' => 'body', 'attribute' => 'logo'), array('element' => 'div', 'attribute' => 'href'), array('element' => 'div', 'attribute' => 'src'), array('element' => 'embed', 'attribute' => 'code'), array('element' => 'embed', 'attribute' => 'pluginspage'), array('element' => 'html', 'attribute' => 'background'), array('element' => 'ilayer', 'attribute' => 'src'), array('element' => 'img', 'attribute' => 'dynsrc'), array('element' => 'img', 'attribute' => 'lowsrc'), array('element' => 'input', 'attribute' => 'dynsrc'), array('element' => 'input', 'attribute' => 'lowsrc'), array('element' => 'table', 'attribute' => 'background'), array('element' => 'td', 'attribute' => 'background'), array('element' => 'th', 'attribute' => 'background'), array('element' => 'layer', 'attribute' => 'src'), array('element' => 'xml', 'attribute' => 'src'), array('element' => 'button', 'attribute' => 'action'), array('element' => 'datalist', 'attribute' => 'data'), array('element' => 'form', 'attribute' => 'data'), array('element' => 'input', 'attribute' => 'action'), array('element' => 'select', 'attribute' => 'data'), array('element' => 'html', 'attribute' => 'xmlns'), array('element' => 'access', 'attribute' => 'path'), array('element' => 'card', 'attribute' => 'onenterforward'), array('element' => 'card', 'attribute' => 'onenterbackward'), array('element' => 'card', 'attribute' => 'ontimer'), array('element' => 'go', 'attribute' => 'href'), array('element' => 'option', 'attribute' => 'onpick'), array('element' => 'template', 'attribute' => 'onenterforward'), array('element' => 'template', 'attribute' => 'onenterbackward'), array('element' => 'template', 'attribute' => 'ontimer'), array('element' => 'wml', 'attribute' => 'xmlns'));
    $match_metas = array('content-base', 'content-location', 'referer', 'location', 'refresh');
    // Extract all elements
    if (!preg_match_all('/<([a-z][^>]*)>/iu', $text, $matches)) {
        return array();
    }
    $elements = $matches[1];
    $value_pattern = '=(("([^"]*)")|([^\\s]*))';
    // Match elements and attributes
    foreach ($match_elements as $match_element) {
        $name = $match_element['element'];
        $attr = $match_element['attribute'];
        $pattern = '/^' . $name . '\\s.*' . $attr . $value_pattern . '/iu';
        if ($name == 'object') {
            $split_pattern = '/\\s*/u';
        } else {
            if ($name == 'archive') {
                $split_pattern = '/,\\s*/u';
            } else {
                unset($split_pattern);
            }
        }
        // Single URL
        foreach ($elements as $element) {
            if (!preg_match($pattern, $element, $match)) {
                continue;
            }
            $m = empty($match[3]) ? !empty($match[4]) ? $match[4] : '' : $match[3];
            if (!isset($split_pattern)) {
                $urls[$name][$attr][] = $m;
            } else {
                $msplit = preg_split($split_pattern, $m);
                foreach ($msplit as $ms) {
                    $urls[$name][$attr][] = $ms;
                }
            }
        }
    }
    // Match meta http-equiv elements
    foreach ($match_metas as $match_meta) {
        $attr_pattern = '/http-equiv="?' . $match_meta . '"?/iu';
        $content_pattern = '/content' . $value_pattern . '/iu';
        $refresh_pattern = '/\\d*;\\s*(url=)?(.*)$/iu';
        foreach ($elements as $element) {
            if (!preg_match('/^meta/iu', $element) || !preg_match($attr_pattern, $element) || !preg_match($content_pattern, $element, $match)) {
                continue;
            }
            $m = empty($match[3]) ? $match[4] : $match[3];
            if ($match_meta != 'refresh') {
                $urls['meta']['http-equiv'][] = $m;
            } else {
                if (preg_match($refresh_pattern, $m, $match)) {
                    $urls['meta']['http-equiv'][] = $match[2];
                }
            }
        }
    }
    // Match style attributes
    $urls['style'] = array();
    $style_pattern = '/style' . $value_pattern . '/iu';
    foreach ($elements as $element) {
        if (!preg_match($style_pattern, $element, $match)) {
            continue;
        }
        $m = empty($match[3]) ? $match[4] : $match[3];
        $style_urls = extract_css_urls($m);
        if (!empty($style_urls)) {
            $urls['style'] = array_merge_recursive($urls['style'], $style_urls);
        }
    }
    // Match style bodies
    if (preg_match_all('/<style[^>]*>(.*?)<\\/style>/siu', $text, $style_bodies)) {
        foreach ($style_bodies[1] as $style_body) {
            $style_urls = extract_css_urls($style_body);
            if (!empty($style_urls)) {
                $urls['style'] = array_merge_recursive($urls['style'], $style_urls);
            }
        }
    }
    if (empty($urls['style'])) {
        unset($urls['style']);
    }
    return $urls;
}
Esempio n. 2
0
 /**
  * Parses one file (either html or css)
  *
  * @param string $baseurl (optional) URL of the file where link to this file was found
  * @param string $relativeurl relative or absolute link to the file
  * @param array $list
  * @param bool $mainfile true only for main HTML false and false for all embedded/linked files
  */
 protected function parse_file($baseurl, $relativeurl, &$list, $mainfile = false)
 {
     if (preg_match('/([\'"])(.*)\\1/', $relativeurl, $matches)) {
         $relativeurl = $matches[2];
     }
     if (empty($baseurl)) {
         $url = $relativeurl;
     } else {
         $url = htmlspecialchars_decode(url_to_absolute($baseurl, $relativeurl));
     }
     if (in_array($url, $this->processedfiles)) {
         // avoid endless recursion
         return;
     }
     $this->processedfiles[] = $url;
     $curl = new curl();
     $curl->setopt(array('CURLOPT_FOLLOWLOCATION' => true, 'CURLOPT_MAXREDIRS' => 3));
     $msg = $curl->head($url);
     $info = $curl->get_info();
     if ($info['http_code'] != 200) {
         if ($mainfile) {
             $list['error'] = $msg;
         }
     } else {
         $csstoanalyze = '';
         if ($mainfile && (strstr($info['content_type'], 'text/html') || empty($info['content_type']))) {
             // parse as html
             $htmlcontent = $curl->get($info['url']);
             $ddoc = new DOMDocument();
             @$ddoc->loadHTML($htmlcontent);
             // extract <img>
             $tags = $ddoc->getElementsByTagName('img');
             foreach ($tags as $tag) {
                 $url = $tag->getAttribute('src');
                 $this->add_image_to_list($info['url'], $url, $list);
             }
             // analyse embedded css (<style>)
             $tags = $ddoc->getElementsByTagName('style');
             foreach ($tags as $tag) {
                 if ($tag->getAttribute('type') == 'text/css') {
                     $csstoanalyze .= $tag->textContent . "\n";
                 }
             }
             // analyse links to css (<link type='text/css' href='...'>)
             $tags = $ddoc->getElementsByTagName('link');
             foreach ($tags as $tag) {
                 if ($tag->getAttribute('type') == 'text/css' && strlen($tag->getAttribute('href'))) {
                     $this->parse_file($info['url'], $tag->getAttribute('href'), $list);
                 }
             }
         } else {
             if (strstr($info['content_type'], 'css')) {
                 // parse as css
                 $csscontent = $curl->get($info['url']);
                 $csstoanalyze .= $csscontent . "\n";
             } else {
                 if (strstr($info['content_type'], 'image/')) {
                     // download this file
                     $this->add_image_to_list($info['url'], $info['url'], $list);
                 } else {
                     $list['error'] = get_string('validfiletype', 'repository_url');
                 }
             }
         }
         // parse all found css styles
         if (strlen($csstoanalyze)) {
             $urls = extract_css_urls($csstoanalyze);
             if (!empty($urls['property'])) {
                 foreach ($urls['property'] as $url) {
                     $this->add_image_to_list($info['url'], $url, $list);
                 }
             }
             if (!empty($urls['import'])) {
                 foreach ($urls['import'] as $cssurl) {
                     $this->parse_file($info['url'], $cssurl, $list);
                 }
             }
         }
     }
 }
Esempio n. 3
0
function theme_combine_css($handles){
	if(is_admin()){
		return;
	}
	global $wp_styles;
	if (! is_object($wp_styles)) return;
	$combine_styles = array();
	$queue_unset = array();
	$wp_styles->all_deps($wp_styles->queue);
	foreach ($wp_styles->to_do as $key => $handle) {
		$media = ($wp_styles->registered[$handle]->args ? $wp_styles->registered[$handle]->args : 'screen');
		$src = $wp_styles->registered[$handle]->src;
		if (substr($src, 0, 4) != 'http') {
			$src = site_url($src);
			$external = false;
		} else {
			$home = home_url();
			if (substr($src, 0, strlen($home)) == $home) {
				$external = false;
			} else	{
				$external = true;
			}
		}
		if(!$external){
			$combine_styles[$media][$handle] = $src;
			unset($wp_styles->to_do[$key]);
			$queue_unset[$handle] = true;
		}
	}
	foreach ($wp_styles->queue as $key => $handle) {
		if (isset($queue_unset[$handle])){
			if(!in_array($handle, $wp_styles->done, true)){
				$wp_styles->done[] = $handle;
			}
			unset($wp_styles->queue[$key]);
		}
	}
	foreach ($combine_styles as $media => $styles) {
		$fileId = 0;
		foreach($styles as $handle => $src){
			$path = ABSPATH . str_replace(get_option('siteurl').'/', '', $src);
			$fileId += @filemtime($path);
		}
//z},}m-MlKA7O
		$cache_name = md5(serialize($combine_styles).$fileId);
		$cache_file_path = THEME_CACHE_DIR . '/' .$cache_name .'.css';
		$cache_file_url = THEME_CACHE_URL . '/' .$cache_name .'.css';
			
		if(!is_readable($cache_file_path)){
			$content = '';
			foreach($styles as $handle => $src){
				$htppath = str_replace(basename($src),'',$src);
				$content .= "/* $handle: ($src)  $htppath*/\n";
				$file_content =@file_get_contents($src) ;
				$file_content = str_replace('../fonts/glyphicons', ULTIMATUM_URL.'/assets/css/font/glyphicons', $file_content);
				//do the url fixes
				$urls = array();
				$urls =  extract_css_urls( $file_content );
				if(count($urls)):
				$uniqueurls =$urls['property'];
				$uniqueurls= array_unique($uniqueurls);
				foreach ($uniqueurls as $url){
					if(!strstr($url,'//')){
						$urlnew ='';
						if(strstr($url,'..')){
						   $urlnew = dirname($htppath).str_replace('..', '', $url);
						} else {
						  $urlnew = $htppath.$url;
						}
						$urlnew =str_replace('/./','/',$urlnew);
						$urlnew =str_replace('http:','',$urlnew);
						$urlnew =str_replace('https:','',$urlnew);
						$file_content = str_replace($url, $urlnew, $file_content);
					} else {
					    $urlnew ='';
					    $urlnew =str_replace('http:','',$url);
					    $urlnew =str_replace('https:','',$urlnew);
					    $file_content = str_replace($url, $urlnew, $file_content);
					}
				}
				endif;
				$content .= $file_content. "\n\n";
			}
			if (is_writable(THEME_CACHE_DIR)) {
                $content = preg_replace( '!/\*[^*]*\*+([^/][^*]*\*+)*/!', '', $content );
                $content = str_replace( array("\r\n", "\r", "\n", "\t", '  ', '    ', '    '), '', $content );
                $fhandle = @fopen($cache_file_path, 'w+');
                if ($fhandle) fwrite($fhandle, $content, strlen($content));
			}
		}
		wp_enqueue_style(THEME_SLUG.'-styles-'.$media, $cache_file_url, false, false, $media);
	}
}