Exemple #1
0
function clean_url($url)
{
    $bits = crack_url($url);
    // force http (but allow https too)
    if (strtolower($bits['scheme']) != 'https') {
        $bits['scheme'] = 'http';
    }
    return glue_url($bits);
}
Exemple #2
0
function buildurl($url = false, $qs = '')
{
    $url or $url = $_SERVER["REQUEST_URI"];
    $urlA = parse_url($url);
    parse_str($urlA['query'], $query1);
    $query2 = $qs;
    is_array($qs) or parse_str($qs, $query2);
    $query = array_merge($query1, $query2);
    $urlA['query'] = http_build_query($query);
    $nurl = glue_url($urlA);
    return $nurl ? $nurl : $url;
}
Exemple #3
0
/**
 * Enter description here...
 *
 * @param unknown_type $base
 * @param unknown_type $url
 * @return unknown
 * @author adrian-php at sixfingeredman dot net
 */
function resolve_url($base, $url)
{
    if (!strlen($base)) {
        return $url;
    }
    // Step 2
    if (!strlen($url)) {
        return $base;
    }
    // Step 3
    if (preg_match('!^[a-z]+:!i', $url)) {
        return $url;
    }
    $base = parse_url($base);
    if ($url[0] == "#") {
        // Step 2 (fragment)
        $base['fragment'] = substr($url, 1);
        return unparse_url($base);
    }
    unset($base['fragment']);
    unset($base['query']);
    if (substr($url, 0, 2) == "//") {
        // Step 4
        return unparse_url(array('scheme' => $base['scheme'], 'path' => substr($url, 2)));
    } else {
        if ($url[0] == "/") {
            // Step 5
            $base['path'] = $url;
        } else {
            // Step 6
            $path = explode('/', $base['path']);
            $url_path = explode('/', $url);
            // Step 6a: drop file from base
            array_pop($path);
            // Step 6b, 6c, 6e: append url while removing "." and ".." from
            // the directory portion
            $end = array_pop($url_path);
            foreach ($url_path as $segment) {
                if ($segment == '.') {
                    // skip
                } else {
                    if ($segment == '..' && $path && $path[sizeof($path) - 1] != '..') {
                        array_pop($path);
                    } else {
                        $path[] = $segment;
                    }
                }
            }
            // Step 6d, 6f: remove "." and ".." from file portion
            if ($end == '.') {
                $path[] = '';
            } else {
                if ($end == '..' && $path && $path[sizeof($path) - 1] != '..') {
                    $path[sizeof($path) - 1] = '';
                } else {
                    $path[] = $end;
                }
            }
            // Step 6h
            $base['path'] = join('/', $path);
        }
    }
    // Step 7
    return glue_url($base);
}
Exemple #4
0
function no_password_proxy_url($http_proxy) {
	if ($p = @parse_url($http_proxy)
	AND $p['pass']) {
		$p['pass'] = '******';
		$http_proxy = glue_url($p);
	}
	return $http_proxy;
}
/**
 * Get PukiWiki Page Source via http using cmd=source
 * @access public
 * @param string $url Page URL
 * @return mixed PukiWiki Page Source. FALSE if HTTP GET failed. 
 * @uses PKWKSourceHandler
 * @uses PEAR XML/XML_HTMLSax.php
 */
function &pkwk_get_source($url)
{
    // pkwk source (cmd=source&page=PAGE)
    $parsed = parse_url($url);
    $queries = array();
    parse_str($parsed['query'], $queries);
    // rawurldecode
    $page = isset($queries['page']) ? $queries['page'] : rawurldecode($parsed['query']);
    $queries = array();
    $queries['cmd'] = 'source';
    $queries['page'] = $page;
    $parsed['query'] = glue_str($queries);
    $url = glue_url($parsed);
    if (($html = http_get_contents($url, $GLOBALS['USERNAME'], $GLOBALS['USERPASS'])) === FALSE) {
        return FALSE;
    }
    require_once 'XML/XML_HTMLSax.php';
    $parser = new XML_HTMLSax();
    $handler = new PKWKSourceHandler();
    $parser->set_object($handler);
    $parser->set_element_handler('openHandler', 'closeHandler');
    $parser->set_data_handler('dataHandler');
    $parser->parse($html);
    return $handler->source;
}
function parse_wordpress($url, $page_source)
{
    // feed url
    $url_parsed = parse_url($url);
    if (isset($url_parsed['query'])) {
        $url_parsed['query'] .= '&feed=rss&withoutcomments=1';
    } else {
        $url_parsed['query'] = 'feed=rss2&withoutcomments=1';
    }
    $single_post_feed_url = glue_url($url_parsed);
    // get feed source
    $single_post_feed_source = get_source_width_curl($single_post_feed_url);
    $single_post_feed_source = $single_post_feed_source["content"];
    // fallback parser if not full content in single post feed
    if (!stristr($single_post_feed_source, "<content:encoded>")) {
        return fallback_parser($url, $page_source);
    } else {
        // content
        $xml_parsed = simplexml_load_string($single_post_feed_source, null, LIBXML_NOCDATA);
        $link = $xml_parsed->channel->item->link;
        $h1 = $xml_parsed->channel->item->title;
        $title = $xml_parsed->channel->title;
        $content_decoded = $xml_parsed->channel->item->children('content', true)->encoded;
        // parse content
        $content = str_get_html((string) $content_decoded[0]);
        // remove facebook iframes
        $i = 0;
        foreach ($content->find("iframe") as $iframe) {
            if (strstr($iframe->src, 'facebook.com')) {
                $content->find("iframe", $i)->outertext = '';
            }
            $i++;
        }
        // remove flattr-links
        $i = 0;
        foreach ($content->find("a") as $a) {
            if (strstr($a->href, 'flattrss_redirect')) {
                $content->find("a", $i)->outertext = '';
            }
            $i++;
        }
        // remove wordpress.com feeds
        $i = 0;
        foreach ($content->find("a") as $a) {
            if (strstr($a->href, 'feeds.wordpress.com')) {
                $content->find("a", $i)->outertext = '';
            }
            $i++;
        }
        // loop through elements and do stuff
        function do_stuff($element)
        {
            // remove all attributes, except href and src
            foreach ($element->attr as $name => $attr) {
                if ($name != 'src' && $name != 'href') {
                    $element->removeAttribute($name);
                }
            }
            // we keep a, img, object, embed, h1, h2, h3, h4, h5, h6, i, em, b, strong, blockquote
            // all other converted to p
            if ($element->tag != 'img' && $element->tag != 'span' && $element->tag != 'a' && $element->tag != 'li' && $element->tag != 'ul' && $element->tag != 'ol' && $element->tag != 'object' && $element->tag != 'embed' && $element->tag != 'h1' && $element->tag != 'h2' && $element->tag != 'h3' && $element->tag != 'h4' && $element->tag != 'h5' && $element->tag != 'h6' && $element->tag != 'i' && $element->tag != 'em' && $element->tag != 'b' && $element->tag != 'strong' && $element->tag != 'blockquote') {
                $element->tag = 'p';
            }
            // remove p:s without innertext
            if ($element->tag == 'p' && $element->innertext == null) {
                $element->outertext = '';
            }
            // remove a:s without innertext
            if ($element->tag == 'a' && $element->innertext == null) {
                $element->outertext = '';
            }
            // remove wordpress.com stats
            if (strstr($element->src, 'stats.wordpress.com')) {
                $element->outertext = '';
            }
        }
        $content->set_callback('do_stuff');
        $content = $content->save();
        // no indent before image, if first in p
        $content = str_replace('<p><a', '<p>&nbsp;<a', $content);
        $content = str_replace('<p><img', '<p>&nbsp;<img', $content);
        // wrap in article structure
        return '<div class="article"><h1>' . $h1 . '</h1>' . $content . '<address><a href="' . $link . '">' . $title . '</a></address></div>';
    }
}
 /**
  * Get absolute URL
  *
  * PHP Extension
  *
  * @access public
  * @param string $base base url
  * @param string $url relative url
  * @return string absolute url
  * @see parse_url()
  * @see realpath()
  * @uses glue_url()
  */
 function realurl($base, $url)
 {
     if (!strlen($base)) {
         return $url;
     }
     if (!strlen($url)) {
         return $base;
     }
     if (preg_match('!^[a-z]+:!i', $url)) {
         return $url;
     }
     $base = parse_url($base);
     if ($url[0] == "#") {
         // fragment
         $base['fragment'] = substr($url, 1);
         return glue_url($base);
     }
     unset($base['fragment']);
     unset($base['query']);
     if (substr($url, 0, 2) == "//") {
         // FQDN
         $base = array('scheme' => $base['scheme'], 'path' => substr($url, 2));
         return glue_url($base);
     } elseif ($url[0] == "/") {
         // absolute path reference
         $base['path'] = $url;
     } else {
         // relative path reference
         $path = explode('/', $base['path']);
         $url_path = explode('/', $url);
         // drop file from base
         array_pop($path);
         // append url while removing "." and ".." from
         // the directory portion
         $end = array_pop($url_path);
         foreach ($url_path as $segment) {
             if ($segment == '.') {
                 // skip
             } elseif ($segment == '..' && $path && $path[sizeof($path) - 1] != '..') {
                 array_pop($path);
             } else {
                 $path[] = $segment;
             }
         }
         // remove "." and ".." from file portion
         if ($end == '.') {
             $path[] = '';
         } elseif ($end == '..' && $path && $path[sizeof($path) - 1] != '..') {
             $path[sizeof($path) - 1] = '';
         } else {
             $path[] = $end;
         }
         $base['path'] = join('/', $path);
     }
     return glue_url($base);
 }