function build_url($url, $parent_url, $select, $current, $handle, $store_file)
{
    global $clear, $ext, $mainurl, $apache_indexes, $strip_sessids, $ex_media, $clear;
    // find only media-files with allowed file suffix  or type-description  or application descriptor
    $match = valid_link($url, $select);
    if ($match == '0') {
        return '';
    }
    if (substr($url, -1) == '\\') {
        return '';
    }
    $original_parent_url_parts = parse_all_url($url);
    $urlparts = parse_all_url($url);
    $main_url_parts = parse_all_url($mainurl);
    if ($urlparts['host'] != "" && $urlparts['host'] != $main_url_parts['host'] && $ex_media != 1) {
        return '';
    }
    if (isset($urlparts['query'])) {
        if ($apache_indexes[$urlparts['query']]) {
            return '';
        }
    }
    if (preg_match("/[\\/]?mailto:|[\\/]?javascript:|[\\/]?news:/i", $url)) {
        return '';
    }
    if (isset($urlparts['scheme'])) {
        $scheme = $urlparts['scheme'];
    } else {
        $scheme = "";
    }
    //only http and https links are followed
    if (!($scheme == 'http' || $scheme == '' || $scheme == 'https')) {
        return '';
    }
    //parent url might be used to build an url from relative path
    $parent_url = remove_file_from_url($parent_url);
    $parent_url_parts = parse_all_url($parent_url);
    if (substr($url, 0, 1) == '/') {
        $url = $parent_url_parts['scheme'] . "://" . $parent_url_parts['host'] . $url;
    } else {
        if (!isset($urlparts['scheme'])) {
            $url = $parent_url . $url;
        }
    }
    $url_parts = parse_all_url($url);
    $urlpath = $url_parts['path'];
    $regs = array();
    while (preg_match("/[^\\/]*\\/[.]{2}\\//", $urlpath, $regs)) {
        $urlpath = str_replace($regs[0], "", $urlpath);
    }
    //remove relative path instructions like ../ etc
    $urlpath = preg_replace("/\\/+/", "/", $urlpath);
    $urlpath = preg_replace("/[^\\/]*\\/[.]{2}/", "", $urlpath);
    $urlpath = str_replace("./", "", $urlpath);
    $query = "";
    if (isset($url_parts['query'])) {
        $query = "?" . $url_parts['query'];
    }
    if ($main_url_parts['port'] == 80 || $url_parts['port'] == "") {
        $portq = "";
    } else {
        $portq = ":" . $main_url_parts['port'];
    }
    if (!$urlpath) {
        $urlpath = "/";
    }
    //     if not exists, add slash instead of real urlpath
    $url = $url_parts['scheme'] . "://" . $url_parts['host'] . $portq . $urlpath . $query;
    if (strstr($url, "/?")) {
        //added to address <a href="?id=1"> syntax
        $page = str_replace($main_url_parts['path'], null, $original_parent_url_parts['path']);
        if (substr(trim($mainurl), -1) !== "/" and substr(trim($page), 0, 1) !== "/") {
            $page = "/" . $page;
        }
        $url = $mainurl . $page . $query;
    }
    if ($ex_media == 1) {
        //  if we index sub-domains
        return $url;
    }
    $mainurl = remove_file_from_url($mainurl);
    $url = convert_url($url);
    // convert 'blank' and '&amp;'
    if ($strip_sessids == 1) {
        $url = remove_sessid($url);
    }
    if (strstr($url, $main_url_parts['host']) == false) {
        //  $main_url_parts['host'] will support also relative-back-folder like ../../
        if ($clear == 1) {
            unset($select, $mainurl, $urlpath, $query, $page);
            $original_parent_url_parts = array();
            $main_url_parts = array();
            $url_parts = array();
            $urlparts = array();
        }
        return '';
    } else {
        if ($clear == 1) {
            unset($select, $mainurl, $urlpath, $query, $page);
            $original_parent_url_parts = array();
            $main_url_parts = array();
            $url_parts = array();
            $urlparts = array();
        }
        return $url;
    }
}
示例#2
0
function url_purify($url, $parent_url, $can_leave_domain)
{
    global $ext, $mainurl, $apache_indexes, $strip_sessids;
    $urlparts = parse_url($url);
    $main_url_parts = parse_url($mainurl);
    if ($urlparts['host'] != "" && $urlparts['host'] != $main_url_parts['host'] && $can_leave_domain != 1) {
        return '';
    }
    reset($ext);
    while (list($id, $excl) = each($ext)) {
        if (preg_match("/\\.{$excl}\$/i", $url)) {
            return '';
        }
    }
    if (substr($url, -1) == '\\') {
        return '';
    }
    if (isset($urlparts['query'])) {
        if ($apache_indexes[$urlparts['query']]) {
            return '';
        }
    }
    if (preg_match("/[\\/]?mailto:|[\\/]?javascript:|[\\/]?news:/i", $url)) {
        return '';
    }
    if (isset($urlparts['scheme'])) {
        $scheme = $urlparts['scheme'];
    } else {
        $scheme = "";
    }
    //only http and https links are followed
    if (!($scheme == 'http' || $scheme == '' || $scheme == 'https')) {
        return '';
    }
    //parent url might be used to build an url from relative path
    $parent_url = remove_file_from_url($parent_url);
    $parent_url_parts = parse_url($parent_url);
    if (substr($url, 0, 1) == '/') {
        $url = $parent_url_parts['scheme'] . "://" . $parent_url_parts['host'] . $url;
    } else {
        if (!isset($urlparts['scheme'])) {
            $url = $parent_url . $url;
        }
    }
    $url_parts = parse_url($url);
    $urlpath = $url_parts['path'];
    $regs = array();
    while (preg_match("/[^\\/]*\\/[.]{2}\\//", $urlpath, $regs)) {
        $urlpath = str_replace($regs[0], "", $urlpath);
    }
    //remove relative path instructions like ../ etc
    $urlpath = preg_replace("/\\/+/", "/", $urlpath);
    $urlpath = preg_replace("/[^\\/]*\\/[.]{2}/", "", $urlpath);
    $urlpath = str_replace("./", "", $urlpath);
    $query = "";
    if (isset($url_parts['query'])) {
        $query = "?" . $url_parts['query'];
    }
    if ($main_url_parts['port'] == 80 || $url_parts['port'] == "") {
        $portq = "";
    } else {
        $portq = ":" . $main_url_parts['port'];
    }
    $url = $url_parts['scheme'] . "://" . $url_parts['host'] . $portq . $urlpath . $query;
    //if we index sub-domains
    if ($can_leave_domain == 1) {
        return $url;
    }
    $mainurl = remove_file_from_url($mainurl);
    if ($strip_sessids == 1) {
        $url = remove_sessid($url);
    }
    //only urls in staying in the starting domain/directory are followed
    $url = convert_url($url);
    if (strstr($url, $mainurl) == false) {
        return '';
    } else {
        return $url;
    }
}
function editsite($site_id, $url, $title, $short_desc, $depth, $required, $disallowed, $domaincb, $cat)
{
    global $db;
    // get the current "root path"
    $result = $db->query("select url from " . TABLE_PREFIX . "sites where site_id={$site_id}");
    if ($result) {
        $row = $result->fetch();
        $old_url = remove_file_from_url($row[0]);
    } else {
        $old_url = "";
    }
    $result->closeCursor();
    //??? split the domain, set it
    $short_desc = $db->quote($short_desc);
    $title = $db->quote($title);
    $db->exec("DELETE FROM " . TABLE_PREFIX . "site_category where site_id={$site_id}");
    echo sql_errorstring(__FILE__, __LINE__);
    $compurl = parse_url($url);
    if ($compurl['path'] == '') {
        $url = $url . "/";
    }
    $db->exec("UPDATE " . TABLE_PREFIX . "sites SET url='{$url}', title={$title}, short_desc={$short_desc}, spider_depth ={$depth}, required='{$required}', disallowed='{$disallowed}', can_leave_domain={$domaincb} WHERE site_id={$site_id}");
    echo sql_errorstring(__FILE__, __LINE__);
    $result = $db->query("select category_id from " . TABLE_PREFIX . "categories");
    echo sql_errorstring(__FILE__, __LINE__);
    print sql_errorstring(__FILE__, __LINE__);
    while ($row = $result->fetch()) {
        $cat_id = $row[0];
        if ($cat[$cat_id] == 'on') {
            $db->exec("INSERT INTO " . TABLE_PREFIX . "site_category (site_id, category_id) values ('{$site_id}', '{$cat_id}')");
            echo sql_errorstring(__FILE__, __LINE__);
        }
    }
    /* update all links */
    $new_url = remove_file_from_url($url);
    if (strcasecmp($new_url, $old_url) != 0) {
        $result = $db->query("SELECT link_id, url FROM " . TABLE_PREFIX . "links WHERE site_id={$site_id}");
        while ($row = $result->fetch()) {
            $link_id = $row[0];
            $link = $row[1];
            $link = substr($link, strlen($old_url));
            $link = $new_url . $link;
            $db->exec("UPDATE " . TABLE_PREFIX . "links SET url='{$link}' WHERE link_id={$link_id}");
        }
    }
    if (!sql_errorstring(__FILE__, __LINE__)) {
        return "<br/><center><b>Site updated.</b></center>";
    } else {
        return sql_errorstring(__FILE__, __LINE__);
    }
}