function build_url($url, $parent_url, $select, $current, $handle, $store_file) { global $clear, $ext, $mainurl, $apache_indexes, $strip_sessids, $ex_media, $clear; // find only media-files with allowed file suffix or type-description or application descriptor $match = valid_link($url, $select); if ($match == '0') { return ''; } if (substr($url, -1) == '\\') { return ''; } $original_parent_url_parts = parse_all_url($url); $urlparts = parse_all_url($url); $main_url_parts = parse_all_url($mainurl); if ($urlparts['host'] != "" && $urlparts['host'] != $main_url_parts['host'] && $ex_media != 1) { return ''; } if (isset($urlparts['query'])) { if ($apache_indexes[$urlparts['query']]) { return ''; } } if (preg_match("/[\\/]?mailto:|[\\/]?javascript:|[\\/]?news:/i", $url)) { return ''; } if (isset($urlparts['scheme'])) { $scheme = $urlparts['scheme']; } else { $scheme = ""; } //only http and https links are followed if (!($scheme == 'http' || $scheme == '' || $scheme == 'https')) { return ''; } //parent url might be used to build an url from relative path $parent_url = remove_file_from_url($parent_url); $parent_url_parts = parse_all_url($parent_url); if (substr($url, 0, 1) == '/') { $url = $parent_url_parts['scheme'] . "://" . $parent_url_parts['host'] . $url; } else { if (!isset($urlparts['scheme'])) { $url = $parent_url . $url; } } $url_parts = parse_all_url($url); $urlpath = $url_parts['path']; $regs = array(); while (preg_match("/[^\\/]*\\/[.]{2}\\//", $urlpath, $regs)) { $urlpath = str_replace($regs[0], "", $urlpath); } //remove relative path instructions like ../ etc $urlpath = preg_replace("/\\/+/", "/", $urlpath); $urlpath = preg_replace("/[^\\/]*\\/[.]{2}/", "", $urlpath); $urlpath = str_replace("./", "", $urlpath); $query = ""; if (isset($url_parts['query'])) { $query = "?" . $url_parts['query']; } if ($main_url_parts['port'] == 80 || $url_parts['port'] == "") { $portq = ""; } else { $portq = ":" . $main_url_parts['port']; } if (!$urlpath) { $urlpath = "/"; } // if not exists, add slash instead of real urlpath $url = $url_parts['scheme'] . "://" . $url_parts['host'] . $portq . $urlpath . $query; if (strstr($url, "/?")) { //added to address <a href="?id=1"> syntax $page = str_replace($main_url_parts['path'], null, $original_parent_url_parts['path']); if (substr(trim($mainurl), -1) !== "/" and substr(trim($page), 0, 1) !== "/") { $page = "/" . $page; } $url = $mainurl . $page . $query; } if ($ex_media == 1) { // if we index sub-domains return $url; } $mainurl = remove_file_from_url($mainurl); $url = convert_url($url); // convert 'blank' and '&' if ($strip_sessids == 1) { $url = remove_sessid($url); } if (strstr($url, $main_url_parts['host']) == false) { // $main_url_parts['host'] will support also relative-back-folder like ../../ if ($clear == 1) { unset($select, $mainurl, $urlpath, $query, $page); $original_parent_url_parts = array(); $main_url_parts = array(); $url_parts = array(); $urlparts = array(); } return ''; } else { if ($clear == 1) { unset($select, $mainurl, $urlpath, $query, $page); $original_parent_url_parts = array(); $main_url_parts = array(); $url_parts = array(); $urlparts = array(); } return $url; } }
function url_purify($url, $parent_url, $can_leave_domain) { global $ext, $mainurl, $apache_indexes, $strip_sessids; $urlparts = parse_url($url); $main_url_parts = parse_url($mainurl); if ($urlparts['host'] != "" && $urlparts['host'] != $main_url_parts['host'] && $can_leave_domain != 1) { return ''; } reset($ext); while (list($id, $excl) = each($ext)) { if (preg_match("/\\.{$excl}\$/i", $url)) { return ''; } } if (substr($url, -1) == '\\') { return ''; } if (isset($urlparts['query'])) { if ($apache_indexes[$urlparts['query']]) { return ''; } } if (preg_match("/[\\/]?mailto:|[\\/]?javascript:|[\\/]?news:/i", $url)) { return ''; } if (isset($urlparts['scheme'])) { $scheme = $urlparts['scheme']; } else { $scheme = ""; } //only http and https links are followed if (!($scheme == 'http' || $scheme == '' || $scheme == 'https')) { return ''; } //parent url might be used to build an url from relative path $parent_url = remove_file_from_url($parent_url); $parent_url_parts = parse_url($parent_url); if (substr($url, 0, 1) == '/') { $url = $parent_url_parts['scheme'] . "://" . $parent_url_parts['host'] . $url; } else { if (!isset($urlparts['scheme'])) { $url = $parent_url . $url; } } $url_parts = parse_url($url); $urlpath = $url_parts['path']; $regs = array(); while (preg_match("/[^\\/]*\\/[.]{2}\\//", $urlpath, $regs)) { $urlpath = str_replace($regs[0], "", $urlpath); } //remove relative path instructions like ../ etc $urlpath = preg_replace("/\\/+/", "/", $urlpath); $urlpath = preg_replace("/[^\\/]*\\/[.]{2}/", "", $urlpath); $urlpath = str_replace("./", "", $urlpath); $query = ""; if (isset($url_parts['query'])) { $query = "?" . $url_parts['query']; } if ($main_url_parts['port'] == 80 || $url_parts['port'] == "") { $portq = ""; } else { $portq = ":" . $main_url_parts['port']; } $url = $url_parts['scheme'] . "://" . $url_parts['host'] . $portq . $urlpath . $query; //if we index sub-domains if ($can_leave_domain == 1) { return $url; } $mainurl = remove_file_from_url($mainurl); if ($strip_sessids == 1) { $url = remove_sessid($url); } //only urls in staying in the starting domain/directory are followed $url = convert_url($url); if (strstr($url, $mainurl) == false) { return ''; } else { return $url; } }
function editsite($site_id, $url, $title, $short_desc, $depth, $required, $disallowed, $domaincb, $cat) { global $db; // get the current "root path" $result = $db->query("select url from " . TABLE_PREFIX . "sites where site_id={$site_id}"); if ($result) { $row = $result->fetch(); $old_url = remove_file_from_url($row[0]); } else { $old_url = ""; } $result->closeCursor(); //??? split the domain, set it $short_desc = $db->quote($short_desc); $title = $db->quote($title); $db->exec("DELETE FROM " . TABLE_PREFIX . "site_category where site_id={$site_id}"); echo sql_errorstring(__FILE__, __LINE__); $compurl = parse_url($url); if ($compurl['path'] == '') { $url = $url . "/"; } $db->exec("UPDATE " . TABLE_PREFIX . "sites SET url='{$url}', title={$title}, short_desc={$short_desc}, spider_depth ={$depth}, required='{$required}', disallowed='{$disallowed}', can_leave_domain={$domaincb} WHERE site_id={$site_id}"); echo sql_errorstring(__FILE__, __LINE__); $result = $db->query("select category_id from " . TABLE_PREFIX . "categories"); echo sql_errorstring(__FILE__, __LINE__); print sql_errorstring(__FILE__, __LINE__); while ($row = $result->fetch()) { $cat_id = $row[0]; if ($cat[$cat_id] == 'on') { $db->exec("INSERT INTO " . TABLE_PREFIX . "site_category (site_id, category_id) values ('{$site_id}', '{$cat_id}')"); echo sql_errorstring(__FILE__, __LINE__); } } /* update all links */ $new_url = remove_file_from_url($url); if (strcasecmp($new_url, $old_url) != 0) { $result = $db->query("SELECT link_id, url FROM " . TABLE_PREFIX . "links WHERE site_id={$site_id}"); while ($row = $result->fetch()) { $link_id = $row[0]; $link = $row[1]; $link = substr($link, strlen($old_url)); $link = $new_url . $link; $db->exec("UPDATE " . TABLE_PREFIX . "links SET url='{$link}' WHERE link_id={$link_id}"); } } if (!sql_errorstring(__FILE__, __LINE__)) { return "<br/><center><b>Site updated.</b></center>"; } else { return sql_errorstring(__FILE__, __LINE__); } }