function link_check($url, $level, $sessid, $can_leave_domain, $reindex, $site_id) { global $db_con, $debug, $command_line, $mysql_table_prefix, $user_agent, $index_media, $no_log, $clear; $needsReindex = 1; $deletable = 0; $local_url = 0; $local_url = strpos($url, 'localhost'); if ($local_url != '/') { $url_status = url_status($url, $site_id, $sessid); $thislevel = $level - 1; if (strstr($url_status['state'], "Relocation")) { $care_excl = '1'; // care file suffixed to be excluded $relocated = '1'; // URL is relocated $local_redir = ''; $url = $db_con->real_escape_string(preg_replace("/ /i", "", url_purify($url_status['path'], $url, $can_leave_domain, $care_excl, $relocated, $local_redir))); if (!$url) { $url_status['aborted'] = 1; $url_status['state'] = "Indexation aborted because of undefined redirection error."; return $url_status; } // abort indexation, if the redirected URL is equal to calling URL if ($url == 'self') { $url_status['aborted'] = 1; $url_status['state'] = "Indexation aborted for this page, because the redirection was a link in it selves.<br />Blocked by Sphide-plus, because this could end in an infinite indexation loop."; return $url_status; } // abort indexation, if the redirected URL contains invalid file suffix if ($url == 'excl') { $url_status['aborted'] = 1; $url_status['state'] = "Indexation aborted because the redirected link does not meet the URL suffix conditions."; return $url_status; } // abort indexation, because purifing the redirected URL failed if (!strstr($url, "//")) { $url_status['aborted'] = 1; $url_status['state'] = "Indexation aborted because: {$url}"; return $url_status; } mysqltest(); $sql_query = "SELECT link from " . $mysql_table_prefix . "temp where link='{$url}' && id = '{$sessid}'"; $result = $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } $rows = $result->num_rows; if ($rows == 0) { $sql_query = "INSERT into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', '{$level}', '{$sessid}')"; $db_con->query($sql_query); if ($debug && $db_con->errno) { $err_row = __LINE__ - 2; printf("<p><span class='red'> MySQL failure: %s \n<br /></span></p>", $db_con->error); if (__FUNCTION__) { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} in function(): " . __FUNCTION__ . " <br /></span></p>"); } else { printf("<p><span class='red'> Found in script: " . __FILE__ . " row: {$err_row} <br /></span></p>"); } printf("<p><span class='red'> Script execution aborted. <br /></span>"); printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>"); echo "<p> {$sql_query} </p>"; exit; } } $url_status['state'] == "redirected"; if ($clear == 1) { clean_resource($result, '17'); } } ini_set("user_agent", $user_agent); if ($url_status['state'] == 'ok') { printStandardReport('link_okay', $command_line, $no_log); } else { $deletable = 1; printUrlStatus($url_status['state'], $command_line); } } if ($local_url == '7') { printStandardReport('link_local', $command_line, $no_log); } if ($reindex == 1 && $deletable == 1) { check_for_removal($url); } else { if ($reindex == 1) { } } if (!isset($all_links)) { $all_links = 0; } if (!isset($numoflinks)) { $numoflinks = 0; } }
function index_site($url, $reindex, $maxlevel, $soption, $url_inc, $url_not_inc, $can_leave_domain) { global $mysql_table_prefix, $command_line, $mainurl, $tmp_urls, $domain_arr, $all_keywords; if (!isset($all_keywords)) { $result = mysql_query("select keyword_ID, keyword from " . $mysql_table_prefix . "keywords"); echo mysql_error(); while ($row = mysql_fetch_array($result)) { $all_keywords[addslashes($row[1])] = $row[0]; } } $compurl = parse_url($url); if ($compurl['path'] == '') { $url = $url . "/"; } $t = microtime(); $a = getenv("REMOTE_ADDR"); $sessid = md5($t . $a); $urlparts = parse_url($url); $domain = $urlparts['host']; if (isset($urlparts['port'])) { $port = (int) $urlparts['port']; } else { $port = 80; } $result = mysql_query("select site_id from " . $mysql_table_prefix . "sites where url='{$url}'"); echo mysql_error(); $row = mysql_fetch_row($result); $site_id = $row[0]; if ($site_id != "" && $reindex == 1) { mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', 0, '{$sessid}')"); echo mysql_error(); $result = mysql_query("select url, level from " . $mysql_table_prefix . "links where site_id = {$site_id}"); while ($row = mysql_fetch_array($result)) { $site_link = $row['url']; $link_level = $row['level']; if ($site_link != $url) { mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$site_link}', {$link_level}, '{$sessid}')"); } } $qry = "update " . $mysql_table_prefix . "sites set indexdate=now(), spider_depth = {$maxlevel}, required = '{$url_inc}'," . "disallowed = '{$url_not_inc}', can_leave_domain={$can_leave_domain} where site_id={$site_id}"; mysql_query($qry); echo mysql_error(); } else { if ($site_id == '') { mysql_query("insert into " . $mysql_table_prefix . "sites (url, indexdate, spider_depth, required, disallowed, can_leave_domain) " . "values ('{$url}', now(), {$maxlevel}, '{$url_inc}', '{$url_not_inc}', {$can_leave_domain})"); echo mysql_error(); $result = mysql_query("select site_ID from " . $mysql_table_prefix . "sites where url='{$url}'"); $row = mysql_fetch_row($result); $site_id = $row[0]; } else { mysql_query("update " . $mysql_table_prefix . "sites set indexdate=now(), spider_depth = {$maxlevel}, required = '{$url_inc}'," . "disallowed = '{$url_not_inc}', can_leave_domain={$can_leave_domain} where site_id={$site_id}"); echo mysql_error(); } } $result = mysql_query("select site_id, temp_id, level, count, num from " . $mysql_table_prefix . "pending where site_id='{$site_id}'"); echo mysql_error(); $row = mysql_fetch_row($result); $pending = $row[0]; $level = 0; $domain_arr = get_domains(); if ($pending == '') { mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', 0, '{$sessid}')"); echo mysql_error(); } else { if ($pending != '') { printStandardReport('continueSuspended', $command_line); mysql_query("select temp_id, level, count from " . $mysql_table_prefix . "pending where site_id='{$site_id}'"); echo mysql_error(); $sessid = $row[1]; $level = $row[2]; $pend_count = $row[3] + 1; $num = $row[4]; $pending = 1; $tmp_urls = get_temp_urls($sessid); } } if ($reindex != 1) { mysql_query("insert into " . $mysql_table_prefix . "pending (site_id, temp_id, level, count) values ('{$site_id}', '{$sessid}', '0', '0')"); echo mysql_error(); } $time = time(); $omit = check_robot_txt($url); printHeader($omit, $url, $command_line); $mainurl = $url; $num = 0; while ($level <= $maxlevel && $soption == 'level' || $soption == 'full') { if ($pending == 1) { $count = $pend_count; $pending = 0; } else { $count = 0; } $links = array(); $result = mysql_query("select distinct link from " . $mysql_table_prefix . "temp where level={$level} && id='{$sessid}' order by link"); echo mysql_error(); $rows = mysql_num_rows($result); if ($rows == 0) { break; } $i = 0; while ($row = mysql_fetch_array($result)) { $links[] = $row['link']; } reset($links); while ($count < count($links)) { $num++; $thislink = $links[$count]; $urlparts = parse_url($thislink); reset($omit); $forbidden = 0; foreach ($omit as $omiturl) { $omiturl = trim($omiturl); $omiturl_parts = parse_url($omiturl); if ($omiturl_parts['scheme'] == '') { $check_omit = $urlparts['host'] . $omiturl; } else { $check_omit = $omiturl; } if (strpos($thislink, $check_omit)) { printRobotsReport($num, $thislink, $command_line); check_for_removal($thislink); $forbidden = 1; break; } } if (!check_include($thislink, $url_inc, $url_not_inc)) { printUrlStringReport($num, $thislink, $command_line); check_for_removal($thislink); $forbidden = 1; } if ($forbidden == 0) { printRetrieving($num, $thislink, $command_line); $query = "select md5sum, indexdate from " . $mysql_table_prefix . "links where url='{$thislink}'"; $result = mysql_query($query); echo mysql_error(); $rows = mysql_num_rows($result); if ($rows == 0) { index_url($thislink, $level + 1, $site_id, '', $domain, '', $sessid, $can_leave_domain, $reindex); mysql_query("update " . $mysql_table_prefix . "pending set level = {$level}, count={$count}, num={$num} where site_id={$site_id}"); echo mysql_error(); } else { if ($rows != 0 && $reindex == 1) { $row = mysql_fetch_array($result); $md5sum = $row['md5sum']; $indexdate = $row['indexdate']; index_url($thislink, $level + 1, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex); mysql_query("update " . $mysql_table_prefix . "pending set level = {$level}, count={$count}, num={$num} where site_id={$site_id}"); echo mysql_error(); } else { printStandardReport('inDatabase', $command_line); } } } $count++; } $level++; } mysql_query("delete from " . $mysql_table_prefix . "temp where id = '{$sessid}'"); echo mysql_error(); mysql_query("delete from " . $mysql_table_prefix . "pending where site_id = '{$site_id}'"); echo mysql_error(); printStandardReport('completed', $command_line); }
function link_check($url, $level, $sessid, $can_leave_domain, $reindex) { global $command_line; $needsReindex = 1; $deletable = 0; $local_url = 0; $local_url = strpos($url, 'localhost'); if ($local_url != '7') { $url_status = url_status($url); $thislevel = $level - 1; if (strstr($url_status['state'], "Relocation")) { $url = eregi_replace(" ", "", url_purify($url_status['path'], $url, $can_leave_domain)); if ($url != '') { $result = mysql_query("select link from " . TABLE_PREFIX . "temp where link='{$url}' && id = '{$sessid}'"); if (DEBUG > '0') { echo mysql_error(); } $rows = mysql_num_rows($result); if ($rows == 0) { mysql_query("insert into " . TABLE_PREFIX . "temp (link, level, id) values ('{$url}', '{$level}', '{$sessid}')"); if (DEBUG > '0') { echo mysql_error(); } } } $url_status['state'] == "redirected"; clean_resource($result); } ini_set("user_agent", Configure::read('user_agent')); if ($url_status['state'] == 'ok') { printStandardReport('link_okay', $command_line); } else { $deletable = 1; printUrlStatus($url_status['state'], $command_line); } } if ($local_url == '7') { printStandardReport('link_local', $command_line); } if ($reindex == 1 && $deletable == 1) { check_for_removal($url); } else { if ($reindex == 1) { } } if (!isset($all_links)) { $all_links = 0; } if (!isset($numoflinks)) { $numoflinks = 0; } }