function link_check($url, $level, $sessid, $can_leave_domain, $reindex, $site_id)
{
    global $db_con, $debug, $command_line, $mysql_table_prefix, $user_agent, $index_media, $no_log, $clear;
    $needsReindex = 1;
    $deletable = 0;
    $local_url = 0;
    $local_url = strpos($url, 'localhost');
    if ($local_url != '/') {
        $url_status = url_status($url, $site_id, $sessid);
        $thislevel = $level - 1;
        if (strstr($url_status['state'], "Relocation")) {
            $care_excl = '1';
            //  care file suffixed to be excluded
            $relocated = '1';
            //  URL is relocated
            $local_redir = '';
            $url = $db_con->real_escape_string(preg_replace("/ /i", "", url_purify($url_status['path'], $url, $can_leave_domain, $care_excl, $relocated, $local_redir)));
            if (!$url) {
                $url_status['aborted'] = 1;
                $url_status['state'] = "Indexation aborted because of undefined redirection error.";
                return $url_status;
            }
            //  abort indexation, if the redirected URL is equal to calling URL
            if ($url == 'self') {
                $url_status['aborted'] = 1;
                $url_status['state'] = "Indexation aborted for this page, because the redirection was a link in it selves.<br />Blocked by Sphide-plus, because this could end in an infinite indexation loop.";
                return $url_status;
            }
            //  abort indexation, if the redirected URL contains invalid file suffix
            if ($url == 'excl') {
                $url_status['aborted'] = 1;
                $url_status['state'] = "Indexation aborted because the redirected link does not meet the URL suffix conditions.";
                return $url_status;
            }
            //  abort indexation, because purifing the redirected URL failed
            if (!strstr($url, "//")) {
                $url_status['aborted'] = 1;
                $url_status['state'] = "Indexation aborted because: {$url}";
                return $url_status;
            }
            mysqltest();
            $sql_query = "SELECT link from " . $mysql_table_prefix . "temp where link='{$url}' && id = '{$sessid}'";
            $result = $db_con->query($sql_query);
            if ($debug && $db_con->errno) {
                $err_row = __LINE__ - 2;
                printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                if (__FUNCTION__) {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                } else {
                    printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                }
                printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                echo "<p> {$sql_query} </p>";
                exit;
            }
            $rows = $result->num_rows;
            if ($rows == 0) {
                $sql_query = "INSERT into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', '{$level}', '{$sessid}')";
                $db_con->query($sql_query);
                if ($debug && $db_con->errno) {
                    $err_row = __LINE__ - 2;
                    printf("<p><span class='red'>&nbsp;MySQL failure: %s&nbsp;\n<br /></span></p>", $db_con->error);
                    if (__FUNCTION__) {
                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;&nbsp;in function():&nbsp;" . __FUNCTION__ . "&nbsp;<br /></span></p>");
                    } else {
                        printf("<p><span class='red'>&nbsp;Found in script: " . __FILE__ . "&nbsp;&nbsp;row: {$err_row}&nbsp;<br /></span></p>");
                    }
                    printf("<p><span class='red'>&nbsp;Script execution aborted.&nbsp;<br /></span>");
                    printf("<p><strong>Invalid query string, which caused the SQL error:</strong></p>");
                    echo "<p> {$sql_query} </p>";
                    exit;
                }
            }
            $url_status['state'] == "redirected";
            if ($clear == 1) {
                clean_resource($result, '17');
            }
        }
        ini_set("user_agent", $user_agent);
        if ($url_status['state'] == 'ok') {
            printStandardReport('link_okay', $command_line, $no_log);
        } else {
            $deletable = 1;
            printUrlStatus($url_status['state'], $command_line);
        }
    }
    if ($local_url == '7') {
        printStandardReport('link_local', $command_line, $no_log);
    }
    if ($reindex == 1 && $deletable == 1) {
        check_for_removal($url);
    } else {
        if ($reindex == 1) {
        }
    }
    if (!isset($all_links)) {
        $all_links = 0;
    }
    if (!isset($numoflinks)) {
        $numoflinks = 0;
    }
}
Example #2
0
function index_site($url, $reindex, $maxlevel, $soption, $url_inc, $url_not_inc, $can_leave_domain)
{
    global $mysql_table_prefix, $command_line, $mainurl, $tmp_urls, $domain_arr, $all_keywords;
    if (!isset($all_keywords)) {
        $result = mysql_query("select keyword_ID, keyword from " . $mysql_table_prefix . "keywords");
        echo mysql_error();
        while ($row = mysql_fetch_array($result)) {
            $all_keywords[addslashes($row[1])] = $row[0];
        }
    }
    $compurl = parse_url($url);
    if ($compurl['path'] == '') {
        $url = $url . "/";
    }
    $t = microtime();
    $a = getenv("REMOTE_ADDR");
    $sessid = md5($t . $a);
    $urlparts = parse_url($url);
    $domain = $urlparts['host'];
    if (isset($urlparts['port'])) {
        $port = (int) $urlparts['port'];
    } else {
        $port = 80;
    }
    $result = mysql_query("select site_id from " . $mysql_table_prefix . "sites where url='{$url}'");
    echo mysql_error();
    $row = mysql_fetch_row($result);
    $site_id = $row[0];
    if ($site_id != "" && $reindex == 1) {
        mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', 0, '{$sessid}')");
        echo mysql_error();
        $result = mysql_query("select url, level from " . $mysql_table_prefix . "links where site_id = {$site_id}");
        while ($row = mysql_fetch_array($result)) {
            $site_link = $row['url'];
            $link_level = $row['level'];
            if ($site_link != $url) {
                mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$site_link}', {$link_level}, '{$sessid}')");
            }
        }
        $qry = "update " . $mysql_table_prefix . "sites set indexdate=now(), spider_depth = {$maxlevel}, required = '{$url_inc}'," . "disallowed = '{$url_not_inc}', can_leave_domain={$can_leave_domain} where site_id={$site_id}";
        mysql_query($qry);
        echo mysql_error();
    } else {
        if ($site_id == '') {
            mysql_query("insert into " . $mysql_table_prefix . "sites (url, indexdate, spider_depth, required, disallowed, can_leave_domain) " . "values ('{$url}', now(), {$maxlevel}, '{$url_inc}', '{$url_not_inc}', {$can_leave_domain})");
            echo mysql_error();
            $result = mysql_query("select site_ID from " . $mysql_table_prefix . "sites where url='{$url}'");
            $row = mysql_fetch_row($result);
            $site_id = $row[0];
        } else {
            mysql_query("update " . $mysql_table_prefix . "sites set indexdate=now(), spider_depth = {$maxlevel}, required = '{$url_inc}'," . "disallowed = '{$url_not_inc}', can_leave_domain={$can_leave_domain} where site_id={$site_id}");
            echo mysql_error();
        }
    }
    $result = mysql_query("select site_id, temp_id, level, count, num from " . $mysql_table_prefix . "pending where site_id='{$site_id}'");
    echo mysql_error();
    $row = mysql_fetch_row($result);
    $pending = $row[0];
    $level = 0;
    $domain_arr = get_domains();
    if ($pending == '') {
        mysql_query("insert into " . $mysql_table_prefix . "temp (link, level, id) values ('{$url}', 0, '{$sessid}')");
        echo mysql_error();
    } else {
        if ($pending != '') {
            printStandardReport('continueSuspended', $command_line);
            mysql_query("select temp_id, level, count from " . $mysql_table_prefix . "pending where site_id='{$site_id}'");
            echo mysql_error();
            $sessid = $row[1];
            $level = $row[2];
            $pend_count = $row[3] + 1;
            $num = $row[4];
            $pending = 1;
            $tmp_urls = get_temp_urls($sessid);
        }
    }
    if ($reindex != 1) {
        mysql_query("insert into " . $mysql_table_prefix . "pending (site_id, temp_id, level, count) values ('{$site_id}', '{$sessid}', '0', '0')");
        echo mysql_error();
    }
    $time = time();
    $omit = check_robot_txt($url);
    printHeader($omit, $url, $command_line);
    $mainurl = $url;
    $num = 0;
    while ($level <= $maxlevel && $soption == 'level' || $soption == 'full') {
        if ($pending == 1) {
            $count = $pend_count;
            $pending = 0;
        } else {
            $count = 0;
        }
        $links = array();
        $result = mysql_query("select distinct link from " . $mysql_table_prefix . "temp where level={$level} && id='{$sessid}' order by link");
        echo mysql_error();
        $rows = mysql_num_rows($result);
        if ($rows == 0) {
            break;
        }
        $i = 0;
        while ($row = mysql_fetch_array($result)) {
            $links[] = $row['link'];
        }
        reset($links);
        while ($count < count($links)) {
            $num++;
            $thislink = $links[$count];
            $urlparts = parse_url($thislink);
            reset($omit);
            $forbidden = 0;
            foreach ($omit as $omiturl) {
                $omiturl = trim($omiturl);
                $omiturl_parts = parse_url($omiturl);
                if ($omiturl_parts['scheme'] == '') {
                    $check_omit = $urlparts['host'] . $omiturl;
                } else {
                    $check_omit = $omiturl;
                }
                if (strpos($thislink, $check_omit)) {
                    printRobotsReport($num, $thislink, $command_line);
                    check_for_removal($thislink);
                    $forbidden = 1;
                    break;
                }
            }
            if (!check_include($thislink, $url_inc, $url_not_inc)) {
                printUrlStringReport($num, $thislink, $command_line);
                check_for_removal($thislink);
                $forbidden = 1;
            }
            if ($forbidden == 0) {
                printRetrieving($num, $thislink, $command_line);
                $query = "select md5sum, indexdate from " . $mysql_table_prefix . "links where url='{$thislink}'";
                $result = mysql_query($query);
                echo mysql_error();
                $rows = mysql_num_rows($result);
                if ($rows == 0) {
                    index_url($thislink, $level + 1, $site_id, '', $domain, '', $sessid, $can_leave_domain, $reindex);
                    mysql_query("update " . $mysql_table_prefix . "pending set level = {$level}, count={$count}, num={$num} where site_id={$site_id}");
                    echo mysql_error();
                } else {
                    if ($rows != 0 && $reindex == 1) {
                        $row = mysql_fetch_array($result);
                        $md5sum = $row['md5sum'];
                        $indexdate = $row['indexdate'];
                        index_url($thislink, $level + 1, $site_id, $md5sum, $domain, $indexdate, $sessid, $can_leave_domain, $reindex);
                        mysql_query("update " . $mysql_table_prefix . "pending set level = {$level}, count={$count}, num={$num} where site_id={$site_id}");
                        echo mysql_error();
                    } else {
                        printStandardReport('inDatabase', $command_line);
                    }
                }
            }
            $count++;
        }
        $level++;
    }
    mysql_query("delete from " . $mysql_table_prefix . "temp where id = '{$sessid}'");
    echo mysql_error();
    mysql_query("delete from " . $mysql_table_prefix . "pending where site_id = '{$site_id}'");
    echo mysql_error();
    printStandardReport('completed', $command_line);
}
Example #3
0
function link_check($url, $level, $sessid, $can_leave_domain, $reindex)
{
    global $command_line;
    $needsReindex = 1;
    $deletable = 0;
    $local_url = 0;
    $local_url = strpos($url, 'localhost');
    if ($local_url != '7') {
        $url_status = url_status($url);
        $thislevel = $level - 1;
        if (strstr($url_status['state'], "Relocation")) {
            $url = eregi_replace(" ", "", url_purify($url_status['path'], $url, $can_leave_domain));
            if ($url != '') {
                $result = mysql_query("select link from " . TABLE_PREFIX . "temp where link='{$url}' && id = '{$sessid}'");
                if (DEBUG > '0') {
                    echo mysql_error();
                }
                $rows = mysql_num_rows($result);
                if ($rows == 0) {
                    mysql_query("insert into " . TABLE_PREFIX . "temp (link, level, id) values ('{$url}', '{$level}', '{$sessid}')");
                    if (DEBUG > '0') {
                        echo mysql_error();
                    }
                }
            }
            $url_status['state'] == "redirected";
            clean_resource($result);
        }
        ini_set("user_agent", Configure::read('user_agent'));
        if ($url_status['state'] == 'ok') {
            printStandardReport('link_okay', $command_line);
        } else {
            $deletable = 1;
            printUrlStatus($url_status['state'], $command_line);
        }
    }
    if ($local_url == '7') {
        printStandardReport('link_local', $command_line);
    }
    if ($reindex == 1 && $deletable == 1) {
        check_for_removal($url);
    } else {
        if ($reindex == 1) {
        }
    }
    if (!isset($all_links)) {
        $all_links = 0;
    }
    if (!isset($numoflinks)) {
        $numoflinks = 0;
    }
}