Example #1
0
     $andmore = " AND spider_id <> '{$spider_root_id}' ";
 } else {
     $andmore = '';
 }
 //is this link already in spider ?
 $query = "SELECT count(*) as num FROM " . PHPDIG_DB_PREFIX . "spider WHERE path like '" . $lien['path'] . "' AND file like '" . $lien['file'] . "' AND site_id='{$site_id}' {$andmore}";
 $test_id = mysql_query($query, $id_connect);
 if (mysql_num_rows($test_id) > 0) {
     $exist_results = mysql_fetch_array($test_id);
     $exists += $exist_results['num'];
     mysql_free_result($test_id);
 }
 $lien['url'] = $full_url;
 //test validity of the new link
 if ($exists < 1) {
     $cur_link = phpdigDetectDir($lien, $exclude, $cookies, $site_id, $id_connect);
 } else {
     $cur_link['ok'] = 0;
 }
 if ($cur_link['ok'] == 1) {
     $s_error = 0;
     print '+ ';
 } else {
     $s_error = 1;
     // redirection
     if (isset($cur_link['status']) && $cur_link['status'] == 'NEWHOST' && isset($lien['the_http_scheme'])) {
         if (PHPDIG_IN_DOMAIN == true && phpdigCompareDomains($lien['the_http_scheme'] . '://' . $cur_link['host'] . $cur_link['path'], $url)) {
             $added_site = phpdigSpiderAddSite($id_connect, $lien['the_http_scheme'] . '://' . $cur_link['host'] . $cur_link['path'], $linksper, $linksper_flag, $limit, $limit_flag, $usetable);
             // verify the site is not already in the sites list
             $site_exists = false;
             foreach ($list_sites as $verify_site) {
function phpdigGetSiteFromUrl($id_connect, $url, $linksper, $linksper_flag, $limit, $limit_flag, $usetable)
{
    //format url
    $pu = parse_url($url);
    if (!isset($pu['scheme'])) {
        $pu['scheme'] = "http";
    }
    if (!isset($pu['host'])) {
        echo 'Specify a valid host ! ';
        die;
    }
    settype($pu['path'], 'string');
    settype($pu['query'], 'string');
    settype($pu['user'], 'string');
    settype($pu['pass'], 'string');
    settype($pu['port'], 'integer');
    if ($pu['port'] == 0 || $pu['port'] == 80) {
        $pu['port'] = '';
    } else {
        settype($pu['port'], 'integer');
    }
    $url = $pu['scheme'] . "://" . $pu['host'] . "/";
    //build a complete url with user/pass and port
    $full_url = $pu['scheme'] . "://";
    if ($pu['user'] && $pu['pass']) {
        $full_url .= $pu['user'] . ':' . $pu['pass'] . '@';
    }
    $full_url .= $pu['host'];
    if ($pu['port']) {
        $full_url .= ':' . $pu['port'];
    }
    $full_url .= '/';
    $subpu = phpdigRewriteUrl($pu['path'] . "?" . $pu['query']);
    if (!$pu['port']) {
        $where_port = "and (port IS NULL OR port = 0)";
    } else {
        $where_port = "and port='" . $pu['port'] . "'";
    }
    $query = "SELECT site_id FROM " . PHPDIG_DB_PREFIX . "sites WHERE site_url = '{$url}' {$where_port}";
    $result = mysql_query($query, $id_connect);
    if (mysql_num_rows($result) > 0) {
        $exclude = phpdigReadRobotsTxt($full_url);
        $new_site = 0;
        //existing site
        list($site_id) = mysql_fetch_row($result);
        $query = "SELECT ex_id, ex_path FROM " . PHPDIG_DB_PREFIX . "excludes WHERE ex_site_id='{$site_id}'";
        if (is_array($list_exclude = phpdigMySelect($id_connect, $query))) {
            foreach ($list_exclude as $add_exclude) {
                $exclude[$add_exclude['ex_path']] = 1;
            }
        }
        $subpu['url'] = $full_url;
        $subpu = phpdigDetectDir($subpu, $exclude);
        mysql_free_result($result);
        if ($subpu['ok'] == 1) {
            set_time_limit(0);
            if (isset($subpu['path']) && strlen($subpu['path']) > 0 && LIMIT_TO_DIRECTORY) {
                $query_tempspider = "INSERT INTO " . PHPDIG_DB_PREFIX . "includes SET in_site_id = " . $site_id . ", in_path = '" . $subpu['path'] . "';";
                mysql_query($query_tempspider, $id_connect);
            }
            $query_tempspider = "INSERT INTO " . PHPDIG_DB_PREFIX . "tempspider (site_id,file,path) VALUES ('{$site_id}','" . $subpu['file'] . "','" . $subpu['path'] . "')";
            mysql_query($query_tempspider, $id_connect);
        }
    } else {
        //new site
        $query = "INSERT INTO " . PHPDIG_DB_PREFIX . "sites SET site_url='{$url}',upddate=NOW(),username='******'user'] . "',password='******'pass'] . "',port='" . $pu['port'] . "'";
        mysql_query($query, $id_connect);
        $site_id = mysql_insert_id($id_connect);
        $new_site = 1;
        //new spidering = insert first row in tempspider
        $subpu['url'] = $full_url;
        $exclude = phpdigReadRobotsTxt($full_url);
        $subpu = phpdigDetectDir($subpu, $exclude);
        if ($subpu['ok'] == 1) {
            set_time_limit(0);
            if (isset($subpu['path']) && strlen($subpu['path']) > 0 && LIMIT_TO_DIRECTORY) {
                $query = "INSERT INTO " . PHPDIG_DB_PREFIX . "includes SET in_site_id = " . $site_id . ", in_path = '" . $subpu['path'] . "';";
                mysql_query($query, $id_connect);
            }
            $query = "INSERT INTO " . PHPDIG_DB_PREFIX . "tempspider SET file='" . $subpu['file'] . "',path='" . $subpu['path'] . "',level=0,site_id='{$site_id}'";
            mysql_query($query, $id_connect);
        }
    }
    $query_num_page = "SELECT links,depth FROM " . PHPDIG_DB_PREFIX . "site_page WHERE site_id = '{$site_id}'";
    $result_num_page = mysql_query($query_num_page, $id_connect);
    if (mysql_num_rows($result_num_page) == 0) {
        $sql = "INSERT INTO " . PHPDIG_DB_PREFIX . "site_page (site_id,links,depth) VALUES ('{$site_id}', '{$linksper}', '{$limit}')";
    } elseif ($linksper_flag == 0 && $limit_flag == 0 && $usetable == "no") {
        $sql = "UPDATE " . PHPDIG_DB_PREFIX . "site_page SET links='{$linksper}', depth='{$limit}' WHERE site_id='{$site_id}'";
    }
    if (isset($sql)) {
        mysql_query($sql, $id_connect);
    }
    return array('site_id' => $site_id, 'exclude' => $exclude, 'new_site' => $new_site);
}