示例#1
0
         }
     }
 } elseif ($from_shell_flag == 1 && $mode != 'small') {
     // either all or force_all from shell
     $query_tempspider = "INSERT INTO " . PHPDIG_DB_PREFIX . "tempspider (site_id,file,path) SELECT site_id,file,path FROM " . PHPDIG_DB_PREFIX . "spider WHERE site_id={$site_id} {$andmore_tempspider}";
     mysql_query($query_tempspider, $id_connect);
 } elseif ($mode == 'small') {
     $limit = 0;
     // bypass config limit to index just one page
     $force_first_reindex = 1;
     // set to one to index just one page
 }
 //first level
 $level = 0;
 //store robots.txt datas
 $exclude = phpdigReadRobotsTxt($full_url);
 // parse exclude paths
 $query = "SELECT ex_id, ex_path FROM " . PHPDIG_DB_PREFIX . "excludes WHERE ex_site_id='{$site_id}'";
 if (is_array($list_exclude = phpdigMySelect($id_connect, $query))) {
     foreach ($list_exclude as $add_exclude) {
         $exclude[$add_exclude['ex_path']] = 1;
     }
 }
 print $hr . 'SITE : ' . $url . $br;
 if (is_array($exclude)) {
     print phpdigMsg('excludes') . ' :' . $br;
     foreach ($exclude as $ex_path => $tmp) {
         $ex_path = str_replace("\\", "", $ex_path);
         print '- ' . $ex_path . $br;
     }
 }
function phpdigGetSiteFromUrl($id_connect, $url, $linksper, $linksper_flag, $limit, $limit_flag, $usetable)
{
    //format url
    $pu = parse_url($url);
    if (!isset($pu['scheme'])) {
        $pu['scheme'] = "http";
    }
    if (!isset($pu['host'])) {
        echo 'Specify a valid host ! ';
        die;
    }
    settype($pu['path'], 'string');
    settype($pu['query'], 'string');
    settype($pu['user'], 'string');
    settype($pu['pass'], 'string');
    settype($pu['port'], 'integer');
    if ($pu['port'] == 0 || $pu['port'] == 80) {
        $pu['port'] = '';
    } else {
        settype($pu['port'], 'integer');
    }
    $url = $pu['scheme'] . "://" . $pu['host'] . "/";
    //build a complete url with user/pass and port
    $full_url = $pu['scheme'] . "://";
    if ($pu['user'] && $pu['pass']) {
        $full_url .= $pu['user'] . ':' . $pu['pass'] . '@';
    }
    $full_url .= $pu['host'];
    if ($pu['port']) {
        $full_url .= ':' . $pu['port'];
    }
    $full_url .= '/';
    $subpu = phpdigRewriteUrl($pu['path'] . "?" . $pu['query']);
    if (!$pu['port']) {
        $where_port = "and (port IS NULL OR port = 0)";
    } else {
        $where_port = "and port='" . $pu['port'] . "'";
    }
    $query = "SELECT site_id FROM " . PHPDIG_DB_PREFIX . "sites WHERE site_url = '{$url}' {$where_port}";
    $result = mysql_query($query, $id_connect);
    if (mysql_num_rows($result) > 0) {
        $exclude = phpdigReadRobotsTxt($full_url);
        $new_site = 0;
        //existing site
        list($site_id) = mysql_fetch_row($result);
        $query = "SELECT ex_id, ex_path FROM " . PHPDIG_DB_PREFIX . "excludes WHERE ex_site_id='{$site_id}'";
        if (is_array($list_exclude = phpdigMySelect($id_connect, $query))) {
            foreach ($list_exclude as $add_exclude) {
                $exclude[$add_exclude['ex_path']] = 1;
            }
        }
        $subpu['url'] = $full_url;
        $subpu = phpdigDetectDir($subpu, $exclude);
        mysql_free_result($result);
        if ($subpu['ok'] == 1) {
            set_time_limit(0);
            if (isset($subpu['path']) && strlen($subpu['path']) > 0 && LIMIT_TO_DIRECTORY) {
                $query_tempspider = "INSERT INTO " . PHPDIG_DB_PREFIX . "includes SET in_site_id = " . $site_id . ", in_path = '" . $subpu['path'] . "';";
                mysql_query($query_tempspider, $id_connect);
            }
            $query_tempspider = "INSERT INTO " . PHPDIG_DB_PREFIX . "tempspider (site_id,file,path) VALUES ('{$site_id}','" . $subpu['file'] . "','" . $subpu['path'] . "')";
            mysql_query($query_tempspider, $id_connect);
        }
    } else {
        //new site
        $query = "INSERT INTO " . PHPDIG_DB_PREFIX . "sites SET site_url='{$url}',upddate=NOW(),username='******'user'] . "',password='******'pass'] . "',port='" . $pu['port'] . "'";
        mysql_query($query, $id_connect);
        $site_id = mysql_insert_id($id_connect);
        $new_site = 1;
        //new spidering = insert first row in tempspider
        $subpu['url'] = $full_url;
        $exclude = phpdigReadRobotsTxt($full_url);
        $subpu = phpdigDetectDir($subpu, $exclude);
        if ($subpu['ok'] == 1) {
            set_time_limit(0);
            if (isset($subpu['path']) && strlen($subpu['path']) > 0 && LIMIT_TO_DIRECTORY) {
                $query = "INSERT INTO " . PHPDIG_DB_PREFIX . "includes SET in_site_id = " . $site_id . ", in_path = '" . $subpu['path'] . "';";
                mysql_query($query, $id_connect);
            }
            $query = "INSERT INTO " . PHPDIG_DB_PREFIX . "tempspider SET file='" . $subpu['file'] . "',path='" . $subpu['path'] . "',level=0,site_id='{$site_id}'";
            mysql_query($query, $id_connect);
        }
    }
    $query_num_page = "SELECT links,depth FROM " . PHPDIG_DB_PREFIX . "site_page WHERE site_id = '{$site_id}'";
    $result_num_page = mysql_query($query_num_page, $id_connect);
    if (mysql_num_rows($result_num_page) == 0) {
        $sql = "INSERT INTO " . PHPDIG_DB_PREFIX . "site_page (site_id,links,depth) VALUES ('{$site_id}', '{$linksper}', '{$limit}')";
    } elseif ($linksper_flag == 0 && $limit_flag == 0 && $usetable == "no") {
        $sql = "UPDATE " . PHPDIG_DB_PREFIX . "site_page SET links='{$linksper}', depth='{$limit}' WHERE site_id='{$site_id}'";
    }
    if (isset($sql)) {
        mysql_query($sql, $id_connect);
    }
    return array('site_id' => $site_id, 'exclude' => $exclude, 'new_site' => $new_site);
}