} } } elseif ($from_shell_flag == 1 && $mode != 'small') { // either all or force_all from shell $query_tempspider = "INSERT INTO " . PHPDIG_DB_PREFIX . "tempspider (site_id,file,path) SELECT site_id,file,path FROM " . PHPDIG_DB_PREFIX . "spider WHERE site_id={$site_id} {$andmore_tempspider}"; mysql_query($query_tempspider, $id_connect); } elseif ($mode == 'small') { $limit = 0; // bypass config limit to index just one page $force_first_reindex = 1; // set to one to index just one page } //first level $level = 0; //store robots.txt datas $exclude = phpdigReadRobotsTxt($full_url); // parse exclude paths $query = "SELECT ex_id, ex_path FROM " . PHPDIG_DB_PREFIX . "excludes WHERE ex_site_id='{$site_id}'"; if (is_array($list_exclude = phpdigMySelect($id_connect, $query))) { foreach ($list_exclude as $add_exclude) { $exclude[$add_exclude['ex_path']] = 1; } } print $hr . 'SITE : ' . $url . $br; if (is_array($exclude)) { print phpdigMsg('excludes') . ' :' . $br; foreach ($exclude as $ex_path => $tmp) { $ex_path = str_replace("\\", "", $ex_path); print '- ' . $ex_path . $br; } }
function phpdigGetSiteFromUrl($id_connect, $url, $linksper, $linksper_flag, $limit, $limit_flag, $usetable) { //format url $pu = parse_url($url); if (!isset($pu['scheme'])) { $pu['scheme'] = "http"; } if (!isset($pu['host'])) { echo 'Specify a valid host ! '; die; } settype($pu['path'], 'string'); settype($pu['query'], 'string'); settype($pu['user'], 'string'); settype($pu['pass'], 'string'); settype($pu['port'], 'integer'); if ($pu['port'] == 0 || $pu['port'] == 80) { $pu['port'] = ''; } else { settype($pu['port'], 'integer'); } $url = $pu['scheme'] . "://" . $pu['host'] . "/"; //build a complete url with user/pass and port $full_url = $pu['scheme'] . "://"; if ($pu['user'] && $pu['pass']) { $full_url .= $pu['user'] . ':' . $pu['pass'] . '@'; } $full_url .= $pu['host']; if ($pu['port']) { $full_url .= ':' . $pu['port']; } $full_url .= '/'; $subpu = phpdigRewriteUrl($pu['path'] . "?" . $pu['query']); if (!$pu['port']) { $where_port = "and (port IS NULL OR port = 0)"; } else { $where_port = "and port='" . $pu['port'] . "'"; } $query = "SELECT site_id FROM " . PHPDIG_DB_PREFIX . "sites WHERE site_url = '{$url}' {$where_port}"; $result = mysql_query($query, $id_connect); if (mysql_num_rows($result) > 0) { $exclude = phpdigReadRobotsTxt($full_url); $new_site = 0; //existing site list($site_id) = mysql_fetch_row($result); $query = "SELECT ex_id, ex_path FROM " . PHPDIG_DB_PREFIX . "excludes WHERE ex_site_id='{$site_id}'"; if (is_array($list_exclude = phpdigMySelect($id_connect, $query))) { foreach ($list_exclude as $add_exclude) { $exclude[$add_exclude['ex_path']] = 1; } } $subpu['url'] = $full_url; $subpu = phpdigDetectDir($subpu, $exclude); mysql_free_result($result); if ($subpu['ok'] == 1) { set_time_limit(0); if (isset($subpu['path']) && strlen($subpu['path']) > 0 && LIMIT_TO_DIRECTORY) { $query_tempspider = "INSERT INTO " . PHPDIG_DB_PREFIX . "includes SET in_site_id = " . $site_id . ", in_path = '" . $subpu['path'] . "';"; mysql_query($query_tempspider, $id_connect); } $query_tempspider = "INSERT INTO " . PHPDIG_DB_PREFIX . "tempspider (site_id,file,path) VALUES ('{$site_id}','" . $subpu['file'] . "','" . $subpu['path'] . "')"; mysql_query($query_tempspider, $id_connect); } } else { //new site $query = "INSERT INTO " . PHPDIG_DB_PREFIX . "sites SET site_url='{$url}',upddate=NOW(),username='******'user'] . "',password='******'pass'] . "',port='" . $pu['port'] . "'"; mysql_query($query, $id_connect); $site_id = mysql_insert_id($id_connect); $new_site = 1; //new spidering = insert first row in tempspider $subpu['url'] = $full_url; $exclude = phpdigReadRobotsTxt($full_url); $subpu = phpdigDetectDir($subpu, $exclude); if ($subpu['ok'] == 1) { set_time_limit(0); if (isset($subpu['path']) && strlen($subpu['path']) > 0 && LIMIT_TO_DIRECTORY) { $query = "INSERT INTO " . PHPDIG_DB_PREFIX . "includes SET in_site_id = " . $site_id . ", in_path = '" . $subpu['path'] . "';"; mysql_query($query, $id_connect); } $query = "INSERT INTO " . PHPDIG_DB_PREFIX . "tempspider SET file='" . $subpu['file'] . "',path='" . $subpu['path'] . "',level=0,site_id='{$site_id}'"; mysql_query($query, $id_connect); } } $query_num_page = "SELECT links,depth FROM " . PHPDIG_DB_PREFIX . "site_page WHERE site_id = '{$site_id}'"; $result_num_page = mysql_query($query_num_page, $id_connect); if (mysql_num_rows($result_num_page) == 0) { $sql = "INSERT INTO " . PHPDIG_DB_PREFIX . "site_page (site_id,links,depth) VALUES ('{$site_id}', '{$linksper}', '{$limit}')"; } elseif ($linksper_flag == 0 && $limit_flag == 0 && $usetable == "no") { $sql = "UPDATE " . PHPDIG_DB_PREFIX . "site_page SET links='{$linksper}', depth='{$limit}' WHERE site_id='{$site_id}'"; } if (isset($sql)) { mysql_query($sql, $id_connect); } return array('site_id' => $site_id, 'exclude' => $exclude, 'new_site' => $new_site); }