date_default_timezone_set('Europe/London'); include_once "CONFIG_db.php"; include_once "LIB_db_functions.php"; $usage = "Usage:\nset_status.php STOP\tStop crawler after current page if running\n" . "set_status.php OK\tAllow crawler to restart when next executed.\n"; $strStopSQL = "UPDATE tblConfig SET strValue='STOP' WHERE strName='CrawlerStatus'"; $strOKSQL = "UPDATE tblConfig SET strValue='OK' WHERE strName='CrawlerStatus'"; if ($argc != 2) { print "Please enter an argument\n{$usage}"; exit; } $command = strtoupper($argv[1]); if ($command != "OK" && $command != "STOP") { print "Invalid argument\n{$usage}"; exit; } print "Connecting to database..."; db_connect(); print "OK\n"; if ($command == "STOP") { print "Stopping crawler...."; db_run_query($strStopSQL); print "OK\n"; print "Crawler should stop gracefully after current page\n"; } else { print "Updating status to OK...."; db_run_query($strOKSQL); print "OK\n"; } print "Disconnecting...."; db_close(); print "OK\n";
} } catch (Exception $e) { echo "***ERROR***\n"; echo "Couldn't store: {$resolved_address}\n"; echo "While harvesting: {$SEED_URL}\n"; break; //ignore any further links (to prevent multiple error messages for one page) } } #echo "Harvested: ".$resolved_address." \n"; } //print "3 sqlQuery is $sqlQuery\n"; if ($outputExists) { $sqlQuery = substr($sqlQuery, 0, strlen($sqlQuery) - 1); //trim last char db_run_query($sqlQuery); //already in try-catch in function } db_marked_harvested($seed); /*Safe zone: This is where halting of the crawler should occur if at all Check DB to see if flag has been left to stop*/ $stop = false; $strSQL = "SELECT strValue FROM tblConfig WHERE strName='CrawlerStatus'"; $result = db_run_select($strSQL, null, true); if ($result == "STOP") { echo "***Received command to STOP: Stopping now; crawl is incomplete.\n"; mail($operator_email, "Crawl Stopped", "Bot stopped via DB Stop signal: " . date('Y-m-d H:i:s') . "\n", "FROM: " . $operator_email); $stop = true; break; }
function db_update_domain_links($strFromDomain, $strToDomain) { $strSQL = "SELECT iHostID,iCount FROM tblExternalHosts WHERE strFromDomain=? AND strToDomain=?"; $result = db_run_select($strSQL, array($strFromDomain, $strToDomain)); if ($result == NULL) { $strSQL = "INSERT INTO tblExternalHosts SET strFromDomain=?, strToDomain=?, iCount=?"; $params = array($strFromDomain, $strToDomain, 1); } else { $iCount = int($result['iCount']) + 1; $strSQL = "UPDATE tblExternalHosts SET iCount=? WHERE iHostID=?"; $params = array($iCount, $result['iHostID']); } db_run_query($strSQL, $params); }