date_default_timezone_set('Europe/London');
include_once "CONFIG_db.php";
include_once "LIB_db_functions.php";
$usage = "Usage:\nset_status.php STOP\tStop crawler after current page if running\n" . "set_status.php OK\tAllow crawler to restart when next executed.\n";
$strStopSQL = "UPDATE tblConfig SET strValue='STOP' WHERE strName='CrawlerStatus'";
$strOKSQL = "UPDATE tblConfig SET strValue='OK' WHERE strName='CrawlerStatus'";
if ($argc != 2) {
    print "Please enter an argument\n{$usage}";
    exit;
}
$command = strtoupper($argv[1]);
if ($command != "OK" && $command != "STOP") {
    print "Invalid argument\n{$usage}";
    exit;
}
print "Connecting to database...";
db_connect();
print "OK\n";
if ($command == "STOP") {
    print "Stopping crawler....";
    db_run_query($strStopSQL);
    print "OK\n";
    print "Crawler should stop gracefully after current page\n";
} else {
    print "Updating status to OK....";
    db_run_query($strOKSQL);
    print "OK\n";
}
print "Disconnecting....";
db_close();
print "OK\n";
Example #2
0
             }
         } catch (Exception $e) {
             echo "***ERROR***\n";
             echo "Couldn't store: {$resolved_address}\n";
             echo "While harvesting: {$SEED_URL}\n";
             break;
             //ignore any further links (to prevent multiple error messages for one page)
         }
     }
     #echo "Harvested: ".$resolved_address." \n";
 }
 //print "3 sqlQuery is $sqlQuery\n";
 if ($outputExists) {
     $sqlQuery = substr($sqlQuery, 0, strlen($sqlQuery) - 1);
     //trim last char
     db_run_query($sqlQuery);
     //already in try-catch in function
 }
 db_marked_harvested($seed);
 /*Safe zone:
 	This is where halting of the crawler should occur if at all
 	Check DB to see if flag has been left to stop*/
 $stop = false;
 $strSQL = "SELECT strValue FROM tblConfig WHERE strName='CrawlerStatus'";
 $result = db_run_select($strSQL, null, true);
 if ($result == "STOP") {
     echo "***Received command to STOP: Stopping now; crawl is incomplete.\n";
     mail($operator_email, "Crawl Stopped", "Bot stopped via DB Stop signal: " . date('Y-m-d H:i:s') . "\n", "FROM: " . $operator_email);
     $stop = true;
     break;
 }
function db_update_domain_links($strFromDomain, $strToDomain)
{
    $strSQL = "SELECT iHostID,iCount FROM tblExternalHosts WHERE strFromDomain=? AND strToDomain=?";
    $result = db_run_select($strSQL, array($strFromDomain, $strToDomain));
    if ($result == NULL) {
        $strSQL = "INSERT INTO tblExternalHosts SET strFromDomain=?, strToDomain=?, iCount=?";
        $params = array($strFromDomain, $strToDomain, 1);
    } else {
        $iCount = int($result['iCount']) + 1;
        $strSQL = "UPDATE tblExternalHosts SET iCount=? WHERE iHostID=?";
        $params = array($iCount, $result['iHostID']);
    }
    db_run_query($strSQL, $params);
}