<?php include_once __DIR__ . '/../config.php'; include_once __DIR__ . '/../common/constants.php'; if (defined("ADMIN_USER") && ADMIN_USER != "" && (!isset($_SERVER['PHP_AUTH_USER']) || $_SERVER['PHP_AUTH_USER'] != ADMIN_USER)) { die("Go away, you evil hacker!"); } include_once __DIR__ . '/query_manager.php'; include_once __DIR__ . '/../common/functions.php'; include_once __DIR__ . '/../common/upgrade.php'; include_once __DIR__ . '/../capture/common/functions.php'; create_admin(); create_error_logs(); $captureroles = unserialize(CAPTUREROLES); $querybins = getBins(); $activePhrases = getNrOfActivePhrases(); $activeGeoboxes = getNrOfActiveGeoboxes(); $activeUsers = getNrOfActiveUsers(); $lastRateLimitHit = getLastRateLimitHit(); ?> <html> <head> <title>DMI-TCAT query manager</title> <meta charset='<?php echo mb_internal_encoding(); ?> '> <style type="text/css"> body,html { font-family:Arial, Helvetica, sans-serif; font-size:12px; }
function tracker_run() { global $dbuser, $dbpass, $database, $hostname, $tweetQueue; // We need the tcat_status table create_error_logs(); // We need the tcat_captured_phrases table create_admin(); $tweetQueue = new TweetQueue(); $tweetQueue->setoption('replace', false); if (defined('USE_INSERT_DELAYED') && USE_INSERT_DELAYED) { $tweetQueue->setoption('delayed', true); } if (defined('DISABLE_INSERT_IGNORE') && DISABLE_INSERT_IGNORE) { $tweetQueue->setoption('ignore', false); } else { $tweetQueue->setoption('ignore', true); } if (!defined("CAPTURE")) { /* logged to no file in particular, because we don't know which one. this should not happen. */ error_log("tracker_run() called without defining CAPTURE. have you set up config.php ?"); die; } $roles = unserialize(CAPTUREROLES); if (!in_array(CAPTURE, $roles)) { /* incorrect script execution, report back error to user */ error_log("tracker_run() role " . CAPTURE . " is not configured to run"); die; } // log execution environment $phpstring = phpversion() . " in mode " . php_sapi_name() . " with extensions "; $extensions = get_loaded_extensions(); $first = true; foreach ($extensions as $ext) { if ($first) { $first = false; } else { $phpstring .= ','; } $phpstring .= "{$ext}"; } $phpstring .= " (ini file: " . php_ini_loaded_file() . ")"; logit(CAPTURE . ".error.log", "running php version {$phpstring}"); // install the signal handler if (function_exists('pcntl_signal')) { // tick use required as of PHP 4.3.0 declare (ticks=1); // See signal method discussion: // http://darrendev.blogspot.nl/2010/11/php-53-ticks-pcntlsignal.html logit(CAPTURE . ".error.log", "installing term signal handler for this script"); // setup signal handlers pcntl_signal(SIGTERM, "capture_signal_handler_term"); } else { logit(CAPTURE . ".error.log", "your php installation does not support signal handlers. graceful reload will not work"); } // sanity check for geo bins functions if (geophp_sane()) { logit(CAPTURE . ".error.log", "geoPHP library is fully functional"); } elseif (geobinsActive()) { logit(CAPTURE . ".error.log", "refusing to track until geobins are stopped or geo is functional"); exit(1); } else { logit(CAPTURE . ".error.log", "geoPHP functions are not yet available, see documentation for instructions"); } global $rl_current_record, $rl_registering_minute; global $last_insert_id; global $tracker_started_at; $rl_current_record = 0; // how many tweets have been ratelimited this MINUTE? $rl_registering_minute = get_current_minute(); // what is the minute we are registering (as soon as the current minute differs from this, we insert our record in the database) $last_insert_id = -1; // needed to make INSERT DELAYED work, see the function database_activity() $tracker_started_at = time(); // the walltime when this script was started global $twitter_consumer_key, $twitter_consumer_secret, $twitter_user_token, $twitter_user_secret, $lastinsert; $pid = getmypid(); logit(CAPTURE . ".error.log", "started script " . CAPTURE . " with pid {$pid}"); $lastinsert = time(); $procfilename = __DIR__ . "/../../proc/" . CAPTURE . ".procinfo"; if (file_put_contents($procfilename, $pid . "|" . time()) === FALSE) { logit(CAPTURE . ".error.log", "cannot register capture script start time (file \"{$procfilename}\" is not WRITABLE. make sure the proc/ directory exists in your webroot and is writable by the cron user)"); die; } $networkpath = isset($GLOBALS["HOSTROLE"][CAPTURE]) ? $GLOBALS["HOSTROLE"][CAPTURE] : 'https://stream.twitter.com/'; // prepare queries if (CAPTURE == "track") { // check for geolocation bins $locations = geobinsActive() ? getActiveLocationsImploded() : false; // assemble query $querylist = getActivePhrases(); if (empty($querylist) && !geobinsActive()) { logit(CAPTURE . ".error.log", "empty query list, aborting!"); return; } $method = $networkpath . '1.1/statuses/filter.json'; $track = implode(",", $querylist); $params = array(); if (geobinsActive()) { $params['locations'] = $locations; } if (!empty($querylist)) { $params['track'] = $track; } } elseif (CAPTURE == "follow") { $querylist = getActiveUsers(); if (empty($querylist)) { logit(CAPTURE . ".error.log", "empty query list, aborting!"); return; } $method = $networkpath . '1.1/statuses/filter.json'; $params = array("follow" => implode(",", $querylist)); } elseif (CAPTURE == "onepercent") { $method = $networkpath . '1.1/statuses/sample.json'; $params = array('stall_warnings' => 'true'); } logit(CAPTURE . ".error.log", "connecting to API socket"); $tmhOAuth = new tmhOAuth(array('consumer_key' => $twitter_consumer_key, 'consumer_secret' => $twitter_consumer_secret, 'token' => $twitter_user_token, 'secret' => $twitter_user_secret, 'host' => 'stream.twitter.com')); $tmhOAuth->request_settings['headers']['Host'] = 'stream.twitter.com'; if (CAPTURE == "track" || CAPTURE == "follow") { logit(CAPTURE . ".error.log", "connecting - query " . var_export($params, 1)); } elseif (CAPTURE == "onepercent") { logit(CAPTURE . ".error.log", "connecting to sample stream"); } $capturebucket = array(); $tmhOAuth->streaming_request('POST', $method, $params, 'tracker_streamCallback', array('Host' => 'stream.twitter.com')); // output any response we get back AFTER the Stream has stopped -- or it errors logit(CAPTURE . ".error.log", "stream stopped - error " . var_export($tmhOAuth, 1)); logit(CAPTURE . ".error.log", "processing buffer before exit"); processtweets($capturebucket); }