function process_json_file_timeline($filepath, $dbh) { global $tweets_processed, $tweets_failed, $tweets_success, $valid_timeline, $empty_timeline, $invalid_timeline, $populated_timeline, $total_timeline, $all_tweet_ids, $all_users, $bin_name; $tweetQueue = new TweetQueue(); $total_timeline++; ini_set('auto_detect_line_endings', true); $handle = @fopen($filepath, "r"); if ($handle) { while (($buffer = fgets($handle, 40960)) !== false) { $tweet = json_decode($buffer, true); //var_export($tweet); print "\n\n"; $buffer = ""; $t = new Tweet(); $t->fromJSON($tweet); if (!$t->isInBin($bin_name)) { $tweetQueue->push($t, $bin_name); if ($tweetQueue->length() > 100) { $tweetQueue->insertDB(); } $all_users[] = $t->from_user_id; $all_tweet_ids[] = $t->id; $tweets_processed++; } print "."; } if (!feof($handle)) { echo "Error: unexpected fgets() fail\n"; } fclose($handle); } if ($tweetQueue->length() > 0) { $tweetQueue->insertDB(); } }
function process_json_file_timeline($filepath, $dbh) { global $tweets_processed, $tweets_failed, $tweets_success, $valid_timeline, $empty_timeline, $invalid_timeline, $populated_timeline, $total_timeline, $all_tweet_ids, $all_users, $bin_name; $tweetQueue = new TweetQueue(); $total_timeline++; $filestr = file_get_contents($filepath); // sylvester stores multiple json exports in the same file, // in order to decode it we will need to split it into its respective individual exports $jsons = explode("}][{", $filestr); print count($jsons) . " jsons found\n"; foreach ($jsons as $json) { if (substr($json, 0, 2) != "[{") { $json = "[{" . $json; } if (substr($json, -2) != "}]") { $json = $json . "}]"; } $timeline = json_decode($json); if (is_array($timeline)) { $valid_timeline++; if (!empty($timeline)) { $populated_timeline++; } else { $empty_timeline++; } } else { $invalid_timeline++; } foreach ($timeline as $tweet) { $t = new Tweet(); $t->fromJSON($tweet); if (!$t->isInBin($bin_name)) { $tweetQueue->push($t, $bin_name); if ($tweetQueue->length() > 100) { $tweetQueue->insertDB(); } $all_users[] = $t->user->id; $all_tweet_ids[] = $t->id; $tweets_processed++; } } } if ($tweetQueue->length() > 0) { $tweetQueue->insertDB(); } }
function process_json_file_timeline($filepath, $dbh) { print $filepath . "\n"; global $tweets_processed, $tweets_failed, $tweets_success, $all_tweet_ids, $all_users, $bin_name; $tweetQueue = new TweetQueue(); ini_set('auto_detect_line_endings', true); $handle = @fopen($filepath, "r"); if ($handle) { while (($buffer = fgets($handle, 40960)) !== false) { $buffer = trim($buffer); if (empty($buffer)) { continue; } $tweet = json_decode($buffer); $buffer = ""; $t = Tweet::fromGnip($tweet); if ($t === false) { continue; } if (!$t->isInBin($bin_name)) { $all_users[] = $t->from_user_id; $all_tweet_ids[] = $t->id; $tweetQueue->push($t, $bin_name); if ($tweetQueue->length() > 100) { $tweetQueue->insertDB(); } $tweets_processed++; } print "."; } if (!feof($handle)) { echo "Error: unexpected fgets() fail\n"; } fclose($handle); } if ($tweetQueue->length() > 0) { $tweetQueue->insertDB(); } }
function tracker_run() { global $tweetQueue; $tweetQueue = new TweetQueue(); $tweetQueue->setoption('replace', false); if (defined('USE_INSERT_DELAYED') && USE_INSERT_DELAYED) { $tweetQueue->setoption('delayed', true); } if (defined('DISABLE_INSERT_IGNORE') && DISABLE_INSERT_IGNORE) { $tweetQueue->setoption('ignore', false); } else { $tweetQueue->setoption('ignore', true); } if (!defined("CAPTURE")) { /* logged to no file in particular, because we don't know which one. this should not happen. */ error_log("tracker_run() called without defining CAPTURE. have you set up config.php ?"); die; } $roles = unserialize(CAPTUREROLES); if (!in_array(CAPTURE, $roles)) { /* incorrect script execution, report back error to user */ error_log("tracker_run() role " . CAPTURE . " is not configured to run"); die; } // log execution environment $phpstring = phpversion() . " in mode " . php_sapi_name() . " with extensions "; $extensions = get_loaded_extensions(); $first = true; foreach ($extensions as $ext) { if ($first) { $first = false; } else { $phpstring .= ','; } $phpstring .= "{$ext}"; } $phpstring .= " (ini file: " . php_ini_loaded_file() . ")"; logit(CAPTURE . ".error.log", "running php version {$phpstring}"); // install the signal handler if (function_exists('pcntl_signal')) { // tick use required as of PHP 4.3.0 declare (ticks=1); // See signal method discussion: // http://darrendev.blogspot.nl/2010/11/php-53-ticks-pcntlsignal.html logit(CAPTURE . ".error.log", "installing term signal handler for this script"); // setup signal handlers pcntl_signal(SIGTERM, "capture_signal_handler_term"); } else { logit(CAPTURE . ".error.log", "your php installation does not support signal handlers. graceful reload will not work"); } // sanity check for geo bins functions if (geophp_sane()) { logit(CAPTURE . ".error.log", "geoPHP library is fully functional"); } elseif (geobinsActive()) { logit(CAPTURE . ".error.log", "refusing to track until geobins are stopped or geo is functional"); exit(1); } else { logit(CAPTURE . ".error.log", "geoPHP functions are not yet available, see documentation for instructions"); } global $ratelimit, $exceeding, $ex_start, $last_insert_id; $ratelimit = 0; // rate limit counter since start of script $exceeding = 0; // are we exceeding the rate limit currently? $ex_start = 0; // time at which rate limit started being exceeded $last_insert_id = -1; global $twitter_consumer_key, $twitter_consumer_secret, $twitter_user_token, $twitter_user_secret, $lastinsert; $pid = getmypid(); logit(CAPTURE . ".error.log", "started script " . CAPTURE . " with pid {$pid}"); $lastinsert = time(); $procfilename = BASE_FILE . "proc/" . CAPTURE . ".procinfo"; if (file_put_contents($procfilename, $pid . "|" . time()) === FALSE) { logit(CAPTURE . ".error.log", "cannot register capture script start time (file \"{$procfilename}\" is not WRITABLE. make sure the proc/ directory exists in your webroot and is writable by the cron user)"); die; } $networkpath = isset($GLOBALS["HOSTROLE"][CAPTURE]) ? $GLOBALS["HOSTROLE"][CAPTURE] : 'https://stream.twitter.com/'; // prepare queries if (CAPTURE == "track") { // check for geolocation bins $locations = geobinsActive() ? getActiveLocationsImploded() : false; // assemble query $querylist = getActivePhrases(); if (empty($querylist) && !geobinsActive()) { logit(CAPTURE . ".error.log", "empty query list, aborting!"); return; } $method = $networkpath . '1.1/statuses/filter.json'; $track = implode(",", $querylist); $params = array(); if (geobinsActive()) { $params['locations'] = $locations; } if (!empty($querylist)) { $params['track'] = $track; } } elseif (CAPTURE == "follow") { $querylist = getActiveUsers(); if (empty($querylist)) { logit(CAPTURE . ".error.log", "empty query list, aborting!"); return; } $method = $networkpath . '1.1/statuses/filter.json'; $params = array("follow" => implode(",", $querylist)); } elseif (CAPTURE == "onepercent") { $method = $networkpath . '1.1/statuses/sample.json'; $params = array('stall_warnings' => 'true'); } logit(CAPTURE . ".error.log", "connecting to API socket"); $tmhOAuth = new tmhOAuth(array('consumer_key' => $twitter_consumer_key, 'consumer_secret' => $twitter_consumer_secret, 'token' => $twitter_user_token, 'secret' => $twitter_user_secret, 'host' => 'stream.twitter.com')); $tmhOAuth->request_settings['headers']['Host'] = 'stream.twitter.com'; if (CAPTURE == "track" || CAPTURE == "follow") { logit(CAPTURE . ".error.log", "connecting - query " . var_export($params, 1)); } elseif (CAPTURE == "onepercent") { logit(CAPTURE . ".error.log", "connecting to sample stream"); } $capturebucket = array(); $tmhOAuth->streaming_request('POST', $method, $params, 'tracker_streamCallback', array('Host' => 'stream.twitter.com')); // output any response we get back AFTER the Stream has stopped -- or it errors logit(CAPTURE . ".error.log", "stream stopped - error " . var_export($tmhOAuth, 1)); logit(CAPTURE . ".error.log", "processing buffer before exit"); processtweets($capturebucket); }
$keywords = ''; // separate keywords by 'OR', limit your search to 10 keywords and operators, max 500 characters - https://dev.twitter.com/docs/using-search $type = 'search'; // specify 'search' if you want this to be a standalone bin, or 'track' if you want to be able to continue tracking these keywords later on via BASE_URL/capture/index.php if (empty($bin_name)) { die("bin_name not set\n"); } if (empty($keywords)) { die("keywords not set\n"); } if (dbserver_has_utf8mb4_support() == false) { die("DMI-TCAT requires at least MySQL version 5.5.3 - please upgrade your server"); } $querybin_id = queryManagerBinExists($bin_name, $cronjob); $current_key = 0; $tweetQueue = new TweetQueue(); // ----- connection ----- $dbh = pdo_connect(); create_bin($bin_name, $dbh); $ratefree = 0; queryManagerCreateBinFromExistingTables($bin_name, $querybin_id, $type, explode("OR", $keywords)); search($keywords); if ($tweetQueue->length() > 0) { $tweetQueue->insertDB(); } function search($keywords, $max_id = null) { global $twitter_keys, $current_key, $ratefree, $bin_name, $dbh, $tweetQueue; $ratefree--; if ($ratefree < 1 || $ratefree % 10 == 0) { $keyinfo = getRESTKey($current_key, 'search', 'tweets');
/** * Grab enqueued tweets and ping them across to Facebook */ public function processQueue() { $total_updates = 0; $this->log('processing queue', 'queue'); $this->db->exec('set wait_timeout = 43200'); $tweetQueue = new TweetQueue($this->db); while ($tweets = $tweetQueue->getTweets()) { $this->log('processing tweets', 'queue'); $total_updates += $this->processTweets($tweets); } $this->log($total_updates . ' tweets processed', 'queue'); $this->log('exiting', 'queue'); }