Beispiel #1
0
function process_json_file_timeline($filepath, $dbh)
{
    global $tweets_processed, $tweets_failed, $tweets_success, $valid_timeline, $empty_timeline, $invalid_timeline, $populated_timeline, $total_timeline, $all_tweet_ids, $all_users, $bin_name;
    $tweetQueue = new TweetQueue();
    $total_timeline++;
    ini_set('auto_detect_line_endings', true);
    $handle = @fopen($filepath, "r");
    if ($handle) {
        while (($buffer = fgets($handle, 40960)) !== false) {
            $tweet = json_decode($buffer, true);
            //var_export($tweet); print "\n\n";
            $buffer = "";
            $t = new Tweet();
            $t->fromJSON($tweet);
            if (!$t->isInBin($bin_name)) {
                $tweetQueue->push($t, $bin_name);
                if ($tweetQueue->length() > 100) {
                    $tweetQueue->insertDB();
                }
                $all_users[] = $t->from_user_id;
                $all_tweet_ids[] = $t->id;
                $tweets_processed++;
            }
            print ".";
        }
        if (!feof($handle)) {
            echo "Error: unexpected fgets() fail\n";
        }
        fclose($handle);
    }
    if ($tweetQueue->length() > 0) {
        $tweetQueue->insertDB();
    }
}
function process_json_file_timeline($filepath, $dbh)
{
    global $tweets_processed, $tweets_failed, $tweets_success, $valid_timeline, $empty_timeline, $invalid_timeline, $populated_timeline, $total_timeline, $all_tweet_ids, $all_users, $bin_name;
    $tweetQueue = new TweetQueue();
    $total_timeline++;
    $filestr = file_get_contents($filepath);
    // sylvester stores multiple json exports in the same file,
    // in order to decode it we will need to split it into its respective individual exports
    $jsons = explode("}][{", $filestr);
    print count($jsons) . " jsons found\n";
    foreach ($jsons as $json) {
        if (substr($json, 0, 2) != "[{") {
            $json = "[{" . $json;
        }
        if (substr($json, -2) != "}]") {
            $json = $json . "}]";
        }
        $timeline = json_decode($json);
        if (is_array($timeline)) {
            $valid_timeline++;
            if (!empty($timeline)) {
                $populated_timeline++;
            } else {
                $empty_timeline++;
            }
        } else {
            $invalid_timeline++;
        }
        foreach ($timeline as $tweet) {
            $t = new Tweet();
            $t->fromJSON($tweet);
            if (!$t->isInBin($bin_name)) {
                $tweetQueue->push($t, $bin_name);
                if ($tweetQueue->length() > 100) {
                    $tweetQueue->insertDB();
                }
                $all_users[] = $t->user->id;
                $all_tweet_ids[] = $t->id;
                $tweets_processed++;
            }
        }
    }
    if ($tweetQueue->length() > 0) {
        $tweetQueue->insertDB();
    }
}
Beispiel #3
0
function process_json_file_timeline($filepath, $dbh)
{
    print $filepath . "\n";
    global $tweets_processed, $tweets_failed, $tweets_success, $all_tweet_ids, $all_users, $bin_name;
    $tweetQueue = new TweetQueue();
    ini_set('auto_detect_line_endings', true);
    $handle = @fopen($filepath, "r");
    if ($handle) {
        while (($buffer = fgets($handle, 40960)) !== false) {
            $buffer = trim($buffer);
            if (empty($buffer)) {
                continue;
            }
            $tweet = json_decode($buffer);
            $buffer = "";
            $t = Tweet::fromGnip($tweet);
            if ($t === false) {
                continue;
            }
            if (!$t->isInBin($bin_name)) {
                $all_users[] = $t->from_user_id;
                $all_tweet_ids[] = $t->id;
                $tweetQueue->push($t, $bin_name);
                if ($tweetQueue->length() > 100) {
                    $tweetQueue->insertDB();
                }
                $tweets_processed++;
            }
            print ".";
        }
        if (!feof($handle)) {
            echo "Error: unexpected fgets() fail\n";
        }
        fclose($handle);
    }
    if ($tweetQueue->length() > 0) {
        $tweetQueue->insertDB();
    }
}
Beispiel #4
0
function tracker_run()
{
    global $tweetQueue;
    $tweetQueue = new TweetQueue();
    $tweetQueue->setoption('replace', false);
    if (defined('USE_INSERT_DELAYED') && USE_INSERT_DELAYED) {
        $tweetQueue->setoption('delayed', true);
    }
    if (defined('DISABLE_INSERT_IGNORE') && DISABLE_INSERT_IGNORE) {
        $tweetQueue->setoption('ignore', false);
    } else {
        $tweetQueue->setoption('ignore', true);
    }
    if (!defined("CAPTURE")) {
        /* logged to no file in particular, because we don't know which one. this should not happen. */
        error_log("tracker_run() called without defining CAPTURE. have you set up config.php ?");
        die;
    }
    $roles = unserialize(CAPTUREROLES);
    if (!in_array(CAPTURE, $roles)) {
        /* incorrect script execution, report back error to user */
        error_log("tracker_run() role " . CAPTURE . " is not configured to run");
        die;
    }
    // log execution environment
    $phpstring = phpversion() . " in mode " . php_sapi_name() . " with extensions ";
    $extensions = get_loaded_extensions();
    $first = true;
    foreach ($extensions as $ext) {
        if ($first) {
            $first = false;
        } else {
            $phpstring .= ',';
        }
        $phpstring .= "{$ext}";
    }
    $phpstring .= " (ini file: " . php_ini_loaded_file() . ")";
    logit(CAPTURE . ".error.log", "running php version {$phpstring}");
    // install the signal handler
    if (function_exists('pcntl_signal')) {
        // tick use required as of PHP 4.3.0
        declare (ticks=1);
        // See signal method discussion:
        // http://darrendev.blogspot.nl/2010/11/php-53-ticks-pcntlsignal.html
        logit(CAPTURE . ".error.log", "installing term signal handler for this script");
        // setup signal handlers
        pcntl_signal(SIGTERM, "capture_signal_handler_term");
    } else {
        logit(CAPTURE . ".error.log", "your php installation does not support signal handlers. graceful reload will not work");
    }
    // sanity check for geo bins functions
    if (geophp_sane()) {
        logit(CAPTURE . ".error.log", "geoPHP library is fully functional");
    } elseif (geobinsActive()) {
        logit(CAPTURE . ".error.log", "refusing to track until geobins are stopped or geo is functional");
        exit(1);
    } else {
        logit(CAPTURE . ".error.log", "geoPHP functions are not yet available, see documentation for instructions");
    }
    global $ratelimit, $exceeding, $ex_start, $last_insert_id;
    $ratelimit = 0;
    // rate limit counter since start of script
    $exceeding = 0;
    // are we exceeding the rate limit currently?
    $ex_start = 0;
    // time at which rate limit started being exceeded
    $last_insert_id = -1;
    global $twitter_consumer_key, $twitter_consumer_secret, $twitter_user_token, $twitter_user_secret, $lastinsert;
    $pid = getmypid();
    logit(CAPTURE . ".error.log", "started script " . CAPTURE . " with pid {$pid}");
    $lastinsert = time();
    $procfilename = BASE_FILE . "proc/" . CAPTURE . ".procinfo";
    if (file_put_contents($procfilename, $pid . "|" . time()) === FALSE) {
        logit(CAPTURE . ".error.log", "cannot register capture script start time (file \"{$procfilename}\" is not WRITABLE. make sure the proc/ directory exists in your webroot and is writable by the cron user)");
        die;
    }
    $networkpath = isset($GLOBALS["HOSTROLE"][CAPTURE]) ? $GLOBALS["HOSTROLE"][CAPTURE] : 'https://stream.twitter.com/';
    // prepare queries
    if (CAPTURE == "track") {
        // check for geolocation bins
        $locations = geobinsActive() ? getActiveLocationsImploded() : false;
        // assemble query
        $querylist = getActivePhrases();
        if (empty($querylist) && !geobinsActive()) {
            logit(CAPTURE . ".error.log", "empty query list, aborting!");
            return;
        }
        $method = $networkpath . '1.1/statuses/filter.json';
        $track = implode(",", $querylist);
        $params = array();
        if (geobinsActive()) {
            $params['locations'] = $locations;
        }
        if (!empty($querylist)) {
            $params['track'] = $track;
        }
    } elseif (CAPTURE == "follow") {
        $querylist = getActiveUsers();
        if (empty($querylist)) {
            logit(CAPTURE . ".error.log", "empty query list, aborting!");
            return;
        }
        $method = $networkpath . '1.1/statuses/filter.json';
        $params = array("follow" => implode(",", $querylist));
    } elseif (CAPTURE == "onepercent") {
        $method = $networkpath . '1.1/statuses/sample.json';
        $params = array('stall_warnings' => 'true');
    }
    logit(CAPTURE . ".error.log", "connecting to API socket");
    $tmhOAuth = new tmhOAuth(array('consumer_key' => $twitter_consumer_key, 'consumer_secret' => $twitter_consumer_secret, 'token' => $twitter_user_token, 'secret' => $twitter_user_secret, 'host' => 'stream.twitter.com'));
    $tmhOAuth->request_settings['headers']['Host'] = 'stream.twitter.com';
    if (CAPTURE == "track" || CAPTURE == "follow") {
        logit(CAPTURE . ".error.log", "connecting - query " . var_export($params, 1));
    } elseif (CAPTURE == "onepercent") {
        logit(CAPTURE . ".error.log", "connecting to sample stream");
    }
    $capturebucket = array();
    $tmhOAuth->streaming_request('POST', $method, $params, 'tracker_streamCallback', array('Host' => 'stream.twitter.com'));
    // output any response we get back AFTER the Stream has stopped -- or it errors
    logit(CAPTURE . ".error.log", "stream stopped - error " . var_export($tmhOAuth, 1));
    logit(CAPTURE . ".error.log", "processing buffer before exit");
    processtweets($capturebucket);
}
Beispiel #5
0
$keywords = '';
// separate keywords by 'OR', limit your search to 10 keywords and operators, max 500 characters - https://dev.twitter.com/docs/using-search
$type = 'search';
// specify 'search' if you want this to be a standalone bin, or 'track' if you want to be able to continue tracking these keywords later on via BASE_URL/capture/index.php
if (empty($bin_name)) {
    die("bin_name not set\n");
}
if (empty($keywords)) {
    die("keywords not set\n");
}
if (dbserver_has_utf8mb4_support() == false) {
    die("DMI-TCAT requires at least MySQL version 5.5.3 - please upgrade your server");
}
$querybin_id = queryManagerBinExists($bin_name, $cronjob);
$current_key = 0;
$tweetQueue = new TweetQueue();
// ----- connection -----
$dbh = pdo_connect();
create_bin($bin_name, $dbh);
$ratefree = 0;
queryManagerCreateBinFromExistingTables($bin_name, $querybin_id, $type, explode("OR", $keywords));
search($keywords);
if ($tweetQueue->length() > 0) {
    $tweetQueue->insertDB();
}
function search($keywords, $max_id = null)
{
    global $twitter_keys, $current_key, $ratefree, $bin_name, $dbh, $tweetQueue;
    $ratefree--;
    if ($ratefree < 1 || $ratefree % 10 == 0) {
        $keyinfo = getRESTKey($current_key, 'search', 'tweets');
Beispiel #6
0
 /**
  * Grab enqueued tweets and ping them across to Facebook
  */
 public function processQueue()
 {
     $total_updates = 0;
     $this->log('processing queue', 'queue');
     $this->db->exec('set wait_timeout = 43200');
     $tweetQueue = new TweetQueue($this->db);
     while ($tweets = $tweetQueue->getTweets()) {
         $this->log('processing tweets', 'queue');
         $total_updates += $this->processTweets($tweets);
     }
     $this->log($total_updates . ' tweets processed', 'queue');
     $this->log('exiting', 'queue');
 }