Exemplo n.º 1
0
function process_json_file_timeline($filepath, $dbh)
{
    global $tweets_processed, $tweets_failed, $tweets_success, $valid_timeline, $empty_timeline, $invalid_timeline, $populated_timeline, $total_timeline, $all_tweet_ids, $all_users, $bin_name;
    $tweetQueue = new TweetQueue();
    $total_timeline++;
    ini_set('auto_detect_line_endings', true);
    $handle = @fopen($filepath, "r");
    if ($handle) {
        while (($buffer = fgets($handle, 40960)) !== false) {
            $tweet = json_decode($buffer, true);
            //var_export($tweet); print "\n\n";
            $buffer = "";
            $t = new Tweet();
            $t->fromJSON($tweet);
            if (!$t->isInBin($bin_name)) {
                $tweetQueue->push($t, $bin_name);
                if ($tweetQueue->length() > 100) {
                    $tweetQueue->insertDB();
                }
                $all_users[] = $t->from_user_id;
                $all_tweet_ids[] = $t->id;
                $tweets_processed++;
            }
            print ".";
        }
        if (!feof($handle)) {
            echo "Error: unexpected fgets() fail\n";
        }
        fclose($handle);
    }
    if ($tweetQueue->length() > 0) {
        $tweetQueue->insertDB();
    }
}
Exemplo n.º 2
0
function search($idlist)
{
    global $twitter_keys, $current_key, $all_users, $all_tweet_ids, $bin_name, $dbh, $tweetQueue;
    $keyinfo = getRESTKey(0);
    $current_key = $keyinfo['key'];
    $ratefree = $keyinfo['remaining'];
    print "current key {$current_key} ratefree {$ratefree}\n";
    $tmhOAuth = new tmhOAuth(array('consumer_key' => $twitter_keys[$current_key]['twitter_consumer_key'], 'consumer_secret' => $twitter_keys[$current_key]['twitter_consumer_secret'], 'token' => $twitter_keys[$current_key]['twitter_user_token'], 'secret' => $twitter_keys[$current_key]['twitter_user_secret']));
    // by hundred
    for ($i = 0; $i < sizeof($idlist); $i += 100) {
        if ($ratefree <= 0 || $ratefree % 10 == 0) {
            $keyinfo = getRESTKey($current_key);
            $current_key = $keyinfo['key'];
            $ratefree = $keyinfo['remaining'];
            $tmhOAuth = new tmhOAuth(array('consumer_key' => $twitter_keys[$current_key]['twitter_consumer_key'], 'consumer_secret' => $twitter_keys[$current_key]['twitter_consumer_secret'], 'token' => $twitter_keys[$current_key]['twitter_user_token'], 'secret' => $twitter_keys[$current_key]['twitter_user_secret']));
        }
        $q = $idlist[$i];
        $n = $i + 1;
        while ($n < $i + 100) {
            if (!isset($idlist[$n])) {
                break;
            }
            $q .= "," . $idlist[$n];
            $n++;
        }
        $params = array('id' => $q);
        $code = $tmhOAuth->user_request(array('method' => 'GET', 'url' => $tmhOAuth->url('1.1/statuses/lookup'), 'params' => $params));
        $ratefree--;
        if ($tmhOAuth->response['code'] == 200) {
            $data = json_decode($tmhOAuth->response['response'], true);
            if (is_array($data) && empty($data)) {
                // all tweets in set are deleted
                continue;
            }
            $tweets = $data;
            $tweet_ids = array();
            foreach ($tweets as $tweet) {
                $t = new Tweet();
                $t->fromJSON($tweet);
                if (!$t->isInBin($bin_name)) {
                    $all_users[] = $t->from_user_id;
                    $all_tweet_ids[] = $t->id;
                    $tweet_ids[] = $t->id;
                    $tweetQueue->push($t, $bin_name);
                }
                print ".";
            }
            sleep(1);
        } else {
            echo "Failure with code " . $tmhOAuth->response['response']['code'] . "\n";
            var_dump($tmhOAuth->response['response']['info']);
            var_dump($tmhOAuth->response['response']['error']);
            var_dump($tmhOAuth->response['response']['errno']);
            die;
        }
        $tweetQueue->insertDB();
    }
}
Exemplo n.º 3
0
function search($keywords, $max_id = null)
{
    global $twitter_keys, $current_key, $ratefree, $bin_name, $dbh, $tweetQueue;
    $ratefree--;
    if ($ratefree < 1 || $ratefree % 10 == 0) {
        $keyinfo = getRESTKey($current_key, 'search', 'tweets');
        $current_key = $keyinfo['key'];
        $ratefree = $keyinfo['remaining'];
    }
    $tmhOAuth = new tmhOAuth(array('consumer_key' => $twitter_keys[$current_key]['twitter_consumer_key'], 'consumer_secret' => $twitter_keys[$current_key]['twitter_consumer_secret'], 'token' => $twitter_keys[$current_key]['twitter_user_token'], 'secret' => $twitter_keys[$current_key]['twitter_user_secret']));
    $params = array('q' => $keywords, 'count' => 100);
    if (isset($max_id)) {
        $params['max_id'] = $max_id;
    }
    $code = $tmhOAuth->user_request(array('method' => 'GET', 'url' => $tmhOAuth->url('1.1/search/tweets'), 'params' => $params));
    if ($tmhOAuth->response['code'] == 200) {
        $data = json_decode($tmhOAuth->response['response'], true);
        $tweets = $data['statuses'];
        $tweet_ids = array();
        foreach ($tweets as $tweet) {
            $t = new Tweet();
            $t->fromJSON($tweet);
            $tweet_ids[] = $t->id;
            if (!$t->isInBin($bin_name)) {
                $tweetQueue->push($t, $bin_name);
                if ($tweetQueue->length() > 100) {
                    $tweetQueue->insertDB();
                }
                print ".";
            }
        }
        if (!empty($tweet_ids)) {
            print "\n";
            if (count($tweet_ids) <= 1) {
                print "no more tweets found\n\n";
                return false;
            }
            $max_id = min($tweet_ids);
            print "max id: " . $max_id . "\n";
        } else {
            print "0 tweets found\n\n";
            return false;
        }
        sleep(1);
        search($keywords, $max_id);
    } else {
        echo $tmhOAuth->response['response'] . "\n";
        if ($tmhOAuth->response['response']['errors']['code'] == 130) {
            // over capacity
            sleep(1);
            search($keywords, $max_id);
        }
    }
}
Exemplo n.º 4
0
function process_json_file_timeline($filepath, $dbh)
{
    global $tweets_processed, $tweets_failed, $tweets_success, $valid_timeline, $empty_timeline, $invalid_timeline, $populated_timeline, $total_timeline, $all_tweet_ids, $all_users, $bin_name;
    $tweetQueue = new TweetQueue();
    $total_timeline++;
    $filestr = file_get_contents($filepath);
    // sylvester stores multiple json exports in the same file,
    // in order to decode it we will need to split it into its respective individual exports
    $jsons = explode("}][{", $filestr);
    print count($jsons) . " jsons found\n";
    foreach ($jsons as $json) {
        if (substr($json, 0, 2) != "[{") {
            $json = "[{" . $json;
        }
        if (substr($json, -2) != "}]") {
            $json = $json . "}]";
        }
        $timeline = json_decode($json);
        if (is_array($timeline)) {
            $valid_timeline++;
            if (!empty($timeline)) {
                $populated_timeline++;
            } else {
                $empty_timeline++;
            }
        } else {
            $invalid_timeline++;
        }
        foreach ($timeline as $tweet) {
            $t = new Tweet();
            $t->fromJSON($tweet);
            if (!$t->isInBin($bin_name)) {
                $tweetQueue->push($t, $bin_name);
                if ($tweetQueue->length() > 100) {
                    $tweetQueue->insertDB();
                }
                $all_users[] = $t->user->id;
                $all_tweet_ids[] = $t->id;
                $tweets_processed++;
            }
        }
    }
    if ($tweetQueue->length() > 0) {
        $tweetQueue->insertDB();
    }
}
Exemplo n.º 5
0
function get_timeline($user_id, $type, $max_id = null)
{
    print "doing {$user_id}\n";
    global $twitter_keys, $current_key, $ratefree, $looped, $bin_name, $dbh, $tweetQueue;
    $ratefree--;
    if ($ratefree < 1 || $ratefree % 10 == 0) {
        $keyinfo = getRESTKey($current_key, 'statuses', 'user_timeline');
        $current_key = $keyinfo['key'];
        $ratefree = $keyinfo['remaining'];
    }
    $tmhOAuth = new tmhOAuth(array('consumer_key' => $twitter_keys[$current_key]['twitter_consumer_key'], 'consumer_secret' => $twitter_keys[$current_key]['twitter_consumer_secret'], 'token' => $twitter_keys[$current_key]['twitter_user_token'], 'secret' => $twitter_keys[$current_key]['twitter_user_secret']));
    $params = array('count' => 200, 'trim_user' => false, 'exclude_replies' => false, 'contributor_details' => true, 'include_rts' => 1);
    if ($type == "user_id") {
        $params['user_id'] = $user_id;
    } else {
        $params['screen_name'] = $user_id;
    }
    if (isset($max_id)) {
        $params['max_id'] = $max_id;
    }
    $tmhOAuth->user_request(array('method' => 'GET', 'url' => $tmhOAuth->url('1.1/statuses/user_timeline'), 'params' => $params));
    //var_export($params); print "\n";
    if ($tmhOAuth->response['code'] == 200) {
        $tweets = json_decode($tmhOAuth->response['response'], true);
        // store in db
        $tweet_ids = array();
        foreach ($tweets as $tweet) {
            $t = new Tweet();
            $t->fromJSON($tweet);
            $tweet_ids[] = $t->id;
            if (!$t->isInBin($bin_name)) {
                $tweetQueue->push($t, $bin_name);
                print ".";
                if ($tweetQueue->length() > 100) {
                    $tweetQueue->insertDB();
                }
            }
        }
        if (!empty($tweet_ids)) {
            print "\n";
            if (count($tweet_ids) <= 1) {
                print "no more tweets found\n\n";
                return false;
            }
            $max_id = min($tweet_ids);
            print "max id: " . $max_id . "\n";
        } else {
            print "0 tweets found\n\n";
            return false;
        }
        sleep(1);
        get_timeline($user_id, $type, $max_id);
    } else {
        $error_code = json_decode($tmhOAuth->response['response'])->errors[0]->code;
        if ($error_code == 130) {
            print "Twitter is over capacity, sleeping 5 seconds before retry\n";
            sleep(5);
            get_timeline($user_id, $type, $max_id);
        } elseif ($error_code == 88) {
            print "API key rate limit exceeded, sleeping 60 seconds before retry\n";
            sleep(60);
            get_timeline($user_id, $type, $max_id);
        } else {
            echo "\nAPI error: " . $tmhOAuth->response['response'] . "\n";
        }
    }
}
Exemplo n.º 6
0
function search($idlist)
{
    global $twitter_keys, $current_key, $all_users, $all_tweet_ids, $bin_name, $dbh, $tweetQueue;
    $keyinfo = getRESTKey(0);
    $current_key = $keyinfo['key'];
    $ratefree = $keyinfo['remaining'];
    print "\ncurrent key {$current_key} ratefree {$ratefree}\n";
    $tmhOAuth = new tmhOAuth(array('consumer_key' => $twitter_keys[$current_key]['twitter_consumer_key'], 'consumer_secret' => $twitter_keys[$current_key]['twitter_consumer_secret'], 'token' => $twitter_keys[$current_key]['twitter_user_token'], 'secret' => $twitter_keys[$current_key]['twitter_user_secret']));
    // by hundred
    for ($i = 0; $i < sizeof($idlist); $i += 100) {
        if ($ratefree <= 0 || $ratefree % 10 == 0) {
            print "\n";
            $keyinfo = getRESTKey($current_key);
            $current_key = $keyinfo['key'];
            $ratefree = $keyinfo['remaining'];
            $tmhOAuth = new tmhOAuth(array('consumer_key' => $twitter_keys[$current_key]['twitter_consumer_key'], 'consumer_secret' => $twitter_keys[$current_key]['twitter_consumer_secret'], 'token' => $twitter_keys[$current_key]['twitter_user_token'], 'secret' => $twitter_keys[$current_key]['twitter_user_secret']));
        }
        $q = $idlist[$i];
        $n = $i + 1;
        while ($n < $i + 100) {
            if (!isset($idlist[$n])) {
                break;
            }
            $q .= "," . $idlist[$n];
            $n++;
        }
        $params = array('id' => $q);
        $code = $tmhOAuth->user_request(array('method' => 'GET', 'url' => $tmhOAuth->url('1.1/statuses/lookup'), 'params' => $params));
        $ratefree--;
        $reset_connection = false;
        if ($tmhOAuth->response['code'] == 200) {
            $data = json_decode($tmhOAuth->response['response'], true);
            if (is_array($data) && empty($data)) {
                // all tweets in set are deleted
                continue;
            }
            $tweets = $data;
            $tweet_ids = array();
            foreach ($tweets as $tweet) {
                $t = new Tweet();
                $t->fromJSON($tweet);
                if (!$t->isInBin($bin_name)) {
                    $all_users[] = $t->from_user_id;
                    $all_tweet_ids[] = $t->id;
                    $tweet_ids[] = $t->id;
                    $tweetQueue->push($t, $bin_name);
                }
                print ".";
            }
            sleep(1);
            $retries = 0;
            // reset retry counter on success
        } else {
            if ($retries < 4 && $tmhOAuth->response['code'] == 503) {
                /* this indicates problems on the Twitter side, such as overcapacity. we slow down and retry the connection */
                print "!";
                sleep(7);
                $i--;
                // rewind
                $retries++;
                $reset_connection = true;
            } else {
                if ($retries < 4) {
                    print "\n";
                    print "Failure with code " . $tmhOAuth->response['response']['code'] . "\n";
                    var_dump($tmhOAuth->response['response']['info']);
                    var_dump($tmhOAuth->response['response']['error']);
                    var_dump($tmhOAuth->response['response']['errno']);
                    print "The above error may not be permanent. We will sleep and retry the request.\n";
                    sleep(7);
                    $i--;
                    // rewind
                    $retries++;
                    $reset_connection = true;
                } else {
                    print "\n";
                    print "Permanent error when querying the Twitter API. Please investigate the error output. Now stopping.\n";
                    exit(1);
                }
            }
        }
        if ($reset_connection) {
            $tmhOAuth = new tmhOAuth(array('consumer_key' => $twitter_keys[$current_key]['twitter_consumer_key'], 'consumer_secret' => $twitter_keys[$current_key]['twitter_consumer_secret'], 'token' => $twitter_keys[$current_key]['twitter_user_token'], 'secret' => $twitter_keys[$current_key]['twitter_user_secret']));
            $reset_connection = false;
        } else {
            $tweetQueue->insertDB();
        }
    }
}