Example #1
0
function generate($what, $filename)
{
    global $tsv, $network, $esc, $titles, $database, $interval, $outputformat;
    require_once __DIR__ . '/CSV.class.php';
    // initialize variables
    $tweets = $times = $from_user_names = $results = $urls = $urls_expanded = $hosts = $hashtags = array();
    $csv = new CSV($filename, $outputformat);
    $collation = current_collation();
    // determine interval
    $sql = "SELECT MIN(t.created_at) AS min, MAX(t.created_at) AS max FROM " . $esc['mysql']['dataset'] . "_tweets t ";
    $sql .= sqlSubset();
    //print $sql . "<bR>";
    $rec = mysql_query($sql);
    $res = mysql_fetch_assoc($rec);
    // get frequencies
    if ($what == "hashtag") {
        $results = frequencyTable("hashtags", "text");
    } elseif ($what == "urls") {
        $results = frequencyTable("urls", "url_followed");
    } elseif ($what == "hosts") {
        $results = frequencyTable("urls", "domain");
    } elseif ($what == "mention") {
        $results = frequencyTable("mentions", "to_user");
        // get other things
    } else {
        // @todo, this could also use database grouping
        $sql = "SELECT id,text COLLATE {$collation} as text,created_at,from_user_name COLLATE {$collation} as from_user_name FROM " . $esc['mysql']['dataset'] . "_tweets t ";
        $sql .= sqlSubset();
        // get slice and its min and max time
        $rec = mysql_query($sql);
        if ($rec && mysql_num_rows($rec) > 0) {
            while ($res = mysql_fetch_assoc($rec)) {
                $tweets[] = $res['text'];
                $ids[] = $res['id'];
                $times[] = $res['created_at'];
                $from_user_names[] = strtolower($res['from_user_name']);
            }
        }
        // extract desired things ($what) and group per interval
        foreach ($tweets as $key => $tweet) {
            $time = $times[$key];
            switch ($interval) {
                case "hourly":
                    $group = strftime("%Y-%m-%d %Hh", strtotime($time));
                    break;
                case "weekly":
                    $group = strftime("%Y %u", strtotime($time));
                    break;
                case "monthly":
                    $group = strftime("%Y-%m", strtotime($time));
                    break;
                case "yearly":
                    $group = strftime("%Y-%m", strtotime($time));
                    break;
                case "overall":
                    $group = "overall";
                    break;
                case "custom":
                    $group = groupByInterval(strftime("%Y-%m-%d", strtotime($time)));
                    break;
                default:
                    $group = strftime("%Y-%m-%d", strtotime($time));
                    // default daily
            }
            switch ($what) {
                //case "hashtag":
                //    foreach ($hashtags as $hashtag)
                //        $results[$group][] = $hashtag;
                //    break;
                //case "mention": // @todo, mentions might be taken from own table
                //    $stuff = get_replies($tweet);
                //    foreach ($stuff as $thing)
                //        $results[$group][] = $thing;
                //    break;
                case "user":
                    $results[$group][] = $from_user_names[$key];
                    break;
                case "user-mention":
                    $stuff = get_replies($tweet);
                    foreach ($stuff as $thing) {
                        $results[$group]['mentions'][] = $thing;
                    }
                    $results[$group]['users'][] = $from_user_names[$key];
                    //var_dump($results);
                    break;
                case "retweet":
                    $results[$group][] = $tweet;
                    // TODO, write stemming function
                    break;
                    //case "urls":
                    //    if (isset($urls_expanded[$ids[$key]]))
                    //        $results[$group][] = $urls_expanded[$ids[$key]];
                    //    break;
                    //case "hosts":
                    //    if (isset($urls_expanded[$ids[$key]]))
                    //        $results[$group][] = $hosts[$ids[$key]];
                    //    break;
                //case "urls":
                //    if (isset($urls_expanded[$ids[$key]]))
                //        $results[$group][] = $urls_expanded[$ids[$key]];
                //    break;
                //case "hosts":
                //    if (isset($urls_expanded[$ids[$key]]))
                //        $results[$group][] = $hosts[$ids[$key]];
                //    break;
                default:
                    break;
            }
        }
        // count frequency of occurence of thing, per interval
        if ($what != "user-mention") {
            foreach ($results as $group => $things) {
                $counted_things = array_count_values($things);
                arsort($counted_things);
                $results[$group] = $counted_things;
            }
        }
    }
    // network output for users
    if ($what == "user-mention") {
        foreach ($results as $group => $things) {
            $tmp_mentions = array_count_values($things['mentions']);
            $tmp_users = array_count_values($things['users']);
            $counted_things = array();
            // add all from_user_names
            foreach ($tmp_users as $user => $count) {
                if (isset($tmp_mentions["@" . $user])) {
                    $counted_things[$user] = $tmp_mentions["@" . $user] . "," . $count;
                } else {
                    $counted_things[$user] = "0," . $count;
                }
            }
            // add all users which were replied but not in the set
            foreach ($tmp_mentions as $user => $count) {
                $user = str_replace("@", "", $user);
                if (!isset($counted_things[$user])) {
                    $counted_things[$user] = $count . ",0";
                }
            }
            ksort($counted_things);
            $results[$group] = $counted_things;
        }
        if (isset($titles[$what])) {
            if (!empty($esc['shell']['query'])) {
                $q = " with search " . $esc['shell']['query'];
            } else {
                $q = "";
            }
            $csv->writeheader(array($titles[$what] . $q . " from " . $esc['date']["startdate"] . " to " . $esc['date']["enddate"]));
        }
        $csv->writeheader(array("date", "user", "mentions", "tweets"));
        foreach ($results as $group => $things) {
            foreach ($things as $thing => $count) {
                $csv->newrow();
                $csv->addfield($group);
                $csv->addfield($thing);
                $exp = explode(",", $count);
                // unpack what we packed
                $csv->addfield($exp[0]);
                $csv->addfield($exp[1]);
                $csv->writerow();
            }
        }
        // write tsv output
    } elseif (in_array($what, $tsv) !== false) {
        ksort($results);
        // construct file
        if (isset($titles[$what])) {
            if (!empty($esc['shell']['query'])) {
                $q = " with search " . $esc['shell']['query'];
            } else {
                $q = "";
            }
            $csv->writeheader(array($titles[$what] . " for " . $esc['shell']['datasetname'] . $q . " from " . $esc['date']["startdate"] . " to " . $esc['date']["enddate"]));
        }
        if ($what == "urls") {
            $csv->writeheader(array("date", "frequency", "tweetedurl"));
        } elseif ($what == "hosts") {
            $csv->writeheader(array("date", "frequency", "domain", "name"));
        } else {
            $csv->writeheader(array("date", "frequency", $what));
        }
        foreach ($results as $group => $things) {
            arsort($things);
            foreach ($things as $thing => $count) {
                if (empty($thing)) {
                    continue;
                }
                if ($count < $esc['shell']['minf']) {
                    continue;
                }
                $csv->newrow();
                $csv->addfield($group);
                $csv->addfield($count);
                $csv->addfield($thing);
                $csv->writerow();
            }
        }
    } else {
        die('no valid output format found');
    }
    $csv->close();
}
Example #2
0
        <h1>TCAT :: Export URLs</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export('urlsExport');
$csv = new CSV($filename, $outputformat);
$csv->writeheader(array('tweet_id', 'url', 'url_expanded', 'url_followed'));
$sql = "SELECT t.id as id, u.url as url, u.url_expanded as url_expanded, u.url_followed as url_followed FROM " . $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_urls u ";
$sql .= sqlSubset();
$sql .= " AND u.tweet_id = t.id ORDER BY id";
$sqlresults = mysql_unbuffered_query($sql);
$out = "";
if ($sqlresults) {
    while ($data = mysql_fetch_assoc($sqlresults)) {
        $csv->newrow();
        $csv->addfield($data['id'], 'integer');
        $csv->addfield($data['url'], 'string');
        if (isset($data['url_followed']) && strlen($data['url_followed']) > 1) {
            $csv->addfield($data['url'], 'string');
        } else {
            $csv->addfield('', 'string');
        }
        if (isset($data['url_expanded']) && strlen($data['url_expanded']) > 1) {
            $csv->addfield($data['url_expanded'], 'string');
        } else {
            $csv->addfield('', 'string');
        }
        $csv->writerow();
    }
    mysql_free_result($sqlresults);
function variabilityOfAssociationProfiles($filename, $series, $keywordToTrack, $ap)
{
    global $outputformat;
    if (empty($series) || empty($keywordToTrack)) {
        die('not enough data');
    }
    $filename = get_filename_for_export("variability", "_variabilityOfAssociationProfiles");
    $csv = new CSV($filename, $outputformat);
    // group per slice
    // per keyword
    // 	get associated words (depth 1) per slice
    // 	get frequency, degree, ap variation (calculated on cooc frequency), words in, words out, ap keywords
    $degree = array();
    foreach ($series as $time => $cw) {
        $cw = $cw->getCowords();
        foreach ($cw as $word => $cowords) {
            foreach ($cowords as $coword => $frequency) {
                // save how many time slices the word appears
                $words[$word][$time] = 1;
                $words[$coword][$time] = 1;
                // keep track of degree per word per time slice
                if (array_key_exists($word, $degree) === false) {
                    $degree[$word] = array();
                }
                if (array_key_exists($coword, $degree) === false) {
                    $degree[$coword] = array();
                }
                if (array_key_exists($time, $degree[$word]) === false) {
                    $degree[$word][$time] = 0;
                }
                if (array_key_exists($time, $degree[$coword]) === false) {
                    $degree[$coword][$time] = 0;
                }
                $degree[$word][$time]++;
                $degree[$coword][$time]++;
            }
        }
    }
    // count nr of time slices the words appears in
    foreach ($words as $word => $times) {
        $documentsPerWords[$word] = count($times);
    }
    // calculate similarity and changes
    foreach ($ap as $word => $times) {
        $times_keys = array_keys($times);
        for ($i = 1; $i < count($times_keys); $i++) {
            $im1 = $i - 1;
            $v1 = $times[$times_keys[$im1]];
            $v2 = $times[$times_keys[$i]];
            $cos_sim[$word][$times_keys[$i]] = cosineSimilarity($v1, $v2);
            $change_out[$word][$times_keys[$i]] = change($v1, $v2);
            $change_in[$word][$times_keys[$i]] = change($v2, $v1);
            $stable[$word][$times_keys[$i]] = array_intersect(array_keys($v1), array_keys($v2));
        }
    }
    // @todo, frequency
    $csv->writeheader(array("key", "time", "degree", "similarity", "associational profile", "change in", "change out", "stable"));
    foreach ($ap as $word => $times) {
        foreach ($times as $time => $profile) {
            if (isset($change_in[$word][$time])) {
                $inc = "";
                foreach ($change_in[$word][$time] as $w => $c) {
                    $inc .= "{$w} ({$c}), ";
                }
                $inc = substr($inc, 0, -2);
            } else {
                $inc = "";
            }
            if (isset($change_out[$word][$time])) {
                $outc = "";
                foreach ($change_out[$word][$time] as $w => $c) {
                    $outc .= "{$w} ({$c}), ";
                }
                $outc = substr($outc, 0, -2);
            } else {
                $outc = "";
            }
            if (isset($stable[$word][$time])) {
                $stablec = array();
                foreach ($stable[$word][$time] as $w) {
                    $stablec[] = $w;
                }
                $stablec = implode(", ", $stablec);
            } else {
                $stablec = "";
            }
            $prof = "";
            foreach ($profile as $w => $c) {
                $prof .= "{$w} ({$c}), ";
            }
            $prof = substr($prof, 0, -2);
            if (isset($degree[$word][$time])) {
                $deg = $degree[$word][$time];
            } else {
                $deg = "";
            }
            if (isset($cos_sim[$word][$time])) {
                $cs = $cos_sim[$word][$time];
            } else {
                $cs = "";
            }
            $csv->newrow();
            $csv->addfield($word);
            $csv->addfield($time);
            $csv->addfield($deg);
            $csv->addfield($cs);
            $csv->addfield($prof);
            $csv->addfield($inc);
            $csv->addfield($outc);
            $csv->addfield($stablec);
            $csv->writerow();
        }
    }
    $csv->close();
    echo '<fieldset class="if_parameters">';
    echo '<legend>Your co-hashtag variability File</legend>';
    echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>';
    echo '</fieldset>';
}