<h1>TCAT :: Export Tweets language</h1>

        <?php 
validate_all_variables();
/* @todo, use same export possibilities as mod.export_tweets.php */
$header = "id,time,created_at,from_user_name,from_user_lang,text,source,location,lat,lng,from_user_tweetcount,from_user_followercount,from_user_friendcount,from_user_realname,to_user_name,in_reply_to_status_id,quoted_status_id,from_user_listed,from_user_utcoffset,from_user_timezone,from_user_description,from_user_url,from_user_verified,filter_leveli,cld_name,cld_code,cld_reliable,cld_bytes,cld_percent";
if (isset($_GET['includeUrls']) && $_GET['includeUrls'] == 1) {
    $header .= ",urls,urls_expanded,urls_followed,domains";
}
$header .= "\n";
$langset = $esc['mysql']['dataset'] . '_lang';
$sql = "SELECT * FROM " . $esc['mysql']['dataset'] . "_tweets t inner join {$langset} l on t.id = l.tweet_id ";
$sql .= sqlSubset();
$sqlresults = mysql_query($sql);
$filename = get_filename_for_export("fullExportLang");
$csv = new CSV($filename, $outputformat);
$csv->writeheader(explode(',', $header));
if ($sqlresults) {
    while ($data = mysql_fetch_assoc($sqlresults)) {
        $csv->newrow();
        if (preg_match("/_urls/", $sql)) {
            $id = $data['tweet_id'];
        } else {
            $id = $data['id'];
        }
        $csv->addfield($id);
        $csv->addfield(strtotime($data["created_at"]));
        $fields = array('created_at', 'from_user_name', 'from_user_lang', 'text', 'source', 'location', 'geo_lat', 'geo_lng', 'from_user_tweetcount', 'from_user_followercount', 'from_user_friendcount', 'from_user_realname', 'to_user_name', 'in_reply_to_status_id', 'quoted_status_id', 'from_user_listed', 'from_user_utcoffset', 'from_user_timezone', 'from_user_description', 'from_user_url', 'from_user_verified', 'filter_level');
        foreach ($fields as $f) {
            $csv->addfield(isset($data[$f]) ? $data[$f] : '');
    $content .= $value["id"] . "," . $key . "," . $value["notweets"] . "," . $value["nomentions"] . "," . $value["nomentions"] / $value["notweets"] . "," . $value["user_tweetcount"] . "," . $value["user_followercount"] . "," . $value["user_friendcount"] . "," . $value["user_listed"] . "," . $value["user_frienddivfollower"] . "," . $value["user_friendminfollower"] . "," . $value["linkcount"] . "," . $value["domaincount"] . "," . $value["domaincount"] / $value["linkcount"] . "," . $value["user_utcoffset"] . "\n";
}
// let's add more qualifications to the link
$content .= "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,socialslope DOUBLE\n,directed BOOLEAN";
foreach ($edges as $key => $value) {
    $ids = explode(",", $key);
    $from = $users[$usersinv[$ids[0]]]["user_followercount"];
    $to = $users[$usersinv[$ids[1]]]["user_followercount"];
    if ($from == $to) {
        $slope = 0;
    }
    if ($from > $to) {
        $slope = $from / $to;
        $slope = -$slope;
    }
    if ($from < $to) {
        $slope = $to / $from;
    }
    $content .= $key . "," . $value . "," . $slope . ",true\n";
}
$filename = get_filename_for_export("mention", "", "gdf");
file_put_contents($filename, $content);
echo '<fieldset class="if_parameters">';
echo '<legend>Your File</legend>';
echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>';
echo '</fieldset>';
?>

    </body>
</html>
Ejemplo n.º 3
0
function get_file($what)
{
    validate_all_variables();
    // get filename (this also validates the data)
    global $database;
    $filename = get_filename_for_export($what);
    generate($what, $filename);
    // redirect to file
    $location = str_replace("index.php", "", ANALYSIS_URL) . filename_to_url($filename);
    if (defined('LOCATION')) {
        $location = LOCATION . $location;
    }
    header("Content-type: text/csv");
    header("Location: {$location}");
}
Ejemplo n.º 4
0
arsort($topusers);
if ($esc["shell"]["topu"] > 0) {
    $topusers = array_slice($topusers, 0, $esc["shell"]["topu"], true);
}
//print_r($topusers);
$content = "nodedef>name VARCHAR,label VARCHAR,no_tweets INT,no_mentions INT\n";
foreach ($users as $key => $value) {
    if (isset($topusers[$key])) {
        $content .= $value["id"] . "," . $key . "," . $value["notweets"] . "," . $value["nomentions"] . "\n";
    }
}
$content .= "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE\n";
foreach ($edges as $key => $value) {
    $tmp = explode(",", $key);
    if (isset($topusers[$usersinv[$tmp[0]]]) && isset($topusers[$usersinv[$tmp[1]]])) {
        $content .= $key . "," . $value . "\n";
    }
}
//echo $content;
// add filename for top user filter  "_minDegreeOf".$esc['shell']['minf']
$filename = get_filename_for_export("mention", "_Top" . $esc['shell']['topu'], "gdf");
file_put_contents($filename, $content);
echo '<fieldset class="if_parameters">';
echo '<legend>Your File</legend>';
echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>';
echo '</fieldset>';
?>

    </body>
</html>
Ejemplo n.º 5
0
        <script type="text/javascript" language="javascript">
	
	
	
        </script>

    </head>

    <body>

        <h1>TCAT :: Sentiment Cloud</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export("sentiment_cloud");
$csv = new CSV($filename, $outputformat);
$sql = "SELECT s.explanation FROM " . $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_sentiment s ";
$sql .= sqlSubset("s.tweet_id = t.id AND ");
//print $sql . "<br>";die;
$rec = mysql_unbuffered_query($sql);
$negativeSentiments = $positiveSentiments = $wordValues = array();
while ($res = mysql_fetch_assoc($rec)) {
    if (preg_match_all("/[\\s|\\B]([\\p{L}\\w\\d_]+)\\[(-?\\d)\\]/u", $res['explanation'], $matches)) {
        foreach ($matches[1] as $k => $word) {
            $word = strtolower(trim($word));
            $sentimentValue = (int) $matches[2][$k];
            if ($sentimentValue < 0) {
                if (array_key_exists($word, $negativeSentiments) === false) {
                    $negativeSentiments[$word] = 0;
                }
Ejemplo n.º 6
0
        <script type="text/javascript" language="javascript">
	
	
	
        </script>

    </head>

    <body>

        <h1>TCAT :: User Stats</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export("userStats");
$csv = new CSV($filename, $outputformat);
// tweets per user
$sql = "SELECT count(distinct(t.id)) AS count, t.from_user_id, ";
$sql .= sqlInterval();
$sql .= " FROM " . $esc['mysql']['dataset'] . "_tweets t ";
$sql .= sqlSubset();
$sql .= "GROUP BY datepart, from_user_id";
//print $sql . "<br>";
$sqlresults = mysql_unbuffered_query($sql);
$array = array();
while ($res = mysql_fetch_assoc($sqlresults)) {
    $array[$res['datepart']][$res['from_user_id']] = $res['count'];
}
mysql_free_result($sqlresults);
if (!empty($array)) {
    <head>
        <title>TCAT :: Hashtag user activity</title>

        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />

        <link rel="stylesheet" href="css/main.css" type="text/css" />

    </head>

    <body>

        <h1>TCAT :: Hashtag user activity</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export("hashtagUserActivity", isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "");
$csv = new CSV($filename, $outputformat);
// select nr of users in subset
$sql = "SELECT count(id) AS count FROM " . $esc['mysql']['dataset'] . "_tweets t ";
$sql .= sqlSubset();
$rec = mysql_query($sql);
if (mysql_num_rows($rec) > 0) {
    $res = mysql_fetch_assoc($rec);
} else {
    die('no data in selection');
}
$nrOfTweets = $res['count'];
$collation = current_collation();
// select nr of users in subset
$sql = "SELECT count(distinct(from_user_name COLLATE {$collation})) AS count FROM " . $esc['mysql']['dataset'] . "_tweets t ";
$sql .= sqlSubset();
Ejemplo n.º 8
0
<?php

require_once __DIR__ . '/common/config.php';
require_once __DIR__ . '/common/functions.php';
validate_all_variables();
dataset_must_exist();
$filename = get_filename_for_export("ids");
$stream_to_open = export_start($filename, $outputformat);
$sql = "SELECT id FROM " . $esc['mysql']['dataset'] . "_tweets t ";
$sql .= sqlSubset();
$sqlresults = mysql_unbuffered_query($sql);
$out = "";
if ($sqlresults) {
    while ($data = mysql_fetch_assoc($sqlresults)) {
        if (preg_match("/_urls/", $sql)) {
            $id = $data['tweet_id'];
        } else {
            $id = $data['id'];
        }
        $out .= $id . "\n";
    }
    mysql_free_result($sqlresults);
}
$fp = fopen($stream_to_open, 'w');
if ($fp === false) {
    die("Could not open output file.");
}
fwrite($fp, chr(239) . chr(187) . chr(191) . $out);
fclose($fp);
if (!$use_cache_file) {
    exit(0);
Ejemplo n.º 9
0
}
// print stats
foreach ($paths_node_counts_distribution as $length => $frequency) {
    print "Networks with {$length} nodes: {$frequency}<br>";
}
print "<br><b>Now analyzing only networks with at least {$min_nr_of_nodes} nodes</b><br><br>";
print "Networks in analysis: " . array_sum($paths_node_counts_distribution_tmp) . "<br>";
print "Networks in analysis without root: " . $rootsnotfound . "<bR>";
print "<bR>";
ksort($network_types_stats);
foreach ($network_types_stats as $type => $count) {
    print "Networks of type {$type}: {$count}<br>";
}
flush();
// @todo percentage of different structures (pure stars, pure chains, mixed structure). Ratio between average degree and number of nodes.
$filename = get_filename_for_export("interactionGraph", "min" . $esc['shell']['minf'] . "nodes", "gexf");
$gexf = new Gexf();
$gexf->setTitle("interaction graph " . $filename);
$gexf->setEdgeType(GEXF_EDGE_DIRECTED);
$gexf->setCreator("tools.digitalmethods.net");
// label: user, attribute: tweet
// min, max, mean of what?? ditributions of nr of nodes in the network
foreach ($links as $link) {
    $tweet_id = $link[0];
    $in_reply_to_status_id = $link[1];
    if (array_search($in_reply_to_status_id, $todo) === false && array_search($tweet_id, $todo) === false) {
        continue;
    }
    $node1 = new GexfNode($in_reply_to_status_id);
    // if node already exists, we want the attributes added that node instead
    $id = $gexf->nodeExists($node1);
Ejemplo n.º 10
0
    }
}
// TODO: Support these. This shouldn't be difficult, but requires a little different logic.
if ($esc['date']['interval'] == "custom" || $esc['date']['interval'] == "overall") {
    echo '<b>Notice:</b> You have selected an interval type which is not yet supported by this export module.<br/>';
    echo '</body></html>';
    die;
}
// make filename and open file for write
if ($bin_type == "geotrack") {
    $module = "rateLimitDataGeo";
} else {
    $module = "ratelimitData";
}
$module .= "-" . $esc['date']['interval'];
$filename = get_filename_for_export($module);
$csv = new CSV($filename, $outputformat);
// write header
$header = "querybin,datetime,tweets ratelimited (estimate)";
$csv->writeheader(explode(',', $header));
$sqlInterval = sqlInterval();
$sqlSubset = sqlSubset();
$sqlGroup = " GROUP BY datepart ASC";
// Use native MySQL to create a temporary table with all dateparts. They should be identical to the dateparts we will use in the GROUP BY statement.
// Prepare the string mysql needs in date_add()
$mysqlNativeInterval = "day";
// default $interval = daily
switch ($esc['date']['interval']) {
    case "hourly":
        $mysqlNativeInterval = "hour";
        break;
Ejemplo n.º 11
0
        <script type="text/javascript" language="javascript">



        </script>

    </head>

    <body>

        <h1>TCAT :: Source stats</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export("sourceStats");
$csv = new CSV($filename, $outputformat);
// tweets per source
$sql = "SELECT COUNT(*) AS count, source, ";
$sql .= sqlInterval();
$sql .= " FROM " . $esc['mysql']['dataset'] . "_tweets t ";
$sql .= sqlSubset();
$sql .= "GROUP BY datepart,source ORDER BY count DESC";
//print $sql . "<br>"; exit;
$sqlresults = mysql_unbuffered_query($sql);
$array = array();
while ($res = mysql_fetch_assoc($sqlresults)) {
    $res['source'] = preg_replace("/<.+>/U", "", $res['source']);
    $res['source'] = preg_replace("/,/", "_", $res['source']);
    $res['source'] = preg_replace("/[ \\s\t]+/", " ", $res['source']);
    $res['source'] = trim($res['source']);
Ejemplo n.º 12
0
        <script type="text/javascript" language="javascript">
	
	
	
        </script>

    </head>

    <body>

        <h1>TCAT :: Word list</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export("wordList");
$csv = new CSV($filename, $outputformat);
mysql_query("set names utf8");
$sql = "SELECT id, text FROM " . $esc['mysql']['dataset'] . "_tweets t ";
$sql .= sqlSubset();
$sqlresults = mysql_unbuffered_query($sql);
$debug = '';
if ($sqlresults) {
    while ($data = mysql_fetch_assoc($sqlresults)) {
        $text = $data["text"];
        preg_match_all('/(https?:\\/\\/[^\\s]+)|([\\p{L}][\\p{L}]+)/u', $text, $matches, PREG_PATTERN_ORDER);
        foreach ($matches[0] as $word) {
            if (preg_match('/(https?:\\/\\/)/u', $word)) {
                continue;
            }
            $word = strtolower($word);
Ejemplo n.º 13
0
        <script type="text/javascript" language="javascript">
	
	
	
        </script>

    </head>

    <body>

        <h1>TCAT :: Export hashtags</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export('hashtagExport');
$csv = new CSV($filename, $outputformat);
$csv->writeheader(array('tweet_id', 'hashtag'));
$sql = "SELECT t.id as id, h.text as hashtag FROM " . $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_hashtags h ";
$sql .= sqlSubset();
$sql .= " AND h.tweet_id = t.id ORDER BY id";
$sqlresults = mysql_unbuffered_query($sql);
$out = "";
if ($sqlresults) {
    while ($data = mysql_fetch_assoc($sqlresults)) {
        $csv->newrow();
        $csv->addfield($data['id'], 'integer');
        $csv->addfield($data['hashtag'], 'string');
        $csv->writerow();
    }
    mysql_free_result($sqlresults);
Ejemplo n.º 14
0
        <script type="text/javascript" language="javascript">



        </script>

    </head>

    <body>

        <h1>TCAT :: Source / hashtag co-occurence</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export("sourceHashtag", '', 'gexf');
$collation = current_collation();
//print_r($_GET);
$sql = "SELECT LOWER(t.source COLLATE {$collation}) AS source, LOWER(h.text COLLATE {$collation}) AS hashtag FROM ";
$sql .= $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_hashtags h ";
$where = "t.id = h.tweet_id AND ";
$sql .= sqlSubset($where);
$sqlresults = mysql_unbuffered_query($sql);
while ($res = mysql_fetch_assoc($sqlresults)) {
    $res['source'] = preg_replace("/<.+>/U", "", $res['source']);
    $res['source'] = preg_replace("/[ \\s\t]+/", " ", $res['source']);
    $res['source'] = trim($res['source']);
    if (!isset($sourcesHashtags[$res['source']][$res['hashtag']])) {
        $sourcesHashtags[$res['source']][$res['hashtag']] = 0;
    }
    $sourcesHashtags[$res['source']][$res['hashtag']]++;
Ejemplo n.º 15
0
        <script type="text/javascript" language="javascript">



        </script>

    </head>

    <body>

        <h1>TCAT :: User list</h1>

        <?php 
validate_all_variables();
$collation = current_collation();
$filename = get_filename_for_export("user.list");
$csv = new CSV($filename, $outputformat);
// tweets per user
$sql = "SELECT t.from_user_id,t.from_user_name COLLATE {$collation} as from_user_name,t.from_user_lang,t.from_user_tweetcount,t.from_user_followercount,t.from_user_friendcount,t.from_user_listed,t.from_user_utcoffset,t.from_user_verified,count(distinct(t.id)) as tweetcount, ";
$sql .= sqlInterval();
$sql .= " FROM " . $esc['mysql']['dataset'] . "_tweets t ";
$sql .= sqlSubset();
$sql .= "GROUP BY datepart, from_user_id";
$sqlresults = mysql_unbuffered_query($sql);
$array = array();
while ($res = mysql_fetch_assoc($sqlresults)) {
    $array[$res['datepart']][$res['from_user_name']] = $res;
}
mysql_free_result($sqlresults);
// retweets per user
$sql = "SELECT count(t.retweet_id) as count, t.from_user_name COLLATE {$collation} as from_user_name, ";
$sql .= "ORDER BY h1,h2";
//print $sql."<br>";
$sqlresults = mysql_unbuffered_query($sql);
while ($res = mysql_fetch_assoc($sqlresults)) {
    $coword->addWord($res['h1']);
    $coword->addWord($res['h2']);
    $coword->addCoword($res['h1'], $res['h2'], 1);
}
mysql_free_result($sqlresults);
unset($coword->words);
// as we are adding words manually the frequency would be messed up
if ($esc['shell']['minf'] > 0 && !($esc['shell']['topu'] > 0)) {
    $coword->applyMinFreq($esc['shell']['minf']);
    //$coword->applyMinDegree($esc['shell']['minf']);	// Berno: method no longer in use, remains unharmed
    $filename = get_filename_for_export("hashtagCooc", (isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "") . "_sentiment_minFreqOf" . $esc['shell']['minf'], "gexf");
} elseif ($esc['shell']['topu'] > 0) {
    $coword->applyTopUnits($esc['shell']['topu']);
    $filename = get_filename_for_export("hashtagCooc", (isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "") . "_sentiment_Top" . $esc['shell']['topu'], "gexf");
} else {
    $filename = get_filename_for_export("hashtagCooc", (isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "") . "_sentiment", "gexf");
}
file_put_contents($filename, $coword->getCowordsAsGexf($filename));
echo '<fieldset class="if_parameters">';
echo '<legend>Your GEXF File</legend>';
echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>';
echo '</fieldset>';
?>

    </body>
</html>
Ejemplo n.º 17
0
            }
            fputs($tempfile, "\"{$datepart}\" \"{$word}\"\n");
        }
    }
    mysql_free_result($sqlresults);
}
if (function_exists('eio_fsync')) {
    eio_fsync($tempfile);
} else {
    fflush($tempfile);
}
$tempmeta = stream_get_meta_data($tempfile);
$templocation = $tempmeta["uri"];
// write csv results
// CSV is written by awk here, so we explicitely handle the output format
$filename = get_filename_for_export("wordFrequency");
$csv = fopen($filename, "w");
fputs($csv, chr(239) . chr(187) . chr(191));
if ($outputformat == 'tsv') {
    fputs($csv, "interval\tword\tfrequency\n");
} else {
    fputs($csv, "interval,word,frequency\n");
}
if ($outputformat == 'tsv') {
    system("sort -S 8% {$templocation} | uniq -c | sort -S 8% -b -k 2,2 -k 1,1nr -k 3,3 | awk '{ if (\$1 >= {$minf}) { print \$2 \"\\t\" \$3 \"\\t\" \$1} }' | sed -e 's/_/ /' >> {$filename}");
} else {
    system("sort -S 8% {$templocation} | uniq -c | sort -S 8% -b -k 2,2 -k 1,1nr -k 3,3 | awk '{ if (\$1 >= {$minf}) { print \$2 \",\" \$3 \",\" \$1} }' | sed -e 's/_/ /' >> {$filename}");
}
fclose($csv);
fclose($tempfile);
// this removes the temporary file
Ejemplo n.º 18
0
    while ($data = mysql_fetch_assoc($sqlresults)) {
        $datepart = $data["datepart"];
        $url = $data["url"];
        if (!array_key_exists($datepart, $media_url_count)) {
            $media_url_count[$datepart] = array();
        }
        if (!array_key_exists($url, $media_url_count[$datepart])) {
            $media_url_count[$datepart][$url] = 1;
        } else {
            $media_url_count[$datepart][$url]++;
        }
    }
    mysql_free_result($sqlresults);
}
// write csv results
$filename = get_filename_for_export("mediaFrequency");
$csv = new CSV($filename, $outputformat);
$csv->writeheader(array('interval', 'media url', 'frequency'));
foreach ($media_url_count as $datepart => $url_count) {
    arsort($url_count);
    foreach ($url_count as $url => $count) {
        $csv->newrow();
        $csv->addfield($datepart);
        $csv->addfield($url);
        $csv->addfield($count);
        $csv->writerow();
    }
}
$csv->close();
echo '<fieldset class="if_parameters">';
echo '<legend>Your File</legend>';
        <script type="text/javascript" language="javascript">
	
	
	
        </script>

    </head>

    <body>

        <h1>TCAT :: Export Tweets sentiment</h1>

        <?php 
validate_all_variables();
/* @todo, use same export possibilities as mod.export_tweets.php */
$filename = get_filename_for_export("fullExport-sentiment");
$csv = new CSV($filename, $outputformat);
$header = "id";
$header .= ",sentistrength,negative,positive";
$csv->writeheader($header);
$sql = "SELECT s.positive, s.negative, s.explanation, t.from_user_name as user, t.id as tid FROM " . $esc['mysql']['dataset'] . "_sentiment s, " . $esc['mysql']['dataset'] . "_tweets t ";
$sql .= sqlSubset("t.id = s.tweet_id AND ");
$rec = mysql_unbuffered_query($sql);
while ($res = mysql_fetch_assoc($rec)) {
    $sentiment[$res['tid']]['pos'] = $res['positive'];
    $sentiment[$res['tid']]['neg'] = $res['negative'];
    $sentiment[$res['tid']]['desc'] = $res['explanation'];
}
mysql_free_result($rec);
$sql = "SELECT * FROM " . $esc['mysql']['dataset'] . "_tweets t ";
$sql .= sqlSubset();
Ejemplo n.º 20
0
validate_all_variables();
// make filename and open file for write
$module = "gapData";
$sql = "SELECT id, `type` FROM tcat_query_bins WHERE querybin = '" . mysql_real_escape_string($esc['mysql']['dataset']) . "'";
$sqlresults = mysql_query($sql);
if ($res = mysql_fetch_assoc($sqlresults)) {
    $bin_id = $res['id'];
    $bin_type = $res['type'];
} else {
    die("Query bin not found!");
}
$exportSettings = array();
if (isset($_GET['exportSettings']) && $_GET['exportSettings'] != "") {
    $exportSettings = explode(",", $_GET['exportSettings']);
}
$filename = get_filename_for_export($module, implode("_", $exportSettings));
$csv = new CSV($filename, $outputformat);
// write header
$header = "start,end";
$csv->writeheader(explode(',', $header));
// make query
$sql = "SELECT * FROM tcat_error_gap WHERE type = '" . mysql_real_escape_string($bin_type) . "' and\n                                                   start >= '" . mysql_real_escape_string($_GET['startdate']) . "' and end <= '" . mysql_real_escape_string($_GET['enddate']) . "'";
// loop over results and write to file
$sqlresults = mysql_query($sql);
if ($sqlresults) {
    while ($data = mysql_fetch_assoc($sqlresults)) {
        // the query bin must have been active during the gap period, if we want to report it as a possible gap
        $sql2 = "SELECT count(*) as cnt FROM tcat_query_bins_phrases WHERE querybin_id = {$bin_id} and\n                                                            starttime <= '" . $data["end"] . "' and (endtime >= '" . $data["start"] . "' or endtime is null or endtime = '0000-00-00 00:00:00')";
        $sqlresults2 = mysql_query($sql2);
        if ($sqlresults2) {
            if ($data2 = mysql_fetch_assoc($sqlresults2)) {
Ejemplo n.º 21
0
        <script type="text/javascript" language="javascript">



        </script>

    </head>

    <body>

        <h1>TCAT :: Language / hashtag co-occurence</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export("languageHashtag", '', 'gexf');
//print_r($_GET);
$collation = current_collation();
$sql = "SELECT LOWER(t.from_user_lang) AS language, LOWER(h.text COLLATE {$collation}) AS hashtag FROM ";
$sql .= $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_hashtags h ";
$where = "t.id = h.tweet_id AND ";
$sql .= sqlSubset($where);
$sqlresults = mysql_unbuffered_query($sql);
while ($res = mysql_fetch_assoc($sqlresults)) {
    //print_r($res); exit;
    $res['language'] = preg_replace("/<.+>/U", "", $res['language']);
    $res['language'] = preg_replace("/[ \\s\t]+/", " ", $res['language']);
    $res['language'] = trim($res['language']);
    if (!isset($languagesHashtags[$res['language']][$res['hashtag']])) {
        $languagesHashtags[$res['language']][$res['hashtag']] = 0;
    }
function variabilityOfAssociationProfiles($filename, $series, $keywordToTrack, $ap)
{
    if (empty($series) || empty($keywordToTrack)) {
        die('not enough data');
    }
    $filename = get_filename_for_export("hashtagVariability", "_variabilityOfAssociationProfiles", "gexf");
    // group per slice
    // per keyword
    // 	get associated words (depth 1) per slice
    // 	get frequency, degree, ap variation (calculated on cooc frequency), words in, words out, ap keywords
    $degree = array();
    foreach ($series as $time => $cw) {
        $cw = $cw->getCowords();
        foreach ($cw as $word => $cowords) {
            foreach ($cowords as $coword => $frequency) {
                // save how many time slices the word appears
                $words[$word][$time] = 1;
                $words[$coword][$time] = 1;
                // keep track of degree per word per time slice
                if (array_key_exists($word, $degree) === false) {
                    $degree[$word] = array();
                }
                if (array_key_exists($coword, $degree) === false) {
                    $degree[$coword] = array();
                }
                if (array_key_exists($time, $degree[$word]) === false) {
                    $degree[$word][$time] = 0;
                }
                if (array_key_exists($time, $degree[$coword]) === false) {
                    $degree[$coword][$time] = 0;
                }
                $degree[$word][$time]++;
                $degree[$coword][$time]++;
            }
        }
    }
    // count nr of time slices the words appears in
    foreach ($words as $word => $times) {
        $documentsPerWords[$word] = count($times);
    }
    // calculate similarity and changes
    foreach ($ap as $word => $times) {
        $times_keys = array_keys($times);
        for ($i = 1; $i < count($times_keys); $i++) {
            $im1 = $i - 1;
            $v1 = $times[$times_keys[$im1]];
            $v2 = $times[$times_keys[$i]];
            $cos_sim[$word][$times_keys[$i]] = cosineSimilarity($v1, $v2);
            $change_out[$word][$times_keys[$i]] = change($v1, $v2);
            $change_in[$word][$times_keys[$i]] = change($v2, $v1);
            $stable[$word][$times_keys[$i]] = array_intersect(array_keys($v1), array_keys($v2));
        }
    }
    // @todo, frequency
    $out = "key\ttime\tdegree\tsimilarity\tassociational profile\tchange in\tchange out\tstable\n";
    foreach ($ap as $word => $times) {
        foreach ($times as $time => $profile) {
            if (isset($change_in[$word][$time])) {
                $inc = "";
                foreach ($change_in[$word][$time] as $w => $c) {
                    $inc .= "{$w} ({$c}), ";
                }
                $inc = substr($inc, 0, -2);
            } else {
                $inc = "";
            }
            if (isset($change_out[$word][$time])) {
                $outc = "";
                foreach ($change_out[$word][$time] as $w => $c) {
                    $outc .= "{$w} ({$c}), ";
                }
                $outc = substr($outc, 0, -2);
            } else {
                $outc = "";
            }
            if (isset($stable[$word][$time])) {
                $stablec = array();
                foreach ($stable[$word][$time] as $w) {
                    $stablec[] = $w;
                }
                $stablec = implode(", ", $stablec);
            } else {
                $stablec = "";
            }
            $prof = "";
            foreach ($profile as $w => $c) {
                $prof .= "{$w} ({$c}), ";
            }
            $prof = substr($prof, 0, -2);
            if (isset($degree[$word][$time])) {
                $deg = $degree[$word][$time];
            } else {
                $deg = "";
            }
            if (isset($cos_sim[$word][$time])) {
                $cs = $cos_sim[$word][$time];
            } else {
                $cs = "";
            }
            $out .= $word . "\t" . $time . "\t" . $deg . "\t" . $cs . "\t" . $prof . "\t" . $inc . "\t" . $outc . "\t" . $stablec . "\n";
        }
    }
    file_put_contents($filename, chr(239) . chr(187) . chr(191) . $out);
    echo '<fieldset class="if_parameters">';
    echo '<legend>Your co-hashtag variability File</legend>';
    echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>';
    echo '</fieldset>';
}
Ejemplo n.º 23
0
        <script type="text/javascript" language="javascript">
	
	
	
        </script>

    </head>

    <body>

        <h1>TCAT :: Export mentions</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export('mentionExport');
$csv = new CSV($filename, $outputformat);
$csv->writeheader(array('tweet_id', 'user_from_id', 'user_from_name', 'user_to_id', 'user_to_name', 'mention_type'));
$sql = "SELECT t.id as id, t.text as text, m.from_user_id as user_from_id, m.from_user_name as user_from_name, m.to_user_id as user_to_id, m.to_user as user_to_name FROM " . $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_mentions m ";
$sql .= sqlSubset();
$sql .= " AND m.tweet_id = t.id ORDER BY id";
$sqlresults = mysql_unbuffered_query($sql);
$out = "";
if ($sqlresults) {
    while ($data = mysql_fetch_assoc($sqlresults)) {
        $csv->newrow();
        $csv->addfield($data['id'], 'integer');
        $csv->addfield($data['user_from_id'], 'integer');
        $csv->addfield($data['user_from_name'], 'string');
        $csv->addfield($data['user_to_id'], 'integer');
        $csv->addfield($data['user_to_name'], 'string');
Ejemplo n.º 24
0
        <script type="text/javascript" language="javascript">
	
	
	
        </script>

    </head>

    <body>

        <h1>TCAT :: Tweet stats</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export("tweetStats");
$csv = new CSV($filename, $outputformat);
$numtweets = $numlinktweets = $numTweetsWithHashtag = $numTweetsWithMentions = $numTweetsWithMedia = $numRetweets = $numReplies = array();
// tweets in subset
$sql = "SELECT count(distinct(t.id)) as count, ";
$sql .= sqlInterval();
$sql .= " FROM " . $esc['mysql']['dataset'] . "_tweets t ";
$sql .= sqlSubset();
$sql .= " GROUP BY datepart ORDER BY datepart ASC";
$sqlresults = mysql_unbuffered_query($sql);
while ($data = mysql_fetch_assoc($sqlresults)) {
    $numtweets[$data['datepart']] = $data["count"];
}
mysql_free_result($sqlresults);
// tweet containing links
$sql = "SELECT count(distinct(t.id)) AS count, ";
Ejemplo n.º 25
0
echo '<fieldset class="if_parameters">';
echo '<legend>Your spreadsheet (CSV) file</legend>';
echo '<p><a href="' . str_replace("#", urlencode("#"), str_replace("\"", "%22", $filename)) . '">' . $filename . '</a></p>';
echo '</fieldset>';
$gexf = new Gexf();
$gexf->setTitle("URL-hashtag " . $filename);
$gexf->setEdgeType(GEXF_EDGE_UNDIRECTED);
$gexf->setCreator("tools.digitalmethods.net");
foreach ($urlHashtags as $url => $hashtags) {
    foreach ($hashtags as $hashtag => $frequency) {
        $node1 = new GexfNode($url);
        $node1->addNodeAttribute("type", 'host', $type = "string");
        $gexf->addNode($node1);
        $node2 = new GexfNode($hashtag);
        $node2->addNodeAttribute("type", 'hashtag', $type = "string");
        $gexf->addNode($node2);
        $edge_id = $gexf->addEdge($node1, $node2, $frequency);
    }
}
$gexf->render();
$filename = get_filename_for_export("hostHashtag", '', 'gexf');
file_put_contents($filename, $gexf->gexfFile);
echo '<fieldset class="if_parameters">';
echo '<legend>Your network (GEXF) file</legend>';
echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>';
echo '</fieldset>';
?>

    </body>
</html>
Ejemplo n.º 26
0
while ($res = mysql_fetch_assoc($sqlresults)) {
    $coword->addWord($res['h1']);
    $coword->addWord($res['h2']);
    $coword->addCoword($res['h1'], $res['h2'], 1);
}
unset($coword->words);
// as we are adding words manually the frequency would be messed up
if ($esc['shell']['minf'] > 1 && !($esc['shell']['topu'] > 0)) {
    $coword->applyMinFreq($esc['shell']['minf']);
    //$coword->applyMinDegree($esc['shell']['minf']);	// Berno: method no longer in use, remains unharmed
    $filename = get_filename_for_export("hashtagCooc", (isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "") . "_minFreqOf" . $esc['shell']['minf'], "gdf");
} elseif ($esc['shell']['topu'] > 0) {
    $coword->applyTopUnits($esc['shell']['topu']);
    $filename = get_filename_for_export("hashtagCooc", (isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "") . "_Top" . $esc['shell']['topu'], "gdf");
} else {
    $filename = get_filename_for_export("hashtagCooc", isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "", "gdf");
}
//print_r($coword);
$lookup = array();
$fp = fopen($filename, 'w');
fwrite($fp, chr(239) . chr(187) . chr(191));
fwrite($fp, "nodedef> name VARCHAR,label VARCHAR,wordFrequency INT,distinctUsersForWord INT,userDiversity FLOAT,wordFrequencyDividedByUniqueUsers FLOAT,wordFrequencyMultipliedByUniqueUsers INT\n");
$counter = 0;
foreach ($coword->wordFrequency as $word => $freq) {
    fwrite($fp, $counter . "," . $word . "," . $freq . "," . $coword->distinctUsersForWord[$word] . "," . $coword->userDiversity[$word] . "," . $coword->wordFrequencyDividedByUniqueUsers[$word] . "," . $coword->wordFrequencyMultipliedByUniqueUsers[$word] . "\n");
    $lookup[$word] = $counter;
    $counter++;
}
unset($coword->wordFrequency);
unset($coword->distinctUsersForWord);
unset($coword->userDiversity);
Ejemplo n.º 27
0
    foreach ($usernames as $username => $frequency) {
        $node1 = new GexfNode($url);
        $node1->addNodeAttribute("type", 'url', $type = "string");
        $node1->addNodeAttribute('shortlabel', $urlDomain[$url], $type = "string");
        $node1->addNodeAttribute('longlabel', $url, $type = "string");
        $node1->addNodeAttribute('status_code', $urlStatusCode[$url], $type = "string");
        $node1->addNodeAttribute('unique_users', $urlUniqueUsers[$url], $type = "integer");
        $node1->addNodeAttribute('total_users', $urlTotalUsers[$url], $type = "integer");
        $gexf->addNode($node1);
        $node2 = new GexfNode($username);
        $node2->addNodeAttribute("type", 'user', $type = "string");
        $node2->addNodeAttribute('shortlabel', $username, $type = "string");
        $node2->addNodeAttribute('longlabel', $username, $type = "string");
        $node2->addNodeAttribute('unique_urls', $userUniqueUrls[$username], $type = "integer");
        $node2->addNodeAttribute('total_urls', $userTotalUrls[$username], $type = "integer");
        $gexf->addNode($node2);
        $edge_id = $gexf->addEdge($node1, $node2, $frequency);
    }
}
$gexf->render();
$filename = get_filename_for_export("urlUser", '', 'gexf');
file_put_contents($filename, $gexf->gexfFile);
echo '<fieldset class="if_parameters">';
echo '<legend>Your network (GEXF) file</legend>';
echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>';
echo '</fieldset>';
?>

    </body>
</html>
Ejemplo n.º 28
0
        <script type="text/javascript" language="javascript">
	
	
	
        </script>

    </head>

    <body>

        <h1>TCAT :: Export URLs</h1>

        <?php 
validate_all_variables();
$filename = get_filename_for_export('urlsExport');
$csv = new CSV($filename, $outputformat);
$csv->writeheader(array('tweet_id', 'url', 'url_expanded', 'url_followed'));
$sql = "SELECT t.id as id, u.url as url, u.url_expanded as url_expanded, u.url_followed as url_followed FROM " . $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_urls u ";
$sql .= sqlSubset();
$sql .= " AND u.tweet_id = t.id ORDER BY id";
$sqlresults = mysql_unbuffered_query($sql);
$out = "";
if ($sqlresults) {
    while ($data = mysql_fetch_assoc($sqlresults)) {
        $csv->newrow();
        $csv->addfield($data['id'], 'integer');
        $csv->addfield($data['url'], 'string');
        if (isset($data['url_followed']) && strlen($data['url_followed']) > 1) {
            $csv->addfield($data['url'], 'string');
        } else {
Ejemplo n.º 29
0
        <script type="text/javascript" language="javascript">
	
	
	
        </script>

    </head>

    <body>

        <h1>TCAT :: Mention - Hashtags</h1>

        <?php 
validate_all_variables();
$collation = current_collation();
$filename = get_filename_for_export("mentionHashtags", "", "gexf");
$sql = "SELECT m.to_user COLLATE {$collation} AS user, LOWER(h.text COLLATE {$collation}) AS hashtag FROM ";
$sql .= $esc['mysql']['dataset'] . "_mentions m, " . $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_hashtags h ";
$where = "t.id = m.tweet_id AND m.tweet_id = h.tweet_id AND LENGTH(h.text)>1 AND ";
$sql .= sqlSubset($where);
//print $sql."<Br>";
$sqlresults = mysql_unbuffered_query($sql);
while ($res = mysql_fetch_assoc($sqlresults)) {
    if (!isset($userHashtags[$res['user']][$res['hashtag']])) {
        $userHashtags[$res['user']][$res['hashtag']] = 0;
    }
    $userHashtags[$res['user']][$res['hashtag']]++;
    if (!isset($userCount[$res['user']])) {
        $userCount[$res['user']] = 0;
    }
    $userCount[$res['user']]++;