<h1>TCAT :: Export Tweets language</h1> <?php validate_all_variables(); /* @todo, use same export possibilities as mod.export_tweets.php */ $header = "id,time,created_at,from_user_name,from_user_lang,text,source,location,lat,lng,from_user_tweetcount,from_user_followercount,from_user_friendcount,from_user_realname,to_user_name,in_reply_to_status_id,quoted_status_id,from_user_listed,from_user_utcoffset,from_user_timezone,from_user_description,from_user_url,from_user_verified,filter_leveli,cld_name,cld_code,cld_reliable,cld_bytes,cld_percent"; if (isset($_GET['includeUrls']) && $_GET['includeUrls'] == 1) { $header .= ",urls,urls_expanded,urls_followed,domains"; } $header .= "\n"; $langset = $esc['mysql']['dataset'] . '_lang'; $sql = "SELECT * FROM " . $esc['mysql']['dataset'] . "_tweets t inner join {$langset} l on t.id = l.tweet_id "; $sql .= sqlSubset(); $sqlresults = mysql_query($sql); $filename = get_filename_for_export("fullExportLang"); $csv = new CSV($filename, $outputformat); $csv->writeheader(explode(',', $header)); if ($sqlresults) { while ($data = mysql_fetch_assoc($sqlresults)) { $csv->newrow(); if (preg_match("/_urls/", $sql)) { $id = $data['tweet_id']; } else { $id = $data['id']; } $csv->addfield($id); $csv->addfield(strtotime($data["created_at"])); $fields = array('created_at', 'from_user_name', 'from_user_lang', 'text', 'source', 'location', 'geo_lat', 'geo_lng', 'from_user_tweetcount', 'from_user_followercount', 'from_user_friendcount', 'from_user_realname', 'to_user_name', 'in_reply_to_status_id', 'quoted_status_id', 'from_user_listed', 'from_user_utcoffset', 'from_user_timezone', 'from_user_description', 'from_user_url', 'from_user_verified', 'filter_level'); foreach ($fields as $f) { $csv->addfield(isset($data[$f]) ? $data[$f] : '');
$content .= $value["id"] . "," . $key . "," . $value["notweets"] . "," . $value["nomentions"] . "," . $value["nomentions"] / $value["notweets"] . "," . $value["user_tweetcount"] . "," . $value["user_followercount"] . "," . $value["user_friendcount"] . "," . $value["user_listed"] . "," . $value["user_frienddivfollower"] . "," . $value["user_friendminfollower"] . "," . $value["linkcount"] . "," . $value["domaincount"] . "," . $value["domaincount"] / $value["linkcount"] . "," . $value["user_utcoffset"] . "\n"; } // let's add more qualifications to the link $content .= "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE,socialslope DOUBLE\n,directed BOOLEAN"; foreach ($edges as $key => $value) { $ids = explode(",", $key); $from = $users[$usersinv[$ids[0]]]["user_followercount"]; $to = $users[$usersinv[$ids[1]]]["user_followercount"]; if ($from == $to) { $slope = 0; } if ($from > $to) { $slope = $from / $to; $slope = -$slope; } if ($from < $to) { $slope = $to / $from; } $content .= $key . "," . $value . "," . $slope . ",true\n"; } $filename = get_filename_for_export("mention", "", "gdf"); file_put_contents($filename, $content); echo '<fieldset class="if_parameters">'; echo '<legend>Your File</legend>'; echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>'; echo '</fieldset>'; ?> </body> </html>
function get_file($what) { validate_all_variables(); // get filename (this also validates the data) global $database; $filename = get_filename_for_export($what); generate($what, $filename); // redirect to file $location = str_replace("index.php", "", ANALYSIS_URL) . filename_to_url($filename); if (defined('LOCATION')) { $location = LOCATION . $location; } header("Content-type: text/csv"); header("Location: {$location}"); }
arsort($topusers); if ($esc["shell"]["topu"] > 0) { $topusers = array_slice($topusers, 0, $esc["shell"]["topu"], true); } //print_r($topusers); $content = "nodedef>name VARCHAR,label VARCHAR,no_tweets INT,no_mentions INT\n"; foreach ($users as $key => $value) { if (isset($topusers[$key])) { $content .= $value["id"] . "," . $key . "," . $value["notweets"] . "," . $value["nomentions"] . "\n"; } } $content .= "edgedef>node1 VARCHAR,node2 VARCHAR,weight DOUBLE\n"; foreach ($edges as $key => $value) { $tmp = explode(",", $key); if (isset($topusers[$usersinv[$tmp[0]]]) && isset($topusers[$usersinv[$tmp[1]]])) { $content .= $key . "," . $value . "\n"; } } //echo $content; // add filename for top user filter "_minDegreeOf".$esc['shell']['minf'] $filename = get_filename_for_export("mention", "_Top" . $esc['shell']['topu'], "gdf"); file_put_contents($filename, $content); echo '<fieldset class="if_parameters">'; echo '<legend>Your File</legend>'; echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>'; echo '</fieldset>'; ?> </body> </html>
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: Sentiment Cloud</h1> <?php validate_all_variables(); $filename = get_filename_for_export("sentiment_cloud"); $csv = new CSV($filename, $outputformat); $sql = "SELECT s.explanation FROM " . $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_sentiment s "; $sql .= sqlSubset("s.tweet_id = t.id AND "); //print $sql . "<br>";die; $rec = mysql_unbuffered_query($sql); $negativeSentiments = $positiveSentiments = $wordValues = array(); while ($res = mysql_fetch_assoc($rec)) { if (preg_match_all("/[\\s|\\B]([\\p{L}\\w\\d_]+)\\[(-?\\d)\\]/u", $res['explanation'], $matches)) { foreach ($matches[1] as $k => $word) { $word = strtolower(trim($word)); $sentimentValue = (int) $matches[2][$k]; if ($sentimentValue < 0) { if (array_key_exists($word, $negativeSentiments) === false) { $negativeSentiments[$word] = 0; }
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: User Stats</h1> <?php validate_all_variables(); $filename = get_filename_for_export("userStats"); $csv = new CSV($filename, $outputformat); // tweets per user $sql = "SELECT count(distinct(t.id)) AS count, t.from_user_id, "; $sql .= sqlInterval(); $sql .= " FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset(); $sql .= "GROUP BY datepart, from_user_id"; //print $sql . "<br>"; $sqlresults = mysql_unbuffered_query($sql); $array = array(); while ($res = mysql_fetch_assoc($sqlresults)) { $array[$res['datepart']][$res['from_user_id']] = $res['count']; } mysql_free_result($sqlresults); if (!empty($array)) {
<head> <title>TCAT :: Hashtag user activity</title> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <link rel="stylesheet" href="css/main.css" type="text/css" /> </head> <body> <h1>TCAT :: Hashtag user activity</h1> <?php validate_all_variables(); $filename = get_filename_for_export("hashtagUserActivity", isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : ""); $csv = new CSV($filename, $outputformat); // select nr of users in subset $sql = "SELECT count(id) AS count FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset(); $rec = mysql_query($sql); if (mysql_num_rows($rec) > 0) { $res = mysql_fetch_assoc($rec); } else { die('no data in selection'); } $nrOfTweets = $res['count']; $collation = current_collation(); // select nr of users in subset $sql = "SELECT count(distinct(from_user_name COLLATE {$collation})) AS count FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset();
<?php require_once __DIR__ . '/common/config.php'; require_once __DIR__ . '/common/functions.php'; validate_all_variables(); dataset_must_exist(); $filename = get_filename_for_export("ids"); $stream_to_open = export_start($filename, $outputformat); $sql = "SELECT id FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset(); $sqlresults = mysql_unbuffered_query($sql); $out = ""; if ($sqlresults) { while ($data = mysql_fetch_assoc($sqlresults)) { if (preg_match("/_urls/", $sql)) { $id = $data['tweet_id']; } else { $id = $data['id']; } $out .= $id . "\n"; } mysql_free_result($sqlresults); } $fp = fopen($stream_to_open, 'w'); if ($fp === false) { die("Could not open output file."); } fwrite($fp, chr(239) . chr(187) . chr(191) . $out); fclose($fp); if (!$use_cache_file) { exit(0);
} // print stats foreach ($paths_node_counts_distribution as $length => $frequency) { print "Networks with {$length} nodes: {$frequency}<br>"; } print "<br><b>Now analyzing only networks with at least {$min_nr_of_nodes} nodes</b><br><br>"; print "Networks in analysis: " . array_sum($paths_node_counts_distribution_tmp) . "<br>"; print "Networks in analysis without root: " . $rootsnotfound . "<bR>"; print "<bR>"; ksort($network_types_stats); foreach ($network_types_stats as $type => $count) { print "Networks of type {$type}: {$count}<br>"; } flush(); // @todo percentage of different structures (pure stars, pure chains, mixed structure). Ratio between average degree and number of nodes. $filename = get_filename_for_export("interactionGraph", "min" . $esc['shell']['minf'] . "nodes", "gexf"); $gexf = new Gexf(); $gexf->setTitle("interaction graph " . $filename); $gexf->setEdgeType(GEXF_EDGE_DIRECTED); $gexf->setCreator("tools.digitalmethods.net"); // label: user, attribute: tweet // min, max, mean of what?? ditributions of nr of nodes in the network foreach ($links as $link) { $tweet_id = $link[0]; $in_reply_to_status_id = $link[1]; if (array_search($in_reply_to_status_id, $todo) === false && array_search($tweet_id, $todo) === false) { continue; } $node1 = new GexfNode($in_reply_to_status_id); // if node already exists, we want the attributes added that node instead $id = $gexf->nodeExists($node1);
} } // TODO: Support these. This shouldn't be difficult, but requires a little different logic. if ($esc['date']['interval'] == "custom" || $esc['date']['interval'] == "overall") { echo '<b>Notice:</b> You have selected an interval type which is not yet supported by this export module.<br/>'; echo '</body></html>'; die; } // make filename and open file for write if ($bin_type == "geotrack") { $module = "rateLimitDataGeo"; } else { $module = "ratelimitData"; } $module .= "-" . $esc['date']['interval']; $filename = get_filename_for_export($module); $csv = new CSV($filename, $outputformat); // write header $header = "querybin,datetime,tweets ratelimited (estimate)"; $csv->writeheader(explode(',', $header)); $sqlInterval = sqlInterval(); $sqlSubset = sqlSubset(); $sqlGroup = " GROUP BY datepart ASC"; // Use native MySQL to create a temporary table with all dateparts. They should be identical to the dateparts we will use in the GROUP BY statement. // Prepare the string mysql needs in date_add() $mysqlNativeInterval = "day"; // default $interval = daily switch ($esc['date']['interval']) { case "hourly": $mysqlNativeInterval = "hour"; break;
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: Source stats</h1> <?php validate_all_variables(); $filename = get_filename_for_export("sourceStats"); $csv = new CSV($filename, $outputformat); // tweets per source $sql = "SELECT COUNT(*) AS count, source, "; $sql .= sqlInterval(); $sql .= " FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset(); $sql .= "GROUP BY datepart,source ORDER BY count DESC"; //print $sql . "<br>"; exit; $sqlresults = mysql_unbuffered_query($sql); $array = array(); while ($res = mysql_fetch_assoc($sqlresults)) { $res['source'] = preg_replace("/<.+>/U", "", $res['source']); $res['source'] = preg_replace("/,/", "_", $res['source']); $res['source'] = preg_replace("/[ \\s\t]+/", " ", $res['source']); $res['source'] = trim($res['source']);
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: Word list</h1> <?php validate_all_variables(); $filename = get_filename_for_export("wordList"); $csv = new CSV($filename, $outputformat); mysql_query("set names utf8"); $sql = "SELECT id, text FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset(); $sqlresults = mysql_unbuffered_query($sql); $debug = ''; if ($sqlresults) { while ($data = mysql_fetch_assoc($sqlresults)) { $text = $data["text"]; preg_match_all('/(https?:\\/\\/[^\\s]+)|([\\p{L}][\\p{L}]+)/u', $text, $matches, PREG_PATTERN_ORDER); foreach ($matches[0] as $word) { if (preg_match('/(https?:\\/\\/)/u', $word)) { continue; } $word = strtolower($word);
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: Export hashtags</h1> <?php validate_all_variables(); $filename = get_filename_for_export('hashtagExport'); $csv = new CSV($filename, $outputformat); $csv->writeheader(array('tweet_id', 'hashtag')); $sql = "SELECT t.id as id, h.text as hashtag FROM " . $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_hashtags h "; $sql .= sqlSubset(); $sql .= " AND h.tweet_id = t.id ORDER BY id"; $sqlresults = mysql_unbuffered_query($sql); $out = ""; if ($sqlresults) { while ($data = mysql_fetch_assoc($sqlresults)) { $csv->newrow(); $csv->addfield($data['id'], 'integer'); $csv->addfield($data['hashtag'], 'string'); $csv->writerow(); } mysql_free_result($sqlresults);
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: Source / hashtag co-occurence</h1> <?php validate_all_variables(); $filename = get_filename_for_export("sourceHashtag", '', 'gexf'); $collation = current_collation(); //print_r($_GET); $sql = "SELECT LOWER(t.source COLLATE {$collation}) AS source, LOWER(h.text COLLATE {$collation}) AS hashtag FROM "; $sql .= $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_hashtags h "; $where = "t.id = h.tweet_id AND "; $sql .= sqlSubset($where); $sqlresults = mysql_unbuffered_query($sql); while ($res = mysql_fetch_assoc($sqlresults)) { $res['source'] = preg_replace("/<.+>/U", "", $res['source']); $res['source'] = preg_replace("/[ \\s\t]+/", " ", $res['source']); $res['source'] = trim($res['source']); if (!isset($sourcesHashtags[$res['source']][$res['hashtag']])) { $sourcesHashtags[$res['source']][$res['hashtag']] = 0; } $sourcesHashtags[$res['source']][$res['hashtag']]++;
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: User list</h1> <?php validate_all_variables(); $collation = current_collation(); $filename = get_filename_for_export("user.list"); $csv = new CSV($filename, $outputformat); // tweets per user $sql = "SELECT t.from_user_id,t.from_user_name COLLATE {$collation} as from_user_name,t.from_user_lang,t.from_user_tweetcount,t.from_user_followercount,t.from_user_friendcount,t.from_user_listed,t.from_user_utcoffset,t.from_user_verified,count(distinct(t.id)) as tweetcount, "; $sql .= sqlInterval(); $sql .= " FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset(); $sql .= "GROUP BY datepart, from_user_id"; $sqlresults = mysql_unbuffered_query($sql); $array = array(); while ($res = mysql_fetch_assoc($sqlresults)) { $array[$res['datepart']][$res['from_user_name']] = $res; } mysql_free_result($sqlresults); // retweets per user $sql = "SELECT count(t.retweet_id) as count, t.from_user_name COLLATE {$collation} as from_user_name, ";
$sql .= "ORDER BY h1,h2"; //print $sql."<br>"; $sqlresults = mysql_unbuffered_query($sql); while ($res = mysql_fetch_assoc($sqlresults)) { $coword->addWord($res['h1']); $coword->addWord($res['h2']); $coword->addCoword($res['h1'], $res['h2'], 1); } mysql_free_result($sqlresults); unset($coword->words); // as we are adding words manually the frequency would be messed up if ($esc['shell']['minf'] > 0 && !($esc['shell']['topu'] > 0)) { $coword->applyMinFreq($esc['shell']['minf']); //$coword->applyMinDegree($esc['shell']['minf']); // Berno: method no longer in use, remains unharmed $filename = get_filename_for_export("hashtagCooc", (isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "") . "_sentiment_minFreqOf" . $esc['shell']['minf'], "gexf"); } elseif ($esc['shell']['topu'] > 0) { $coword->applyTopUnits($esc['shell']['topu']); $filename = get_filename_for_export("hashtagCooc", (isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "") . "_sentiment_Top" . $esc['shell']['topu'], "gexf"); } else { $filename = get_filename_for_export("hashtagCooc", (isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "") . "_sentiment", "gexf"); } file_put_contents($filename, $coword->getCowordsAsGexf($filename)); echo '<fieldset class="if_parameters">'; echo '<legend>Your GEXF File</legend>'; echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>'; echo '</fieldset>'; ?> </body> </html>
} fputs($tempfile, "\"{$datepart}\" \"{$word}\"\n"); } } mysql_free_result($sqlresults); } if (function_exists('eio_fsync')) { eio_fsync($tempfile); } else { fflush($tempfile); } $tempmeta = stream_get_meta_data($tempfile); $templocation = $tempmeta["uri"]; // write csv results // CSV is written by awk here, so we explicitely handle the output format $filename = get_filename_for_export("wordFrequency"); $csv = fopen($filename, "w"); fputs($csv, chr(239) . chr(187) . chr(191)); if ($outputformat == 'tsv') { fputs($csv, "interval\tword\tfrequency\n"); } else { fputs($csv, "interval,word,frequency\n"); } if ($outputformat == 'tsv') { system("sort -S 8% {$templocation} | uniq -c | sort -S 8% -b -k 2,2 -k 1,1nr -k 3,3 | awk '{ if (\$1 >= {$minf}) { print \$2 \"\\t\" \$3 \"\\t\" \$1} }' | sed -e 's/_/ /' >> {$filename}"); } else { system("sort -S 8% {$templocation} | uniq -c | sort -S 8% -b -k 2,2 -k 1,1nr -k 3,3 | awk '{ if (\$1 >= {$minf}) { print \$2 \",\" \$3 \",\" \$1} }' | sed -e 's/_/ /' >> {$filename}"); } fclose($csv); fclose($tempfile); // this removes the temporary file
while ($data = mysql_fetch_assoc($sqlresults)) { $datepart = $data["datepart"]; $url = $data["url"]; if (!array_key_exists($datepart, $media_url_count)) { $media_url_count[$datepart] = array(); } if (!array_key_exists($url, $media_url_count[$datepart])) { $media_url_count[$datepart][$url] = 1; } else { $media_url_count[$datepart][$url]++; } } mysql_free_result($sqlresults); } // write csv results $filename = get_filename_for_export("mediaFrequency"); $csv = new CSV($filename, $outputformat); $csv->writeheader(array('interval', 'media url', 'frequency')); foreach ($media_url_count as $datepart => $url_count) { arsort($url_count); foreach ($url_count as $url => $count) { $csv->newrow(); $csv->addfield($datepart); $csv->addfield($url); $csv->addfield($count); $csv->writerow(); } } $csv->close(); echo '<fieldset class="if_parameters">'; echo '<legend>Your File</legend>';
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: Export Tweets sentiment</h1> <?php validate_all_variables(); /* @todo, use same export possibilities as mod.export_tweets.php */ $filename = get_filename_for_export("fullExport-sentiment"); $csv = new CSV($filename, $outputformat); $header = "id"; $header .= ",sentistrength,negative,positive"; $csv->writeheader($header); $sql = "SELECT s.positive, s.negative, s.explanation, t.from_user_name as user, t.id as tid FROM " . $esc['mysql']['dataset'] . "_sentiment s, " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset("t.id = s.tweet_id AND "); $rec = mysql_unbuffered_query($sql); while ($res = mysql_fetch_assoc($rec)) { $sentiment[$res['tid']]['pos'] = $res['positive']; $sentiment[$res['tid']]['neg'] = $res['negative']; $sentiment[$res['tid']]['desc'] = $res['explanation']; } mysql_free_result($rec); $sql = "SELECT * FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset();
validate_all_variables(); // make filename and open file for write $module = "gapData"; $sql = "SELECT id, `type` FROM tcat_query_bins WHERE querybin = '" . mysql_real_escape_string($esc['mysql']['dataset']) . "'"; $sqlresults = mysql_query($sql); if ($res = mysql_fetch_assoc($sqlresults)) { $bin_id = $res['id']; $bin_type = $res['type']; } else { die("Query bin not found!"); } $exportSettings = array(); if (isset($_GET['exportSettings']) && $_GET['exportSettings'] != "") { $exportSettings = explode(",", $_GET['exportSettings']); } $filename = get_filename_for_export($module, implode("_", $exportSettings)); $csv = new CSV($filename, $outputformat); // write header $header = "start,end"; $csv->writeheader(explode(',', $header)); // make query $sql = "SELECT * FROM tcat_error_gap WHERE type = '" . mysql_real_escape_string($bin_type) . "' and\n start >= '" . mysql_real_escape_string($_GET['startdate']) . "' and end <= '" . mysql_real_escape_string($_GET['enddate']) . "'"; // loop over results and write to file $sqlresults = mysql_query($sql); if ($sqlresults) { while ($data = mysql_fetch_assoc($sqlresults)) { // the query bin must have been active during the gap period, if we want to report it as a possible gap $sql2 = "SELECT count(*) as cnt FROM tcat_query_bins_phrases WHERE querybin_id = {$bin_id} and\n starttime <= '" . $data["end"] . "' and (endtime >= '" . $data["start"] . "' or endtime is null or endtime = '0000-00-00 00:00:00')"; $sqlresults2 = mysql_query($sql2); if ($sqlresults2) { if ($data2 = mysql_fetch_assoc($sqlresults2)) {
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: Language / hashtag co-occurence</h1> <?php validate_all_variables(); $filename = get_filename_for_export("languageHashtag", '', 'gexf'); //print_r($_GET); $collation = current_collation(); $sql = "SELECT LOWER(t.from_user_lang) AS language, LOWER(h.text COLLATE {$collation}) AS hashtag FROM "; $sql .= $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_hashtags h "; $where = "t.id = h.tweet_id AND "; $sql .= sqlSubset($where); $sqlresults = mysql_unbuffered_query($sql); while ($res = mysql_fetch_assoc($sqlresults)) { //print_r($res); exit; $res['language'] = preg_replace("/<.+>/U", "", $res['language']); $res['language'] = preg_replace("/[ \\s\t]+/", " ", $res['language']); $res['language'] = trim($res['language']); if (!isset($languagesHashtags[$res['language']][$res['hashtag']])) { $languagesHashtags[$res['language']][$res['hashtag']] = 0; }
function variabilityOfAssociationProfiles($filename, $series, $keywordToTrack, $ap) { if (empty($series) || empty($keywordToTrack)) { die('not enough data'); } $filename = get_filename_for_export("hashtagVariability", "_variabilityOfAssociationProfiles", "gexf"); // group per slice // per keyword // get associated words (depth 1) per slice // get frequency, degree, ap variation (calculated on cooc frequency), words in, words out, ap keywords $degree = array(); foreach ($series as $time => $cw) { $cw = $cw->getCowords(); foreach ($cw as $word => $cowords) { foreach ($cowords as $coword => $frequency) { // save how many time slices the word appears $words[$word][$time] = 1; $words[$coword][$time] = 1; // keep track of degree per word per time slice if (array_key_exists($word, $degree) === false) { $degree[$word] = array(); } if (array_key_exists($coword, $degree) === false) { $degree[$coword] = array(); } if (array_key_exists($time, $degree[$word]) === false) { $degree[$word][$time] = 0; } if (array_key_exists($time, $degree[$coword]) === false) { $degree[$coword][$time] = 0; } $degree[$word][$time]++; $degree[$coword][$time]++; } } } // count nr of time slices the words appears in foreach ($words as $word => $times) { $documentsPerWords[$word] = count($times); } // calculate similarity and changes foreach ($ap as $word => $times) { $times_keys = array_keys($times); for ($i = 1; $i < count($times_keys); $i++) { $im1 = $i - 1; $v1 = $times[$times_keys[$im1]]; $v2 = $times[$times_keys[$i]]; $cos_sim[$word][$times_keys[$i]] = cosineSimilarity($v1, $v2); $change_out[$word][$times_keys[$i]] = change($v1, $v2); $change_in[$word][$times_keys[$i]] = change($v2, $v1); $stable[$word][$times_keys[$i]] = array_intersect(array_keys($v1), array_keys($v2)); } } // @todo, frequency $out = "key\ttime\tdegree\tsimilarity\tassociational profile\tchange in\tchange out\tstable\n"; foreach ($ap as $word => $times) { foreach ($times as $time => $profile) { if (isset($change_in[$word][$time])) { $inc = ""; foreach ($change_in[$word][$time] as $w => $c) { $inc .= "{$w} ({$c}), "; } $inc = substr($inc, 0, -2); } else { $inc = ""; } if (isset($change_out[$word][$time])) { $outc = ""; foreach ($change_out[$word][$time] as $w => $c) { $outc .= "{$w} ({$c}), "; } $outc = substr($outc, 0, -2); } else { $outc = ""; } if (isset($stable[$word][$time])) { $stablec = array(); foreach ($stable[$word][$time] as $w) { $stablec[] = $w; } $stablec = implode(", ", $stablec); } else { $stablec = ""; } $prof = ""; foreach ($profile as $w => $c) { $prof .= "{$w} ({$c}), "; } $prof = substr($prof, 0, -2); if (isset($degree[$word][$time])) { $deg = $degree[$word][$time]; } else { $deg = ""; } if (isset($cos_sim[$word][$time])) { $cs = $cos_sim[$word][$time]; } else { $cs = ""; } $out .= $word . "\t" . $time . "\t" . $deg . "\t" . $cs . "\t" . $prof . "\t" . $inc . "\t" . $outc . "\t" . $stablec . "\n"; } } file_put_contents($filename, chr(239) . chr(187) . chr(191) . $out); echo '<fieldset class="if_parameters">'; echo '<legend>Your co-hashtag variability File</legend>'; echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>'; echo '</fieldset>'; }
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: Export mentions</h1> <?php validate_all_variables(); $filename = get_filename_for_export('mentionExport'); $csv = new CSV($filename, $outputformat); $csv->writeheader(array('tweet_id', 'user_from_id', 'user_from_name', 'user_to_id', 'user_to_name', 'mention_type')); $sql = "SELECT t.id as id, t.text as text, m.from_user_id as user_from_id, m.from_user_name as user_from_name, m.to_user_id as user_to_id, m.to_user as user_to_name FROM " . $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_mentions m "; $sql .= sqlSubset(); $sql .= " AND m.tweet_id = t.id ORDER BY id"; $sqlresults = mysql_unbuffered_query($sql); $out = ""; if ($sqlresults) { while ($data = mysql_fetch_assoc($sqlresults)) { $csv->newrow(); $csv->addfield($data['id'], 'integer'); $csv->addfield($data['user_from_id'], 'integer'); $csv->addfield($data['user_from_name'], 'string'); $csv->addfield($data['user_to_id'], 'integer'); $csv->addfield($data['user_to_name'], 'string');
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: Tweet stats</h1> <?php validate_all_variables(); $filename = get_filename_for_export("tweetStats"); $csv = new CSV($filename, $outputformat); $numtweets = $numlinktweets = $numTweetsWithHashtag = $numTweetsWithMentions = $numTweetsWithMedia = $numRetweets = $numReplies = array(); // tweets in subset $sql = "SELECT count(distinct(t.id)) as count, "; $sql .= sqlInterval(); $sql .= " FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset(); $sql .= " GROUP BY datepart ORDER BY datepart ASC"; $sqlresults = mysql_unbuffered_query($sql); while ($data = mysql_fetch_assoc($sqlresults)) { $numtweets[$data['datepart']] = $data["count"]; } mysql_free_result($sqlresults); // tweet containing links $sql = "SELECT count(distinct(t.id)) AS count, ";
echo '<fieldset class="if_parameters">'; echo '<legend>Your spreadsheet (CSV) file</legend>'; echo '<p><a href="' . str_replace("#", urlencode("#"), str_replace("\"", "%22", $filename)) . '">' . $filename . '</a></p>'; echo '</fieldset>'; $gexf = new Gexf(); $gexf->setTitle("URL-hashtag " . $filename); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setCreator("tools.digitalmethods.net"); foreach ($urlHashtags as $url => $hashtags) { foreach ($hashtags as $hashtag => $frequency) { $node1 = new GexfNode($url); $node1->addNodeAttribute("type", 'host', $type = "string"); $gexf->addNode($node1); $node2 = new GexfNode($hashtag); $node2->addNodeAttribute("type", 'hashtag', $type = "string"); $gexf->addNode($node2); $edge_id = $gexf->addEdge($node1, $node2, $frequency); } } $gexf->render(); $filename = get_filename_for_export("hostHashtag", '', 'gexf'); file_put_contents($filename, $gexf->gexfFile); echo '<fieldset class="if_parameters">'; echo '<legend>Your network (GEXF) file</legend>'; echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>'; echo '</fieldset>'; ?> </body> </html>
while ($res = mysql_fetch_assoc($sqlresults)) { $coword->addWord($res['h1']); $coword->addWord($res['h2']); $coword->addCoword($res['h1'], $res['h2'], 1); } unset($coword->words); // as we are adding words manually the frequency would be messed up if ($esc['shell']['minf'] > 1 && !($esc['shell']['topu'] > 0)) { $coword->applyMinFreq($esc['shell']['minf']); //$coword->applyMinDegree($esc['shell']['minf']); // Berno: method no longer in use, remains unharmed $filename = get_filename_for_export("hashtagCooc", (isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "") . "_minFreqOf" . $esc['shell']['minf'], "gdf"); } elseif ($esc['shell']['topu'] > 0) { $coword->applyTopUnits($esc['shell']['topu']); $filename = get_filename_for_export("hashtagCooc", (isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "") . "_Top" . $esc['shell']['topu'], "gdf"); } else { $filename = get_filename_for_export("hashtagCooc", isset($_GET['probabilityOfAssociation']) ? "_normalizedAssociationWeight" : "", "gdf"); } //print_r($coword); $lookup = array(); $fp = fopen($filename, 'w'); fwrite($fp, chr(239) . chr(187) . chr(191)); fwrite($fp, "nodedef> name VARCHAR,label VARCHAR,wordFrequency INT,distinctUsersForWord INT,userDiversity FLOAT,wordFrequencyDividedByUniqueUsers FLOAT,wordFrequencyMultipliedByUniqueUsers INT\n"); $counter = 0; foreach ($coword->wordFrequency as $word => $freq) { fwrite($fp, $counter . "," . $word . "," . $freq . "," . $coword->distinctUsersForWord[$word] . "," . $coword->userDiversity[$word] . "," . $coword->wordFrequencyDividedByUniqueUsers[$word] . "," . $coword->wordFrequencyMultipliedByUniqueUsers[$word] . "\n"); $lookup[$word] = $counter; $counter++; } unset($coword->wordFrequency); unset($coword->distinctUsersForWord); unset($coword->userDiversity);
foreach ($usernames as $username => $frequency) { $node1 = new GexfNode($url); $node1->addNodeAttribute("type", 'url', $type = "string"); $node1->addNodeAttribute('shortlabel', $urlDomain[$url], $type = "string"); $node1->addNodeAttribute('longlabel', $url, $type = "string"); $node1->addNodeAttribute('status_code', $urlStatusCode[$url], $type = "string"); $node1->addNodeAttribute('unique_users', $urlUniqueUsers[$url], $type = "integer"); $node1->addNodeAttribute('total_users', $urlTotalUsers[$url], $type = "integer"); $gexf->addNode($node1); $node2 = new GexfNode($username); $node2->addNodeAttribute("type", 'user', $type = "string"); $node2->addNodeAttribute('shortlabel', $username, $type = "string"); $node2->addNodeAttribute('longlabel', $username, $type = "string"); $node2->addNodeAttribute('unique_urls', $userUniqueUrls[$username], $type = "integer"); $node2->addNodeAttribute('total_urls', $userTotalUrls[$username], $type = "integer"); $gexf->addNode($node2); $edge_id = $gexf->addEdge($node1, $node2, $frequency); } } $gexf->render(); $filename = get_filename_for_export("urlUser", '', 'gexf'); file_put_contents($filename, $gexf->gexfFile); echo '<fieldset class="if_parameters">'; echo '<legend>Your network (GEXF) file</legend>'; echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>'; echo '</fieldset>'; ?> </body> </html>
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: Export URLs</h1> <?php validate_all_variables(); $filename = get_filename_for_export('urlsExport'); $csv = new CSV($filename, $outputformat); $csv->writeheader(array('tweet_id', 'url', 'url_expanded', 'url_followed')); $sql = "SELECT t.id as id, u.url as url, u.url_expanded as url_expanded, u.url_followed as url_followed FROM " . $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_urls u "; $sql .= sqlSubset(); $sql .= " AND u.tweet_id = t.id ORDER BY id"; $sqlresults = mysql_unbuffered_query($sql); $out = ""; if ($sqlresults) { while ($data = mysql_fetch_assoc($sqlresults)) { $csv->newrow(); $csv->addfield($data['id'], 'integer'); $csv->addfield($data['url'], 'string'); if (isset($data['url_followed']) && strlen($data['url_followed']) > 1) { $csv->addfield($data['url'], 'string'); } else {
<script type="text/javascript" language="javascript"> </script> </head> <body> <h1>TCAT :: Mention - Hashtags</h1> <?php validate_all_variables(); $collation = current_collation(); $filename = get_filename_for_export("mentionHashtags", "", "gexf"); $sql = "SELECT m.to_user COLLATE {$collation} AS user, LOWER(h.text COLLATE {$collation}) AS hashtag FROM "; $sql .= $esc['mysql']['dataset'] . "_mentions m, " . $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_hashtags h "; $where = "t.id = m.tweet_id AND m.tweet_id = h.tweet_id AND LENGTH(h.text)>1 AND "; $sql .= sqlSubset($where); //print $sql."<Br>"; $sqlresults = mysql_unbuffered_query($sql); while ($res = mysql_fetch_assoc($sqlresults)) { if (!isset($userHashtags[$res['user']][$res['hashtag']])) { $userHashtags[$res['user']][$res['hashtag']] = 0; } $userHashtags[$res['user']][$res['hashtag']]++; if (!isset($userCount[$res['user']])) { $userCount[$res['user']] = 0; } $userCount[$res['user']]++;