echo 'checked="checked"'; } ?> /> discard "other" from diagram </div> <div class="form_row"> <input type="submit" /> </div> </form> <?php validate_all_variables(); global $collation; $collation = current_collation(); // make sure that all columns are different if ($_GET["col1_type"] == $_GET["col2_type"] || $_GET["col2_type"] == $_GET["col3_type"] || $_GET["col1_type"] == $_GET["col3_type"]) { echo "all columns must be different"; exit; } // get the full tweet count $sql = "SELECT count(distinct(t.id)) as count FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset(); $sqlresults = mysql_query($sql); $data = mysql_fetch_assoc($sqlresults); $fulltweetcount = $data["count"]; // process colums getFlow($_GET["col1_type"], $_GET["col1_cutoff"], $_GET["col2_type"], $_GET["col2_cutoff"], $_GET["discard_other"], 0); if ($_GET["col3_type"] != "none") { getFlow($_GET["col2_type"], $_GET["col2_cutoff"], $_GET["col3_type"], $_GET["col3_cutoff"], $_GET["discard_other"], 1);
function printTopHashtags() { global $esc; $collation = current_collation(); $results = array(); $sql = "SELECT COUNT(hashtags.text COLLATE {$collation}) AS count, LOWER(hashtags.text COLLATE {$collation}) AS toget "; $sql .= "FROM " . $esc['mysql']['dataset'] . "_hashtags hashtags, " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset("t.id = hashtags.tweet_id AND "); $sql .= " GROUP BY toget ORDER BY count DESC limit 10"; //print $sql."<br>"; $rec = mysql_query($sql); $out = ""; while ($res = mysql_fetch_assoc($rec)) { $out .= $res['toget'] . " (" . $res['count'] . "), "; } print substr($out, 0, -2); }
function generate($what, $filename) { global $tsv, $network, $esc, $titles, $database, $interval, $outputformat; require_once __DIR__ . '/CSV.class.php'; // initialize variables $tweets = $times = $from_user_names = $results = $urls = $urls_expanded = $hosts = $hashtags = array(); $csv = new CSV($filename, $outputformat); $collation = current_collation(); // determine interval $sql = "SELECT MIN(t.created_at) AS min, MAX(t.created_at) AS max FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset(); //print $sql . "<bR>"; $rec = mysql_query($sql); $res = mysql_fetch_assoc($rec); // get frequencies if ($what == "hashtag") { $results = frequencyTable("hashtags", "text"); } elseif ($what == "urls") { $results = frequencyTable("urls", "url_followed"); } elseif ($what == "hosts") { $results = frequencyTable("urls", "domain"); } elseif ($what == "mention") { $results = frequencyTable("mentions", "to_user"); // get other things } else { // @todo, this could also use database grouping $sql = "SELECT id,text COLLATE {$collation} as text,created_at,from_user_name COLLATE {$collation} as from_user_name FROM " . $esc['mysql']['dataset'] . "_tweets t "; $sql .= sqlSubset(); // get slice and its min and max time $rec = mysql_query($sql); if ($rec && mysql_num_rows($rec) > 0) { while ($res = mysql_fetch_assoc($rec)) { $tweets[] = $res['text']; $ids[] = $res['id']; $times[] = $res['created_at']; $from_user_names[] = strtolower($res['from_user_name']); } } // extract desired things ($what) and group per interval foreach ($tweets as $key => $tweet) { $time = $times[$key]; switch ($interval) { case "hourly": $group = strftime("%Y-%m-%d %Hh", strtotime($time)); break; case "weekly": $group = strftime("%Y %u", strtotime($time)); break; case "monthly": $group = strftime("%Y-%m", strtotime($time)); break; case "yearly": $group = strftime("%Y-%m", strtotime($time)); break; case "overall": $group = "overall"; break; case "custom": $group = groupByInterval(strftime("%Y-%m-%d", strtotime($time))); break; default: $group = strftime("%Y-%m-%d", strtotime($time)); // default daily } switch ($what) { //case "hashtag": // foreach ($hashtags as $hashtag) // $results[$group][] = $hashtag; // break; //case "mention": // @todo, mentions might be taken from own table // $stuff = get_replies($tweet); // foreach ($stuff as $thing) // $results[$group][] = $thing; // break; case "user": $results[$group][] = $from_user_names[$key]; break; case "user-mention": $stuff = get_replies($tweet); foreach ($stuff as $thing) { $results[$group]['mentions'][] = $thing; } $results[$group]['users'][] = $from_user_names[$key]; //var_dump($results); break; case "retweet": $results[$group][] = $tweet; // TODO, write stemming function break; //case "urls": // if (isset($urls_expanded[$ids[$key]])) // $results[$group][] = $urls_expanded[$ids[$key]]; // break; //case "hosts": // if (isset($urls_expanded[$ids[$key]])) // $results[$group][] = $hosts[$ids[$key]]; // break; //case "urls": // if (isset($urls_expanded[$ids[$key]])) // $results[$group][] = $urls_expanded[$ids[$key]]; // break; //case "hosts": // if (isset($urls_expanded[$ids[$key]])) // $results[$group][] = $hosts[$ids[$key]]; // break; default: break; } } // count frequency of occurence of thing, per interval if ($what != "user-mention") { foreach ($results as $group => $things) { $counted_things = array_count_values($things); arsort($counted_things); $results[$group] = $counted_things; } } } // network output for users if ($what == "user-mention") { foreach ($results as $group => $things) { $tmp_mentions = array_count_values($things['mentions']); $tmp_users = array_count_values($things['users']); $counted_things = array(); // add all from_user_names foreach ($tmp_users as $user => $count) { if (isset($tmp_mentions["@" . $user])) { $counted_things[$user] = $tmp_mentions["@" . $user] . "," . $count; } else { $counted_things[$user] = "0," . $count; } } // add all users which were replied but not in the set foreach ($tmp_mentions as $user => $count) { $user = str_replace("@", "", $user); if (!isset($counted_things[$user])) { $counted_things[$user] = $count . ",0"; } } ksort($counted_things); $results[$group] = $counted_things; } if (isset($titles[$what])) { if (!empty($esc['shell']['query'])) { $q = " with search " . $esc['shell']['query']; } else { $q = ""; } $csv->writeheader(array($titles[$what] . $q . " from " . $esc['date']["startdate"] . " to " . $esc['date']["enddate"])); } $csv->writeheader(array("date", "user", "mentions", "tweets")); foreach ($results as $group => $things) { foreach ($things as $thing => $count) { $csv->newrow(); $csv->addfield($group); $csv->addfield($thing); $exp = explode(",", $count); // unpack what we packed $csv->addfield($exp[0]); $csv->addfield($exp[1]); $csv->writerow(); } } // write tsv output } elseif (in_array($what, $tsv) !== false) { ksort($results); // construct file if (isset($titles[$what])) { if (!empty($esc['shell']['query'])) { $q = " with search " . $esc['shell']['query']; } else { $q = ""; } $csv->writeheader(array($titles[$what] . " for " . $esc['shell']['datasetname'] . $q . " from " . $esc['date']["startdate"] . " to " . $esc['date']["enddate"])); } if ($what == "urls") { $csv->writeheader(array("date", "frequency", "tweetedurl")); } elseif ($what == "hosts") { $csv->writeheader(array("date", "frequency", "domain", "name")); } else { $csv->writeheader(array("date", "frequency", $what)); } foreach ($results as $group => $things) { arsort($things); foreach ($things as $thing => $count) { if (empty($thing)) { continue; } if ($count < $esc['shell']['minf']) { continue; } $csv->newrow(); $csv->addfield($group); $csv->addfield($count); $csv->addfield($thing); $csv->writerow(); } } } else { die('no valid output format found'); } $csv->close(); }
function printTopHashtags() { global $esc, $method; $collation = current_collation(); $sql_interval = "DATE_FORMAT(t.created_at,'%Y-%m-%d') datepart "; if ($method == "word") { $sql = "SELECT COUNT(h.text COLLATE {$collation}) AS count, h.text COLLATE {$collation} AS toget, "; $sql .= $sql_interval; $sql .= "FROM " . $esc['mysql']['dataset'] . "_pos h, " . $esc['mysql']['dataset'] . "_tweets t "; $where = "t.id = h.tweet_id AND "; $sql .= sqlSubset($where); $sql .= " GROUP BY toget ORDER BY count DESC limit 10"; } else { $sql = "SELECT COUNT(hashtags.text COLLATE {$collation}) AS count, LOWER(hashtags.text COLLATE {$collation}) AS toget, "; $sql .= $sql_interval; $sql .= "FROM " . $esc['mysql']['dataset'] . "_hashtags hashtags, " . $esc['mysql']['dataset'] . "_tweets t "; $where = "t.id = hashtags.tweet_id AND "; $sql .= sqlSubset($where); $sql .= " GROUP BY toget ORDER BY count DESC limit 10"; } //print $sql . "<br>"; flush(); $rec = mysql_query($sql); $out = ""; while ($res = mysql_fetch_assoc($rec)) { //if ($res['count'] > $esc['shell']['minf']) $out .= $res['toget'] . " (" . $res['count'] . "), "; } print substr($out, 0, -2); }