function getGEXFtimeseries($filename, $series) { include_once 'common/Gexf.class.php'; $gexf = new Gexf(); $gexf->setTitle("Co-word " . $filename); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setMode(GEXF_MODE_DYNAMIC); $gexf->setTimeFormat(GEXF_TIMEFORMAT_DATE); $gexf->setCreator("tools.digitalmethods.net"); foreach ($series as $time => $cw) { $w = $cw->getWords(); $cw = $cw->getCowords(); foreach ($cw as $word => $cowords) { foreach ($cowords as $coword => $coword_frequency) { $node1 = new GexfNode($word); if (isset($w[$word])) { $node1->addNodeAttribute("word_frequency", $w[$word], $type = "int"); } $gexf->addNode($node1); //if ($documentsPerWords[$word] > $threshold) // $node1->setNodeColor(0, 255, 0, 0.75); $gexf->nodeObjects[$node1->id]->addNodeSpell($time, $time); $node2 = new GexfNode($coword); if (isset($w[$coword])) { $node2->addNodeAttribute("word_frequency", $w[$word], $type = "int"); } $gexf->addNode($node2); //if ($documentsPerWords[$coword] > $threshold) // $node2->setNodeColor(0, 255, 0, 0.75); $gexf->nodeObjects[$node2->id]->addNodeSpell($time, $time); $edge_id = $gexf->addEdge($node1, $node2, $coword_frequency); $gexf->edgeObjects[$edge_id]->addEdgeSpell($time, $time); } } } $gexf->render(); file_put_contents($filename, $gexf->gexfFile); echo '<fieldset class="if_parameters">'; echo '<legend>Your co-hashtag time-series File</legend>'; echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>'; echo '</fieldset>'; }
function getCowordsAsGexf($title = "") { include_once 'Gexf.class.php'; $gexf = new Gexf(); $gexf->setTitle("Co-word " . $title); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setMode(GEXF_MODE_DYNAMIC); $gexf->setTimeFormat(GEXF_TIMEFORMAT_DATE); $gexf->setCreator("tools.digitalmethods.net"); foreach ($this->cowords as $word => $cowords) { if (empty($cowords)) { $node1 = new GexfNode($word); if (isset($this->words[$word])) { $node1->addNodeAttribute("word_frequency", $this->words[$word], $type = "int"); } $this->addNodeExtraNodeAttributes($node1, $word); $gexf->addNode($node1); } else { foreach ($cowords as $coword => $coword_frequency) { $node1 = new GexfNode($word); if (isset($this->words[$word])) { $node1->addNodeAttribute("word_frequency", $this->words[$word], $type = "int"); } $this->addNodeExtraNodeAttributes($node1, $word); $gexf->addNode($node1); $node2 = new GexfNode($coword); if (isset($this->words[$coword])) { $node2->addNodeAttribute("word_frequency", $this->words[$coword], $type = "int"); } $this->addNodeExtraNodeAttributes($node2, $coword); $gexf->addNode($node2); $edge_id = $gexf->addEdge($node1, $node2, $coword_frequency); } } } $gexf->render(); return $gexf->gexfFile; }
*/ // using Erik Borra's very useful Gexf class require_once "Gexf.class.php"; // search query and api key are in: include "conf.php"; // prepare term $term = urlencode($term); $term = preg_replace("/[^a-zA-Z.]/", "", $term); $folder = getcwd() . "/json_" . $term; $list = scandir($folder); // shave off . and .. array_shift($list); array_shift($list); // initialize gexf object $gexf = new Gexf(); $gexf->setTitle("NY Times Categories"); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setMode(GEXF_MODE_STATIC); $gexf->setTimeFormat(GEXF_TIMEFORMAT_DATE); $gexf->setCreator("polsys.net"); // iterate over all JSON files foreach ($list as $fn) { $json = file_get_contents($folder . "/" . $fn); $json = json_decode($json); // iterate over keyword matrix (half) for ($i = 0; $i < count($json->keywords); $i++) { $node1 = new GexfNode(strtolower($json->keywords[$i]->value)); $node1->addNodeAttribute("type", $json->keywords[$i]->name, $type = "string"); $gexf->addNode($node1); for ($j = $i; $j < count($json->keywords); $j++) { $node2 = new GexfNode(strtolower($json->keywords[$j]->value));
if (!isset($userCount[$res['user']])) { $userCount[$res['user']] = 0; } $userCount[$res['user']]++; if (!isset($hashtagCount[$res['h1']])) { $hashtagCount[$res['h1']] = 0; } $hashtagCount[$res['h1']]++; $languages[$res['user']] = $res['language']; $locations[$res['user']] = $res['location']; $from_user_timezone[$res['user']] = $res['timezone']; $from_user_utcoffset[$res['user']] = $res['utcoffset']; } mysql_free_result($sqlresults); $gexf = new Gexf(); $gexf->setTitle("Hashtag - user " . $filename); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setCreator("tools.digitalmethods.net"); foreach ($userHashtags as $user => $hashtags) { foreach ($hashtags as $hashtag => $frequency) { $node1 = new GexfNode($user); $node1->id = md5('n-user_' . $user); $node1->addNodeAttribute("type", 'user', $type = "string"); $node1->addNodeAttribute("userFrequency", $userCount[$user], $type = "int"); $node1->addNodeAttribute("hashtagFrequency", 0, $type = "int"); $node1->addNodeAttribute("language", $languages[$user], $type = "string"); $node1->addNodeAttribute("location", $locations[$user], $type = "string"); $node1->addNodeAttribute("from_user_utcoffset", $from_user_utcoffset[$user], $type = "string"); $node1->addNodeAttribute("from_user_timezone", $from_user_timezone[$user], $type = "string"); $gexf->addNode($node1); $node2 = new GexfNode($hashtag);
$sql .= $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_hashtags h "; $where = "t.id = h.tweet_id AND "; $sql .= sqlSubset($where); $sqlresults = mysql_unbuffered_query($sql); while ($res = mysql_fetch_assoc($sqlresults)) { $res['source'] = preg_replace("/<.+>/U", "", $res['source']); $res['source'] = preg_replace("/[ \\s\t]+/", " ", $res['source']); $res['source'] = trim($res['source']); if (!isset($sourcesHashtags[$res['source']][$res['hashtag']])) { $sourcesHashtags[$res['source']][$res['hashtag']] = 0; } $sourcesHashtags[$res['source']][$res['hashtag']]++; } mysql_free_result($sqlresults); $gexf = new Gexf(); $gexf->setTitle("source-hashtag " . $filename); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setCreator("tools.digitalmethods.net"); foreach ($sourcesHashtags as $source => $hashtags) { foreach ($hashtags as $hashtag => $frequency) { $node1 = new GexfNode($source); $node1->addNodeAttribute("type", 'source', $type = "string"); $gexf->addNode($node1); $node2 = new GexfNode($hashtag); $node2->addNodeAttribute("type", 'hashtag', $type = "string"); $gexf->addNode($node2); $edge_id = $gexf->addEdge($node1, $node2, $frequency); } } $gexf->render(); file_put_contents($filename, $gexf->gexfFile);
} foreach ($usernames as $username => $frequency) { if (!isset($userUniqueUrls[$username])) { $userUniqueUrls[$username] = 0; } if (!isset($userTotalUrls[$username])) { $userTotalUrls[$username] = 0; } $urlUniqueUsers[$url]++; $urlTotalUsers[$url] += $frequency; $userUniqueUrls[$username]++; $userTotalUrls[$username] += $frequency; } } $gexf = new Gexf(); $gexf->setTitle("URL-user " . $filename); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setCreator("tools.digitalmethods.net"); foreach ($urlUsernames as $url => $usernames) { foreach ($usernames as $username => $frequency) { $node1 = new GexfNode($url); $node1->addNodeAttribute("type", 'url', $type = "string"); $node1->addNodeAttribute('shortlabel', $urlDomain[$url], $type = "string"); $node1->addNodeAttribute('longlabel', $url, $type = "string"); $node1->addNodeAttribute('status_code', $urlStatusCode[$url], $type = "string"); $node1->addNodeAttribute('unique_users', $urlUniqueUsers[$url], $type = "integer"); $node1->addNodeAttribute('total_users', $urlTotalUsers[$url], $type = "integer"); $gexf->addNode($node1); $node2 = new GexfNode($username); $node2->addNodeAttribute("type", 'user', $type = "string"); $node2->addNodeAttribute('shortlabel', $username, $type = "string");
while ($res = mysql_fetch_assoc($sqlresults)) { $csv->newrow(); $csv->addfield($res['frequency']); $csv->addfield($res['hashtag']); $csv->addfield($res['domain']); $csv->writerow(); $urlHashtags[$res['domain']][$res['hashtag']] = $res['frequency']; } mysql_free_result($sqlresults); $csv->close(); echo '<fieldset class="if_parameters">'; echo '<legend>Your spreadsheet (CSV) file</legend>'; echo '<p><a href="' . str_replace("#", urlencode("#"), str_replace("\"", "%22", $filename)) . '">' . $filename . '</a></p>'; echo '</fieldset>'; $gexf = new Gexf(); $gexf->setTitle("URL-hashtag " . $filename); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setCreator("tools.digitalmethods.net"); foreach ($urlHashtags as $url => $hashtags) { foreach ($hashtags as $hashtag => $frequency) { $node1 = new GexfNode($url); $node1->addNodeAttribute("type", 'host', $type = "string"); $gexf->addNode($node1); $node2 = new GexfNode($hashtag); $node2->addNodeAttribute("type", 'hashtag', $type = "string"); $gexf->addNode($node2); $edge_id = $gexf->addEdge($node1, $node2, $frequency); } } $gexf->render(); $filename = get_filename_for_export("hostHashtag", '', 'gexf');
$where = "t.id = h.tweet_id AND "; $sql .= sqlSubset($where); $sqlresults = mysql_unbuffered_query($sql); while ($res = mysql_fetch_assoc($sqlresults)) { //print_r($res); exit; $res['language'] = preg_replace("/<.+>/U", "", $res['language']); $res['language'] = preg_replace("/[ \\s\t]+/", " ", $res['language']); $res['language'] = trim($res['language']); if (!isset($languagesHashtags[$res['language']][$res['hashtag']])) { $languagesHashtags[$res['language']][$res['hashtag']] = 0; } $languagesHashtags[$res['language']][$res['hashtag']]++; } mysql_free_result($sqlresults); $gexf = new Gexf(); $gexf->setTitle("from_user_lang-hashtag " . $filename); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setCreator("tools.digitalmethods.net"); foreach ($languagesHashtags as $language => $hashtags) { foreach ($hashtags as $hashtag => $frequency) { $node1 = new GexfNode($language); $node1->addNodeAttribute("type", 'from_user_lang', $type = "string"); $gexf->addNode($node1); $node2 = new GexfNode($hashtag); $node2->addNodeAttribute("type", 'hashtag', $type = "string"); $gexf->addNode($node2); $edge_id = $gexf->addEdge($node1, $node2, $frequency); } } $gexf->render(); file_put_contents($filename, $gexf->gexfFile);
foreach ($paths_node_counts_distribution as $length => $frequency) { print "Networks with {$length} nodes: {$frequency}<br>"; } print "<br><b>Now analyzing only networks with at least {$min_nr_of_nodes} nodes</b><br><br>"; print "Networks in analysis: " . array_sum($paths_node_counts_distribution_tmp) . "<br>"; print "Networks in analysis without root: " . $rootsnotfound . "<bR>"; print "<bR>"; ksort($network_types_stats); foreach ($network_types_stats as $type => $count) { print "Networks of type {$type}: {$count}<br>"; } flush(); // @todo percentage of different structures (pure stars, pure chains, mixed structure). Ratio between average degree and number of nodes. $filename = get_filename_for_export("interactionGraph", "min" . $esc['shell']['minf'] . "nodes", "gexf"); $gexf = new Gexf(); $gexf->setTitle("interaction graph " . $filename); $gexf->setEdgeType(GEXF_EDGE_DIRECTED); $gexf->setCreator("tools.digitalmethods.net"); // label: user, attribute: tweet // min, max, mean of what?? ditributions of nr of nodes in the network foreach ($links as $link) { $tweet_id = $link[0]; $in_reply_to_status_id = $link[1]; if (array_search($in_reply_to_status_id, $todo) === false && array_search($tweet_id, $todo) === false) { continue; } $node1 = new GexfNode($in_reply_to_status_id); // if node already exists, we want the attributes added that node instead $id = $gexf->nodeExists($node1); if ($id !== false && isset($gexf->nodeObjects[$id])) { $node1 = $gexf->nodeObjects[$id];
if (!isset($userHashtags[$res['user']][$res['hashtag']])) { $userHashtags[$res['user']][$res['hashtag']] = 0; } $userHashtags[$res['user']][$res['hashtag']]++; if (!isset($userCount[$res['user']])) { $userCount[$res['user']] = 0; } $userCount[$res['user']]++; if (!isset($hashtagCount[$res['hashtag']])) { $hashtagCount[$res['hashtag']] = 0; } $hashtagCount[$res['hashtag']]++; } mysql_free_result($sqlresults); $gexf = new Gexf(); $gexf->setTitle("Hashtag - mentions " . $filename); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setCreator("tools.digitalmethods.net"); foreach ($userHashtags as $user => $hashtags) { foreach ($hashtags as $hashtag => $frequency) { $node1 = new GexfNode($user); $node1->id = md5('n-user_' . $user); $node1->addNodeAttribute("type", 'user', $type = "string"); $node1->addNodeAttribute("userFrequency", $userCount[$user], $type = "int"); $node1->addNodeAttribute("hashtagFrequency", 0, $type = "int"); $gexf->addNode($node1); $node2 = new GexfNode($hashtag); $node2->id = md5('n-hashtag_' . $hashtag); $node2->addNodeAttribute("type", 'hashtag', $type = "string"); $node2->addNodeAttribute("userFrequency", 0, $type = "int"); $node2->addNodeAttribute("hashtagFrequency", $hashtagCount[$hashtag], $type = "int");