function getGEXFtimeseries($filename, $series) { include_once 'common/Gexf.class.php'; $gexf = new Gexf(); $gexf->setTitle("Co-word " . $filename); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setMode(GEXF_MODE_DYNAMIC); $gexf->setTimeFormat(GEXF_TIMEFORMAT_DATE); $gexf->setCreator("tools.digitalmethods.net"); foreach ($series as $time => $cw) { $w = $cw->getWords(); $cw = $cw->getCowords(); foreach ($cw as $word => $cowords) { foreach ($cowords as $coword => $coword_frequency) { $node1 = new GexfNode($word); if (isset($w[$word])) { $node1->addNodeAttribute("word_frequency", $w[$word], $type = "int"); } $gexf->addNode($node1); //if ($documentsPerWords[$word] > $threshold) // $node1->setNodeColor(0, 255, 0, 0.75); $gexf->nodeObjects[$node1->id]->addNodeSpell($time, $time); $node2 = new GexfNode($coword); if (isset($w[$coword])) { $node2->addNodeAttribute("word_frequency", $w[$word], $type = "int"); } $gexf->addNode($node2); //if ($documentsPerWords[$coword] > $threshold) // $node2->setNodeColor(0, 255, 0, 0.75); $gexf->nodeObjects[$node2->id]->addNodeSpell($time, $time); $edge_id = $gexf->addEdge($node1, $node2, $coword_frequency); $gexf->edgeObjects[$edge_id]->addEdgeSpell($time, $time); } } } $gexf->render(); file_put_contents($filename, $gexf->gexfFile); echo '<fieldset class="if_parameters">'; echo '<legend>Your co-hashtag time-series File</legend>'; echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>'; echo '</fieldset>'; }
* @license GPLv3 <http://www.gnu.org/licenses/gpl.txt> */ // using Erik Borra's very useful Gexf class require_once "Gexf.class.php"; // search query and api key are in: include "conf.php"; // prepare term $term = urlencode($term); $term = preg_replace("/[^a-zA-Z.]/", "", $term); $folder = getcwd() . "/json_" . $term; $list = scandir($folder); // shave off . and .. array_shift($list); array_shift($list); // initialize gexf object $gexf = new Gexf(); $gexf->setTitle("NY Times Categories"); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setMode(GEXF_MODE_STATIC); $gexf->setTimeFormat(GEXF_TIMEFORMAT_DATE); $gexf->setCreator("polsys.net"); // iterate over all JSON files foreach ($list as $fn) { $json = file_get_contents($folder . "/" . $fn); $json = json_decode($json); // iterate over keyword matrix (half) for ($i = 0; $i < count($json->keywords); $i++) { $node1 = new GexfNode(strtolower($json->keywords[$i]->value)); $node1->addNodeAttribute("type", $json->keywords[$i]->name, $type = "string"); $gexf->addNode($node1); for ($j = $i; $j < count($json->keywords); $j++) {
$sql = "SELECT LOWER(t.source COLLATE {$collation}) AS source, LOWER(h.text COLLATE {$collation}) AS hashtag FROM "; $sql .= $esc['mysql']['dataset'] . "_tweets t, " . $esc['mysql']['dataset'] . "_hashtags h "; $where = "t.id = h.tweet_id AND "; $sql .= sqlSubset($where); $sqlresults = mysql_unbuffered_query($sql); while ($res = mysql_fetch_assoc($sqlresults)) { $res['source'] = preg_replace("/<.+>/U", "", $res['source']); $res['source'] = preg_replace("/[ \\s\t]+/", " ", $res['source']); $res['source'] = trim($res['source']); if (!isset($sourcesHashtags[$res['source']][$res['hashtag']])) { $sourcesHashtags[$res['source']][$res['hashtag']] = 0; } $sourcesHashtags[$res['source']][$res['hashtag']]++; } mysql_free_result($sqlresults); $gexf = new Gexf(); $gexf->setTitle("source-hashtag " . $filename); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setCreator("tools.digitalmethods.net"); foreach ($sourcesHashtags as $source => $hashtags) { foreach ($hashtags as $hashtag => $frequency) { $node1 = new GexfNode($source); $node1->addNodeAttribute("type", 'source', $type = "string"); $gexf->addNode($node1); $node2 = new GexfNode($hashtag); $node2->addNodeAttribute("type", 'hashtag', $type = "string"); $gexf->addNode($node2); $edge_id = $gexf->addEdge($node1, $node2, $frequency); } } $gexf->render();
function getCowordsAsGexf($title = "") { include_once 'Gexf.class.php'; $gexf = new Gexf(); $gexf->setTitle("Co-word " . $title); $gexf->setEdgeType(GEXF_EDGE_UNDIRECTED); $gexf->setMode(GEXF_MODE_DYNAMIC); $gexf->setTimeFormat(GEXF_TIMEFORMAT_DATE); $gexf->setCreator("tools.digitalmethods.net"); foreach ($this->cowords as $word => $cowords) { if (empty($cowords)) { $node1 = new GexfNode($word); if (isset($this->words[$word])) { $node1->addNodeAttribute("word_frequency", $this->words[$word], $type = "int"); } $this->addNodeExtraNodeAttributes($node1, $word); $gexf->addNode($node1); } else { foreach ($cowords as $coword => $coword_frequency) { $node1 = new GexfNode($word); if (isset($this->words[$word])) { $node1->addNodeAttribute("word_frequency", $this->words[$word], $type = "int"); } $this->addNodeExtraNodeAttributes($node1, $word); $gexf->addNode($node1); $node2 = new GexfNode($coword); if (isset($this->words[$coword])) { $node2->addNodeAttribute("word_frequency", $this->words[$coword], $type = "int"); } $this->addNodeExtraNodeAttributes($node2, $coword); $gexf->addNode($node2); $edge_id = $gexf->addEdge($node1, $node2, $coword_frequency); } } } $gexf->render(); return $gexf->gexfFile; }
// print stats foreach ($paths_node_counts_distribution as $length => $frequency) { print "Networks with {$length} nodes: {$frequency}<br>"; } print "<br><b>Now analyzing only networks with at least {$min_nr_of_nodes} nodes</b><br><br>"; print "Networks in analysis: " . array_sum($paths_node_counts_distribution_tmp) . "<br>"; print "Networks in analysis without root: " . $rootsnotfound . "<bR>"; print "<bR>"; ksort($network_types_stats); foreach ($network_types_stats as $type => $count) { print "Networks of type {$type}: {$count}<br>"; } flush(); // @todo percentage of different structures (pure stars, pure chains, mixed structure). Ratio between average degree and number of nodes. $filename = get_filename_for_export("interactionGraph", "min" . $esc['shell']['minf'] . "nodes", "gexf"); $gexf = new Gexf(); $gexf->setTitle("interaction graph " . $filename); $gexf->setEdgeType(GEXF_EDGE_DIRECTED); $gexf->setCreator("tools.digitalmethods.net"); // label: user, attribute: tweet // min, max, mean of what?? ditributions of nr of nodes in the network foreach ($links as $link) { $tweet_id = $link[0]; $in_reply_to_status_id = $link[1]; if (array_search($in_reply_to_status_id, $todo) === false && array_search($tweet_id, $todo) === false) { continue; } $node1 = new GexfNode($in_reply_to_status_id); // if node already exists, we want the attributes added that node instead $id = $gexf->nodeExists($node1); if ($id !== false && isset($gexf->nodeObjects[$id])) {