<?php /* * Simple coword calculation, all in memory * * WARNING: Because of PHP's very very very bad UTF-8 support, splitting tweets into words only works reliably for tweets consisting solely of latin chars (i.e. English) */ if (0) { $coword = new Coword(); $coword->setDocuments(array("bla bla bla test test clash", "test test test bla bla", "clash bla test")); $coword->addDocument("bla, !test 345 cla5sh"); $coword->iterate(); var_export($coword->getCowordsAsCsv()); print "\n\n"; var_export($coword->getWordsAsCsv()); print "\n"; } class Coword { public $documents = array(); public $punctuation = array(); public $hashtags_are_separate_words; public $extract_only_hashtags; public $remove_stop_words; public $min_word_length; public $min_word_frequency; public $words = array(); // holds word frequencies public $cowords = array(); // holds coword frequencies public $document_word_frequencies = array();
</head> <body> <h1>TCAT :: co-hashtags</h1> <?php validate_all_variables(); if (empty($esc['shell']['minf'])) { $esc['shell']['minf'] = 4; } if (empty($esc['shell']['topu'])) { $esc['shell']['topu'] = 0; } include_once 'common/Coword.class.php'; $coword = new Coword(); $coword->countWordOncePerDocument = FALSE; $collation = current_collation(); // get user diversity per hasthag $sql = "SELECT LOWER(h.text COLLATE {$collation}) as h1, COUNT(t.from_user_id) as c, COUNT(DISTINCT(t.from_user_id)) AS d "; $sql .= "FROM " . $esc['mysql']['dataset'] . "_hashtags h, " . $esc['mysql']['dataset'] . "_tweets t "; $where = "h.tweet_id = t.id AND "; $sql .= sqlSubset($where); $sql .= "GROUP BY h1"; //print $sql . "<bR>"; $sqlresults = mysql_unbuffered_query($sql); while ($res = mysql_fetch_assoc($sqlresults)) { $word = $res['h1']; $coword->distinctUsersForWord[$word] = $res['d']; $coword->userDiversity[$word] = round($res['d'] / $res['c'] * 100, 2); $coword->wordFrequency[$word] = $res['c'];
<link rel="stylesheet" href="css/main.css" type="text/css" /> </head> <body> <h1>TCAT :: Co-hashtags sentiments</h1> <?php validate_all_variables(); $collation = current_collation(); if (empty($esc['shell']['minf'])) { $esc['shell']['minf'] = 4; } include_once 'common/Coword.class.php'; $coword = new Coword(); $coword->countWordOncePerDocument = FALSE; // get user diversity per hasthag $sql = "SELECT LOWER(h.text COLLATE {$collation}) as h1, COUNT(t.from_user_id) as c, COUNT(DISTINCT(t.from_user_id)) AS d "; $sql .= "FROM " . $esc['mysql']['dataset'] . "_hashtags h, " . $esc['mysql']['dataset'] . "_tweets t "; $where = "h.tweet_id = t.id AND "; $sql .= sqlSubset($where); $sql .= "GROUP BY h1"; //print $sql . "<bR>"; $sqlresults = mysql_unbuffered_query($sql); while ($res = mysql_fetch_assoc($sqlresults)) { $word = $res['h1']; $coword->distinctUsersForWord[$word] = $res['d']; $coword->userDiversity[$word] = round($res['d'] / $res['c'] * 100, 2); $coword->wordFrequency[$word] = $res['c']; $coword->wordFrequencyDividedByUniqueUsers[$word] = round($res['c'] / $res['d'], 2);