Example #1
0
<?php

/*
 * Simple coword calculation, all in memory
 *
 * WARNING: Because of PHP's very very very bad UTF-8 support, splitting tweets into words only works reliably for tweets consisting solely of latin chars (i.e. English)
 */
if (0) {
    $coword = new Coword();
    $coword->setDocuments(array("bla bla bla test test clash", "test test test bla bla", "clash bla test"));
    $coword->addDocument("bla, !test 345 cla5sh");
    $coword->iterate();
    var_export($coword->getCowordsAsCsv());
    print "\n\n";
    var_export($coword->getWordsAsCsv());
    print "\n";
}
class Coword
{
    public $documents = array();
    public $punctuation = array();
    public $hashtags_are_separate_words;
    public $extract_only_hashtags;
    public $remove_stop_words;
    public $min_word_length;
    public $min_word_frequency;
    public $words = array();
    // holds word frequencies
    public $cowords = array();
    // holds coword frequencies
    public $document_word_frequencies = array();
Example #2
0
    </head>

    <body>

        <h1>TCAT :: co-hashtags</h1>

        <?php 
validate_all_variables();
if (empty($esc['shell']['minf'])) {
    $esc['shell']['minf'] = 4;
}
if (empty($esc['shell']['topu'])) {
    $esc['shell']['topu'] = 0;
}
include_once 'common/Coword.class.php';
$coword = new Coword();
$coword->countWordOncePerDocument = FALSE;
$collation = current_collation();
// get user diversity per hasthag
$sql = "SELECT LOWER(h.text COLLATE {$collation}) as h1, COUNT(t.from_user_id) as c, COUNT(DISTINCT(t.from_user_id)) AS d ";
$sql .= "FROM " . $esc['mysql']['dataset'] . "_hashtags h, " . $esc['mysql']['dataset'] . "_tweets t ";
$where = "h.tweet_id = t.id AND ";
$sql .= sqlSubset($where);
$sql .= "GROUP BY h1";
//print $sql . "<bR>";
$sqlresults = mysql_unbuffered_query($sql);
while ($res = mysql_fetch_assoc($sqlresults)) {
    $word = $res['h1'];
    $coword->distinctUsersForWord[$word] = $res['d'];
    $coword->userDiversity[$word] = round($res['d'] / $res['c'] * 100, 2);
    $coword->wordFrequency[$word] = $res['c'];
        <link rel="stylesheet" href="css/main.css" type="text/css" />

    </head>

    <body>

        <h1>TCAT :: Co-hashtags sentiments</h1>

        <?php 
validate_all_variables();
$collation = current_collation();
if (empty($esc['shell']['minf'])) {
    $esc['shell']['minf'] = 4;
}
include_once 'common/Coword.class.php';
$coword = new Coword();
$coword->countWordOncePerDocument = FALSE;
// get user diversity per hasthag
$sql = "SELECT LOWER(h.text COLLATE {$collation}) as h1, COUNT(t.from_user_id) as c, COUNT(DISTINCT(t.from_user_id)) AS d ";
$sql .= "FROM " . $esc['mysql']['dataset'] . "_hashtags h, " . $esc['mysql']['dataset'] . "_tweets t ";
$where = "h.tweet_id = t.id AND ";
$sql .= sqlSubset($where);
$sql .= "GROUP BY h1";
//print $sql . "<bR>";
$sqlresults = mysql_unbuffered_query($sql);
while ($res = mysql_fetch_assoc($sqlresults)) {
    $word = $res['h1'];
    $coword->distinctUsersForWord[$word] = $res['d'];
    $coword->userDiversity[$word] = round($res['d'] / $res['c'] * 100, 2);
    $coword->wordFrequency[$word] = $res['c'];
    $coword->wordFrequencyDividedByUniqueUsers[$word] = round($res['c'] / $res['d'], 2);