Exemplo n.º 1
0
<?php

/*
 * Simple coword calculation, all in memory
 *
 * WARNING: Because of PHP's very very very bad UTF-8 support, splitting tweets into words only works reliably for tweets consisting solely of latin chars (i.e. English)
 */
if (0) {
    $coword = new Coword();
    $coword->setDocuments(array("bla bla bla test test clash", "test test test bla bla", "clash bla test"));
    $coword->addDocument("bla, !test 345 cla5sh");
    $coword->iterate();
    var_export($coword->getCowordsAsCsv());
    print "\n\n";
    var_export($coword->getWordsAsCsv());
    print "\n";
}
class Coword
{
    public $documents = array();
    public $punctuation = array();
    public $hashtags_are_separate_words;
    public $extract_only_hashtags;
    public $remove_stop_words;
    public $min_word_length;
    public $min_word_frequency;
    public $words = array();
    // holds word frequencies
    public $cowords = array();
    // holds coword frequencies
    public $document_word_frequencies = array();