function variabilityOfAssociationProfiles($filename, $series, $keywordToTrack, $ap) { if (empty($series) || empty($keywordToTrack)) { die('not enough data'); } $filename = get_filename_for_export("hashtagVariability", "_variabilityOfAssociationProfiles", "gexf"); // group per slice // per keyword // get associated words (depth 1) per slice // get frequency, degree, ap variation (calculated on cooc frequency), words in, words out, ap keywords $degree = array(); foreach ($series as $time => $cw) { $cw = $cw->getCowords(); foreach ($cw as $word => $cowords) { foreach ($cowords as $coword => $frequency) { // save how many time slices the word appears $words[$word][$time] = 1; $words[$coword][$time] = 1; // keep track of degree per word per time slice if (array_key_exists($word, $degree) === false) { $degree[$word] = array(); } if (array_key_exists($coword, $degree) === false) { $degree[$coword] = array(); } if (array_key_exists($time, $degree[$word]) === false) { $degree[$word][$time] = 0; } if (array_key_exists($time, $degree[$coword]) === false) { $degree[$coword][$time] = 0; } $degree[$word][$time]++; $degree[$coword][$time]++; } } } // count nr of time slices the words appears in foreach ($words as $word => $times) { $documentsPerWords[$word] = count($times); } // calculate similarity and changes foreach ($ap as $word => $times) { $times_keys = array_keys($times); for ($i = 1; $i < count($times_keys); $i++) { $im1 = $i - 1; $v1 = $times[$times_keys[$im1]]; $v2 = $times[$times_keys[$i]]; $cos_sim[$word][$times_keys[$i]] = cosineSimilarity($v1, $v2); $change_out[$word][$times_keys[$i]] = change($v1, $v2); $change_in[$word][$times_keys[$i]] = change($v2, $v1); $stable[$word][$times_keys[$i]] = array_intersect(array_keys($v1), array_keys($v2)); } } // @todo, frequency $out = "key\ttime\tdegree\tsimilarity\tassociational profile\tchange in\tchange out\tstable\n"; foreach ($ap as $word => $times) { foreach ($times as $time => $profile) { if (isset($change_in[$word][$time])) { $inc = ""; foreach ($change_in[$word][$time] as $w => $c) { $inc .= "{$w} ({$c}), "; } $inc = substr($inc, 0, -2); } else { $inc = ""; } if (isset($change_out[$word][$time])) { $outc = ""; foreach ($change_out[$word][$time] as $w => $c) { $outc .= "{$w} ({$c}), "; } $outc = substr($outc, 0, -2); } else { $outc = ""; } if (isset($stable[$word][$time])) { $stablec = array(); foreach ($stable[$word][$time] as $w) { $stablec[] = $w; } $stablec = implode(", ", $stablec); } else { $stablec = ""; } $prof = ""; foreach ($profile as $w => $c) { $prof .= "{$w} ({$c}), "; } $prof = substr($prof, 0, -2); if (isset($degree[$word][$time])) { $deg = $degree[$word][$time]; } else { $deg = ""; } if (isset($cos_sim[$word][$time])) { $cs = $cos_sim[$word][$time]; } else { $cs = ""; } $out .= $word . "\t" . $time . "\t" . $deg . "\t" . $cs . "\t" . $prof . "\t" . $inc . "\t" . $outc . "\t" . $stablec . "\n"; } } file_put_contents($filename, chr(239) . chr(187) . chr(191) . $out); echo '<fieldset class="if_parameters">'; echo '<legend>Your co-hashtag variability File</legend>'; echo '<p><a href="' . filename_to_url($filename) . '">' . $filename . '</a></p>'; echo '</fieldset>'; }
if ($_SERVER["REQUEST_METHOD"] == "POST") { session_start(); if (empty($_POST["query"]) && !empty($_POST["new"])) { //new document setDocuments(); addNewDocument($_POST["new"]); } else { if (!empty($_POST["query"]) && empty($_POST["new"])) { //calculate cosine sim setDocuments(); cosineSimilarity($_POST["query"]); } else { if (empty($_POST["query"]) && !empty($_POST["new"])) { //add new document and then calculate cosine addNewDocument($_POST["new"]); cosineSimilarity($_POST["query"]); } } } } function cosineSimilarity($query) { $querySum = 0; $termWeights = explode(" ", $query); for ($i = 0; $i < count($termWeights); $i++) { $querySum += pow($termWeights[$i], 2); } for ($i = 0; $i < count($_SESSION['documents']); $i++) { $numerator = 0; $documentSum = 0; for ($j = 1; $j < 4; $j++) {