public function createBaseline() { set_time_limit(0); $path2 = "C:/Users/fabio/Documents/baseline/baseline da validare2.csv"; $myfile2 = fopen($path2, 'w') or die("Unable to open file!"); // $classes = array("study_group","tvl", "spam", "need_help", "give_exam_info", "need_exam_info"); $classes = array("study_group", "give_exam_info", "spam"); foreach ($classes as $c) { $path = "C:/Users/fabio/Documents/baseline/" . $c . ".txt"; $myfile = fopen($path, 'r') or die("Unable to open file!"); // echo fread($myfile,filesize("webdictionary.txt")); $i = 1; $line = fgets($myfile); $count_type = 0; while (!feof($myfile) && $count_type <= 100) { // $criteria_type = new CDbCriteria; // $criteria_type->condition = " post_type =".$c; // $n_type = PreProcPost::model()->findAllByAttributes(array("post_type" => $c)); $line = fgets($myfile); $line = str_replace(";", " ", $line); $line = explode(" ", $line); var_dump($line); ?> <br><?php if (isset($line[0])) { $criteria = new CDbCriteria(); $criteria->condition = " post_type ='null' and message like '%" . $line[0] . "%' order by rand() limit 15"; $posts = PreProcPost::model()->findAll($criteria); $count_type += count($posts); foreach ($posts as $p) { $p->post_type = $c; echo $p->fbpid . " " . $c . " " . $line[0]; ?> <br><?php $post = FbPost::model()->findByPk($p->fbpid); $message = str_replace("\n", " ", $post->message); $message = str_replace(";", " ", $message); $txt = $post->fbpid . ";" . $message . "\n"; fwrite($myfile2, $txt); // $p->save(); } } } fclose($myfile); } fclose($myfile2); }
public function ActionKld() { $categories = array("need_help", "tvl", "spam", "give_exam_info", "need_exam_info", "study_group"); $categories_words = array(); $single_words = array("need_help" => array(), "tvl" => array(), "spam" => array(), "give_exam_info" => array(), "need_Exam_info" => array(), "study_group" => array()); $word = array(); $vocabulary = array(); $add = 1; $i = 0; foreach ($categories as $c) { // if($c =="need_help"){ // $posts1 = PreProcPost::model()->findAllByAttributes(array("post_type" => $c),array("limit" => "10")); // $posts2 = PreProcPost::model()->findAllByAttributes(array("post_type" => "need_exam_info"), array("limit" => "10")); // $posts = array_merge($posts1, $posts2); // } $posts = PreProcPost::model()->findAllByAttributes(array("post_type" => $c)); $categories_words[$c] = ""; foreach ($posts as $p) { $categories_words[$c] = $categories_words[$c] . $p->message . " "; } $categories_words[$c] = str_replace("?", "", $categories_words[$c]); $categories_words[$c] = trim($categories_words[$c]); $tmp = explode(" ", $categories_words[$c]); $word[$tmp[0]] = 0; $single_words[$c][$tmp[0]]["in_oc"] = 0; foreach ($tmp as $t) { if (!array_key_exists($t, $vocabulary)) { $vocabulary[$t] = true; } if (isset($single_words[$c][$t])) { $single_words[$c][$t]["in_oc"] = $single_words[$c][$t]["in_oc"] + 1; } else { $single_words[$c][$t]["in_oc"] = 1; } } } foreach (array_keys($vocabulary) as $t) { foreach ($categories as $c) { if (array_key_exists($t, $single_words[$c])) { foreach ($single_words as $sw) { if (array_diff_key($sw, $single_words[$c]) != null) { if (!isset($single_words[$c][$t]["other_oc"])) { $single_words[$c][$t]["other_oc"] = 0; } if (array_key_exists($t, $sw)) { $single_words[$c][$t]["other_oc"] = $single_words[$c][$t]["other_oc"] + $sw[$t]["in_oc"]; } } } } } } $tot = 0; foreach ($categories as $c) { foreach ($single_words[$c] as $sw) { $tot = $tot + $sw["in_oc"]; } $single_words[$c]["n_tot"] = $tot; $tot = 0; } foreach ($categories as $c) { foreach (array_keys($single_words[$c]) as $sw) { if ($sw != "n_tot") { $prior_probability = ($single_words[$c][$sw]["in_oc"] + 1) / ($single_words[$c]["n_tot"] + count($vocabulary)); $single_words[$c][$sw]["prior_prob"] = round($prior_probability, 5); } } } //complementary probability $total_occurrence = 0; foreach ($categories as $c) { $total_occurrence = $total_occurrence + $single_words[$c]["n_tot"]; } foreach ($categories as $c) { $single_words[$c]["other_tot"] = $total_occurrence - $single_words[$c]["n_tot"]; } foreach ($categories as $c) { foreach (array_keys($single_words[$c]) as $sw) { if ($sw != "n_tot" && $sw != "other_tot") { $complementary_probability = ($single_words[$c][$sw]["other_oc"] + 1) / ($single_words[$c]["other_tot"] + count($vocabulary)); $single_words[$c][$sw]["complementary_prob"] = round($complementary_probability, 5); } } } //KLD foreach ($categories as $c) { foreach (array_keys($single_words[$c]) as $sw) { if ($sw != "n_tot" && $sw != "other_tot") { $rapporto = $single_words[$c][$sw]["prior_prob"] / $single_words[$c][$sw]["complementary_prob"]; $kld = $single_words[$c][$sw]["prior_prob"] * log($rapporto); $single_words[$c][$sw]["kld"] = round($kld, 5); } } } $study_group = array(); $tvl = array(); $spam = array(); $need_exam_info = array(); $need_help = array(); $give_exam_info = array(); foreach (array_keys($single_words["tvl"]) as $sw) { $tvl[$sw] = $single_words["tvl"][$sw]["kld"]; } foreach (array_keys($single_words["spam"]) as $sw) { $spam[$sw] = $single_words["spam"][$sw]["kld"]; } foreach (array_keys($single_words["need_exam_info"]) as $sw) { $need_exam_info[$sw] = $single_words["need_exam_info"][$sw]["kld"]; } foreach (array_keys($single_words["need_help"]) as $sw) { $need_help[$sw] = $single_words["need_help"][$sw]["kld"]; } foreach (array_keys($single_words["give_exam_info"]) as $sw) { $give_exam_info[$sw] = $single_words["give_exam_info"][$sw]["kld"]; } foreach (array_keys($single_words["study_group"]) as $sw) { $study_group[$sw] = $single_words["study_group"][$sw]["kld"]; } arsort($tvl); arsort($spam); arsort($give_exam_info); arsort($need_exam_info); arsort($need_help); arsort($study_group); // var_dump($tvl); // write tvl $myfile = fopen('C:\\Users\\fabio\\Google Drive\\TESI - Q&A SL\\TESI 2.0\\Classificazione\\KLD - keys\\6 classi\\tvl keys 6.csv', 'w') or die("Unable to open file!"); fwrite($myfile, ""); fwrite($myfile, "WORD; KLD - Value\n"); foreach (array_keys($tvl) as $key) { $text = $key . ";" . $tvl[$key] . "\n"; fwrite($myfile, $text); } fclose($myfile); // write need_help $myfile = fopen('C:\\Users\\fabio\\Google Drive\\TESI - Q&A SL\\TESI 2.0\\Classificazione\\KLD - keys\\6 classi\\need_help keys 6.csv', 'w') or die("Unable to open file!"); fwrite($myfile, ""); fwrite($myfile, "WORD; KLD - Value\n"); foreach (array_keys($need_help) as $key) { $text = $key . ";" . $need_help[$key] . "\n"; fwrite($myfile, $text); } fclose($myfile); // write need_exam_info $myfile = fopen('C:\\Users\\fabio\\Google Drive\\TESI - Q&A SL\\TESI 2.0\\Classificazione\\KLD - keys\\6 classi\\need_exam_info keys 6.csv', 'w') or die("Unable to open file!"); fwrite($myfile, ""); fwrite($myfile, "WORD; KLD - Value\n"); foreach (array_keys($need_exam_info) as $key) { $text = $key . ";" . $need_exam_info[$key] . "\n"; fwrite($myfile, $text); } fclose($myfile); // write study_group $myfile = fopen('C:\\Users\\fabio\\Google Drive\\TESI - Q&A SL\\TESI 2.0\\Classificazione\\KLD - keys\\6 classi\\study_group keys 6.csv', 'w') or die("Unable to open file!"); fwrite($myfile, ""); fwrite($myfile, "WORD; KLD - Value\n"); foreach (array_keys($study_group) as $key) { $text = $key . ";" . $study_group[$key] . "\n"; fwrite($myfile, $text); } fclose($myfile); // write spam $myfile = fopen('C:\\Users\\fabio\\Google Drive\\TESI - Q&A SL\\TESI 2.0\\Classificazione\\KLD - keys\\6 classi\\spam keys 6.csv', 'w') or die("Unable to open file!"); fwrite($myfile, ""); fwrite($myfile, "WORD; KLD - Value\n"); foreach (array_keys($spam) as $key) { $text = $key . ";" . $spam[$key] . "\n"; fwrite($myfile, $text); } fclose($myfile); // write give_exam_info $myfile = fopen('C:\\Users\\fabio\\Google Drive\\TESI - Q&A SL\\TESI 2.0\\Classificazione\\KLD - keys\\6 classi\\give_exam_info keys 6.csv', 'w') or die("Unable to open file!"); fwrite($myfile, ""); fwrite($myfile, "WORD; KLD - Value\n"); foreach (array_keys($give_exam_info) as $key) { $text = $key . ";" . $give_exam_info[$key] . "\n"; fwrite($myfile, $text); } fclose($myfile); }