Ejemplo n.º 1
0
 echo '<table border="1px"><tr><th>Keyword Set</th><th>Occurrence</th><th>Jaccard Index</th><th>Support sets</th></tr>';
 while (count($current_generation) > 0) {
     $candidates = array();
     foreach ($current_generation as $group) {
         $keyword_set = $group[0];
         $kwset_string = kwset_to_string($keyword_set);
         //avoid unnecessary typing
         $support_sets = $group[1];
         //if keyword set already in the support set
         //skip it
         if (array_key_exists($kwset_string, $candidates)) {
             continue;
         }
         //in case the occurrence is not calculated
         if (!array_key_exists($kwset_string, $kwset_occur_mapping)) {
             $occur = occurrence($keyword_set, $all_splitted_keyword_sets);
             $kwset_occur_mapping[$kwset_string] = $occur;
         }
         //calculate the index here, finally!
         $intersection_length = $kwset_occur_mapping[$kwset_string];
         $suppset_occur1 = $kwset_occur_mapping[kwset_to_string($support_sets[0])];
         $suppset_occur2 = $kwset_occur_mapping[kwset_to_string($support_sets[1])];
         $union_length = $suppset_occur1 + $suppset_occur2 - $intersection_length;
         $jaccard_index = floatval($intersection_length) / $union_length;
         if ($verbose) {
             echo 'processing [' . $kwset_string . '] index = ' . $jaccard_index . '<br />';
         }
         //check if index is no less than $THRESHOLD
         if ($jaccard_index >= $THRESHOLD) {
             $candidates[$kwset_string] = $keyword_set;
             if ($round_count > 1) {
 }
 //echo '</table>';
 echo "entries with keywords/total entries: {$count} / {$TOTAL}";
 $all = array_unique($all);
 //sort($all);
 $keyword_occr = array();
 foreach ($all as $keyword) {
     $occurrence = occurrence(array($keyword), $all_keywords_arr);
     $keyword_occr[$keyword] = $occurrence;
 }
 arsort($keyword_occr);
 echo '<table border="1px"><tr><th>Keywords of size 2</th><th>occurrence</th><th>ratio(=occur/max(sub_occur))</th></tr>';
 foreach ($all_keywords_arr as $keywords_arr) {
     $subsets_2 = generate_next($keywords_arr, expand_dimension($keywords_arr));
     foreach ($subsets_2 as $subset_2) {
         $occur_2 = occurrence($subset_2, $all_keywords_arr);
         //print elements in subset_2
         echo '<tr><td>';
         $max_elem_occr = 0;
         foreach ($subset_2 as $elem) {
             echo $elem . "({$keyword_occr[$elem]}) ";
             $max_elem_occr = max($max_elem_occr, $keyword_occr[$elem]);
         }
         $ratio = floatval($occur_2) / $max_elem_occr;
         //print occurrence and ratio
         echo "</td><td>{$occur_2}</td><td>{$ratio}</td></tr>";
     }
     //split sets
     echo '<tr><td> </td><td> </td></tr>';
 }
 echo '</table>';