echo '<table border="1px"><tr><th>Keyword Set</th><th>Occurrence</th><th>Jaccard Index</th><th>Support sets</th></tr>'; while (count($current_generation) > 0) { $candidates = array(); foreach ($current_generation as $group) { $keyword_set = $group[0]; $kwset_string = kwset_to_string($keyword_set); //avoid unnecessary typing $support_sets = $group[1]; //if keyword set already in the support set //skip it if (array_key_exists($kwset_string, $candidates)) { continue; } //in case the occurrence is not calculated if (!array_key_exists($kwset_string, $kwset_occur_mapping)) { $occur = occurrence($keyword_set, $all_splitted_keyword_sets); $kwset_occur_mapping[$kwset_string] = $occur; } //calculate the index here, finally! $intersection_length = $kwset_occur_mapping[$kwset_string]; $suppset_occur1 = $kwset_occur_mapping[kwset_to_string($support_sets[0])]; $suppset_occur2 = $kwset_occur_mapping[kwset_to_string($support_sets[1])]; $union_length = $suppset_occur1 + $suppset_occur2 - $intersection_length; $jaccard_index = floatval($intersection_length) / $union_length; if ($verbose) { echo 'processing [' . $kwset_string . '] index = ' . $jaccard_index . '<br />'; } //check if index is no less than $THRESHOLD if ($jaccard_index >= $THRESHOLD) { $candidates[$kwset_string] = $keyword_set; if ($round_count > 1) {
} //echo '</table>'; echo "entries with keywords/total entries: {$count} / {$TOTAL}"; $all = array_unique($all); //sort($all); $keyword_occr = array(); foreach ($all as $keyword) { $occurrence = occurrence(array($keyword), $all_keywords_arr); $keyword_occr[$keyword] = $occurrence; } arsort($keyword_occr); echo '<table border="1px"><tr><th>Keywords of size 2</th><th>occurrence</th><th>ratio(=occur/max(sub_occur))</th></tr>'; foreach ($all_keywords_arr as $keywords_arr) { $subsets_2 = generate_next($keywords_arr, expand_dimension($keywords_arr)); foreach ($subsets_2 as $subset_2) { $occur_2 = occurrence($subset_2, $all_keywords_arr); //print elements in subset_2 echo '<tr><td>'; $max_elem_occr = 0; foreach ($subset_2 as $elem) { echo $elem . "({$keyword_occr[$elem]}) "; $max_elem_occr = max($max_elem_occr, $keyword_occr[$elem]); } $ratio = floatval($occur_2) / $max_elem_occr; //print occurrence and ratio echo "</td><td>{$occur_2}</td><td>{$ratio}</td></tr>"; } //split sets echo '<tr><td> </td><td> </td></tr>'; } echo '</table>';