$product = $row['url']; foreach ($previous_sets as $keywordset) { $keywordset_string = kwset_to_string($keywordset); $query = "INSERT INTO keywordset_product(keyword_set, product) VALUES('{$keywordset_string}', '{$product}')"; $all_insert_attemps++; $ret_val = mysql_query($query); if ($ret_val) { $insert_success++; } } continue; } $current_sets = array(); //deal with keyword sets of size 1 first $current_generation = array(); $size1_set = expand_dimension($splitted_keyword_set); foreach ($size1_set as $keyword_set) { if (!array_key_exists(kwset_to_string($keyword_set), $kwset_occur_mapping)) { $occur = occurrence($keyword_set, $all_splitted_keyword_sets); $kwset_occur_mapping[kwset_to_string($keyword_set)] = $occur; } //to use Jaccard Index, do trick here $current_generation[] = array($keyword_set, array($keyword_set, $keyword_set)); $current_sets[] = $keyword_set; //add size 1 keywordset to results } $round_count = 1; //now aggregation process begins echo '<table border="1px"><tr><th>Keyword Set</th><th>Occurrence</th><th>Jaccard Index</th><th>Support sets</th></tr>'; while (count($current_generation) > 0) { $candidates = array();
//echo "</pre></td></tr>"; } } //echo '</table>'; echo "entries with keywords/total entries: {$count} / {$TOTAL}"; $all = array_unique($all); //sort($all); $keyword_occr = array(); foreach ($all as $keyword) { $occurrence = occurrence(array($keyword), $all_keywords_arr); $keyword_occr[$keyword] = $occurrence; } arsort($keyword_occr); echo '<table border="1px"><tr><th>Keywords of size 2</th><th>occurrence</th><th>ratio(=occur/max(sub_occur))</th></tr>'; foreach ($all_keywords_arr as $keywords_arr) { $subsets_2 = generate_next($keywords_arr, expand_dimension($keywords_arr)); foreach ($subsets_2 as $subset_2) { $occur_2 = occurrence($subset_2, $all_keywords_arr); //print elements in subset_2 echo '<tr><td>'; $max_elem_occr = 0; foreach ($subset_2 as $elem) { echo $elem . "({$keyword_occr[$elem]}) "; $max_elem_occr = max($max_elem_occr, $keyword_occr[$elem]); } $ratio = floatval($occur_2) / $max_elem_occr; //print occurrence and ratio echo "</td><td>{$occur_2}</td><td>{$ratio}</td></tr>"; } //split sets echo '<tr><td> </td><td> </td></tr>';