} //string keys to numeric $candidates = array_values($candidates); //aggregate to size N+1 $current_generation = keyword_aggregate($candidates); foreach ($current_generation as $group) { $current_sets[] = $group[0]; //add result set to current sets } $round_count++; } echo '</table>'; //insert results to databse $product = $row['url']; foreach ($previous_sets as $keywordset) { $keywordset_string = kwset_to_string($keywordset); $query = "INSERT INTO keywordset_product(keyword_set, product) VALUES('{$keywordset_string}', '{$product}')"; $all_insert_attemps++; $ret_val = mysql_query($query); if ($ret_val) { $insert_success++; } } $previous_keywords = $current_keywords; $previous_sets = $current_sets; } mysql_query("COMMIT"); echo "Insert success: {$insert_success} / {$all_insert_attemps}<br/>"; } $end_time = microtime(true); echo 'processed: ' . ($end_time - $start_time) . ' ms';
$occur = occurrence($keyword_set, $all_splitted_keyword_sets); $kwset_occur_mapping[$kwset_string] = $occur; } //calculate the index here, finally! $intersection_length = $kwset_occur_mapping[$kwset_string]; $suppset_occur1 = $kwset_occur_mapping[kwset_to_string($support_sets[0])]; $suppset_occur2 = $kwset_occur_mapping[kwset_to_string($support_sets[1])]; $union_length = $suppset_occur1 + $suppset_occur2 - $intersection_length; $jaccard_index = floatval($intersection_length) / $union_length; if ($verbose) { echo 'processing [' . $kwset_string . '] index = ' . $jaccard_index . '<br />'; } //check if index is no less than THRESHOLD if ($jaccard_index >= THRESHOLD) { //TODO: add keyword_set, uri to databse $candidates[$kwset_string] = $keyword_set; if ($round_count > 1) { echo '<tr><td>' . $kwset_string . '</td><td>' . $intersection_length . '</td><td>' . $jaccard_index . '</td><td>' . kwset_to_string($support_sets[0]) . '[' . $suppset_occur1 . ']<br />' . kwset_to_string($support_sets[1]) . '[' . $suppset_occur2 . ']</td></tr>'; } } } //string keys to numeric $candidates = array_values($candidates); //aggregate to size N+1 $current_generation = keyword_aggregate($candidates); $round_count++; } echo '</table>'; } } mysql_close($con);