Example #1
0
            }
            //string keys to numeric
            $candidates = array_values($candidates);
            //aggregate to size N+1
            $current_generation = keyword_aggregate($candidates);
            foreach ($current_generation as $group) {
                $current_sets[] = $group[0];
                //add result set to current sets
            }
            $round_count++;
        }
        echo '</table>';
        //insert results to databse
        $product = $row['url'];
        foreach ($previous_sets as $keywordset) {
            $keywordset_string = kwset_to_string($keywordset);
            $query = "INSERT INTO keywordset_product(keyword_set, product) VALUES('{$keywordset_string}', '{$product}')";
            $all_insert_attemps++;
            $ret_val = mysql_query($query);
            if ($ret_val) {
                $insert_success++;
            }
        }
        $previous_keywords = $current_keywords;
        $previous_sets = $current_sets;
    }
    mysql_query("COMMIT");
    echo "Insert success: {$insert_success} / {$all_insert_attemps}<br/>";
}
$end_time = microtime(true);
echo 'processed: ' . ($end_time - $start_time) . ' ms';
Example #2
0
                    $occur = occurrence($keyword_set, $all_splitted_keyword_sets);
                    $kwset_occur_mapping[$kwset_string] = $occur;
                }
                //calculate the index here, finally!
                $intersection_length = $kwset_occur_mapping[$kwset_string];
                $suppset_occur1 = $kwset_occur_mapping[kwset_to_string($support_sets[0])];
                $suppset_occur2 = $kwset_occur_mapping[kwset_to_string($support_sets[1])];
                $union_length = $suppset_occur1 + $suppset_occur2 - $intersection_length;
                $jaccard_index = floatval($intersection_length) / $union_length;
                if ($verbose) {
                    echo 'processing [' . $kwset_string . '] index = ' . $jaccard_index . '<br />';
                }
                //check if index is no less than THRESHOLD
                if ($jaccard_index >= THRESHOLD) {
                    //TODO: add keyword_set, uri to databse
                    $candidates[$kwset_string] = $keyword_set;
                    if ($round_count > 1) {
                        echo '<tr><td>' . $kwset_string . '</td><td>' . $intersection_length . '</td><td>' . $jaccard_index . '</td><td>' . kwset_to_string($support_sets[0]) . '[' . $suppset_occur1 . ']<br />' . kwset_to_string($support_sets[1]) . '[' . $suppset_occur2 . ']</td></tr>';
                    }
                }
            }
            //string keys to numeric
            $candidates = array_values($candidates);
            //aggregate to size N+1
            $current_generation = keyword_aggregate($candidates);
            $round_count++;
        }
        echo '</table>';
    }
}
mysql_close($con);