Example #1
0
     $product = $row['url'];
     foreach ($previous_sets as $keywordset) {
         $keywordset_string = kwset_to_string($keywordset);
         $query = "INSERT INTO keywordset_product(keyword_set, product) VALUES('{$keywordset_string}', '{$product}')";
         $all_insert_attemps++;
         $ret_val = mysql_query($query);
         if ($ret_val) {
             $insert_success++;
         }
     }
     continue;
 }
 $current_sets = array();
 //deal with keyword sets of size 1 first
 $current_generation = array();
 $size1_set = expand_dimension($splitted_keyword_set);
 foreach ($size1_set as $keyword_set) {
     if (!array_key_exists(kwset_to_string($keyword_set), $kwset_occur_mapping)) {
         $occur = occurrence($keyword_set, $all_splitted_keyword_sets);
         $kwset_occur_mapping[kwset_to_string($keyword_set)] = $occur;
     }
     //to use Jaccard Index, do trick here
     $current_generation[] = array($keyword_set, array($keyword_set, $keyword_set));
     $current_sets[] = $keyword_set;
     //add size 1 keywordset to results
 }
 $round_count = 1;
 //now aggregation process begins
 echo '<table border="1px"><tr><th>Keyword Set</th><th>Occurrence</th><th>Jaccard Index</th><th>Support sets</th></tr>';
 while (count($current_generation) > 0) {
     $candidates = array();
         //echo "</pre></td></tr>";
     }
 }
 //echo '</table>';
 echo "entries with keywords/total entries: {$count} / {$TOTAL}";
 $all = array_unique($all);
 //sort($all);
 $keyword_occr = array();
 foreach ($all as $keyword) {
     $occurrence = occurrence(array($keyword), $all_keywords_arr);
     $keyword_occr[$keyword] = $occurrence;
 }
 arsort($keyword_occr);
 echo '<table border="1px"><tr><th>Keywords of size 2</th><th>occurrence</th><th>ratio(=occur/max(sub_occur))</th></tr>';
 foreach ($all_keywords_arr as $keywords_arr) {
     $subsets_2 = generate_next($keywords_arr, expand_dimension($keywords_arr));
     foreach ($subsets_2 as $subset_2) {
         $occur_2 = occurrence($subset_2, $all_keywords_arr);
         //print elements in subset_2
         echo '<tr><td>';
         $max_elem_occr = 0;
         foreach ($subset_2 as $elem) {
             echo $elem . "({$keyword_occr[$elem]}) ";
             $max_elem_occr = max($max_elem_occr, $keyword_occr[$elem]);
         }
         $ratio = floatval($occur_2) / $max_elem_occr;
         //print occurrence and ratio
         echo "</td><td>{$occur_2}</td><td>{$ratio}</td></tr>";
     }
     //split sets
     echo '<tr><td> </td><td> </td></tr>';