<thead> <tr> <th>ID</th> <th>Keyword string</th> </tr> </thead> <tbody> <? while($row = mysql_fetch_array($result)){ $userid = $row['userid']; $refer = $row['refer']; $real_refer = urldecode($refer); $keyword_string = extract_keywords($real_refer); if($keyword_string){ $count++; $kw_set = keywords_array($keyword_string); foreach($kw_set as $kw) { if(!array_key_exists($kw, $kw_occr)) { $kw_occr[$kw] = 0; } $kw_occr[$kw] += 1; } echo "<tr><td>$userid</td><td>$keyword_string</td></tr>"; } } ?> </tbody> </table> <? mysql_query("COMMIT"); echo "query ratio: $count / $TOTAL<br/>";
} else { //STEP1: pre-process //1. get all keyword strings //2. get all splited keyword array mysql_query("BEGIN"); $all_splitted_keyword_sets = array(); $all_rows = array(); //store rows for later iteration $previous_cookie = 'thequickbrownfoxjumpsoverthelazydog'; $previous_keywords = 'theluckymankissesthearrogantlady'; while ($row = mysql_fetch_array($result)) { $current_cookie = $row['cookie_id']; $keyword_string = extract_keywords($row['keywords']); if ($keyword_string) { $count++; $kw_array = keywords_array($keyword_string); $row['keywords'] = $kw_array; $all_rows[] = $row; //see if the keyword is from the same session if ($current_cookie != $previous_cookie && $keyword_string != $previous_keywords) { $all_splitted_keyword_sets[] = $kw_array; $previous_keywords = $keyword_string; } } $previous_cookie = $current_cookie; } echo "<p>entries with keywords/total entries: {$count} / {$TOTAL}</p>"; echo "<p>Threshold = {$THRESHOLD}</p>"; //STEP2: aggregate $all_insert_attemps = 0; //for statistics
} mysql_select_db('bagsok'); $TOTAL = 2000; $result = mysql_query("SELECT id, refer FROM userinfo WHERE refer IS NOT NULL LIMIT 2000, {$TOTAL}"); $count = 0; if (!$result) { echo 'no result available'; } else { //echo '<table border="1px">'; $all = array(); $all_keywords_arr = array(); while ($row = mysql_fetch_array($result)) { $keywords_str = extract_keywords($row['refer']); if ($keywords_str) { $count++; $keywords_arr = keywords_array($keywords_str); $all_keywords_arr[] = $keywords_arr; //echo "<tr><td>$keywords_str</td><td><pre>"; //print_r($keywords_arr); $all = array_merge($all, $keywords_arr); //echo "</pre></td></tr>"; } } //echo '</table>'; echo "entries with keywords/total entries: {$count} / {$TOTAL}"; $all = array_unique($all); //sort($all); $keyword_occr = array(); foreach ($all as $keyword) { $occurrence = occurrence(array($keyword), $all_keywords_arr); $keyword_occr[$keyword] = $occurrence;
} mysql_select_db('bagsok'); $result = mysql_query("SELECT id, refer FROM userinfo WHERE refer IS NOT NULL LIMIT 2000, {$TOTAL}"); $count = 0; if (!$result) { echo 'no result available'; } else { //STEP1: pre-process //1. get all keyword strings //2. get all splited keyword array $all_splitted_keyword_sets = array(); while ($row = mysql_fetch_array($result)) { $keyword_string = extract_keywords($row['refer']); if ($keyword_string) { $count++; $all_splitted_keyword_sets[] = keywords_array($keyword_string); } } echo "<p>entries with keywords/total entries: {$count} / {$TOTAL}</p>"; echo "<p>Threshold = " . THRESHOLD . "</p>"; //STEP2: aggregate $kwset_occur_mapping = array(); foreach ($all_splitted_keyword_sets as $splitted_keyword_set) { echo '<h2>' . kwset_to_string($splitted_keyword_set) . '</h2>'; //deal with keyword sets of size 1 first $current_generation = array(); $size1_set = expand_dimension($splitted_keyword_set); foreach ($size1_set as $keyword_set) { if (!array_key_exists(kwset_to_string($keyword_set), $kwset_occur_mapping)) { $occur = occurrence($keyword_set, $all_splitted_keyword_sets); $kwset_occur_mapping[kwset_to_string($keyword_set)] = $occur;
$TOTAL = mysql_num_rows($result); $count = 0; if (!$result) { echo 'no result available'; die; } else { $stopwords = load_stopwords('stopwords.txt'); $keyword_occur = array(); $previous_keyword_string = 'thequickbrownfoxjumpsoverthelazydog'; $previous_keywords = array(); while ($row = mysql_fetch_array($result)) { $query_string = $row['keywords']; $keyword_string = extract_keywords($query_string); $product = $row['url']; if ($keyword_string != $previous_keyword_string) { $keywords = keywords_array($keyword_string); $keywords = remove_stopwords($keywords, $stopwords); foreach ($keywords as $keyword) { if (!isset($keyword_occur[$keyword])) { $keyword_occur[$keyword] = 0; } } $previous_keyword_string = $keyword_string; $previous_keywords = $keywords; } if (count($previous_keywords) == 0) { continue; } //avoid unnecessary insertion //TODO hey! you forget to check whether the pair already exist in db! $insert_query = 'INSERT INTO m2_keyword_product (keyword, product) VALUES ';