<thead>
<tr>
<th>ID</th>
<th>Keyword string</th>
</tr>
</thead>
<tbody>
<?
    while($row = mysql_fetch_array($result)){
        $userid = $row['userid'];
        $refer = $row['refer'];
        $real_refer = urldecode($refer);
        $keyword_string = extract_keywords($real_refer);
        if($keyword_string){
            $count++;
            $kw_set = keywords_array($keyword_string);
            foreach($kw_set as $kw) {
                if(!array_key_exists($kw, $kw_occr)) {
                    $kw_occr[$kw] = 0;
                }
                $kw_occr[$kw] += 1;
            }
            echo "<tr><td>$userid</td><td>$keyword_string</td></tr>";
        }
    }
?>
</tbody>
</table>
<?
    mysql_query("COMMIT");
    echo "query ratio: $count / $TOTAL<br/>";
Beispiel #2
0
} else {
    //STEP1: pre-process
    //1. get all keyword strings
    //2. get all splited keyword array
    mysql_query("BEGIN");
    $all_splitted_keyword_sets = array();
    $all_rows = array();
    //store rows for later iteration
    $previous_cookie = 'thequickbrownfoxjumpsoverthelazydog';
    $previous_keywords = 'theluckymankissesthearrogantlady';
    while ($row = mysql_fetch_array($result)) {
        $current_cookie = $row['cookie_id'];
        $keyword_string = extract_keywords($row['keywords']);
        if ($keyword_string) {
            $count++;
            $kw_array = keywords_array($keyword_string);
            $row['keywords'] = $kw_array;
            $all_rows[] = $row;
            //see if the keyword is from the same session
            if ($current_cookie != $previous_cookie && $keyword_string != $previous_keywords) {
                $all_splitted_keyword_sets[] = $kw_array;
                $previous_keywords = $keyword_string;
            }
        }
        $previous_cookie = $current_cookie;
    }
    echo "<p>entries with keywords/total entries: {$count} / {$TOTAL}</p>";
    echo "<p>Threshold = {$THRESHOLD}</p>";
    //STEP2: aggregate
    $all_insert_attemps = 0;
    //for statistics
}
mysql_select_db('bagsok');
$TOTAL = 2000;
$result = mysql_query("SELECT id, refer FROM userinfo WHERE refer IS NOT NULL LIMIT 2000, {$TOTAL}");
$count = 0;
if (!$result) {
    echo 'no result available';
} else {
    //echo '<table border="1px">';
    $all = array();
    $all_keywords_arr = array();
    while ($row = mysql_fetch_array($result)) {
        $keywords_str = extract_keywords($row['refer']);
        if ($keywords_str) {
            $count++;
            $keywords_arr = keywords_array($keywords_str);
            $all_keywords_arr[] = $keywords_arr;
            //echo "<tr><td>$keywords_str</td><td><pre>";
            //print_r($keywords_arr);
            $all = array_merge($all, $keywords_arr);
            //echo "</pre></td></tr>";
        }
    }
    //echo '</table>';
    echo "entries with keywords/total entries: {$count} / {$TOTAL}";
    $all = array_unique($all);
    //sort($all);
    $keyword_occr = array();
    foreach ($all as $keyword) {
        $occurrence = occurrence(array($keyword), $all_keywords_arr);
        $keyword_occr[$keyword] = $occurrence;
Beispiel #4
0
}
mysql_select_db('bagsok');
$result = mysql_query("SELECT id, refer FROM userinfo WHERE refer IS NOT NULL LIMIT 2000, {$TOTAL}");
$count = 0;
if (!$result) {
    echo 'no result available';
} else {
    //STEP1: pre-process
    //1. get all keyword strings
    //2. get all splited keyword array
    $all_splitted_keyword_sets = array();
    while ($row = mysql_fetch_array($result)) {
        $keyword_string = extract_keywords($row['refer']);
        if ($keyword_string) {
            $count++;
            $all_splitted_keyword_sets[] = keywords_array($keyword_string);
        }
    }
    echo "<p>entries with keywords/total entries: {$count} / {$TOTAL}</p>";
    echo "<p>Threshold = " . THRESHOLD . "</p>";
    //STEP2: aggregate
    $kwset_occur_mapping = array();
    foreach ($all_splitted_keyword_sets as $splitted_keyword_set) {
        echo '<h2>' . kwset_to_string($splitted_keyword_set) . '</h2>';
        //deal with keyword sets of size 1 first
        $current_generation = array();
        $size1_set = expand_dimension($splitted_keyword_set);
        foreach ($size1_set as $keyword_set) {
            if (!array_key_exists(kwset_to_string($keyword_set), $kwset_occur_mapping)) {
                $occur = occurrence($keyword_set, $all_splitted_keyword_sets);
                $kwset_occur_mapping[kwset_to_string($keyword_set)] = $occur;
$TOTAL = mysql_num_rows($result);
$count = 0;
if (!$result) {
    echo 'no result available';
    die;
} else {
    $stopwords = load_stopwords('stopwords.txt');
    $keyword_occur = array();
    $previous_keyword_string = 'thequickbrownfoxjumpsoverthelazydog';
    $previous_keywords = array();
    while ($row = mysql_fetch_array($result)) {
        $query_string = $row['keywords'];
        $keyword_string = extract_keywords($query_string);
        $product = $row['url'];
        if ($keyword_string != $previous_keyword_string) {
            $keywords = keywords_array($keyword_string);
            $keywords = remove_stopwords($keywords, $stopwords);
            foreach ($keywords as $keyword) {
                if (!isset($keyword_occur[$keyword])) {
                    $keyword_occur[$keyword] = 0;
                }
            }
            $previous_keyword_string = $keyword_string;
            $previous_keywords = $keywords;
        }
        if (count($previous_keywords) == 0) {
            continue;
        }
        //avoid unnecessary insertion
        //TODO hey! you forget to check whether the pair already exist in db!
        $insert_query = 'INSERT INTO m2_keyword_product (keyword, product) VALUES ';