public function preprocess($tables, $startTime = null)
 {
     $word_segmenter = new WordSegmenter();
     $this->dm->executeSqlFile(__DIR__ . "/rec_tables.sql");
     $this->dm->executeSqlFile(__DIR__ . "/rec_tables_additional.sql");
     /*
      * $weight_matrix is actually an array of arrays
      * $weight_matrix_example = array(
      *      'keyword1' => array(
      *          'product1' => 1,
      *          'product1' => 2,
      *      ),
      *      'keyword2' => array(
      *          'product1' => 3,
      *          'product3' => 5,
      *      ),
      *      ...
      *  )
      */
     $weight_matrix = array();
     // by the way, also accumulate keyword count
     $keyword_count = array();
     $query_results = $this->dm->query("select id, query from " . $tables['query'] . "");
     while ($query_row = mysql_fetch_array($query_results)) {
         /* 
          * get keywords set K from each query and
          * products set P browsed in this session
          * for each (k, p) in KxP, increment count by 1 in $weight_matrix
          */
         $keywords = $word_segmenter->segmentWords($query_row['query']);
         foreach ($keywords as $keyword) {
             if (isset($keyword_count[$keyword])) {
                 $keyword_count[$keyword] += 1;
             } else {
                 $keyword_count[$keyword] = 1;
             }
         }
         $item_results = $this->dm->query('select itemId from ' . $tables['query_item'] . ' where queryId = ' . $query_row['id'] . ';');
         //itemId is actually item name!
         $items = array();
         while ($item_row = mysql_fetch_array($item_results)) {
             $items[] = $item_row['itemId'];
         }
         foreach ($keywords as $keyword) {
             if (!array_key_exists($keyword, $weight_matrix)) {
                 $weight_matrix[$keyword] = array();
             }
             foreach ($items as $item) {
                 if (!array_key_exists($item, $weight_matrix[$keyword])) {
                     $weight_matrix[$keyword][$item] = 0;
                 }
                 $weight_matrix[$keyword][$item] += 1;
             }
         }
     }
     /*
      * put keyword count to database for later use:
      * 1. setting up ratings between keywords and items
      * 2. keyword expansion
      */
     foreach ($keyword_count as $key => $key_count) {
         $escaped_keyword = addslashes($key);
         $this->dm->query("insert into Keyword (keyword, count) values('" . $escaped_keyword . "', " . $key_count . " )");
     }
     /*
      * put the $weight_matrix to database
      * create table weight_matrix (
      *      id integer primary key,
      *      keyword varchar,
      *      item varchar,
      *      weight integer,
      *      unique (keyword, item)
      *  );
      */
     $this->dm->query('truncate weight_matrix;');
     foreach ($weight_matrix as $keyword => $weight_array) {
         $escaped_keyword = addslashes($keyword);
         foreach ($weight_array as $item => $weight) {
             $this->dm->query("insert into weight_matrix (keyword, item, weight) values ('{$escaped_keyword}', '{$item}', {$weight});");
         }
     }
     /* 
      * Construct the keyword and keyword_item_weight table
      * with Keyword Frequency - Inverted Item Frequency
      */
     $temp_result = $this->dm->query('select count(distinct item) from weight_matrix;');
     $temp_row = mysql_fetch_array($temp_result);
     $items_count = $temp_row[0];
     $this->dm->query('truncate rating_matrix;');
     $keyword_results = $this->dm->query('select keyword from Keyword;');
     while ($keyword_row = mysql_fetch_array($keyword_results)) {
         $keyword = $keyword_row['keyword'];
         $escaped_keyword = addslashes($keyword);
         $temp_result = $this->dm->query("select count(*) from weight_matrix where keyword = '{$escaped_keyword}';");
         $temp_row = mysql_fetch_array($temp_result);
         $related_items_count = $temp_row[0];
         //this couldn't be zero
         $related_results = $this->dm->query("select item, weight from weight_matrix where keyword = '{$escaped_keyword}';");
         while ($item_weight_row = mysql_fetch_array($related_results)) {
             $item = $item_weight_row['item'];
             $weight = $item_weight_row['weight'];
             $temp_result = $this->dm->query("select sum(weight) from weight_matrix where item = '{$item}';");
             $temp_row = mysql_fetch_array($temp_result);
             $related_weight = $temp_row[0];
             //$iif = log($items_count / $related_items_count);
             $iif = 1 / $related_items_count;
             $kf = $weight / $related_weight;
             $rating = $kf * $iif;
             // rating_matrix is similar to weight_matrix
             $this->dm->query("insert into rating_matrix (keyword, item, rating) values ('{$escaped_keyword}', '{$item}', {$rating});");
         }
     }
     if ($this->name == KEY_LINK_JACCARD) {
         // to do this, the keyword table is needed
         $this->wordAssociationWithJaccardPreprocess($tables);
     }
 }
 public function preprocess($tables, $startTime = null)
 {
     echo "KeywordRecommender_{$this->name} preprocess start.....<br/>";
     flush();
     ob_flush();
     $time_start = microtime(true);
     $word_segmenter = new WordSegmenter();
     $this->dm->executeSqlFile(__DIR__ . "/rec_tables.sql");
     /* Construct the keyword and keyword_item_weight table */
     $keyword_item_count = array();
     $item_keyword_count = array();
     $query_results = $this->dm->query("select id, query from " . $tables['query'] . "");
     $keyword_count = array();
     while ($query_row = mysql_fetch_array($query_results)) {
         $items = array();
         $item_results = $this->dm->query("SELECT itemId FROM {$tables['query_item']} WHERE queryId = {$query_row['id']}");
         while ($item_row = mysql_fetch_array($item_results)) {
             $items[] = $item_row['itemId'];
             $item = $item_row['itemId'];
             if (!array_key_exists($item, $item_keyword_count)) {
                 $item_keyword_count[$item] = array();
             }
         }
         $keywords = $word_segmenter->segmentWords($query_row['query']);
         foreach ($keywords as $keyword) {
             if (isset($keyword_count[$keyword])) {
                 $keyword_count[$keyword] += 1;
             } else {
                 $keyword_count[$keyword] = 1;
                 $keyword_item_count[$keyword] = array();
             }
             foreach ($items as $item) {
                 if (!array_key_exists($item, $keyword_item_count[$keyword])) {
                     $keyword_item_count[$keyword][$item] = 0;
                 }
                 $keyword_item_count[$keyword][$item] += 1;
                 if (!array_key_exists($keyword, $item_keyword_count[$item])) {
                     $item_keyword_count[$item][$keyword] = 0;
                 }
                 $item_keyword_count[$item][$keyword] += 1;
             }
         }
     }
     $this->dm->query("BEGIN");
     $temp_result = $this->dm->query("select count(distinct itemId) from {$tables['query_item']};");
     $temp_row = mysql_fetch_array($temp_result);
     $items_count = $temp_row[0];
     foreach ($keyword_count as $key => $key_count) {
         //$all_count = array_sum($keyword_item_count[$key]);
         $related_items_count = count($keyword_item_count[$key]);
         $iif = log($items_count / $related_items_count);
         foreach ($keyword_item_count[$key] as $item => $count) {
             $all_count = array_sum($item_keyword_count[$item]);
             //$weight = $count / $key_count;
             $weight = $count / $all_count * $iif;
             //$weight = $count * $iif;
             //$weight = $count;
             $this->dm->query("INSERT INTO keyword_item_weight(keyword, item, weight) VALUE('{$key}',\r\n\t\t\t\t\t\t\t\t\t'{$item}', '{$weight}')");
         }
         $this->dm->query("INSERT INTO keyword(keyword, count) VALUE ('{$key}', {$key_count})");
     }
     $this->dm->query("COMMIT");
     if ($this->name == KEY_COL_SLOPEONE) {
         $this->collaborativeFilteringWithSlopeOnePreprocess();
     }
     if ($this->name == KEY_LINK_JACCARD) {
         $this->wordAssociationWithJaccardPreprocess($tables);
     }
     if ($this->name == KEY_LINK_COSINE) {
         $this->wordAssociationWithCosinePreprocess($tables);
     }
     $time_end = microtime(true);
     $cost_time = $time_end - $time_start;
     echo "KeywordRecommender_{$this->name} preprocess end.....<br/>";
     echo "cost time: {$cost_time} <br/>";
     flush();
     ob_flush();
 }