예제 #1
0
 /**
  * Compute the covariance matrix for the row vectors.
  * @param $matrix Could be RealMatrix or SparseMatrix.
  * @return Matrix a new m by m covariance matrix.
  *         don't have to return by ref, because the engine will take care of it.
  *         Note that no matter what's the input matrix, the returned matrix is always a sparse matrix.
  */
 static function correlation($matrix)
 {
     $vectors = $matrix->row_vectors();
     $m = $matrix->row;
     // dimension of the correlation matrix
     $cor_matrix = Matrix::create('SparseMatrix', $m, $m);
     for ($v1 = 0; $v1 < $m; $v1++) {
         for ($v2 = $v1; $v2 < $m; $v2++) {
             if (isset($vectors[$v1]) && isset($vectors[$v2])) {
                 // note, some value (such as std) is cached, so it won't be too much performance problem.
                 $cor = $vectors[$v1]->correlation($vectors[$v2]);
                 if (!is_nan($cor)) {
                     $cor_matrix->set($v1, $v2, $cor);
                     $cor_matrix->set($v2, $v1, $cor);
                 }
             }
         }
     }
     return $cor_matrix;
 }
예제 #2
0
 protected function computePredictionMemory()
 {
     // we do the computation based on $this->directMatrix loaded in memory, not on database
     $this->mouseVectors = $this->directMatrix->row_vectors();
     $aux_matrix = array();
     // this is to store the normalized data (rating minus mean)
     $m = $this->getMouseNum();
     $n = $this->getCheeseNum();
     $nan = $this->missing == 'none' ? TRUE : FALSE;
     $data = array();
     // calculate the difference matrix
     foreach ($this->mouseVectors as $mouse_index => $mouse_vec) {
         $mean = $mouse_vec->mean(TRUE);
         for ($cheese_index = 0; $cheese_index < $n; $cheese_index++) {
             if (!is_nan($mouse_vec->get($cheese_index))) {
                 $aux_matrix[$mouse_index][$cheese_index] = $mouse_vec->get($cheese_index) - $mean;
             }
         }
     }
     $values = $this->similarityMatrix->raw_values();
     // not needed 'cause data will be saved directly to db.
     $this->predictionMatrix = Matrix::create('SparseMatrix', $m, $n);
     // calculate prediction for each mouse-cheese pair, and (optionally) save
     foreach ($this->mouseMap as $mouse_id => $mouse_index) {
         // (note: to improve performance w/ knn, move the for($j) loop here.)
         // implement knn
         if ($this->knn > 0) {
             $sim_scores = $values[$mouse_index];
             // make another copy
             if (empty($sim_scores)) {
                 continue;
             }
             // if there's no knn, just skip.
             arsort($sim_scores);
             $sim_scores = array_slice($sim_scores, 0, $this->knn);
             $neighbor = array_keys($sim_scores);
         }
         foreach ($this->cheeseMap as $cheese_id => $cheese_index) {
             if ($this->duplicate == 'remove' && $this->recordExists($mouse_id, $cheese_id, $nan)) {
                 continue;
             }
             $numerator = 0;
             $denomenator = 0;
             for ($j = 0; $j < $m; $j++) {
                 if (isset($neighbor) && !in_array($j, $neighbor)) {
                     continue;
                 }
                 // if not k-nearest-neighbor, skip
                 if (!array_key_exists($cheese_index, $aux_matrix[$j])) {
                     continue;
                 }
                 // if no rating, skip.
                 if ($j == $mouse_index) {
                     continue;
                 }
                 // skip my own rating
                 $norm_weight = $aux_matrix[$j][$cheese_index];
                 $sim = $this->similarityMatrix->get($j, $mouse_index);
                 if (is_nan($sim)) {
                     continue;
                 }
                 $numerator += $norm_weight * $sim;
                 $denomenator += abs($sim);
             }
             if ($denomenator != 0) {
                 $prediction = $this->mouseVectors[$mouse_index]->mean(TRUE, $nan) + $numerator / $denomenator;
                 // note: we use the same lowerbound setting for prediction generation.
                 if ($prediction > $this->lowerbound) {
                     $data[] = "({$this->appId}, {$mouse_id}, {$cheese_id}, {$prediction}, {$this->created})";
                 }
             }
         }
     }
     $this->batchInsert($this->savePredictionSql(), $data);
     $this->purgeOutdatedRecords('prediction');
     $this->cleanupMemory();
 }
예제 #3
0
 protected function computePrediction()
 {
     // we do the computation based on $this->preferenceMatrix loaded in memory
     $this->userVectors = $this->preferenceMatrix->row_vectors();
     // regardless of whether preferenceMatrix is a sparse matrix or not, predictionMatrix is always a sparseMatrix.
     $this->predictionMatrix = Matrix::create('SparseMatrix', $this->userNum, $this->itemNum);
     // calculate prediction for each user-item pair
     foreach ($this->userMap as $user_real_id => $user_matrix_index) {
         foreach ($this->itemMap as $item_real_id => $item_matrix_index) {
             // skip predictions on already existed preference ratings.
             if (!$this->isBooleanRecommender && !is_nan($this->preferenceMatrix->get($user_matrix_index, $item_matrix_index)) || $this->isBooleanRecommender && $this->preferenceMatrix->get($user_matrix_index, $item_matrix_index) != 0) {
                 continue;
             }
             // $user_matrix_index is the current user's matrix index to computing. $j is the "similar users"
             $numerator = 0;
             $denominator = 0;
             for ($j = 0; $j < $this->userNum; $j++) {
                 if ($j == $user_matrix_index) {
                     continue;
                 }
                 // skip myself.
                 if (is_nan($this->userVectors[$j]->get($item_matrix_index))) {
                     continue;
                 }
                 // if no rating from j, skip.
                 $similarity_value = $this->similarityMatrix->get($j, $user_matrix_index);
                 if (is_nan($similarity_value)) {
                     continue;
                 }
                 // skip if there is no similarity between $user_matrix_index and $j.
                 $mean_j = $this->isBooleanRecommender ? $this->userVectors[$j]->mean(TRUE) : $this->userVectors[$j]->intersect_mean($this->userVectors[$user_matrix_index]);
                 $normalized_j_score = $this->preferenceMatrix->get($j, $item_matrix_index) - $mean_j;
                 $numerator += $normalized_j_score * $similarity_value;
                 $denominator += abs($similarity_value);
             }
             if ($denominator != 0) {
                 $prediction = $this->userVectors[$user_matrix_index]->mean(TRUE) + $numerator / $denominator;
                 $this->predictionMatrix->set($user_matrix_index, $item_matrix_index, $prediction);
             }
         }
     }
 }