/** * Compute the covariance matrix for the row vectors. * @param $matrix Could be RealMatrix or SparseMatrix. * @return Matrix a new m by m covariance matrix. * don't have to return by ref, because the engine will take care of it. * Note that no matter what's the input matrix, the returned matrix is always a sparse matrix. */ static function correlation($matrix) { $vectors = $matrix->row_vectors(); $m = $matrix->row; // dimension of the correlation matrix $cor_matrix = Matrix::create('SparseMatrix', $m, $m); for ($v1 = 0; $v1 < $m; $v1++) { for ($v2 = $v1; $v2 < $m; $v2++) { if (isset($vectors[$v1]) && isset($vectors[$v2])) { // note, some value (such as std) is cached, so it won't be too much performance problem. $cor = $vectors[$v1]->correlation($vectors[$v2]); if (!is_nan($cor)) { $cor_matrix->set($v1, $v2, $cor); $cor_matrix->set($v2, $v1, $cor); } } } } return $cor_matrix; }
protected function computePredictionMemory() { // we do the computation based on $this->directMatrix loaded in memory, not on database $this->mouseVectors = $this->directMatrix->row_vectors(); $aux_matrix = array(); // this is to store the normalized data (rating minus mean) $m = $this->getMouseNum(); $n = $this->getCheeseNum(); $nan = $this->missing == 'none' ? TRUE : FALSE; $data = array(); // calculate the difference matrix foreach ($this->mouseVectors as $mouse_index => $mouse_vec) { $mean = $mouse_vec->mean(TRUE); for ($cheese_index = 0; $cheese_index < $n; $cheese_index++) { if (!is_nan($mouse_vec->get($cheese_index))) { $aux_matrix[$mouse_index][$cheese_index] = $mouse_vec->get($cheese_index) - $mean; } } } $values = $this->similarityMatrix->raw_values(); // not needed 'cause data will be saved directly to db. $this->predictionMatrix = Matrix::create('SparseMatrix', $m, $n); // calculate prediction for each mouse-cheese pair, and (optionally) save foreach ($this->mouseMap as $mouse_id => $mouse_index) { // (note: to improve performance w/ knn, move the for($j) loop here.) // implement knn if ($this->knn > 0) { $sim_scores = $values[$mouse_index]; // make another copy if (empty($sim_scores)) { continue; } // if there's no knn, just skip. arsort($sim_scores); $sim_scores = array_slice($sim_scores, 0, $this->knn); $neighbor = array_keys($sim_scores); } foreach ($this->cheeseMap as $cheese_id => $cheese_index) { if ($this->duplicate == 'remove' && $this->recordExists($mouse_id, $cheese_id, $nan)) { continue; } $numerator = 0; $denomenator = 0; for ($j = 0; $j < $m; $j++) { if (isset($neighbor) && !in_array($j, $neighbor)) { continue; } // if not k-nearest-neighbor, skip if (!array_key_exists($cheese_index, $aux_matrix[$j])) { continue; } // if no rating, skip. if ($j == $mouse_index) { continue; } // skip my own rating $norm_weight = $aux_matrix[$j][$cheese_index]; $sim = $this->similarityMatrix->get($j, $mouse_index); if (is_nan($sim)) { continue; } $numerator += $norm_weight * $sim; $denomenator += abs($sim); } if ($denomenator != 0) { $prediction = $this->mouseVectors[$mouse_index]->mean(TRUE, $nan) + $numerator / $denomenator; // note: we use the same lowerbound setting for prediction generation. if ($prediction > $this->lowerbound) { $data[] = "({$this->appId}, {$mouse_id}, {$cheese_id}, {$prediction}, {$this->created})"; } } } } $this->batchInsert($this->savePredictionSql(), $data); $this->purgeOutdatedRecords('prediction'); $this->cleanupMemory(); }
protected function computePrediction() { // we do the computation based on $this->preferenceMatrix loaded in memory $this->userVectors = $this->preferenceMatrix->row_vectors(); // regardless of whether preferenceMatrix is a sparse matrix or not, predictionMatrix is always a sparseMatrix. $this->predictionMatrix = Matrix::create('SparseMatrix', $this->userNum, $this->itemNum); // calculate prediction for each user-item pair foreach ($this->userMap as $user_real_id => $user_matrix_index) { foreach ($this->itemMap as $item_real_id => $item_matrix_index) { // skip predictions on already existed preference ratings. if (!$this->isBooleanRecommender && !is_nan($this->preferenceMatrix->get($user_matrix_index, $item_matrix_index)) || $this->isBooleanRecommender && $this->preferenceMatrix->get($user_matrix_index, $item_matrix_index) != 0) { continue; } // $user_matrix_index is the current user's matrix index to computing. $j is the "similar users" $numerator = 0; $denominator = 0; for ($j = 0; $j < $this->userNum; $j++) { if ($j == $user_matrix_index) { continue; } // skip myself. if (is_nan($this->userVectors[$j]->get($item_matrix_index))) { continue; } // if no rating from j, skip. $similarity_value = $this->similarityMatrix->get($j, $user_matrix_index); if (is_nan($similarity_value)) { continue; } // skip if there is no similarity between $user_matrix_index and $j. $mean_j = $this->isBooleanRecommender ? $this->userVectors[$j]->mean(TRUE) : $this->userVectors[$j]->intersect_mean($this->userVectors[$user_matrix_index]); $normalized_j_score = $this->preferenceMatrix->get($j, $item_matrix_index) - $mean_j; $numerator += $normalized_j_score * $similarity_value; $denominator += abs($similarity_value); } if ($denominator != 0) { $prediction = $this->userVectors[$user_matrix_index]->mean(TRUE) + $numerator / $denominator; $this->predictionMatrix->set($user_matrix_index, $item_matrix_index, $prediction); } } } }