function updateCentroids($mapping, $data, $k) { $centroids = array(); $counts = array_count_values($mapping); foreach ($mapping as $documentID => $centroidID) { foreach ($data[$documentID] as $dim => $value) { if (!isset($cenntroids[$centroidID][$dim])) { $centroids[$centroidID][$dim] = 0; } $centroids[$centroidID][$dim] += $value / $counts[$centroidID]; } } if (count($centroids) < $k) { $centroids = array_merge($centroids, initialiseCentroids($data, $k - count($centroids))); } return $centroids; }
function updateCentroids($mapDocToCentroid, $normalisedTfidf, $numClusters) { $centroids = array(); $counts = array_count_values($mapDocToCentroid); // for each doc in the mapping foreach ($mapDocToCentroid as $docID => $centroidID) { // For the corresponding doc in the data array, loop over each term foreach ($normalisedTfidf[$docID] as $term => $tfidfValue) { // Update the centroid by calculating average of all tfidf in each centroid if (isset($centroids[$centroidID][$term])) { $centroids[$centroidID][$term] += $tfidfValue / $counts[$centroidID]; } } } // Continue to generate new centroids until the set number of clusters is reached if (count($centroids) < $numClusters) { $centroids = array_merge($centroids, initialiseCentroids($normalisedTfidf, $numClusters - count($centroids))); } return $centroids; }