Пример #1
0
function kMeans($data, $k, $normalize = false)
{
    $centroids = initialiseCentroids($data, $k, $normalize = false);
    $mapping = array();
    while (true) {
        $new_mapping = assignCentroids($data, $centroids);
        foreach ($new_mapping as $documentID => $centroidID) {
            if (!isset($mapping[$documentID]) || $centroidID != $mapping[$documentID]) {
                $mapping = $new_mapping;
                break;
            } else {
                return formatResults($mapping, $data, $centroids);
            }
        }
        $centroids = updateCentroids($mapping, $data, $k);
    }
}
function kMeans($normalisedTfidf, $numClusters)
{
    // Initialise centroids ( with key = term and value = tf-idf )
    $centroids = initialiseCentroids($normalisedTfidf, $numClusters);
    $mapDocToCentroid = array();
    $maxIterations = 10;
    while (true) {
        // Assign documents to centroids
        $newMapDocToCentroid = assignCentroids($normalisedTfidf, $centroids);
        // Stopping condition: when the centroid stops changing
        $changed = false;
        // Resulting array has docID for key and centroidID for values
        foreach ($newMapDocToCentroid as $docID => $centroidID) {
            // If centroid for a doc in new mapping is not same as that of old mapping, use the new mapping
            if (!isset($mapDocToCentroid[$docID]) || $centroidID != $mapDocToCentroid[$docID]) {
                $mapDocToCentroid = $newMapDocToCentroid;
                $changed = true;
                break;
            }
        }
        // Decrease iterations (which were set as alternative stopping condition)
        $maxIterations--;
        // Check status of assignment and return the array with docs mapped to centroids
        if (!$changed || $maxIterations == 0) {
            return $mapDocToCentroid;
        }
        // Update the centroids
        $centroids = updateCentroids($mapDocToCentroid, $normalisedTfidf, $numClusters);
    }
}