public function testInitCentroids() { $xs = array(array(1, 2, 3, 4), array(4, 2, 5, 1), array(9, 1, 5, 4), array(2, 5, 2, 1), array(9, 9, 11, 12), array(99, 1, 1, 2), array(44, 21, 41, 2)); $shown_up = array(); for ($i = 0; $i < 300; $i++) { $centroids = _ll_init_centroids($xs, 2); foreach ($centroids as $centroid) { // make sure they were assigned to points that exist. $position = array_search($centroid, $xs); $this->assertTrue($position !== false); $shown_up[$position] = true; } $this->assertEquals(count($centroids), 2); $this->assertFalse(_ll_init_centroids($xs, 100)); } $this->assertTrue(count($shown_up) == count($xs), "Some \$xs did not show up in 300 samples (shown up = " . count($shown_up) . ", xs = " . count($xs) . "."); }
function ll_kmeans($xs, $k) { if ($k > count($xs)) { return false; } $centroids = _ll_init_centroids($xs, $k); $belongs_to = array(); do { for ($i = 0; $i < count($xs); $i++) { // I reversed the order here (to store the centroids as indexes in the array) // for complexity reasons. $belongs_to[_ll_closest_centroid($xs[$i], $centroids)][] = $i; } $old_centroids = $centroids; $centroids = _ll_reposition_centroids($centroids, $belongs_to, $xs); $continue = $old_centroids == $centroids; } while ($continue); return $belongs_to; }