public function testEuclideanClustering() { $clust = new KMeans(2, new Euclidean(), new EuclidCF(), 0.001); $tset = new TrainingSet(); for ($i = 0; $i < 500; $i++) { $tset->addDocument('A', EuclideanPoint::getRandomPointAround(100, 100, 45)); } for ($i = 0; $i < 500; $i++) { $tset->addDocument('B', EuclideanPoint::getRandomPointAround(200, 100, 45)); } list($clusters, $centroids, $distances) = $clust->cluster($tset, new DataAsFeatures()); $im = $this->drawClusters($tset, $clusters, $centroids, false); if ($im) { imagepng($im, TEST_DATA_DIR . "/Clustering/KmeansTest/clusters.png"); } // since the dataset is artificial and clearly separated, the kmeans // algorithm should always cluster it correctly foreach ($clusters as $clust) { $classes = array(); foreach ($clust as $point_idx) { $class = $tset[$point_idx]->getClass(); if (!isset($classes[$class])) { $classes[$class] = true; } } // assert that all the documents (points) in this cluster belong // in the same class $this->assertCount(1, $classes); } }
public function testClustering2() { $N = 50; $tset = new TrainingSet(); for ($i = 0; $i < $N; $i++) { $tset->addDocument('', EuclideanPoint::getRandomPointAround(100, 100, 45)); } for ($i = 0; $i < $N; $i++) { $tset->addDocument('', EuclideanPoint::getRandomPointAround(200, 100, 45)); } $hc = new Hierarchical(new SingleLink(), new Euclidean()); list($dendrogram) = $hc->cluster($tset, new DataAsFeatures()); $dg = $this->drawDendrogram($tset, $dendrogram, 600); $clusters = Hierarchical::dendrogramToClusters($dendrogram, 2); $im = $this->drawClusters($tset, $clusters, null, false, 10); if ($dg) { imagepng($dg, TEST_DATA_DIR . "/Clustering/HierarchicalTest/dendrogram.png"); } if ($im) { imagepng($im, TEST_DATA_DIR . "/Clustering/HierarchicalTest/clusters.png"); } }