/** * Merges vertical and horizontal coordinates in a bidimensional point array. * Stops coordinates (hesitations) are removed. * @param array $xcoords horizontal coordinates * @param array $ycoords vertical coordinates * @param array $getDistances if TRUE, the result array contains euclidean distances * @return array 2D points or euclidean distances array */ function convert_points($xcoords, $ycoords, $getDistances = false) { // initialize points array $pointArray = array(); // check for illegal offsets on $coords $maxCount = count($xcoords) - 1; // transform arrays in a single points array foreach ($xcoords as $i => $value) { $p = array($value, $ycoords[$i]); // check if next point exists if ($i >= $maxCount) { break; } $q = array($xcoords[$i + 1], $ycoords[$i + 1]); $distance = KMeans::getDistance($p, $q); // check if ($getDistances) { $pointArray[] = $distance; } else { // append point to the points array, discarding null distances if ($distance > 0) { $pointArray[] = $p; } } } return $pointArray; }
// check whether URLs should be merged (just remove query string) if (db_option(TBL_PREFIX . TBL_CMS, "mergeCacheUrl")) { $urlparts = explode("?", $url); $url = $urlparts[0]; } if (isset($pages[$url])) { $pages[$url] += 1; } else { $pages[$url] = 1; } $dict_user[] = $row['id']; $dict_page[] = $url; } $n = count($pages); $k = ceil(sqrt($n / 2)); $km = new KMeans(whiten($users), $k); //$km->initRandom(); $km->initKatsavounidis(); $km->doCluster(); $groups = array(); foreach ($km->clusters as $j => $cluster) { //echo '['.$j.']<br>'; $groups[$j] = array(); foreach ($cluster as $id => $feats) { //echo $id.': '.$dict_page[$id].', '.$dict_user[$id].'<br>'; $url = $dict_page[$id]; if (!isset($groups[$j][$url])) { $groups[$j][$url] = 1; } else { $groups[$j][$url] += 1; }
$xcoords = explode(",", $coordsX); $ycoords = explode(",", $coordsY); //echo count($xcoords)." points (duplicated ones will be removed)\n"; // transform arrays in a single points array $pointArray = convert_points($xcoords, $ycoords); /* We can do as many clusters as (to the extreme) the sample points size, * but use the rule of thumb: k ~ sqrt(n/2) */ $n = count($pointArray); $k = ceil(sqrt($n / 2)); echo $n . " points and " . $k . " clusters.\n"; $prevVar = 0; // compute K-means for ($i = 1; $i <= $n; ++$i) { ini_set('max_execution_time', 30); $km = new KMeans($pointArray, $k); $km->initKatsavounidis(); $km->maxIterations = 5; $c = $km->distributeOverClusters(); $clusterVarX = array(); $clusterVarY = array(); // store points foreach ($c as $cluster) { $clusterVarX[] = $cluster->variance->x; $clusterVarY[] = $cluster->variance->y; } //var_dump($clusterVarX, $clusterVarY); $jointVar = array_sum($clusterVarX) * array_sum($clusterVarY); //$jointVar = (array_sum($clusterVarX)/count($clusterVarX) + array_sum($clusterVarY)/count($clusterVarY) ) / 2; if ($prevVar == 0) { $prevVar = $jointVar;