示例#1
0
#!/usr/bin/php 
<?php 
// read the README
require "phpcluster/k-means.php";
$c = new Kmeans();
$c->setCentroids(1000);
$c->setThreshold(50);
echo "Loading entries\n";
$foo = 0;
$title = array();
foreach (glob("data-abc/*") as $file) {
    $rss = unserialize(file_get_contents($file));
    if (!isset($rss['link']) && isset($rss['guid'])) {
        $rss['link'] = $rss['guid'];
    }
    if (!isset($rss['title']) || !isset($rss['link']) || !isset($rss['description'])) {
        continue;
    }
    if (trim($rss['title']) == "") {
        continue;
    }
    if (isset($title[$rss['title']])) {
        continue;
    }
    $title[$rss['title']] = true;
    /* transform id to link */
    $id = substr(trim($rss['link']), 5);
    $id = substr($id, 0, strlen($id) - 5);
    $link = "http://www.abc.com.py/imprimir.php?pid={$id}";
    /* split the calc in chunks of 20,000 news is a */
    /* a good idea, since large files will take a lot */
示例#2
0
<?php

include '../../../autoloader.php';
include '../../testing.php';
include '../cluster_testing.php';
use NlpTools\Clustering\KMeans;
use NlpTools\Similarity\Euclidean;
use NlpTools\Similarity\CosineSimilarity;
use NlpTools\Clustering\CentroidFactories\MeanAngle;
use NlpTools\Clustering\CentroidFactories\Euclidean as EuclidCF;
use NlpTools\Documents\TrainingSet;
use NlpTools\FeatureFactories\DataAsFeatures;
use NlpTools\Documents\Document;
$NC = 2;
// number of clusters
$clust = new Kmeans($NC, new Euclidean(), new EuclidCF(), 0.001);
$tset = new TrainingSet();
for ($i = 0; $i < 500; $i++) {
    $tset->addDocument('', EuclideanPoint::getRandomPointAround(100, 100, 45));
}
for ($i = 0; $i < 500; $i++) {
    $tset->addDocument('', EuclideanPoint::getRandomPointAround(200, 100, 45));
}
list($clusters, $centroids, $distances) = $clust->cluster($tset, new DataAsFeatures());
$im = draw_clusters($tset, $clusters, $centroids, false);
if ($im) {
    imagepng($im, 'clusters.png');
} else {
    var_dump($clusters);
}