Example #1
0
 public function test($repeat = 100)
 {
     while ($repeat--) {
         $keep1 = array();
         $keep2 = array();
         $ll1 = new HyperLogLog\MinHash();
         $ll2 = new HyperLogLog\MinHash();
         $total = 0;
         while (1) {
             $total++;
             $rand = $this->random();
             $keep1[$rand] = 1;
             $ll1->add($rand);
             $rand = $this->random();
             $keep2[$rand] = 1;
             $ll2->add($rand);
             if (($count = count($keep2)) >= $this->i) {
                 break;
             }
         }
         $intersection = \HyperLogLog\Utils\MinHashIntersector::count(array($ll1, $ll2));
         $actual = count(array_intersect_key($keep1, $keep2));
         if ($actual == 0 || $intersection == 0) {
             continue;
         }
         $ll1->union($ll2);
         $total = $ll1->count();
         $this->average[0] += $actual;
         $this->average[1] += $intersection;
         $this->average[2] += $total;
         $this->results[] = array($actual, $intersection, $total);
     }
 }
 public function test($repeat = 100)
 {
     while ($repeat--) {
         $ll1 = new HyperLogLog\MinHash();
         $ll2 = new HyperLogLog\MinHash();
         $i = 100000000 + $this->random();
         $r = mt_rand(1, 4);
         $end = $i + $this->i * $r;
         $actual = 0;
         $overlap = 0;
         while ($i <= $end) {
             $ll1->add($i);
             if (++$overlap === 2) {
                 $overlap = 0;
                 $ll2->add($i);
                 $actual++;
             }
             $i += $r;
         }
         $intersection = \HyperLogLog\Utils\MinHashIntersector::count(array($ll1, $ll2));
         $ll1->union($ll2);
         $total = $ll1->count();
         $this->average[0] += $actual;
         $this->average[1] += $intersection;
         $this->average[2] += $total;
         $this->results[] = array($actual, $intersection, $total);
     }
 }
$set1 = randomSet(110000);
$set2 = randomSet(0);
$set3 = randomSet(120000);
echo "Number of words in set 1: " . count($set1) . "\n";
echo "Number of words in set 2: " . count($set2) . "\n";
echo "Number of words in set 3: " . count($set3) . "\n";
echo "------\n";
echo "Cardinailiy of set 1: " . cardinality($set1) . "\n";
echo "Cardinailiy of set 2: " . cardinality($set2) . "\n";
echo "Cardinailiy of set 3: " . cardinality($set3) . "\n";
$intersection = array_intersect($set1, $set2, $set3);
$union = array_merge($set1, $set2, $set3);
$intersectionCount = cardinality($intersection);
echo "Cardinailiy of union: " . cardinality($union) . "\n";
echo "Cardinailiy of intersection: " . $intersectionCount . "\n";
echo "------\nLogLog\n";
$log_logs = array();
foreach (array($set1, $set2, $set3) as $i => $set) {
    $log_log = new HyperLogLog\MinHash();
    foreach ($set as $word) {
        $log_log->add($word);
    }
    $log_logs[] = $log_log;
    echo "Added set " . ($i + 1) . "\n";
}
$count = \HyperLogLog\Utils\MinHashIntersector::count($log_logs);
echo "intersection complete: count: {$count}\n";
if ($count) {
    echo "Error: 100% - count should be zero\n";
}
echo "Error: 0% - count is zero\n";
echo "Number of words in set 1: " . count($set1) . "\n";
echo "Number of words in set 2: " . count($set2) . "\n";
echo "Number of words in set 3: " . count($set3) . "\n";
$intersection = array_intersect($set1, $set2, $set3);
$union = array_merge($set1, $set2, $set3);
$intersectionCount = cardinality($intersection);
echo "Cardinailiy of union: " . cardinality($union) . "\n";
echo "Number of words in intersection: " . $intersectionCount . "\n";
echo "------\nLogLog\n";
$log_logs = array();
foreach (array($set1, $set2, $set3) as $i => $set) {
    $log_log = new HyperLogLog\MinHash();
    foreach ($set as $word) {
        $log_log->add($word);
    }
    $log_logs[] = $log_log;
    echo "Added set " . ($i + 1) . "\n";
}
$merge_log = array_pop($log_logs);
$new_log_log = new HyperLogLog\MinHash();
$new_log_log->import($merge_log->export());
echo $merge_log->count() . "\n";
echo $new_log_log->count() . "\n";
$log_logs[] = $new_log_log;
$count = \HyperLogLog\Utils\MinHashIntersector::count($log_logs);
echo "Intersection complete\n";
echo $count . "\n" . 'error: ' . number_format(($count - $intersectionCount) / ($intersectionCount / 100.0), 3) . '%' . PHP_EOL;
foreach ($log_logs as $log) {
    $export = $log->export();
    echo "Size of export: " . strlen($export[0]) . ', ' . strlen($export[1]) . "\n";
}
include __DIR__ . '/../vendor/autoload.php';
include __DIR__ . '/randomGenerator.php';
$set1 = randomSet(100000);
$set2 = randomSet(1000);
$set3 = randomSet(100);
echo "Number of words in set 1: " . count($set1) . "\n";
echo "Number of words in set 2: " . count($set2) . "\n";
echo "Number of words in set 3: " . count($set3) . "\n";
echo "------\n";
echo "Cardinailiy of set 1: " . cardinality($set1) . "\n";
echo "Cardinailiy of set 2: " . cardinality($set2) . "\n";
echo "Cardinailiy of set 3: " . cardinality($set3) . "\n";
$intersection = array_intersect($set1, $set2, $set3);
$union = array_merge($set1, $set2, $set3);
$intersectionCount = cardinality($intersection);
echo "Cardinailiy of union: " . cardinality($union) . "\n";
echo "Cardinailiy of intersection: " . $intersectionCount . "\n";
echo "------\nLogLog\n";
$log_logs = array();
foreach (array($set1, $set2, $set3) as $i => $set) {
    $log_log = HyperLogLog\MinHash::make(HyperLogLog\MinHash::DEFAULT_HLL, 10000);
    foreach ($set as $word) {
        $log_log->add($word);
    }
    $log_logs[] = $log_log;
    echo "Added set " . ($i + 1) . "\n";
}
$count = \HyperLogLog\Utils\MinHashIntersector::count($log_logs);
echo "intersection complete\n";
echo $count . "\n" . 'error: ' . number_format(($count - $intersectionCount) / ($intersectionCount / 100.0), 3) . '%' . PHP_EOL;