<?php

include __DIR__ . '/../vendor/autoload.php';
include __DIR__ . '/randomGenerator.php';
$set1 = randomSet(90000);
$set2 = randomSet(70000);
$set3 = randomSet(80000);
echo "Number of words in set 1: " . count($set1) . "\n";
echo "Number of words in set 2: " . count($set2) . "\n";
echo "Number of words in set 3: " . count($set3) . "\n";
$intersection = array_intersect($set1, $set2, $set3);
$union = array_merge($set1, $set2, $set3);
$intersectionCount = cardinality($intersection);
echo "Cardinailiy of union: " . cardinality($union) . "\n";
echo "Number of words in intersection: " . $intersectionCount . "\n";
echo "------\nLogLog\n";
$log_logs = array();
foreach (array($set1, $set2, $set3) as $i => $set) {
    $log_log = new HyperLogLog\MinHash();
    foreach ($set as $word) {
        $log_log->add($word);
    }
    $log_logs[] = $log_log;
    echo "Added set " . ($i + 1) . "\n";
}
$merge_log = array_pop($log_logs);
$new_log_log = new HyperLogLog\MinHash();
$new_log_log->import($merge_log->export());
echo $merge_log->count() . "\n";
echo $new_log_log->count() . "\n";
$log_logs[] = $new_log_log;
<?php

include __DIR__ . '/../vendor/autoload.php';
include __DIR__ . '/randomGenerator.php';
// adjusted so the union is almost the size of the hash k
$set1 = randomSet(110000);
$set2 = randomSet(0);
$set3 = randomSet(120000);
echo "Number of words in set 1: " . count($set1) . "\n";
echo "Number of words in set 2: " . count($set2) . "\n";
echo "Number of words in set 3: " . count($set3) . "\n";
echo "------\n";
echo "Cardinailiy of set 1: " . cardinality($set1) . "\n";
echo "Cardinailiy of set 2: " . cardinality($set2) . "\n";
echo "Cardinailiy of set 3: " . cardinality($set3) . "\n";
$intersection = array_intersect($set1, $set2, $set3);
$union = array_merge($set1, $set2, $set3);
$intersectionCount = cardinality($intersection);
echo "Cardinailiy of union: " . cardinality($union) . "\n";
echo "Cardinailiy of intersection: " . $intersectionCount . "\n";
echo "------\nLogLog\n";
$log_logs = array();
foreach (array($set1, $set2, $set3) as $i => $set) {
    $log_log = new HyperLogLog\MinHash();
    foreach ($set as $word) {
        $log_log->add($word);
    }
    $log_logs[] = $log_log;
    echo "Added set " . ($i + 1) . "\n";
}
$count = \HyperLogLog\Utils\MinHashIntersector::count($log_logs);
<?php

include __DIR__ . '/../vendor/autoload.php';
include __DIR__ . '/randomGenerator.php';
$set1 = randomSet(800000);
$set2 = randomSet(600000);
$set3 = randomSet(900000);
echo "Number of words in set 1: " . count($set1) . "\n";
echo "Number of words in set 2: " . count($set2) . "\n";
echo "Number of words in set 3: " . count($set3) . "\n";
echo "------\n";
echo "Cardinailiy of set 1: " . cardinality($set1) . "\n";
echo "Cardinailiy of set 2: " . cardinality($set2) . "\n";
echo "Cardinailiy of set 3: " . cardinality($set3) . "\n";
$intersection = array_intersect($set1, $set2, $set3);
$union = array_merge($set1, $set2, $set3);
$intersectionCount = cardinality($intersection);
echo "Cardinailiy of union: " . cardinality($union) . "\n";
echo "Cardinailiy of intersection: " . $intersectionCount . "\n";
echo "------\nLogLog\n";
$log_logs = array();
foreach (array($set1, $set2, $set3) as $i => $set) {
    $log_log = new HyperLogLog\MinHash();
    foreach ($set as $word) {
        $log_log->add($word);
    }
    $log_logs[] = $log_log;
    echo "Added set " . ($i + 1) . "\n";
}
$count = \HyperLogLog\Utils\MinHashIntersector::count($log_logs);
echo "intersection complete\n";
<?php

include __DIR__ . '/../vendor/autoload.php';
include __DIR__ . '/randomGenerator.php';
$set1 = randomSet(100000);
$set2 = randomSet(1000);
$set3 = randomSet(100);
echo "Number of words in set 1: " . count($set1) . "\n";
echo "Number of words in set 2: " . count($set2) . "\n";
echo "Number of words in set 3: " . count($set3) . "\n";
echo "------\n";
echo "Cardinailiy of set 1: " . cardinality($set1) . "\n";
echo "Cardinailiy of set 2: " . cardinality($set2) . "\n";
echo "Cardinailiy of set 3: " . cardinality($set3) . "\n";
$intersection = array_intersect($set1, $set2, $set3);
$union = array_merge($set1, $set2, $set3);
$intersectionCount = cardinality($intersection);
echo "Cardinailiy of union: " . cardinality($union) . "\n";
echo "Cardinailiy of intersection: " . $intersectionCount . "\n";
echo "------\nLogLog\n";
$log_logs = array();
foreach (array($set1, $set2, $set3) as $i => $set) {
    $log_log = HyperLogLog\MinHash::make(HyperLogLog\MinHash::DEFAULT_HLL, 10000);
    foreach ($set as $word) {
        $log_log->add($word);
    }
    $log_logs[] = $log_log;
    echo "Added set " . ($i + 1) . "\n";
}
$count = \HyperLogLog\Utils\MinHashIntersector::count($log_logs);
echo "intersection complete\n";
Example #5
0
<?php

include __DIR__ . '/../vendor/autoload.php';
include __DIR__ . '/randomGenerator.php';
$set1 = randomSet();
$set2 = randomSet();
echo "Number of words in set 1: " . count($set1) . "\n";
echo "Number of words in set 2: " . count($set2) . "\n";
$union = array_merge($set1, $set2);
echo "Number of words in union: " . count($union) . "\n";
echo "------\nCardinality of union\n";
echo $card = cardinality($union) . "\n";
echo "------\nLogLog\n";
$log_log1 = new HyperLogLog\Basic(14);
foreach ($set1 as $word) {
    $log_log1->add($word);
}
echo "Added set 1\n";
$log_log2 = new HyperLogLog\Basic(14);
foreach ($set2 as $word) {
    $log_log2->add($word);
}
echo "Added set 2\n";
$log_log1->union($log_log2);
echo "Union complete\n";
$count = $log_log1->count() . "\n";
echo $count . 'error: ' . number_format(($count - $card) / ($card / 100.0), 3) . '%' . PHP_EOL;
Example #6
0
<?php

include __DIR__ . '/../vendor/autoload.php';
include __DIR__ . '/randomGenerator.php';
$words = randomSet();
echo "Number of words\n" . count($words) . "\n";
echo "------\nCardinality\n";
echo $card = cardinality($words) . "\n";
echo "------\nLogLog\n";
$log_log = new HyperLogLog\Basic();
foreach ($words as $word) {
    $log_log->add($word);
}
$count = $log_log->count() . "\n";
echo $count . 'error: ' . number_format(($count - $card) / ($card / 100.0), 3) . '%' . PHP_EOL;
<?php

include __DIR__ . '/../vendor/autoload.php';
include __DIR__ . '/randomGenerator.php';
$set1 = randomSet(100);
$set2 = randomSet(500);
$set3 = randomSet(780);
echo "Number of words in set 1: " . count($set1) . "\n";
echo "Number of words in set 2: " . count($set2) . "\n";
echo "Number of words in set 3: " . count($set3) . "\n";
echo "------\n";
echo "Cardinailiy of set 1: " . cardinality($set1) . "\n";
echo "Cardinailiy of set 2: " . cardinality($set2) . "\n";
echo "Cardinailiy of set 3: " . cardinality($set3) . "\n";
$intersection = array_intersect($set1, $set2, $set3);
$union = array_merge($set1, $set2, $set3);
$intersectionCount = cardinality($intersection);
echo "Cardinailiy of union: " . cardinality($union) . "\n";
echo "Cardinailiy of intersection: " . $intersectionCount . "\n";
echo "------\nLogLog\n";
$log_logs = array();
foreach (array($set1, $set2, $set3) as $i => $set) {
    $log_log = new HyperLogLog\MinHash();
    foreach ($set as $word) {
        $log_log->add($word);
    }
    $log_logs[] = $log_log;
    echo "Added set " . ($i + 1) . "\n";
}
$count = \HyperLogLog\Utils\MinHashIntersector::count($log_logs);
echo "intersection complete\n";