<?php include __DIR__ . '/../vendor/autoload.php'; include __DIR__ . '/randomGenerator.php'; $set1 = randomSet(); $set2 = randomSet(); echo "Number of words in set 1: " . count($set1) . "\n"; echo "Number of words in set 2: " . count($set2) . "\n"; $union = array_merge($set1, $set2); echo "Number of words in union: " . count($union) . "\n"; echo "------\nCardinality of union\n"; echo $card = cardinality($union) . "\n"; echo "------\nLogLog\n"; $log_log1 = new HyperLogLog\Basic(14); foreach ($set1 as $word) { $log_log1->add($word); } echo "Added set 1\n"; $log_log2 = new HyperLogLog\Basic(14); foreach ($set2 as $word) { $log_log2->add($word); } echo "Added set 2\n"; $log_log1->union($log_log2); echo "Union complete\n"; $count = $log_log1->count() . "\n"; echo $count . 'error: ' . number_format(($count - $card) / ($card / 100.0), 3) . '%' . PHP_EOL;
include __DIR__ . '/randomGenerator.php'; // adjusted so the union is almost the size of the hash k $set1 = randomSet(110000); $set2 = randomSet(0); $set3 = randomSet(120000); echo "Number of words in set 1: " . count($set1) . "\n"; echo "Number of words in set 2: " . count($set2) . "\n"; echo "Number of words in set 3: " . count($set3) . "\n"; echo "------\n"; echo "Cardinailiy of set 1: " . cardinality($set1) . "\n"; echo "Cardinailiy of set 2: " . cardinality($set2) . "\n"; echo "Cardinailiy of set 3: " . cardinality($set3) . "\n"; $intersection = array_intersect($set1, $set2, $set3); $union = array_merge($set1, $set2, $set3); $intersectionCount = cardinality($intersection); echo "Cardinailiy of union: " . cardinality($union) . "\n"; echo "Cardinailiy of intersection: " . $intersectionCount . "\n"; echo "------\nLogLog\n"; $log_logs = array(); foreach (array($set1, $set2, $set3) as $i => $set) { $log_log = new HyperLogLog\MinHash(); foreach ($set as $word) { $log_log->add($word); } $log_logs[] = $log_log; echo "Added set " . ($i + 1) . "\n"; } $count = \HyperLogLog\Utils\MinHashIntersector::count($log_logs); echo "intersection complete: count: {$count}\n"; if ($count) { echo "Error: 100% - count should be zero\n";
<?php include __DIR__ . '/../vendor/autoload.php'; include __DIR__ . '/randomGenerator.php'; $words = randomSet(); echo "Number of words\n" . count($words) . "\n"; echo "------\nCardinality\n"; echo $card = cardinality($words) . "\n"; echo "------\nLogLog\n"; $log_log = new HyperLogLog\Basic(); foreach ($words as $word) { $log_log->add($word); } $count = $log_log->count() . "\n"; echo $count . 'error: ' . number_format(($count - $card) / ($card / 100.0), 3) . '%' . PHP_EOL;