$set1 = randomSet(110000); $set2 = randomSet(0); $set3 = randomSet(120000); echo "Number of words in set 1: " . count($set1) . "\n"; echo "Number of words in set 2: " . count($set2) . "\n"; echo "Number of words in set 3: " . count($set3) . "\n"; echo "------\n"; echo "Cardinailiy of set 1: " . cardinality($set1) . "\n"; echo "Cardinailiy of set 2: " . cardinality($set2) . "\n"; echo "Cardinailiy of set 3: " . cardinality($set3) . "\n"; $intersection = array_intersect($set1, $set2, $set3); $union = array_merge($set1, $set2, $set3); $intersectionCount = cardinality($intersection); echo "Cardinailiy of union: " . cardinality($union) . "\n"; echo "Cardinailiy of intersection: " . $intersectionCount . "\n"; echo "------\nLogLog\n"; $log_logs = array(); foreach (array($set1, $set2, $set3) as $i => $set) { $log_log = new HyperLogLog\MinHash(); foreach ($set as $word) { $log_log->add($word); } $log_logs[] = $log_log; echo "Added set " . ($i + 1) . "\n"; } $count = \HyperLogLog\Utils\MinHashIntersector::count($log_logs); echo "intersection complete: count: {$count}\n"; if ($count) { echo "Error: 100% - count should be zero\n"; } echo "Error: 0% - count is zero\n";
public function test($repeat = 100) { while ($repeat--) { $ll1 = new HyperLogLog\MinHash(); $ll2 = new HyperLogLog\MinHash(); $i = 100000000 + $this->random(); $r = mt_rand(1, 4); $end = $i + $this->i * $r; $actual = 0; $overlap = 0; while ($i <= $end) { $ll1->add($i); if (++$overlap === 2) { $overlap = 0; $ll2->add($i); $actual++; } $i += $r; } $intersection = \HyperLogLog\Utils\MinHashIntersector::count(array($ll1, $ll2)); $ll1->union($ll2); $total = $ll1->count(); $this->average[0] += $actual; $this->average[1] += $intersection; $this->average[2] += $total; $this->results[] = array($actual, $intersection, $total); } }
public function test($repeat = 100) { while ($repeat--) { $keep1 = array(); $keep2 = array(); $ll1 = new HyperLogLog\MinHash(); $ll2 = new HyperLogLog\MinHash(); $total = 0; while (1) { $total++; $rand = $this->random(); $keep1[$rand] = 1; $ll1->add($rand); $rand = $this->random(); $keep2[$rand] = 1; $ll2->add($rand); if (($count = count($keep2)) >= $this->i) { break; } } $intersection = \HyperLogLog\Utils\MinHashIntersector::count(array($ll1, $ll2)); $actual = count(array_intersect_key($keep1, $keep2)); if ($actual == 0 || $intersection == 0) { continue; } $ll1->union($ll2); $total = $ll1->count(); $this->average[0] += $actual; $this->average[1] += $intersection; $this->average[2] += $total; $this->results[] = array($actual, $intersection, $total); } }
include __DIR__ . '/../vendor/autoload.php'; include __DIR__ . '/randomGenerator.php'; $set1 = randomSet(1000); $set2 = randomSet(1000); $set3 = randomSet(1000); echo "Number of words in set 1: " . count($set1) . "\n"; echo "Number of words in set 2: " . count($set2) . "\n"; echo "Number of words in set 3: " . count($set3) . "\n"; $intersection = array_intersect($set1, $set2, $set3); $union = array_merge($set1, $set2, $set3); $intersectionCount = cardinality($intersection); echo "Cardinailiy of union: " . cardinality($union) . "\n"; echo "Number of words in intersection: " . $intersectionCount . "\n"; echo "------\nLogLog\n"; $log_logs = array(); foreach (array($set1, $set2, $set3) as $i => $set) { $log_log = new HyperLogLog\MinHash(); foreach ($set as $word) { $log_log->add($word); } $log_logs[] = $log_log; echo "Added set " . ($i + 1) . "\n"; } list($minHashIntersection, $minHashK, $hllUnion) = \HyperLogLog\Utils\MinHashIntersector::jaccard($log_logs); $hllUnionCount = $hllUnion->count(); echo "Hll union: " . $hllUnionCount . "\n"; echo "Min hash intersection: " . $minHashIntersection . "\n"; echo "Min hash k: " . $minHashK . "\n"; $count = $minHashIntersection / $minHashK * $hllUnionCount; echo "intersection complete\n"; echo $count . "\n" . 'error: ' . number_format(($count - $intersectionCount) / ($intersectionCount / 100.0), 3) . '%' . PHP_EOL;