/** * @dataProvider datasets */ public function testQuartiles($dataset, $min, $max, $mean, $median, $first, $third) { $quantile = new Quantile($dataset); $this->assertSame($min, $quantile->min()->value()); $this->assertSame($max, $quantile->max()->value()); $this->assertSame($mean, $quantile->mean()); $this->assertSame($median, $quantile->median()->value()); $this->assertSame($first, $quantile->quartile(1)->value()); $this->assertSame($third, $quantile->quartile(3)->value()); }
/** * @param MapInterface<int, NodeInterface> $nodes */ public function __invoke(MapInterface $nodes) : NodeInterface { if ((string) $nodes->keyType() !== 'int' || (string) $nodes->valueType() !== NodeInterface::class) { throw new InvalidArgumentException(); } if ($nodes->size() === 1) { if (!$nodes->current()->hasChildren()) { return $nodes->current(); } try { return $this($nodes->current()->children()); } catch (ContentTooDispersedException $e) { return $nodes->current(); } } $dispersion = $nodes->reduce([], function (array $dispersion, int $position, NodeInterface $node) : array { $text = (new Text())($node); $text = new Str($text); $dispersion[$position] = $text->wordCount(); return $dispersion; }); $quantile = new Quantile($dispersion); $lookup = []; //select qartiles that have more words than the average nodes for ($i = 1; $i < 5; $i++) { $diff = $quantile->quartile($i)->value() - $quantile->quartile($i - 1)->value(); if ($diff >= $quantile->mean()) { $lookup[] = $i; } } if (empty($lookup)) { throw new ContentTooDispersedException(); } //select the minimum amount of words that needs to be in nodes $min = $quantile->quartile(min($lookup))->value(); $nodes = $nodes->filter(function (int $position) use($min, $dispersion) : bool { return $dispersion[$position] >= $min; }); return $this($nodes); }