/** * @param Observations $observations * @param array $coefficients * @return array */ protected function calculateGradient(Observations $observations, array $coefficients) : array { $gradient = array_fill(0, count($observations->getObservation(0)->getFeatures()), 0.0); $batchElementIndices = (array) array_rand(range(0, count($observations) - 1), $this->batchSize); foreach ($batchElementIndices as $index) { $observation = $observations->getObservation($index); $observationGradient = $this->gradient->gradient($coefficients, $observation->getFeatures(), $observation->getOutcome()); foreach ($observationGradient as $i => $observationSlope) { $gradient[$i] += $observationSlope / $this->batchSize; } } return $gradient; }
public function testGetObservation() { $observations = Observations::fromArray(static::$features, static::$outcomes); $observation = $observations->getObservation(0); static::assertEquals(static::$features[0], $observation->getFeatures()); static::assertEquals(static::$outcomes[0], $observation->getOutcome()); }
/** * @return Observations */ private function getLogisticObservations() { // Data from http://statistics.ats.ucla.edu/stat/r/dae/logit.htm $observations = new Observations(); $csv = fopen(__DIR__ . '/../../fixtures/logistic.csv', 'r'); fgetcsv($csv); // Throw away headers. while ($line = fgetcsv($csv)) { // Split composite feature, since the school rank isn't actually an interval value. $rank2 = $line[3] == 2 ? 1 : 0; $rank3 = $line[3] == 3 ? 1 : 0; $rank4 = $line[3] == 4 ? 1 : 0; // Normalize the GRE score. This is critical to get convergence. $gre = $line[1] / 100; // [1, GRE, GPA, Rank2, Rank3, Rank4], Admitted $observations->add([1, $gre, $line[2], $rank2, $rank3, $rank4], (double) $line[0]); } fclose($csv); return $observations; }
/** * Calculates the standard error of each of the regression coefficients. * * @return array */ public function getStandardErrorCoefficients() : array { if (is_null($this->SCoefficients)) { $design = new Matrix($this->observations->getFeatures()); $inverted = $design->transpose()->multiplyMatrix($design)->inverse(); $diagonalVector = []; for ($i = 0, $size = $inverted->getRowCount(); $i < $size; $i++) { $diagonalVector[] = $inverted->get($i, $i); } $this->SCoefficients = (new Matrix([$diagonalVector]))->multiplyScalar($this->getMeanSquaredError())->map(function ($element) { return sqrt($element); })->toArray()[0]; } return $this->SCoefficients; }
public function testStatistics() { $observations = Observations::fromArray($this->getFeatures(), $this->getOutcomes()); $coefficients = [1.0954970633022, 0.92451598868827]; $predictor = new LinearPredictor($coefficients); $statisticsGatherer = new LinearStatisticsGatherer($observations, $coefficients, $predictor); static::assertEquals(4, $statisticsGatherer->getDegreesOfFreedomTotal()); static::assertEquals(3, $statisticsGatherer->getDegreesOfFreedomError()); static::assertEquals(1, $statisticsGatherer->getDegreesOfFreedomModel()); static::assertEquals(1.94, round($statisticsGatherer->getFStatistic(), 2)); static::assertEquals(0.39, round($statisticsGatherer->getRSquared(), 2)); $stdErrorCoefficients = $statisticsGatherer->getStandardErrorCoefficients(); static::assertEquals(1.51, round($stdErrorCoefficients[0], 2)); static::assertEquals(0.66, round($stdErrorCoefficients[1], 2)); static::assertEquals(1.42, round($statisticsGatherer->getStandardError(), 2)); $tStatistics = $statisticsGatherer->getTStatistics(); static::assertEquals(0.73, round($tStatistics[0], 2)); static::assertEquals(1.39, round($tStatistics[1], 2)); }
/** * @param Observations $observations * @param array $coefficients * @return array */ protected function calculateGradient(Observations $observations, array $coefficients) : array { $observation = $observations->getObservation(mt_rand(0, count($observations) - 1)); return $this->gradient->gradient($coefficients, $observation->getFeatures(), $observation->getOutcome()); }
public function testTooFewObservations() { static::setExpectedException('InvalidArgumentException'); $regression = new LeastSquares(); $regression->regress(Observations::fromArray([[1, 1]], [1])); }
/** * @param Observations $observations * @return array */ public final function regress(Observations $observations) : array { $coefficients = array_fill(0, $observations->getFeatureCount(), 0.0); do { $gradient = $this->calculateGradient($observations, $coefficients); $this->schedule->update($gradient); $coefficients = $this->updateCoefficients($coefficients, $gradient); } while (!$this->stoppingCriteria->converged($gradient, $coefficients)); return $coefficients; }
/** * @param Observations $observations * @return array * @throws InvalidArgumentException */ public function regress(Observations $observations) : array { $design = new Matrix($observations->getFeatures()); $observed = (new Matrix([$observations->getOutcomes()]))->transpose(); if ($design->getRowCount() < $design->getColumnCount()) { throw new InvalidArgumentException('Not enough observations to perform regression. You need to have more observations than explanatory variables.'); } $designTranspose = $design->transpose(); $prediction = $designTranspose->multiplyMatrix($design)->inverse()->multiplyMatrix($designTranspose->multiplyMatrix($observed)); return $prediction->transpose()->toArray()[0]; }
/** * @param Observations $observations * @param array $coefficients * @return array */ protected function calculateGradient(Observations $observations, array $coefficients) : array { $gradient = array_fill(0, count($observations->getObservation(0)->getFeatures()), 0.0); $batchSize = count($observations); /** @var Observation $observation */ foreach ($observations as $observation) { $observationGradient = $this->gradient->gradient($coefficients, $observation->getFeatures(), $observation->getOutcome()); foreach ($observationGradient as $i => $observationSlope) { $gradient[$i] += $observationSlope / $batchSize; } } return $gradient; }