Example #1
0
 /**
  * Calculate the regression parameters using the Theil-Sen method
  *
  * Procedure:
  * Calculate the slopes of all pairs of points and select the median value
  * Calculate the intercept using the slope, and the medians of the X and Y values.
  *   b = Ymedian - (m * Xmedian)
  */
 public function calculate()
 {
     // The slopes array will be a list of slopes between all pairs of points
     $slopes = [];
     $n = count($this->points);
     for ($i = 0; $i < $n; $i++) {
         for ($j = $i + 1; $j < $n; $j++) {
             $pointi = $this->points[$i];
             $pointj = $this->points[$j];
             $slopes[] = ($pointj[1] - $pointi[1]) / ($pointj[0] - $pointi[0]);
         }
     }
     $this->m = Average::median($slopes);
     $this->b = Average::median($this->ys) - $this->m * Average::median($this->xs);
     $this->parameters = [$this->b, $this->m];
 }
Example #2
0
 /**
  * Evaluate for x
  * Use the smoothness parameter α to determine the subset of data to consider for
  * local regression. Perform a weighted least squares regression and evaluate x.
  *
  * @param  number $x
  *
  * @return number
  */
 public function evaluate($x)
 {
     $α = $this->α;
     $λ = $this->λ;
     $n = $this->n;
     // The number of points considered in the local regression
     $Δx = Single::abs(Single::subtract($this->xs, $x));
     $αᵗʰΔx = Average::kthSmallest($Δx, $this->number_of_points - 1);
     $arg = Single::min(Single::divide($Δx, $αᵗʰΔx * max($α, 1)), 1);
     // Kernel function: tricube = (1-arg³)³
     $tricube = Single::cube(Single::multiply(Single::subtract(Single::cube($arg), 1), -1));
     $weights = $tricube;
     // Local Regression Parameters
     $parameters = $this->leastSquares($this->ys, $this->xs, $weights, $λ);
     $X = new VandermondeMatrix([$x], $λ + 1);
     return $X->multiply($parameters)[0][0];
 }
Example #3
0
 /**
  * Sum of squares deviations
  *
  * ∑⟮xᵢ - μ⟯²
  *
  * @param  array  $numbers
  *
  * @return number
  */
 public static function sumOfSquaresDeviations(array $numbers)
 {
     if (empty($numbers)) {
         return null;
     }
     $μ = Average::mean($numbers);
     $∑⟮xᵢ − μ⟯² = array_sum(array_map(function ($xᵢ) use($μ) {
         return pow($xᵢ - $μ, 2);
     }, $numbers));
     return $∑⟮xᵢ − μ⟯²;
 }
Example #4
0
 /**
  * Two-way ANOVA
  * Examines the influence of two different categorical independent variables on
  * one continuous dependent variable. The two-way ANOVA not only aims at assessing
  * the main effect of each independent variable but also if there is any interaction
  * between them (using the F distribution).
  * https://en.wikipedia.org/wiki/Two-way_analysis_of_variance
  *
  * Produces the following analysis of the data:
  *
  * ANOVA hypothesis test summary data
  *
  *             | SS | df | MS | F | P |
  * Factor A    |    |    |    |   |   |
  * Factor B    |    |    |    |   |   |
  * Interaction |    |    |    |   |   |
  * Error       |    |    |    |
  * Total       |    |    |
  *
  *  where:
  *   Interaction = Factor A X Factor B working together
  *   Error is within groups
  *   SS = Sum of squares
  *   df = Degrees of freedom
  *   MS = Mean squares
  *   F  = F statistic
  *   P  = P value
  *
  * Data summary tables for:
  *   Factor A
  *   Factor B
  *   Factor AB (Interaction)
  *   Total
  *
  *       | N | Sum | Mean | SS | Variance | SD | SEM |
  * 0     |   |     |      |    |          |    |     |
  * 1     |   |     |      |    |          |    |     |
  * ...   |   |     |      |    |          |    |     |
  * Total |   |     |      |    |          |    |     |
  *
  *  where:
  *   Each row is the summary for a sample, numbered from 0 to m - 1
  *   m   = Number of samples
  *   N   = Sample size
  *   SS  = Sum of squares
  *   SD  = Standard deviation
  *   SEM = Standard error of the mean
  *
  * Calculations
  *
  * Sum of Squares
  * SST (sum of squares total)
  * ∑⟮xᵢ − μ⟯²
  *  where:
  *   xᵢ = each element of all samples
  *   μ  = mean total of all elements of all samples
  *
  * SSA, SSB (sum of squares for each factor A and B)
  * ∑n(x - μ)²
  *  where:
  *   n = sample size
  *   x = sample mean
  *   μ  = mean total of all elements of all samples
  *
  * SSW (sum of squares within - error)
  * ∑∑⟮x − μ⟯²  Sum of sum of squared deviations of each sample
  *  where:
  *   x = mean of each AB
  *   μ = mean of the sample
  *
  * SSAB (sum of squares AB - interaction)
  * SSAB = SST - SSA - SSB - SSW;
  *
  * Degrees of Freedom
  * dfT (degrees of freedom for the total)
  * n - 1
  *
  * dfA (degrees of freedom factor A)
  * r - 1
  *
  * dfB (degrees of freedom factor B)
  * c - 1
  *
  * dfAB (degrees of freedom factor AB - interaction)
  * (r - 1)(c - 1)
  *
  * dfW (degrees of freedom within - error)
  * n - rc
  *
  *  where:
  *   n = number of samples
  *   r = number of rows (number of factor As)
  *   c = number of columns (number of factor Bs)
  *
  * Mean Squares
  * MSA (Mean squares factor A)
  * SSA / dfA
  *
  * MSB (Mean squares factor B)
  * SSB / dfB
  *
  * MSAB (Mean squares factor AB - interaction)
  * SSAB / dfAB
  *
  * MSW (Mean squares within - error)
  * SSW / dfW
  *
  * F Test Statistics
  * FA  = MSA / MSW
  * FB  = MSB / MSW
  * FAB = MSAB / MSW
  *
  * P values
  * PA  = F distribution CDF above FA with degrees of freedom dfA and dfW
  * PB  = F distribution CDF above FB with degrees of freedom dfA and dfW
  * PAB = F distribution CDF above FAB with degrees of freedom dfAB and dfW
  *
  * Example input data for ...$data parameter:
  *             | Factor B₁ | Factor B₂ | ⋯
  *   Factor A₁ |  4, 6, 8  |  6, 6, 9  | ⋯
  *   Factor A₂ |  4, 8, 9  | 7, 10, 13 | ⋯
  *      ⋮           ⋮           ⋮         ⋮
  * @param  array ...$data Samples to analyze [
  *               // Factor A₁
  *               [
  *                   [4, 6, 8] // Factor B₁
  *                   [6, 6, 9] // Factor B₂
  *                       ⋮
  *               ],
  *               // Factor A₂
  *               [
  *                   [4, 8, 9]   // Factor B₁
  *                   [7, 10, 13] // Factor B₂
  *                       ⋮
  *               ],
  *               ...
  *         ]
  *
  * @return array [
  *                 ANOVA => [
  *                   factorA  => [SS, df, MS, F, P],
  *                   factorB  => [SS, df, MS, F, P],
  *                   factorAB => [SS, df, MS, F, P],
  *                   error    => [SS, df, MS],
  *                   total    => [SS, df],
  *                 ],
  *                 total_summary => [n, sum, mean, SS, variance, sd, sem],
  *                 summary_factorA  => [
  *                   0     => [n, sum, mean, SS, variance, sd, sem],
  *                   1     => [n, sum, mean, SS, variance, sd, sem],
  *                   ...
  *                 ],
  *                 summary_factorB  => [
  *                   0     => [n, sum, mean, SS, variance, sd, sem],
  *                   1     => [n, sum, mean, SS, variance, sd, sem],
  *                   ...
  *                 ],
  *                 summary_factorAB  => [
  *                   0     => [n, sum, mean, SS, variance, sd, sem],
  *                   1     => [n, sum, mean, SS, variance, sd, sem],
  *                   ...
  *                 ]
  *               ]
  * @throws BadDataException if less than two A factors, or if B factors or values have different number elements
  */
 public static function twoWay(array ...$data)
 {
     // Must have at least two rows (two types of factor A)
     $r = count($data);
     if ($r < 2) {
         throw new Exception\BadDataException('Must have at least two rows (two types of factor A)');
     }
     // All samples must have the same number the second factor B
     $c = count($data[0]);
     for ($i = 1; $i < $r; $i++) {
         if (count($data[$i]) !== $c) {
             throw new Exception\BadDataException('All samples must have the same number of the second factor B');
         }
     }
     // Each AB factor interaction must have the same number of values
     $v = count($data[0][0]);
     for ($i = 0; $i < $r; $i++) {
         for ($j = 0; $j < $c; $j++) {
             if (count($data[$i][$j]) !== $v) {
                 throw new Exception\BadDataException('Each AB factor interaction must have the same number of values');
             }
         }
     }
     // Aggregates for all elements, rows (factor A), and columns (factor B)
     $all_elements = [];
     $A_elements = [];
     $B_elements = [];
     // Summaries for factor A, factor B, AB, and total
     $summary_A = [];
     $summary_B = [];
     $summary_AB = [];
     $summary_total = [];
     // Summary data for each AB
     // And aggregate all elements and elements for factor A
     foreach ($data as $A => $Bs) {
         $A_elements[$A] = [];
         foreach ($Bs as $B => $values) {
             // Aggregates
             $all_elements = array_merge($all_elements, $values);
             $A_elements[$A] = array_merge($A_elements[$A], $values);
             // AB summary
             $summary_AB[$A][$B] = [];
             $summary_AB[$A][$B]['n'] = $c;
             $summary_AB[$A][$B]['sum'] = array_sum($values);
             $summary_AB[$A][$B]['mean'] = Average::mean($values);
             $summary_AB[$A][$B]['SS'] = RandomVariable::sumOfSquares($values);
             $summary_AB[$A][$B]['variance'] = Descriptive::sampleVariance($values);
             $summary_AB[$A][$B]['sd'] = Descriptive::sd($values);
             $summary_AB[$A][$B]['sem'] = RandomVariable::standardErrorOfTheMean($values);
         }
     }
     // Aggregate elements for factor B
     for ($B = 0; $B < $c; $B++) {
         $B_elements[$B] = [];
         foreach ($data as $factor1s) {
             $B_elements[$B] = array_merge($B_elements[$B], $factor1s[$B]);
         }
     }
     // Factor A summary
     foreach ($A_elements as $A => $elements) {
         $summary_A[$A] = [];
         $summary_A[$A]['n'] = count($elements);
         $summary_A[$A]['sum'] = array_sum($elements);
         $summary_A[$A]['mean'] = Average::mean($elements);
         $summary_A[$A]['SS'] = RandomVariable::sumOfSquares($elements);
         $summary_A[$A]['variance'] = Descriptive::sampleVariance($elements);
         $summary_A[$A]['sd'] = Descriptive::sd($elements);
         $summary_A[$A]['sem'] = RandomVariable::standardErrorOfTheMean($elements);
     }
     // Factor B summary
     foreach ($B_elements as $B => $elements) {
         $summary_B[$B] = [];
         $summary_B[$B]['n'] = count($elements);
         $summary_B[$B]['sum'] = array_sum($elements);
         $summary_B[$B]['mean'] = Average::mean($elements);
         $summary_B[$B]['SS'] = RandomVariable::sumOfSquares($elements);
         $summary_B[$B]['variance'] = Descriptive::sampleVariance($elements);
         $summary_B[$B]['sd'] = Descriptive::sd($elements);
         $summary_B[$B]['sem'] = RandomVariable::standardErrorOfTheMean($elements);
     }
     // Totals summary
     $μ = Average::mean($all_elements);
     $summary_total = ['n' => count($all_elements), 'sum' => array_sum($all_elements), 'mean' => $μ, 'SS' => RandomVariable::sumOfSquares($all_elements), 'variance' => Descriptive::sampleVariance($all_elements), 'sd' => Descriptive::sd($all_elements), 'sem' => RandomVariable::standardErrorOfTheMean($all_elements)];
     // Sum of squares factor A
     $SSA = array_sum(array_map(function ($f1) use($μ) {
         return $f1['n'] * ($f1['mean'] - $μ) ** 2;
     }, $summary_A));
     // Sum of squares factor B
     $SSB = array_sum(array_map(function ($B) use($μ) {
         return $B['n'] * ($B['mean'] - $μ) ** 2;
     }, $summary_B));
     // Sum of squares within (error)
     $SSW = 0;
     foreach ($data as $A => $Bs) {
         foreach ($Bs as $B => $values) {
             foreach ($values as $value) {
                 $SSW += ($value - $summary_AB[$A][$B]['mean']) ** 2;
             }
         }
     }
     // Sum of squares total
     $SST = 0;
     foreach ($data as $A => $Bs) {
         foreach ($Bs as $B => $values) {
             foreach ($values as $value) {
                 $SST += ($value - $μ) ** 2;
             }
         }
     }
     // Sum of squares AB interaction
     $SSAB = $SST - $SSA - $SSB - $SSW;
     // Degrees of freedom
     $dfA = $r - 1;
     $dfB = $c - 1;
     $dfAB = ($r - 1) * ($c - 1);
     $dfW = $summary_total['n'] - $r * $c;
     $dfT = $summary_total['n'] - 1;
     // Mean squares
     $MSA = $SSA / $dfA;
     $MSB = $SSB / $dfB;
     $MSAB = $SSAB / $dfAB;
     $MSW = $SSW / $dfW;
     // F test statistics
     $FA = $MSA / $MSW;
     $FB = $MSB / $MSW;
     $FAB = $MSAB / $MSW;
     // P values
     $PA = F::above($FA, $dfA, $dfW);
     $PB = F::above($FB, $dfB, $dfW);
     $PAB = F::above($FAB, $dfAB, $dfW);
     // Return ANOVA report
     return ['ANOVA' => ['factorA' => ['SS' => $SSA, 'df' => $dfA, 'MS' => $MSA, 'F' => $FA, 'P' => $PA], 'factorB' => ['SS' => $SSB, 'df' => $dfB, 'MS' => $MSB, 'F' => $FB, 'P' => $PB], 'interaction' => ['SS' => $SSAB, 'df' => $dfAB, 'MS' => $MSAB, 'F' => $FAB, 'P' => $PAB], 'error' => ['SS' => $SSW, 'df' => $dfW, 'MS' => $MSW], 'total' => ['SS' => $SST, 'df' => $dfT]], 'total_summary' => $summary_total, 'summary_factorA' => $summary_A, 'summary_factorB' => $summary_B, 'summary_interaction' => $summary_AB];
 }
Example #5
0
 /**
  * Sample covariance
  * A measure of how much two random variables change together.
  * Average product of their deviations from their respective means.
  * The population covariance is defined in terms of the sample means x, y
  * https://en.wikipedia.org/wiki/Covariance
  *
  * cov(X, Y) = Sxy = E[⟮X - x⟯⟮Y - y⟯]
  *
  *                   ∑⟮xᵢ - x⟯⟮yᵢ - y⟯
  * cov(X, Y) = Sxy = ---------------
  *                         n - 1
  *
  * @param array $X values for random variable X
  * @param array $Y values for random variable Y
  *
  * @return number
  *
  * @throws BadDataException if X and Y do not have the same number of elements
  */
 public static function sampleCovariance(array $X, array $Y)
 {
     if (count($X) !== count($Y)) {
         throw new Exception\BadDataException('X and Y must have the same number of elements.');
     }
     $x = Average::mean($X);
     $y = Average::mean($Y);
     $∑⟮xᵢ − x⟯⟮yᵢ − y⟯ = array_sum(array_map(function ($xᵢ, $yᵢ) use($x, $y) {
         return ($xᵢ - $x) * ($yᵢ - $y);
     }, $X, $Y));
     $n = count($X);
     return $∑⟮xᵢ − x⟯⟮yᵢ − y⟯ / ($n - 1);
 }
Example #6
0
 /**
  * Five number summary
  * A descriptive statistic that provides information about a set of observations.
  * It consists of the five most important sample percentiles:
  *  1) the sample minimum (smallest observation)
  *  2) the lower quartile or first quartile
  *  3) the median (middle value)
  *  4) the upper quartile or third quartile
  *  5) the sample maximum (largest observation)
  *
  * https://en.wikipedia.org/wiki/Five-number_summary
  *
  * @param  array  $numbers
  *
  * @return array [min, Q1, median, Q3, max]
  */
 public static function fiveNumberSummary(array $numbers)
 {
     $quartiles = self::quartiles($numbers);
     return ['min' => min($numbers), 'Q1' => $quartiles['Q1'], 'median' => Average::median($numbers), 'Q3' => $quartiles['Q3'], 'max' => max($numbers)];
 }