sumOfSquaresDeviations() public static method

∑⟮xᵢ - μ⟯²
public static sumOfSquaresDeviations ( array $numbers ) : number
$numbers array
return number
示例#1
0
 /**
  * SSreg - The Sum Squares of the regression (Explained sum of squares)
  *
  * The sum of the squares of the deviations of the predicted values from
  * the mean value of a response variable, in a standard regression model.
  * https://en.wikipedia.org/wiki/Explained_sum_of_squares
  *
  * SSreg = ∑(ŷᵢ - ȳ)²
  * When a constant is fit to the regression, the average of y = average of ŷ.
  *
  * In the case where the constant is not fit, we use the sum of squares of the predicted value
  * SSreg = ∑ŷᵢ²
  *
  * @return number
  */
 public function sumOfSquaresRegression()
 {
     if ($this->fit_constant == 1) {
         return RandomVariable::sumOfSquaresDeviations($this->Yhat());
     }
     return array_sum(Single::square($this->reg_Yhat));
 }
示例#2
0
 /**
  * One-way ANOVA
  * Technique used to compare means of three or more samples
  * (using the F distribution).
  * https://en.wikipedia.org/wiki/One-way_analysis_of_variance
  *
  * Produces the following analysis of the data:
  *
  * ANOVA hypothesis test summary data
  *
  *           | SS | df | MS | F | P |
  * Treatment |    |    |    |   |   |
  * Error     |    |    |    |
  * Total     |    |    |
  *
  *  where:
  *   Treament is between groups
  *   Error is within groups
  *   SS = Sum of squares
  *   df = Degrees of freedom
  *   MS = Mean squares
  *   F  = F statistic
  *   P  = P value
  *
  * Data summary table
  *
  *       | N | Sum | Mean | SS | Variance | SD | SEM |
  * 0     |   |     |      |    |          |    |     |
  * 1     |   |     |      |    |          |    |     |
  * ...   |   |     |      |    |          |    |     |
  * Total |   |     |      |    |          |    |     |
  *
  *  where:
  *   Each row is the summary for a sample, numbered from 0 to m - 1
  *   m   = Number of samples
  *   N   = Sample size
  *   SS  = Sum of squares
  *   SD  = Standard deviation
  *   SEM = Standard error of the mean
  *
  * Calculations
  *
  * Sum of Squares
  * SST (sum of squares total)
  * ∑⟮xᵢ − μ⟯²
  *  where:
  *   xᵢ = each element of all samples
  *   μ  = mean total of all elements of all samples
  *
  * SSB (sum of squares between - treatment)
  * ∑n(x - μ)²
  *  where:
  *   n = sample size
  *   x = sample mean
  *   μ  = mean total of all elements of all samples
  *
  * SSW (sum of squares within - error)
  * ∑∑⟮xᵢ − μ⟯²  Sum of sum of squared deviations of each sample
  *  where:
  *   xᵢ = each element of the sample
  *   μ  = mean of the sample
  *
  * Degrees of Freedom
  * dfT (degrees of freedom for the total)
  * mn - 1
  *
  * dfB (degrees of freedom between - treatment)
  * m - 1
  *
  * dfW (degrees of freedom within - error)
  * m(n - 1)
  *
  *  where:
  *   m = number of samples
  *   n = number of elements in each sample
  *
  * Mean Squares
  * MSB (Mean squares between - treatment)
  * SSB / dfB
  *
  * MSW (Mean squares within - error)
  * SSW / dfW
  *
  * Test Statistics
  * F = MSB / MSW
  * P = F distribution CDF above F with degrees of freedom dfB and dfW
  *
  * @param  array ...$samples Samples to analyze (at least 3 or more samples)
  *
  * @return array [
  *                 ANOVA => [
  *                   treatment => [SS, df, MS, F, P],
  *                   error     => [SS, df, MS],
  *                   total     => [SS, df],
  *                 ],
  *                 total_summary => [n, sum, mean, SS, variance, sd, sem],
  *                 data_summary  => [
  *                   0     => [n, sum, mean, SS, variance, sd, sem],
  *                   1     => [n, sum, mean, SS, variance, sd, sem],
  *                   ...
  *                 ]
  *               ]
  *
  * @throws BadDataException if less than three samples, or if all samples don't have the same number of values
  */
 public static function oneWay(array ...$samples)
 {
     // Must have at least three samples
     $m = count($samples);
     if ($m < 3) {
         throw new Exception\BadDataException('Must have at least three samples');
     }
     // All samples must have the same number of items
     $n = count($samples[0]);
     for ($i = 1; $i < $m; $i++) {
         if (count($samples[$i]) !== $n) {
             throw new Exception\BadDataException('All samples must have the same number of values');
         }
     }
     // Summary data for each sample
     $summary_data = [];
     foreach ($samples as $i => $sample) {
         $summary_data[$i] = [];
         $summary_data[$i]['n'] = $n;
         $summary_data[$i]['sum'] = array_sum($sample);
         $summary_data[$i]['mean'] = Average::mean($sample);
         $summary_data[$i]['SS'] = RandomVariable::sumOfSquares($sample);
         $summary_data[$i]['variance'] = Descriptive::sampleVariance($sample);
         $summary_data[$i]['sd'] = Descriptive::sd($sample);
         $summary_data[$i]['sem'] = RandomVariable::standardErrorOfTheMean($sample);
     }
     // Totals summary
     $all_elements = array_reduce($samples, function ($merged, $sample) {
         return array_merge($merged, $sample);
     }, array());
     $μ = Average::mean($all_elements);
     $total = ['n' => count($all_elements), 'sum' => array_sum($all_elements), 'mean' => $μ, 'SS' => RandomVariable::sumOfSquares($all_elements), 'variance' => Descriptive::sampleVariance($all_elements), 'sd' => Descriptive::sd($all_elements), 'sem' => RandomVariable::standardErrorOfTheMean($all_elements)];
     // ANOVA sum of squares
     $SST = RandomVariable::sumOfSquaresDeviations($all_elements);
     $SSB = array_sum(array_map(function ($sample) use($n, $μ) {
         return $n * (Average::mean($sample) - $μ) ** 2;
     }, $samples));
     $SSW = array_sum(array_map('MathPHP\\Statistics\\RandomVariable::sumOfSquaresDeviations', $samples));
     // ANOVA degrees of freedom
     $dfT = $m * $n - 1;
     $dfB = $m - 1;
     $dfW = $m * ($n - 1);
     // ANOVA mean squares
     $MSB = $SSB / $dfB;
     $MSW = $SSW / $dfW;
     // Test statistics
     $F = $MSB / $MSW;
     $P = F::above($F, $dfB, $dfW);
     // Return ANOVA report
     return ['ANOVA' => ['treatment' => ['SS' => $SSB, 'df' => $dfB, 'MS' => $MSB, 'F' => $F, 'P' => $P], 'error' => ['SS' => $SSW, 'df' => $dfW, 'MS' => $MSW], 'total' => ['SS' => $SST, 'df' => $dfT]], 'total_summary' => $total, 'data_summary' => $summary_data];
 }
示例#3
0
 /**
  * Variance
  *
  * Variance measures how far a set of numbers are spread out.
  * A variance of zero indicates that all the values are identical.
  * Variance is always non-negative: a small variance indicates that the data points
  * tend to be very close to the mean (expected value) and hence to each other.
  * A high variance indicates that the data points are very spread out around the mean
  * and from each other.
  * (https://en.wikipedia.org/wiki/Variance)
  *
  *      ∑⟮xᵢ - μ⟯²
  * σ² = ----------
  *          ν
  *
  * Generalized method that allows setting the degrees of freedom.
  * For population variance, set d.f. (ν) to n
  * For sample variance, set d.f (ν) to n - 1
  * Or use popluationVariance or sampleVaraince covenience methods.
  *
  * μ is the population mean
  * ν is the degrees of freedom, which usually is
  *   the number of numbers in the population set or n - 1 for sample set.
  *
  * @param array $numbers
  * @param int   $ν degrees of freedom
  * @return numeric
  *
  * @throws OutOfBoundsException if degrees of freedom is ≤ 0
  */
 public static function variance(array $numbers, int $ν)
 {
     if (empty($numbers)) {
         return null;
     }
     if ($ν <= 0) {
         throw new Exception\OutOfBoundsException('Degrees of freedom must be > 0');
     }
     $∑⟮xᵢ − μ⟯² = RandomVariable::sumOfSquaresDeviations($numbers);
     return $∑⟮xᵢ − μ⟯² / $ν;
 }