/** * Sum of squares * * ∑⟮xᵢ⟯² * * @param array $numbers * * @return number */ public static function sumOfSquares(array $numbers) { if (empty($numbers)) { return null; } $∑⟮xᵢ⟯² = array_sum(Map\Single::square($numbers)); return $∑⟮xᵢ⟯²; }
/** * ρ - Spearman's rank correlation coefficient (Spearman's rho) * * https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient * * 6 ∑ dᵢ² * ρ = 1 - --------- * n(n² − 1) * * Where * dᵢ: the difference between the two ranks of each observation * * @param array $X values for random variable X * @param array $Y values for random variable Y * * @return number * * @throws BadDataException if both random variables do not have the same number of elements */ public static function spearmansRho(array $X, array $Y) { if (count($X) !== count($Y)) { throw new Exception\BadDataException('Both random variables must have the same number of elements'); } $n = count($X); // Sorted Xs and Ys $Xs = $X; $Ys = $Y; rsort($Xs); rsort($Ys); // Determine ranks of each X and Y // Some items might show up multiple times, so record each successive rank. $rg⟮X⟯ = []; $rg⟮Y⟯ = []; foreach ($Xs as $rank => $xᵢ) { if (!isset($rg⟮X⟯[$xᵢ])) { $rg⟮X⟯[$xᵢ] = []; } $rg⟮X⟯[$xᵢ][] = $rank; } foreach ($Ys as $rank => $yᵢ) { if (!isset($rg⟮Y⟯[$yᵢ])) { $rg⟮Y⟯[$yᵢ] = []; } $rg⟮Y⟯[$yᵢ][] = $rank; } // Determine average rank of each X and Y // Rank will not change if value only shows up once. // Average is for when values show up multiple times. $rg⟮X⟯ = array_map(function ($x) { return array_sum($x) / count($x); }, $rg⟮X⟯); $rg⟮Y⟯ = array_map(function ($y) { return array_sum($y) / count($y); }, $rg⟮Y⟯); // Difference between the two ranks of each observation $d = array_map(function ($x, $y) use($rg⟮X⟯, $rg⟮Y⟯) { return abs($rg⟮X⟯[$x] - $rg⟮Y⟯[$y]); }, $X, $Y); // Numerator: 6 ∑ dᵢ² $d² = Map\Single::square($d); $∑d² = array_sum($d²); // Denominator: n(n² − 1) $n⟮n² − 1⟯ = $n * ($n ** 2 - 1); /* * 6 ∑ dᵢ² * ρ = 1 - --------- * n(n² − 1) */ return 1 - 6 * $∑d² / $n⟮n² − 1⟯; }
/** * SSreg - The Sum Squares of the regression (Explained sum of squares) * * The sum of the squares of the deviations of the predicted values from * the mean value of a response variable, in a standard regression model. * https://en.wikipedia.org/wiki/Explained_sum_of_squares * * SSreg = ∑(ŷᵢ - ȳ)² * When a constant is fit to the regression, the average of y = average of ŷ. * * In the case where the constant is not fit, we use the sum of squares of the predicted value * SSreg = ∑ŷᵢ² * * @return number */ public function sumOfSquaresRegression() { if ($this->fit_constant == 1) { return RandomVariable::sumOfSquaresDeviations($this->Yhat()); } return array_sum(Single::square($this->reg_Yhat)); }
/** * l²-norm (|x|₂) * Also known as Euclidean norm, Euclidean length, L² distance, ℓ² distance * Used to normalize a vector. * * http://mathworld.wolfram.com/L2-Norm.html * https://en.wikipedia.org/wiki/Norm_(mathematics)#Euclidean_norm * ______ * |x|₂ = √∑|xᵢ|² * * @return number */ public function l2Norm() { return sqrt(array_sum(Map\Single::square($this->A))); }