/** * Calculate the outliers for a dataset using * 'Standard Deviation around the mean'. * * Steps: * 1. Calculate the mean and the std * 2. Calculate the top and bottom thresholds * 3. It a value is over the top or under the bottom value, * then it is an outlier. * * @param array $data * @param number $mean_thres */ public static function outlierAroundMean(array $data, $mean_thres = 2) { $outliers = []; // Step 1. $mean = Basic::mean($data); $std = Error::std($data); // Step 2. $top = $mean + $mean_thres * $std; $bottom = $mean - $mean_thres * $std; // Step 3. foreach ($data as $item) { if ($item > $top || $item < $bottom) { array_push($outliers, $item); } } return $outliers; }
/** * Calculates the 'Simple Moving Average' for a data set. * * Steps: * 1. Make sure elements are numeric * 2. 'Moving Average Value' can't be greater than the size of the elements. * In such case set the value to the size of the elements. * 3. Calculate the number of the Moving Average calculated elements. * 4. Initiate the array to be returned with null values. * 5. Calculate the Moving Average values: * 1. Calculate new array with the needed elements, based on the given 'Moving Average Value' * 2. Calculate the mean * 3. Add it to the array * * @param number $ma_value Moving Average Value * @param array $data Data Set */ public static function simpleMovingAverage($ma_value, $data = []) { // Step 1. array_map("self::isNumeric", $data); // Step 2. $ma_value = self::fixMaValue(count($data), $ma_value); // Step 3. $ma_calc_items = count($data) - $ma_value + 1; // Step 4. $ma_arr = array_pad([], count($data), null); // Step 5. for ($i = 0; $i < $ma_calc_items; $i++) { // Step 5.1. $arr = array_slice($data, $i, $ma_value); // Step 5.2. $mean = Basic::mean($arr); // Step 5.3 $ma_arr[$i + $ma_value - 1] = $mean; } return $ma_arr; }
/** * Calculates the variance for a data set for a pupulation or a sample set. * Variance is defined as 'The average of the squared differences from the Mean.'. * * References: * http://www.mathsisfun.com/data/standard-deviation.html * * Steps: * 1. Make sure that elements are numeric * 2. Calculate the mean * 3. Calculate the variance : the 'Mean Squared Error' between the elements and the mean * * @param array $data */ public static function variance($data = [], $population = true) { $variance = 0; // Step 1. array_map("self::isNumeric", $data); // Step 2. $mean = Basic::mean($data); // Step 3. $n = count($data); $sum = 0; for ($i = 0; $i < $n; $i++) { $sum += pow(self::absoluteError($data[$i], $mean), 2); } if ($population == true) { $variance = round($sum / $n, 3); } else { $variance = round($sum / ($n - 1), 3); } return $variance; }