public function grouped_distribution() { /* Returns a distribution formed by grouping the distributions of each predicted node. */ $joined_distribution = array(); $distribution_unit = 'counts'; foreach ($this->predictions as $prediction) { $joined_distribution = merge_distributions($joined_distribution, array($prediction['distribution'][0], $prediction['distribution'][1])); uasort($joined_distribution, array($this, "sort_joined_distribution_items")); $distribution = array(); foreach ($joined_distribution as $dis) { array_push($distribution, array($dis)); } if ($distribution_unit == 'counts') { if (count($distribution) > MultiVote::BINS_LIMIT) { $distribution_unit = 'bins'; } else { $distribution_unit = 'counts'; } } $distribution = merge_bins($distribution, MultiVote::BINS_LIMIT); } return array("distribution" => $distribution, "distribution_unit" => $distribution_unit); }
public function predict($input_data, $path = null, $missing_strategy = Tree::LAST_PREDICTION) { /* Makes a prediction based on a number of field values. The input fields must be keyed by Id. There are two possible strategies to predict when the value for the splitting field is missing: 0 - LAST_PREDICTION: the last issued prediction is returned. 1 - PROPORTIONAL: as we cannot choose between the two branches in the tree that stem from this split, we consider both. The algorithm goes on until the final leaves are reached and all their predictions are used to decide the final prediction. */ if ($path == null) { $path = array(); } if ($missing_strategy == Tree::PROPORTIONAL) { $predict_pro = $this->predict_proportional($input_data, $path); $final_distribution = $predict_pro[0]; $d_min = $predict_pro[1]; $d_max = $predict_pro[2]; $last_node = $predict_pro[3]; $distribution = array(); if ($this->regression) { // singular case // when the prediction is the one given in a 1-instance node if (count($final_distribution) == 1) { foreach ($final_distribution as $prediction => $instances) { if ($instances == 1) { return new Prediction($last_node->output, $path, $last_node->confidence, $last_node->distribution, $instances, $last_node->distribution_unit, $last_node->median, $last_node->children, $last_node->min, $last_node->max); } break; } } ksort($final_distribution); foreach ($final_distribution as $key => $val) { array_push($distribution, array(floatval($key), $val)); } $distribution_unit = 'counts'; if (count($distribution) > Tree::BINS_LIMIT) { $distribution_unit = 'bins'; } $distribution = merge_bins($distribution, Tree::BINS_LIMIT); $prediction = mean($distribution); $total_instances = 0; foreach ($distribution as $key => $val) { $total_instances += $val[1]; } $confidence = regression_error(unbiased_sample_variance($distribution, $prediction), $total_instances); return new Prediction($prediction, $path, $confidence, $distribution, $total_instances, $distribution_unit, dist_median($distribution, $total_instances), $last_node->children, $d_min, $d_max); } else { ksort($final_distribution); $distribution = array(); foreach ($final_distribution as $key => $val) { array_push($distribution, array($key, $val)); } return new Prediction($distribution[0][0], $path, ws_confidence($distribution[0][0], $final_distribution), $distribution, get_instances($distribution), 'categorial', null, $last_node->children, null, null); } } else { if ($this->children != null) { #&& array_key_exists(splitChildren($this->children), $input_data) ) { foreach ($this->children as $child) { if ($child->predicate->apply($input_data, $this->fields)) { $new_rule = $child->predicate->to_rule($this->fields); array_push($path, $new_rule); return $child->predict($input_data, $path); } } } return new Prediction($this->output, $path, $this->confidence, $this->distribution, get_instances($this->distribution), $this->distribution_unit, $this->regression == null ? null : $this->median, $this->children, $this->regression == null ? null : $this->min, $this->regression == null ? null : $this->max); } }