Exemplo n.º 1
0
 public function grouped_distribution()
 {
     /*
        Returns a distribution formed by grouping the distributions of
     	each predicted node.
     */
     $joined_distribution = array();
     $distribution_unit = 'counts';
     foreach ($this->predictions as $prediction) {
         $joined_distribution = merge_distributions($joined_distribution, array($prediction['distribution'][0], $prediction['distribution'][1]));
         uasort($joined_distribution, array($this, "sort_joined_distribution_items"));
         $distribution = array();
         foreach ($joined_distribution as $dis) {
             array_push($distribution, array($dis));
         }
         if ($distribution_unit == 'counts') {
             if (count($distribution) > MultiVote::BINS_LIMIT) {
                 $distribution_unit = 'bins';
             } else {
                 $distribution_unit = 'counts';
             }
         }
         $distribution = merge_bins($distribution, MultiVote::BINS_LIMIT);
     }
     return array("distribution" => $distribution, "distribution_unit" => $distribution_unit);
 }
Exemplo n.º 2
0
 public function predict($input_data, $path = null, $missing_strategy = Tree::LAST_PREDICTION)
 {
     /*
        Makes a prediction based on a number of field values.
        The input fields must be keyed by Id. There are two possible
          strategies to predict when the value for the splitting field
          is missing:
        0 - LAST_PREDICTION: the last issued prediction is returned.
        1 - PROPORTIONAL: as we cannot choose between the two branches
           in the tree that stem from this split, we consider both. The
           algorithm goes on until the final leaves are reached and
           all their predictions are used to decide the final prediction.
     */
     if ($path == null) {
         $path = array();
     }
     if ($missing_strategy == Tree::PROPORTIONAL) {
         $predict_pro = $this->predict_proportional($input_data, $path);
         $final_distribution = $predict_pro[0];
         $d_min = $predict_pro[1];
         $d_max = $predict_pro[2];
         $last_node = $predict_pro[3];
         $distribution = array();
         if ($this->regression) {
             // singular case
             // when the prediction is the one given in a 1-instance node
             if (count($final_distribution) == 1) {
                 foreach ($final_distribution as $prediction => $instances) {
                     if ($instances == 1) {
                         return new Prediction($last_node->output, $path, $last_node->confidence, $last_node->distribution, $instances, $last_node->distribution_unit, $last_node->median, $last_node->children, $last_node->min, $last_node->max);
                     }
                     break;
                 }
             }
             ksort($final_distribution);
             foreach ($final_distribution as $key => $val) {
                 array_push($distribution, array(floatval($key), $val));
             }
             $distribution_unit = 'counts';
             if (count($distribution) > Tree::BINS_LIMIT) {
                 $distribution_unit = 'bins';
             }
             $distribution = merge_bins($distribution, Tree::BINS_LIMIT);
             $prediction = mean($distribution);
             $total_instances = 0;
             foreach ($distribution as $key => $val) {
                 $total_instances += $val[1];
             }
             $confidence = regression_error(unbiased_sample_variance($distribution, $prediction), $total_instances);
             return new Prediction($prediction, $path, $confidence, $distribution, $total_instances, $distribution_unit, dist_median($distribution, $total_instances), $last_node->children, $d_min, $d_max);
         } else {
             ksort($final_distribution);
             $distribution = array();
             foreach ($final_distribution as $key => $val) {
                 array_push($distribution, array($key, $val));
             }
             return new Prediction($distribution[0][0], $path, ws_confidence($distribution[0][0], $final_distribution), $distribution, get_instances($distribution), 'categorial', null, $last_node->children, null, null);
         }
     } else {
         if ($this->children != null) {
             #&&  array_key_exists(splitChildren($this->children), $input_data) ) {
             foreach ($this->children as $child) {
                 if ($child->predicate->apply($input_data, $this->fields)) {
                     $new_rule = $child->predicate->to_rule($this->fields);
                     array_push($path, $new_rule);
                     return $child->predict($input_data, $path);
                 }
             }
         }
         return new Prediction($this->output, $path, $this->confidence, $this->distribution, get_instances($this->distribution), $this->distribution_unit, $this->regression == null ? null : $this->median, $this->children, $this->regression == null ? null : $this->min, $this->regression == null ? null : $this->max);
     }
 }