public function test_scenario1()
 {
     $data = array(array("predictions" => "./data/predictions_c.json", "method" => 0, "prediction" => "a", "confidence" => 0.450471270879), array("predictions" => "./data/predictions_c.json", "method" => 1, "prediction" => "a", "confidence" => 0.552021302649), array("predictions" => "./data/predictions_c.json", "method" => 2, "prediction" => "a", "confidence" => 0.403632421178), array("predictions" => "./data/predictions_r.json", "method" => 0, "prediction" => 1.55555556667, "confidence" => 0.400079152063), array("predictions" => "./data/predictions_r.json", "method" => 1, "prediction" => 1.59376845074, "confidence" => 0.248366474212), array("predictions" => "./data/predictions_r.json", "method" => 2, "prediction" => 1.55555556667, "confidence" => 0.400079152063));
     foreach ($data as $item) {
         print "\nSuccessfully computing predictions combinations\n";
         $predictions = json_decode(file_get_contents($item["predictions"]));
         print "Given I create a MultiVote for the set of predictions in file " . $item["predictions"] . "\n";
         $multivote = new MultiVote($predictions);
         print "When I compute the prediction with confidence using method " . $item["method"] . "\n";
         $combined_results = $multivote->combine($item["method"], true);
         print "And I compute the prediction without confidence using method " . $item["method"] . "\n";
         $combined_results_no_confidence = $multivote->combine($item["method"]);
         if ($multivote->is_regression()) {
             print "Then the combined prediction is " . $item["prediction"] . "\n";
             $this->assertEquals(round($combined_results[0], 6), round($item["prediction"], 6));
             print "And the combined prediction without confidence is " . $item["prediction"] . "\n";
             $this->assertEquals(round($combined_results_no_confidence, 6), round($item["prediction"], 6));
         } else {
             print "Then the combined prediction is " . $item["prediction"] . "\n";
             $this->assertEquals($combined_results[0], $item["prediction"]);
             print "And the combined prediction without confidence is " . $item["prediction"] . "\n";
             $this->assertEquals($combined_results_no_confidence, $item["prediction"]);
         }
         print "And the confidence for the combined prediction is " . $item["confidence"] . "\n";
         $this->assertEquals(round($combined_results[1], 6), round($item["confidence"], 6));
     }
 }
 public function test_scenario1()
 {
     $data = array(array("predictions" => "./data/predictions_c.json", "method" => 0, "prediction" => "a", "confidence" => 0.450471270879), array("predictions" => "./data/predictions_c.json", "method" => 1, "prediction" => "a", "confidence" => 0.552021302649), array("predictions" => "./data/predictions_c.json", "method" => 2, "prediction" => "a", "confidence" => 0.403632421178), array("predictions" => "./data/predictions_r.json", "method" => 0, "prediction" => 1.55555556667, "confidence" => 0.400079152063), array("predictions" => "./data/predictions_r.json", "method" => 1, "prediction" => 1.59376845074, "confidence" => 0.248366474212), array("predictions" => "./data/predictions_r.json", "method" => 2, "prediction" => 1.55555556667, "confidence" => 0.400079152063));
     foreach ($data as $item) {
         $predictions = json_decode(file_get_contents($item["predictions"]));
         $multivote = new MultiVote($predictions);
         $combined_results = $multivote->combine($item["method"], true);
         $combined_results_no_confidence = $multivote->combine($item["method"]);
         if ($multivote->is_regression()) {
             $this->assertEquals(round($combined_results[0], 6), round($item["prediction"], 6));
             $this->assertEquals(round($combined_results_no_confidence, 6), round($item["prediction"], 6));
         } else {
             $this->assertEquals($combined_results[0], $item["prediction"]);
             $this->assertEquals($combined_results_no_confidence, $item["prediction"]);
         }
         $this->assertEquals(round($combined_results[1], 6), round($item["confidence"], 6));
     }
 }
Пример #3
0
 public function combine($method = MultiVote::DEFAULT_METHOD, $with_confidence = false, $add_confidence = false, $add_distribution = false, $add_count = false, $add_median = false, $add_min = false, $add_max = false, $options = null)
 {
     /*
        Reduces a number of predictions voting for classification and
        averaging predictions for regression.
     
        method will determine the voting method (plurality, confidence
        weighted, probability weighted or threshold).
        If with_confidence is true, the combined confidence (as a weighted
        average of the confidences of votes for the combined prediction)
        will also be given.
     */
     # there must be at least one prediction to be combined
     if ($this->predictions == null) {
         throw new Exception('No predictions to be combined.');
     }
     $method = array_key_exists(strval($method), $this->COMBINER_MAP) ? $this->COMBINER_MAP[strval($method)] : $this->COMBINER_MAP[MultiVote::DEFAULT_METHOD];
     $keys = array_key_exists($method, $this->WEIGHT_KEYS) ? $this->WEIGHT_KEYS[$method] : null;
     if ($keys != null) {
         foreach ($keys as $key) {
             foreach ($this->predictions as $prediction) {
                 if (!array_key_exists($key, $prediction)) {
                     throw new Exception('Not enough data to use the selected prediction method. Try creating your model anew.');
                 }
             }
         }
     }
     if ($this->is_regression()) {
         $new_predictions = array();
         foreach ($this->predictions as $prediction) {
             if ($prediction->confidence == null) {
                 $prediction->confidence = 0;
             }
             $new_predictions[] = $prediction;
         }
         $this->predictions = $new_predictions;
         if ($method == MultiVote::CONFIDENCE) {
             return $this->error_weighted($with_confidence, $add_confidence, $add_distribution, $add_count, $add_median, $add_min, $add_max);
         } else {
             return $this->avg($this, $with_confidence, $add_confidence, $add_distribution, $add_count, $add_median, $add_min, $add_max);
         }
     } else {
         $predictions = $this;
         if ($method == MultiVote::THRESHOLD) {
             if ($options == null) {
                 $options = array();
             }
             $predictions = $this->single_out_category($options);
         } elseif ($method == MultiVote::PROBABILITY) {
             $predictions = new MultiVote(array());
             $predictions->predictions = $this->probability_weight();
         }
         return $predictions->combine_categorical(array_key_exists($method, $this->COMBINATION_WEIGHTS) ? $this->COMBINATION_WEIGHTS[$method] : null, $with_confidence, $add_confidence, $add_distribution, $add_count);
     }
 }
Пример #4
0
 function predict($input_data, $by_name = true, $method = MultiVote::PLURALITY_CODE, $with_confidence = false, $add_confidence = false, $add_distribution = false, $add_count = false, $add_median = false, $add_min = false, $add_max = false, $options = null, $missing_strategy = Tree::LAST_PREDICTION, $median = false)
 {
     /*
        Makes a prediction based on the prediction made by every model.
        The method parameter is a numeric key to the following combination
        methods in classifications/regressions:
           0 - majority vote (plurality)/ average: PLURALITY_CODE
           1 - confidence weighted majority vote / error weighted:
              CONFIDENCE_CODE
           2 - probability weighted majority vote / average:
              PROBABILITY_CODE
           3 - threshold filtered vote / doesn't apply:
              THRESHOLD_CODE
     */
     if (count($this->models_splits) > 1) {
         $votes = new MultiVote(array());
         $models = array();
         $api = $this->api;
         $order = 0;
         foreach ($this->models_splits as $model_split) {
             $models = array();
             foreach ($model_split as $model_id) {
                 array_push($models, $api::retrieve_resource($model_id, $api::ONLY_MODEL));
             }
             $multi_model = new MultiModel($models, $this->api);
             $votes_split = $multi_model->generate_votes($input_data, $by_name, $missing_strategy, $add_median || $median, $add_min, $add_max);
             if ($median) {
                 foreach ($votes_split->predictions as $prediction) {
                     $prediction['prediction'] = $prediction['median'];
                 }
             }
             $votes->extend($votes_split->predictions);
         }
         #return $votes->combine($method, $with_confidence, $options);
     } else {
         # When only one group of models is found you use the
         # corresponding multimodel to predict
         $votes_split = $this->multi_model->generate_votes($input_data, $by_name, $missing_strategy, $add_median || $median, $add_min, $add_max);
         $votes = new MultiVote($votes_split->predictions);
         if ($median) {
             $new_predictions = array();
             foreach ($votes->predictions as $prediction) {
                 $prediction->prediction = $prediction->median;
                 array_push($new_predictions, $prediction);
             }
             $votes->predictions = $new_predictions;
         }
     }
     return $votes->combine($method, $with_confidence, $add_confidence, $add_distribution, $add_count, $add_median, $add_min, $add_max, $options);
 }
Пример #5
0
 function _predict($input_data, $by_name = true, $method = MultiVote::PLURALITY_CODE, $with_confidence = false, $add_confidence = false, $add_distribution = false, $add_count = false, $add_median = false, $add_unused_fields = false, $add_min = false, $add_max = false, $options = null, $missing_strategy = Tree::LAST_PREDICTION, $median = false)
 {
     /*
        Makes a prediction based on the prediction made by every model.
       
        :param input_data: Test data to be used as input
     	 :param by_name: Boolean that is set to true if field_names (as
     	                 alternative to field ids) are used in the
     			 input_data dict
     	 :param method: numeric key code for the following combination
                       methods in classifications/regressions:
     
           0 - majority vote (plurality)/ average: PLURALITY_CODE
           1 - confidence weighted majority vote / error weighted:
              CONFIDENCE_CODE
           2 - probability weighted majority vote / average:
              PROBABILITY_CODE
           3 - threshold filtered vote / doesn't apply:
              THRESHOLD_CODE
     
     	 The following parameter causes the result to be returned as a list
         :param add_confidence: Adds confidence to the prediction
         :param add_distribution: Adds the predicted node's distribution to the prediction
     	  :param add_count: Adds the predicted nodes' instances to the prediction
     	  :param add_median: Adds the median of the predicted nodes' distribution
     	                     to the prediction
         :param add_min: Boolean, if true adds the minimum value in the
     	                          prediction's distribution (for regressions only)
         :param add_max: Boolean, if true adds the maximum value in the
                       prediction's distribution (for regressions only)
         :param add_unused_fields: Boolean, if true adds the information about
                                 the fields in the input_data that are not
                                 being used in the model as predictors.
         :param options: Options to be used in threshold filtered votes.
         :param missing_strategy: numeric key for the individual model's
                                prediction method. See the model predict
                                method.
         :param median: Uses the median of each individual model's predicted
                      node as individual prediction for the specified
                      combination method.				  
     */
     if (count($this->models_splits) > 1) {
         $votes = new MultiVote(array());
         $models = array();
         $api = $this->api;
         $order = 0;
         foreach ($this->models_splits as $model_split) {
             $models = array();
             foreach ($model_split as $model_id) {
                 array_push($models, $api::retrieve_resource($model_id, $api::ONLY_MODEL));
             }
             $multi_model = new MultiModel($models, $this->api);
             $votes_split = $multi_model->generate_votes($input_data, $by_name, $missing_strategy, $add_median || $median, $add_min, $add_max, $add_unused_fields);
             if ($median) {
                 foreach ($votes_split->predictions as $prediction) {
                     $prediction['prediction'] = $prediction['median'];
                 }
             }
             $votes->extend($votes_split->predictions);
         }
         #return $votes->combine($method, $with_confidence, $options);
     } else {
         # When only one group of models is found you use the
         # corresponding multimodel to predict
         $votes_split = $this->multi_model->generate_votes($input_data, $by_name, $missing_strategy, $add_median || $median, $add_min, $add_max, $add_unused_fields);
         $votes = new MultiVote($votes_split->predictions);
         if ($median) {
             $new_predictions = array();
             foreach ($votes->predictions as $prediction) {
                 $prediction->prediction = $prediction->median;
                 array_push($new_predictions, $prediction);
             }
             $votes->predictions = $new_predictions;
         }
     }
     $result = $votes->combine($method, $with_confidence, $add_confidence, $add_distribution, $add_count, $add_median, $add_min, $add_max, $options);
     if ($add_unused_fields) {
         $unused_fields = array_unique(array_keys($input_data));
         foreach ($votes->predictions as $index => $prediction) {
             $unused_fields = array_intersect($unused_fields, array_unique($prediction->unused_fields));
         }
         if (!is_array($result)) {
             $result = array("prediction" => $result);
         }
         $result['unused_fields'] = $unused_fields;
     }
     return $result;
 }
Пример #6
0
 function generate_votes($input_data, $by_name = true, $missing_strategy = Tree::LAST_PREDICTION, $add_median = false, $add_min = false, $add_max = false)
 {
     /*
        Generates a MultiVote object that contains the predictions
        made by each of the models.
     */
     $votes = new MultiVote(array());
     $order = 0;
     foreach ($this->models as $model) {
         $prediction_info = $model->predict($input_data, $by_name, false, STDOUT, false, $missing_strategy, true, false, true, true, $add_median, false, $add_min, $add_max, null);
         $votes->append($prediction_info);
     }
     return $votes;
 }