Esempio n. 1
0
 /**
  * Train on the given set and fill the models variables
  *
  * priors[c] = NDocs[c]/NDocs
  * condprob[t][c] = count( t in c) + 1 / sum( count( t' in c ) + 1 , for every t' )
  * unknown[c] = condbrob['word that doesnt exist in c'][c] ( so that count(t in c)==0 )
  *
  * More information on the algorithm can be found at
  * http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html
  *
  * @param FeatureFactoryInterface A feature factory to compute features from a training document
  * @param TrainingSet The training set
  * @param  integer $a_smoothing The parameter for additive smoothing. Defaults to add-one smoothing.
  * @return array   Return a training context to be used for incremental training
  */
 public function train(FeatureFactoryInterface $ff, TrainingSet $tset, $a_smoothing = 1)
 {
     $class_set = $tset->getClassSet();
     $ctx = array('termcount_per_class' => array_fill_keys($class_set, 0), 'termcount' => array_fill_keys($class_set, array()), 'ndocs_per_class' => array_fill_keys($class_set, 0), 'voc' => array(), 'ndocs' => 0);
     return $this->train_with_context($ctx, $ff, $tset, $a_smoothing);
 }
Esempio n. 2
0
 /**
  * Calculate all the features for every possible class. Pass the
  * information to the optimizer to find the weights that satisfy the
  * constraints and maximize the entropy
  *
  * @param $ff The feature factory
  * @param $tset A collection of training documents
  * @param $opt An optimizer, we need a maxent optimizer
  * @return void
  */
 public function train(FeatureFactoryInterface $ff, TrainingSet $tset, MaxentOptimizerInterface $opt)
 {
     $classSet = $tset->getClassSet();
     $features = $this->calculateFeatureArray($classSet, $tset, $ff);
     $this->l = $opt->optimize($features);
 }