/** * Train on the given set and fill the models variables * * priors[c] = NDocs[c]/NDocs * condprob[t][c] = count( t in c) + 1 / sum( count( t' in c ) + 1 , for every t' ) * unknown[c] = condbrob['word that doesnt exist in c'][c] ( so that count(t in c)==0 ) * * More information on the algorithm can be found at * http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html * * @param FeatureFactoryInterface A feature factory to compute features from a training document * @param TrainingSet The training set * @param integer $a_smoothing The parameter for additive smoothing. Defaults to add-one smoothing. * @return array Return a training context to be used for incremental training */ public function train(FeatureFactoryInterface $ff, TrainingSet $tset, $a_smoothing = 1) { $class_set = $tset->getClassSet(); $ctx = array('termcount_per_class' => array_fill_keys($class_set, 0), 'termcount' => array_fill_keys($class_set, array()), 'ndocs_per_class' => array_fill_keys($class_set, 0), 'voc' => array(), 'ndocs' => 0); return $this->train_with_context($ctx, $ff, $tset, $a_smoothing); }
/** * Calculate all the features for every possible class. Pass the * information to the optimizer to find the weights that satisfy the * constraints and maximize the entropy * * @param $ff The feature factory * @param $tset A collection of training documents * @param $opt An optimizer, we need a maxent optimizer * @return void */ public function train(FeatureFactoryInterface $ff, TrainingSet $tset, MaxentOptimizerInterface $opt) { $classSet = $tset->getClassSet(); $features = $this->calculateFeatureArray($classSet, $tset, $ff); $this->l = $opt->optimize($features); }