Example #1
0
 /**
  * Calculate probability of feature in category.
  *
  * @param string $category category
  * @param string $feature  feature
  *
  * @return float
  */
 protected function featureProbability($category, $feature)
 {
     if ($this->storage->getCategoryFeaturesCount($category) == 0) {
         // avoid `0` probability
         return 0.01;
     }
     // P(category|feature) = P(feature|category) * P(category)  /
     // P(feature|category) * P(category) +
     // ( P(feature|otherCategories) * P(otherCategories))
     // P(feature|category)
     // = feature frequency in category / all features in category
     $featureCountInCategory = $this->storage->getFeatureCountInCategory($category, $feature);
     $categoryFeaturesCount = $this->storage->getCategoryFeaturesCount($category);
     // P(feature|category)
     $featureCategoryProbability = $this->storage->getFeatureDocumentsCountInCategory($category, $feature) / $this->storage->getCategoryDocumentsCount($category);
     if ($featureCategoryProbability == 0) {
         $featureCategoryProbability = 0.01;
     }
     // P(category)
     $categoryProbability = $this->storage->getCategoryDocumentsCount($category) / $this->storage->getDocumentsCount();
     // P(otherCategories)
     $categoryImprobability = ($this->storage->getFeatureCount($feature) - $featureCountInCategory) / ($this->storage->getFeaturesCount() - $categoryFeaturesCount);
     $categoryImprobability = $categoryImprobability == 0 ? 0.01 : $categoryImprobability;
     // P(category|feature)
     $probability = $featureCategoryProbability * $categoryProbability / ($featureCategoryProbability * $categoryProbability + $categoryImprobability * (1 - $categoryProbability));
     return $probability;
 }
Example #2
0
 public function testDocumentsCount()
 {
     for ($i = 1; $i < 4; $i++) {
         $this->storage->increaseDocumentsCount();
         $this->assertEquals($i, $this->storage->getDocumentsCount());
     }
 }