/** * Calculate probability of feature in category. * * @param string $category category * @param string $feature feature * * @return float */ protected function featureProbability($category, $feature) { if ($this->storage->getCategoryFeaturesCount($category) == 0) { // avoid `0` probability return 0.01; } // P(category|feature) = P(feature|category) * P(category) / // P(feature|category) * P(category) + // ( P(feature|otherCategories) * P(otherCategories)) // P(feature|category) // = feature frequency in category / all features in category $featureCountInCategory = $this->storage->getFeatureCountInCategory($category, $feature); $categoryFeaturesCount = $this->storage->getCategoryFeaturesCount($category); // P(feature|category) $featureCategoryProbability = $this->storage->getFeatureDocumentsCountInCategory($category, $feature) / $this->storage->getCategoryDocumentsCount($category); if ($featureCategoryProbability == 0) { $featureCategoryProbability = 0.01; } // P(category) $categoryProbability = $this->storage->getCategoryDocumentsCount($category) / $this->storage->getDocumentsCount(); // P(otherCategories) $categoryImprobability = ($this->storage->getFeatureCount($feature) - $featureCountInCategory) / ($this->storage->getFeaturesCount() - $categoryFeaturesCount); $categoryImprobability = $categoryImprobability == 0 ? 0.01 : $categoryImprobability; // P(category|feature) $probability = $featureCategoryProbability * $categoryProbability / ($featureCategoryProbability * $categoryProbability + $categoryImprobability * (1 - $categoryProbability)); return $probability; }
public function testFeaturesCount() { for ($i = 1; $i < 4; $i++) { $this->storage->increaseFeaturesCount(); $this->assertEquals($i, $this->storage->getFeaturesCount()); } }