/** * @param DataSourceInterface $dataSource * @param SVMModel $model * @param Document\NormalizerInterface $documentNormalizer * @param TokenizerInterface $tokenizer * @param Token\NormalizerInterface $tokenNormalizer * @param \SVM $svm * @param null $threshold */ public function __construct(DataSourceInterface $dataSource, SVMModel $model = null, Document\NormalizerInterface $documentNormalizer = null, TokenizerInterface $tokenizer = null, Token\NormalizerInterface $tokenNormalizer = null, \SVM $svm = null, $threshold = null) { $this->dataSource = $dataSource; $this->model = $model ?: new SVMModel(); $this->documentNormalizer = $documentNormalizer ?: new Document\Lowercase(); $this->tokenizer = $tokenizer ?: new Word(); $this->tokenNormalizer = $tokenNormalizer; if (!$svm) { $svm = new \SVM(); $svm->setOptions(array(\SVM::OPT_KERNEL_TYPE => \SVM::KERNEL_LINEAR)); } $this->svm = $svm; if ($threshold) { $this->setThreshold($threshold); } }
<?php ini_set('memory_limit', '512M'); require_once __DIR__ . '/../vendor/autoload.php'; use Camspiers\StatisticalClassifier\DataSource\Directory; use Camspiers\StatisticalClassifier\Classifier\SVM; $cats = array('alt.atheism', 'comp.graphics', 'rec.motorcycles', 'sci.crypt', 'soc.religion.christian', 'talk.religion.misc'); $source = new Directory(array('directory' => __DIR__ . '/../resources/20news-bydate/20news-bydate-train', 'include' => $cats)); $classifier = new SVM($source); $testSource = new Directory(array('directory' => __DIR__ . '/../resources/20news-bydate/20news-bydate-test', 'include' => $cats)); $data = $testSource->getData(); $stats = array(); foreach ($data as $category => $documents) { $stats[$category] = array(0, count($documents)); foreach ($documents as $document) { if ($classifier->is($category, $document)) { $stats[$category][0]++; } } } foreach ($stats as $category => $data) { echo $category, ': ', $data[0] / $data[1], PHP_EOL; }
<?php require_once __DIR__ . '/../vendor/autoload.php'; // Using a plain data array source for simplicity use Camspiers\StatisticalClassifier\DataSource\DataArray; use Camspiers\StatisticalClassifier\Classifier\SVM; $source = new DataArray(); $source->addDocument('pig', 'Pigs are great. Pink and cute!'); $source->addDocument('wolf', 'Wolves have teeth. They are gray.'); $c = new SVM($source); $c->setThreshold(0.6); var_dump($c->classify('0943jf904jf09j34fpj'));