Author: Cam Spiers (camspiers@gmail.com)
Inheritance: extends Classifier
Example #1
0
 /**
  * @param DataSourceInterface          $dataSource
  * @param SVMModel                     $model
  * @param Document\NormalizerInterface $documentNormalizer
  * @param TokenizerInterface           $tokenizer
  * @param Token\NormalizerInterface    $tokenNormalizer
  * @param \SVM                         $svm
  * @param null                         $threshold
  */
 public function __construct(DataSourceInterface $dataSource, SVMModel $model = null, Document\NormalizerInterface $documentNormalizer = null, TokenizerInterface $tokenizer = null, Token\NormalizerInterface $tokenNormalizer = null, \SVM $svm = null, $threshold = null)
 {
     $this->dataSource = $dataSource;
     $this->model = $model ?: new SVMModel();
     $this->documentNormalizer = $documentNormalizer ?: new Document\Lowercase();
     $this->tokenizer = $tokenizer ?: new Word();
     $this->tokenNormalizer = $tokenNormalizer;
     if (!$svm) {
         $svm = new \SVM();
         $svm->setOptions(array(\SVM::OPT_KERNEL_TYPE => \SVM::KERNEL_LINEAR));
     }
     $this->svm = $svm;
     if ($threshold) {
         $this->setThreshold($threshold);
     }
 }
Example #2
0
<?php

ini_set('memory_limit', '512M');
require_once __DIR__ . '/../vendor/autoload.php';
use Camspiers\StatisticalClassifier\DataSource\Directory;
use Camspiers\StatisticalClassifier\Classifier\SVM;
$cats = array('alt.atheism', 'comp.graphics', 'rec.motorcycles', 'sci.crypt', 'soc.religion.christian', 'talk.religion.misc');
$source = new Directory(array('directory' => __DIR__ . '/../resources/20news-bydate/20news-bydate-train', 'include' => $cats));
$classifier = new SVM($source);
$testSource = new Directory(array('directory' => __DIR__ . '/../resources/20news-bydate/20news-bydate-test', 'include' => $cats));
$data = $testSource->getData();
$stats = array();
foreach ($data as $category => $documents) {
    $stats[$category] = array(0, count($documents));
    foreach ($documents as $document) {
        if ($classifier->is($category, $document)) {
            $stats[$category][0]++;
        }
    }
}
foreach ($stats as $category => $data) {
    echo $category, ': ', $data[0] / $data[1], PHP_EOL;
}
<?php

require_once __DIR__ . '/../vendor/autoload.php';
// Using a plain data array source for simplicity
use Camspiers\StatisticalClassifier\DataSource\DataArray;
use Camspiers\StatisticalClassifier\Classifier\SVM;
$source = new DataArray();
$source->addDocument('pig', 'Pigs are great. Pink and cute!');
$source->addDocument('wolf', 'Wolves have teeth. They are gray.');
$c = new SVM($source);
$c->setThreshold(0.6);
var_dump($c->classify('0943jf904jf09j34fpj'));