public function __invoke($data)
 {
     foreach ($data as $category => $documents) {
         foreach ($documents as $index => $document) {
             if ($this->documentNormalizer) {
                 $document = $this->documentNormalizer->normalize($document);
             }
             $tokens = $this->tokenizer->tokenize($document);
             if ($this->tokenNormalizer) {
                 $tokens = $this->tokenNormalizer->normalize($tokens);
             }
             $data[$category][$index] = $tokens;
         }
     }
     return $data;
 }
 /**
  * @inheritdoc
  */
 public function classify($document)
 {
     $results = array();
     if ($this->documentNormalizer) {
         $document = $this->documentNormalizer->normalize($document);
     }
     $tokens = $this->tokenizer->tokenize($document);
     if ($this->tokenNormalizer) {
         $tokens = $this->tokenNormalizer->normalize($tokens);
     }
     $tokens = array_count_values($tokens);
     $weights = $this->preparedModel()->getModel();
     foreach (array_keys($weights) as $category) {
         $results[$category] = 0;
         foreach ($tokens as $token => $count) {
             if (array_key_exists($token, $weights[$category])) {
                 $results[$category] += $count * $weights[$category][$token];
             }
         }
     }
     asort($results, SORT_NUMERIC);
     $category = key($results);
     $value = array_shift($results);
     if ($value === array_shift($results)) {
         return false;
     } else {
         return $category;
     }
 }
Пример #3
0
 /**
  * Formats the document for use in \SVMModel
  * @param  string                                          $document
  * @param  \Camspiers\StatisticalClassifier\Model\SVMModel $model
  * @return array
  */
 protected function prepareDocument($document, SVMModel $model)
 {
     $tokenMap = $model->getTokenMap();
     $data = array();
     if ($this->documentNormalizer) {
         $document = $this->documentNormalizer->normalize($document);
     }
     $tokens = $this->tokenizer->tokenize($document);
     if ($this->tokenNormalizer) {
         $tokens = $this->tokenNormalizer->normalize($tokens);
     }
     $tokenCounts = array_count_values($tokens);
     foreach ($tokenCounts as $token => $value) {
         if (isset($tokenMap[$token])) {
             $data[$tokenMap[$token]] = $value;
         }
     }
     ksort($data, SORT_NUMERIC);
     return $data;
 }