public function __invoke($data) { foreach ($data as $category => $documents) { foreach ($documents as $index => $document) { if ($this->documentNormalizer) { $document = $this->documentNormalizer->normalize($document); } $tokens = $this->tokenizer->tokenize($document); if ($this->tokenNormalizer) { $tokens = $this->tokenNormalizer->normalize($tokens); } $data[$category][$index] = $tokens; } } return $data; }
/** * @inheritdoc */ public function classify($document) { $results = array(); if ($this->documentNormalizer) { $document = $this->documentNormalizer->normalize($document); } $tokens = $this->tokenizer->tokenize($document); if ($this->tokenNormalizer) { $tokens = $this->tokenNormalizer->normalize($tokens); } $tokens = array_count_values($tokens); $weights = $this->preparedModel()->getModel(); foreach (array_keys($weights) as $category) { $results[$category] = 0; foreach ($tokens as $token => $count) { if (array_key_exists($token, $weights[$category])) { $results[$category] += $count * $weights[$category][$token]; } } } asort($results, SORT_NUMERIC); $category = key($results); $value = array_shift($results); if ($value === array_shift($results)) { return false; } else { return $category; } }
/** * Formats the document for use in \SVMModel * @param string $document * @param \Camspiers\StatisticalClassifier\Model\SVMModel $model * @return array */ protected function prepareDocument($document, SVMModel $model) { $tokenMap = $model->getTokenMap(); $data = array(); if ($this->documentNormalizer) { $document = $this->documentNormalizer->normalize($document); } $tokens = $this->tokenizer->tokenize($document); if ($this->tokenNormalizer) { $tokens = $this->tokenNormalizer->normalize($tokens); } $tokenCounts = array_count_values($tokens); foreach ($tokenCounts as $token => $value) { if (isset($tokenMap[$token])) { $data[$tokenMap[$token]] = $value; } } ksort($data, SORT_NUMERIC); return $data; }