/** * @param Token[] $tokens * * @return TokenProbability[] */ public function calculate(array $tokens) { foreach ($tokens as $idx => $token) { if ($token instanceof PotentialAbbreviationToken) { if ($this->abbreviations->hasAbbreviation($token->getValue())) { $tokens[$idx] = new AbbreviationToken($token->getValue()); } } } $probabilities = []; for ($i = 0, $c = count($tokens); $i < $c; $i++) { $token = $tokens[$i]; $probability = new TokenProbability($token, 0); if ($this->rules->hasRule($token->getName())) { $patterns = $this->rules->getRule($token->getName())->getPatterns(); foreach ($patterns as $pattern) { $offsets = $pattern->getTokensOffsetRelativeToStartToken($token->getName()); foreach ($offsets as $offset => $expectedToken) { if (!array_key_exists($i + $offset, $tokens)) { continue 2; } $actualToken = $tokens[$i + $offset]; if ($actualToken->getName() !== $expectedToken->getTokenName()) { continue 2; } } $probability->setProbability($pattern->getProbability()); } } $probabilities[] = $probability; } return $probabilities; }
/** * @return Abbreviations */ private function getAbbreviations() { $abbreviations = new Abbreviations(); foreach ($this->abbreviationProviders as $provider) { $abbreviations->addAbbreviations($provider->getValues()); } return $abbreviations; }