/** * {@inheritDoc} */ public function load($path) { $data = Yaml::parse($path); $storage = new Storage(); if (!$data) { return $storage; } foreach ($data as $language => $alphabetInfo) { $chars = Unicode::ordStr($alphabetInfo['chars']); $commonCharCodes = array(); if (!empty($alphabetInfo['common_chars'])) { $commonCharCodes = Unicode::ordStr($alphabetInfo['common_chars']); } $multipleCharsCodes = array(); if (!empty($alphabetInfo['multiple_chars'])) { $multipleChars = explode(' ', $alphabetInfo['multiple_chars']); foreach ($multipleChars as $multipleChar) { $multipleCharsCodes[] = Unicode::ordStr($multipleChar); } } $alphabet = new Alphabet($language, $chars, $commonCharCodes, $multipleCharsCodes); $storage->add($alphabet); } return $storage; }
/** * {@inheritDoc} */ public function visit($string, array &$codes, Languages $languages) { if (!count($languages->getDetectedLanguages())) { // Not found detected languages. return; } /** @var \Ideea\LanguageDetector\Alphabet\Alphabet[] $checkAlphabets */ $checkAlphabets = array(); foreach ($languages->getDetectedLanguages() as $detectedLanguage) { $alphabet = $this->alphabets->get($detectedLanguage); if (null === $alphabet) { // Not found alphabet for language continue; } $checkAlphabets[$alphabet->getLanguage()] = $alphabet; } foreach ($codes as $code) { foreach ($checkAlphabets as $alphabet) { if ($alphabet->hasChar($code)) { $languages->vote($alphabet->getLanguage(), 1); } if ($alphabet->hasCommonChar($code)) { $languages->vote($alphabet->getLanguage(), 0.5); } } } // Devote languages if language chars not used // Can remove this functional? $votedLanguages = $languages->getVoteLanguages(); if (count($votedLanguages) > 1) { foreach ($votedLanguages as $votedLanguage => $votes) { $votedLanguageAlphabet = $this->alphabets->get($votedLanguage); if (null === $votedLanguage) { // Not alphabet exists for voted language continue; } $languageAlphabetChars = $votedLanguageAlphabet->getCharCodes(); $languageUsedChars = array_intersect(array_unique($codes), $languageAlphabetChars); $countUnusedChars = count($languageAlphabetChars) - count($languageUsedChars); if ($countUnusedChars > 2) { $languages->devote($votedLanguage, $countUnusedChars * 0.02); } } } }