private function getType($str) { $type = 'O'; if (Analyzer::hasJapaneseNumerals($str)) { $type = 'M'; } elseif (Analyzer::hasKanji($str, true)) { $type = 'H'; } elseif (Analyzer::hasHiragana($str)) { $type = 'I'; } elseif (Analyzer::hasKatakana($str)) { $type = 'K'; } elseif (Analyzer::hasLatinLetters($str)) { $type = 'A'; } elseif (Analyzer::hasWesternNumerals($str)) { $type = 'N'; } return $type; }
/** * Converts a year in Japanese format into Western format. * * @param $year : kanji or hiragana era name followed by digits, or era name in romaji, space and digit. I.e. : 明治33, めいじ33, Meiji 33 * @return string|array : The year(s) in Western format. * @throws Exception */ public static function toWesternYear($year) { if (Analyzer::hasKanji($year)) { $key = 'kanji'; $eraName = Helper::extractKanji($year); $eraName = $eraName[0]; $eraValue = (int) Helper::subString($year, Analyzer::length($eraName), Analyzer::length($year)); } elseif (Analyzer::hasHiragana($year)) { $key = 'kana'; $eraName = Helper::extractHiragana($year); $eraName = $eraName[0]; $eraValue = (int) Helper::subString($year, Analyzer::length($eraName), Analyzer::length($year)); } else { $key = 'romaji'; $year = strtolower($year); $parts = explode(' ', $year); $eraName = $parts[0]; $eraValue = (int) $parts[1]; } if (empty($eraName) || empty($eraValue)) { throw new Exception('Invalid year ' . $year); } $max = count(self::$mapEras); $westernYears = array(); for ($i = 0; $i < $max; $i++) { $era = self::$mapEras[$i]; $overflown = false; if (strtolower($era[$key]) == $eraName) { $eraStart = $era['year']; $westernYear = $eraStart + $eraValue - 1; if ($i < $max - 1) { $nextEra = self::$mapEras[$i + 1]; $nextEraYear = $nextEra['year']; if ($westernYear > $nextEraYear) { $overflown = true; } } $westernYears[] = array('value' => $westernYear, 'overflown' => $overflown); } } $results = array(); foreach ($westernYears as $westernYear) { if (!$westernYear['overflown']) { $results[] = $westernYear['value']; } } if (empty($results)) { throw new Exception('Year ' . $year . ' is invalid'); } elseif (count($results) == 1) { return $results[0]; } else { return $results; } }
public function testSegmenterSentence3() { $result = Analyzer::segment('彼は自分の考えを言葉にするのが得意でない'); $this->assertSame(array('彼', 'は', '自分', 'の', '考え', 'を', '言葉', 'に', 'する', 'の', 'が', '得意', 'で', 'ない'), $result); }
/** * Gets a verb entry from the database using either Kanji, Hiragana or Romaji * * @param $verb * @return array */ public static function getVerb($verb) { if (!Analyzer::hasJapaneseLetters($verb)) { $hepburn = new Kana(); $verb = $hepburn->transliterate($verb); } $sql = 'SELECT * FROM verbs WHERE kanji = :kanji OR kana = :kana'; $uri = 'sqlite:' . __DIR__ . DIRECTORY_SEPARATOR . 'verbs.db'; $connection = new PDO($uri); $connection->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); $connection->setAttribute(PDO::ATTR_CASE, PDO::CASE_LOWER); $statement = $connection->prepare($sql); $statement->execute(array(':kanji' => $verb, ':kana' => $verb)); $results = $statement->fetchAll(PDO::FETCH_ASSOC); return $results; }
/** * Transliterate proper combinaisons of latin alphabet characters into * Sokuon (http://en.wikipedia.org/wiki/Sokuon) characters. * * @param string $str String to be transliterated. * @param array $parameters Sokuon character. * * @return string Transliterated string. */ protected function transliterateSokuon($str, $parameters) { $new_str = $str; $length = Analyzer::length($str); //No need to go further. if ($length < 2) { return $new_str; } $skip = array('a', 'i', 'u', 'e', 'o', 'n'); for ($i = 1; $i < $length; $i++) { $prev_char = substr($str, $i - 1, 1); if (!in_array($prev_char, $skip)) { // Don't forget Hepburn special case: ch > tch if ($prev_char === substr($str, $i, 1) || $prev_char === 't' && substr($str, $i, 2) === 'ch') { $new_str = substr_replace($str, $parameters['sokuon'], $i - 1, 1); } } } return $new_str; }
public function testInspectMixedCharacters() { $result = Analyzer::inspect($this->mixCharacters); $this->assertSame($result, array('length' => 19, 'kanji' => 4, 'hiragana' => 5, 'katakana' => 3)); }