저자: Matthieu Bilbille
예제 #1
0
 private function getType($str)
 {
     $type = 'O';
     if (Analyzer::hasJapaneseNumerals($str)) {
         $type = 'M';
     } elseif (Analyzer::hasKanji($str, true)) {
         $type = 'H';
     } elseif (Analyzer::hasHiragana($str)) {
         $type = 'I';
     } elseif (Analyzer::hasKatakana($str)) {
         $type = 'K';
     } elseif (Analyzer::hasLatinLetters($str)) {
         $type = 'A';
     } elseif (Analyzer::hasWesternNumerals($str)) {
         $type = 'N';
     }
     return $type;
 }
예제 #2
0
 /**
  * Converts a year in Japanese format into Western format.
  *
  * @param $year : kanji or hiragana era name followed by digits, or era name in romaji, space and digit. I.e. : 明治33, めいじ33, Meiji 33
  * @return string|array : The year(s) in Western format.
  * @throws Exception
  */
 public static function toWesternYear($year)
 {
     if (Analyzer::hasKanji($year)) {
         $key = 'kanji';
         $eraName = Helper::extractKanji($year);
         $eraName = $eraName[0];
         $eraValue = (int) Helper::subString($year, Analyzer::length($eraName), Analyzer::length($year));
     } elseif (Analyzer::hasHiragana($year)) {
         $key = 'kana';
         $eraName = Helper::extractHiragana($year);
         $eraName = $eraName[0];
         $eraValue = (int) Helper::subString($year, Analyzer::length($eraName), Analyzer::length($year));
     } else {
         $key = 'romaji';
         $year = strtolower($year);
         $parts = explode(' ', $year);
         $eraName = $parts[0];
         $eraValue = (int) $parts[1];
     }
     if (empty($eraName) || empty($eraValue)) {
         throw new Exception('Invalid year ' . $year);
     }
     $max = count(self::$mapEras);
     $westernYears = array();
     for ($i = 0; $i < $max; $i++) {
         $era = self::$mapEras[$i];
         $overflown = false;
         if (strtolower($era[$key]) == $eraName) {
             $eraStart = $era['year'];
             $westernYear = $eraStart + $eraValue - 1;
             if ($i < $max - 1) {
                 $nextEra = self::$mapEras[$i + 1];
                 $nextEraYear = $nextEra['year'];
                 if ($westernYear > $nextEraYear) {
                     $overflown = true;
                 }
             }
             $westernYears[] = array('value' => $westernYear, 'overflown' => $overflown);
         }
     }
     $results = array();
     foreach ($westernYears as $westernYear) {
         if (!$westernYear['overflown']) {
             $results[] = $westernYear['value'];
         }
     }
     if (empty($results)) {
         throw new Exception('Year ' . $year . ' is invalid');
     } elseif (count($results) == 1) {
         return $results[0];
     } else {
         return $results;
     }
 }
예제 #3
0
 public function testSegmenterSentence3()
 {
     $result = Analyzer::segment('彼は自分の考えを言葉にするのが得意でない');
     $this->assertSame(array('彼', 'は', '自分', 'の', '考え', 'を', '言葉', 'に', 'する', 'の', 'が', '得意', 'で', 'ない'), $result);
 }
예제 #4
0
 /**
  * Gets a verb entry from the database using either Kanji, Hiragana or Romaji
  *
  * @param $verb
  * @return array
  */
 public static function getVerb($verb)
 {
     if (!Analyzer::hasJapaneseLetters($verb)) {
         $hepburn = new Kana();
         $verb = $hepburn->transliterate($verb);
     }
     $sql = 'SELECT * FROM verbs WHERE kanji = :kanji OR kana = :kana';
     $uri = 'sqlite:' . __DIR__ . DIRECTORY_SEPARATOR . 'verbs.db';
     $connection = new PDO($uri);
     $connection->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
     $connection->setAttribute(PDO::ATTR_CASE, PDO::CASE_LOWER);
     $statement = $connection->prepare($sql);
     $statement->execute(array(':kanji' => $verb, ':kana' => $verb));
     $results = $statement->fetchAll(PDO::FETCH_ASSOC);
     return $results;
 }
예제 #5
0
 /**
  * Transliterate proper combinaisons of latin alphabet characters into
  * Sokuon (http://en.wikipedia.org/wiki/Sokuon) characters.
  *
  * @param string $str        String to be transliterated.
  * @param array  $parameters Sokuon character.
  *
  * @return string Transliterated string.
  */
 protected function transliterateSokuon($str, $parameters)
 {
     $new_str = $str;
     $length = Analyzer::length($str);
     //No need to go further.
     if ($length < 2) {
         return $new_str;
     }
     $skip = array('a', 'i', 'u', 'e', 'o', 'n');
     for ($i = 1; $i < $length; $i++) {
         $prev_char = substr($str, $i - 1, 1);
         if (!in_array($prev_char, $skip)) {
             // Don't forget Hepburn special case: ch > tch
             if ($prev_char === substr($str, $i, 1) || $prev_char === 't' && substr($str, $i, 2) === 'ch') {
                 $new_str = substr_replace($str, $parameters['sokuon'], $i - 1, 1);
             }
         }
     }
     return $new_str;
 }
예제 #6
0
 public function testInspectMixedCharacters()
 {
     $result = Analyzer::inspect($this->mixCharacters);
     $this->assertSame($result, array('length' => 19, 'kanji' => 4, 'hiragana' => 5, 'katakana' => 3));
 }