public function testUnicode_a_alphabet_latin() { $att = 'inuit'; $syllCodes = array(0x1403, 0x14c4, 0x1403, 0x1466); $syll = utf8::numeric_to_utf8($syllCodes); $latin = Syllabics::unicodeToLatinAlphabet($syll); $this->assertEquals($att, $latin, "Erreur"); $att = 'taima'; $syllCodes = array(0x144c, 0x14aa); $syll = utf8::numeric_to_utf8($syllCodes); $latin = Syllabics::unicodeToLatinAlphabet($syll); $this->assertEquals($att, $latin, "Erreur"); }
public function run($word) { if (!$this->is_supported_on_server()) { throw new NotSupportedOnServerException(); } else { $analyses = null; $latin_word = Syllabics::is_syllabic_word($word) ? Syllabics::unicodeToLatinAlphabet($word) : $word; $jar_path = realpath(dirname(__FILE__) . "/Uqailaut.jar"); $command = "java -jar \"{$jar_path}\" {$latin_word}"; $res = exec($command, $analyses); return $analyses; } }
function run($query_orig, $query_language, $output_inuktitut_script, $max_variants, $max_parts, $show_list_flag, $list_order, $lang) { $logger = Logger::getLogger('ProcessQuery.run'); $logger->debug("\$query_orig= {$query_orig}"); $this->max_variants = $max_variants; $this->max_parts = $max_parts; $this->query_orig = $query_orig; $this->query_language = $query_language; $this->output_inuktitut_script = $output_inuktitut_script; $this->show_list_flag = $show_list_flag; $this->list_order = $list_order; $this->lang = $lang; // Make the query a regular expression: * means '0 or more characters' $this->query_regexp = preg_replace('/\\*/', '\\S*?', $this->query_orig); $this->query_regexpSyllabic = ''; if ($this->query_language == 'iu') { if (preg_match("/\\p{Canadian_Aboriginal}/u", $this->query_regexp)) { $this->query_regexpSyllabic = $this->query_regexp; // $this->query_regexp = transcode('unicode','roman alphabet','0',$this->query_regexp); $this->query_regexp = Syllabics::unicodeToLatinAlphabet($this->query_regexp); } elseif ($this->output_inuktitut_script == 'syl') { // $this->query_regexpSyllabic = transcode('roman alphabet','unicode','0',$this->query_regexp); $this->query_regexpSyllabic = Syllabics::latinAlphabetToUnicode($this->query_regexp, '0'); } } $logger->debug("\$this->query_regexp= " . $this->query_regexp); $logger->debug("\$this->query_regexpSyllabic= " . $this->query_regexpSyllabic); # Starting time $startTime = time(); # Prepare an object that tells the format of the query and which files to look into for that query. $this->grepper = new Grepper($this->query_regexp, $this->query_regexpSyllabic, $this->query_language, $this->output_inuktitut_script); # Report the number of matching terms, total frequency, and distribution. # This is looked for in the files InuktitutWordsIndex.txt, # InuktitutWordsSyllabicIndex.txt and EnglisWordsIndex.txt. # # %td contains the following keys: # words : a reference to an array containing the words of the query # total_frequency : number of sentences containing the query # indices : positions of those sentences in the file SingleLineAlignment*.txt # $...$ : the distribution for each word of the query $this->getDistribution(); if (count($this->term_distribution->indices) > 0) { $this->get_all_matching_alignments_from_td(); $this->term_distribution->total_frequency = count($this->alignments); } # Elapsed time $this->elapsed_time = time() - $startTime; }
function transcodeFromUnicode($toFormat, $aipaitai, $text) { $logger = Logger::getLogger('transcodeFromUnicode'); $logger->debug("\$toFormat= '{$toFormat}'"); $logger->debug("\$text= '{$text}'"); $transcodedText = ""; switch ($toFormat) { case 'roman alphabet': $transcodedText = Syllabics::unicodeToLatinAlphabet($text); break; case 'unicode': switch ($aipaitai) { case '1': $transcodedText = Syllabics::iciUnicodeToLatinAlphabet($text); break; case '0': $transcodedText = Syllabics::iciUnicodeToNoAipaitai($text); break; default: $transcodedText = $text; break; } break; case 'unicode &#xxxx;': switch ($aipaitai) { case '1': $transcodedText = Syllabics::iciUnicodeToLatinAlphabet($text); break; case '0': $transcodedText = Syllabics::iciUnicodeToNoAipaitai($text); break; default: $transcodedText = $text; break; } $transcodedText = $this->unicodeToHTMLEntity($transcodedText); break; case 'unicode \\uxxxx': switch ($aipaitai) { case '1': $transcodedText = Syllabics::iciUnicodeToLatinAlphabet($text); break; case '0': $transcodedText = Syllabics::iciUnicodeToNoAipaitai($text); break; default: $transcodedText = $text; break; } $transcodedText = $this->unicodeToSlashUUnicodeString($transcodedText); break; case 'unicode url encoding %xx': switch ($aipaitai) { case '1': $transcodedText = Syllabics::iciUnicodeToLatinAlphabet($text); break; case '0': $transcodedText = Syllabics::iciUnicodeToNoAipaitai($text); break; default: $transcodedText = $text; break; } $transcodedText = $this->unicodeToURLEncoding($transcodedText); break; case 'nunacom': $transcodedText = Nunacom::unicodeToLegacy($text); break; case 'prosyl': $transcodedText = Prosyl::unicodeToLegacy($text); break; case 'aipainunavik': $transcodedText = Aipainunavik::unicodeToLegacy($text); break; } return $transcodedText; }