Ejemplo n.º 1
0
 public function testUnicode_a_alphabet_latin()
 {
     $att = 'inuit';
     $syllCodes = array(0x1403, 0x14c4, 0x1403, 0x1466);
     $syll = utf8::numeric_to_utf8($syllCodes);
     $latin = Syllabics::unicodeToLatinAlphabet($syll);
     $this->assertEquals($att, $latin, "Erreur");
     $att = 'taima';
     $syllCodes = array(0x144c, 0x14aa);
     $syll = utf8::numeric_to_utf8($syllCodes);
     $latin = Syllabics::unicodeToLatinAlphabet($syll);
     $this->assertEquals($att, $latin, "Erreur");
 }
 public function run($word)
 {
     if (!$this->is_supported_on_server()) {
         throw new NotSupportedOnServerException();
     } else {
         $analyses = null;
         $latin_word = Syllabics::is_syllabic_word($word) ? Syllabics::unicodeToLatinAlphabet($word) : $word;
         $jar_path = realpath(dirname(__FILE__) . "/Uqailaut.jar");
         $command = "java -jar \"{$jar_path}\" {$latin_word}";
         $res = exec($command, $analyses);
         return $analyses;
     }
 }
Ejemplo n.º 3
0
 function run($query_orig, $query_language, $output_inuktitut_script, $max_variants, $max_parts, $show_list_flag, $list_order, $lang)
 {
     $logger = Logger::getLogger('ProcessQuery.run');
     $logger->debug("\$query_orig= {$query_orig}");
     $this->max_variants = $max_variants;
     $this->max_parts = $max_parts;
     $this->query_orig = $query_orig;
     $this->query_language = $query_language;
     $this->output_inuktitut_script = $output_inuktitut_script;
     $this->show_list_flag = $show_list_flag;
     $this->list_order = $list_order;
     $this->lang = $lang;
     // Make the query a regular expression: * means '0 or more characters'
     $this->query_regexp = preg_replace('/\\*/', '\\S*?', $this->query_orig);
     $this->query_regexpSyllabic = '';
     if ($this->query_language == 'iu') {
         if (preg_match("/\\p{Canadian_Aboriginal}/u", $this->query_regexp)) {
             $this->query_regexpSyllabic = $this->query_regexp;
             // 			$this->query_regexp = transcode('unicode','roman alphabet','0',$this->query_regexp);
             $this->query_regexp = Syllabics::unicodeToLatinAlphabet($this->query_regexp);
         } elseif ($this->output_inuktitut_script == 'syl') {
             // 			$this->query_regexpSyllabic = transcode('roman alphabet','unicode','0',$this->query_regexp);
             $this->query_regexpSyllabic = Syllabics::latinAlphabetToUnicode($this->query_regexp, '0');
         }
     }
     $logger->debug("\$this->query_regexp= " . $this->query_regexp);
     $logger->debug("\$this->query_regexpSyllabic= " . $this->query_regexpSyllabic);
     # Starting time
     $startTime = time();
     # Prepare an object that tells the format of the query and which files to look into for that query.
     $this->grepper = new Grepper($this->query_regexp, $this->query_regexpSyllabic, $this->query_language, $this->output_inuktitut_script);
     # Report the number of matching terms, total frequency, and distribution.
     # This is looked for in the files InuktitutWordsIndex.txt,
     # InuktitutWordsSyllabicIndex.txt and EnglisWordsIndex.txt.
     #
     # %td contains the following keys:
     # words : a reference to an array containing the words of the query
     # total_frequency : number of sentences containing the query
     # indices : positions of those sentences in the file SingleLineAlignment*.txt
     # $...$ : the distribution for each word of the query
     $this->getDistribution();
     if (count($this->term_distribution->indices) > 0) {
         $this->get_all_matching_alignments_from_td();
         $this->term_distribution->total_frequency = count($this->alignments);
     }
     # Elapsed time
     $this->elapsed_time = time() - $startTime;
 }
Ejemplo n.º 4
0
 function transcodeFromUnicode($toFormat, $aipaitai, $text)
 {
     $logger = Logger::getLogger('transcodeFromUnicode');
     $logger->debug("\$toFormat= '{$toFormat}'");
     $logger->debug("\$text= '{$text}'");
     $transcodedText = "";
     switch ($toFormat) {
         case 'roman alphabet':
             $transcodedText = Syllabics::unicodeToLatinAlphabet($text);
             break;
         case 'unicode':
             switch ($aipaitai) {
                 case '1':
                     $transcodedText = Syllabics::iciUnicodeToLatinAlphabet($text);
                     break;
                 case '0':
                     $transcodedText = Syllabics::iciUnicodeToNoAipaitai($text);
                     break;
                 default:
                     $transcodedText = $text;
                     break;
             }
             break;
         case 'unicode &#xxxx;':
             switch ($aipaitai) {
                 case '1':
                     $transcodedText = Syllabics::iciUnicodeToLatinAlphabet($text);
                     break;
                 case '0':
                     $transcodedText = Syllabics::iciUnicodeToNoAipaitai($text);
                     break;
                 default:
                     $transcodedText = $text;
                     break;
             }
             $transcodedText = $this->unicodeToHTMLEntity($transcodedText);
             break;
         case 'unicode \\uxxxx':
             switch ($aipaitai) {
                 case '1':
                     $transcodedText = Syllabics::iciUnicodeToLatinAlphabet($text);
                     break;
                 case '0':
                     $transcodedText = Syllabics::iciUnicodeToNoAipaitai($text);
                     break;
                 default:
                     $transcodedText = $text;
                     break;
             }
             $transcodedText = $this->unicodeToSlashUUnicodeString($transcodedText);
             break;
         case 'unicode url encoding %xx':
             switch ($aipaitai) {
                 case '1':
                     $transcodedText = Syllabics::iciUnicodeToLatinAlphabet($text);
                     break;
                 case '0':
                     $transcodedText = Syllabics::iciUnicodeToNoAipaitai($text);
                     break;
                 default:
                     $transcodedText = $text;
                     break;
             }
             $transcodedText = $this->unicodeToURLEncoding($transcodedText);
             break;
         case 'nunacom':
             $transcodedText = Nunacom::unicodeToLegacy($text);
             break;
         case 'prosyl':
             $transcodedText = Prosyl::unicodeToLegacy($text);
             break;
         case 'aipainunavik':
             $transcodedText = Aipainunavik::unicodeToLegacy($text);
             break;
     }
     return $transcodedText;
 }