Esempio n. 1
0
 public function test_escapeChars__Case_wildchar_inside()
 {
     $roman = 'inuit';
     $syll = Syllabics::latinAlphabetToUnicode($roman, '0');
     $query_syll = $syll . '\\S*?' . $syll;
     $escaped = Grepper::escapeChars($query_syll);
     $expected = '\\x{e1}\\x{90}\\x{83}\\x{e1}\\x{93}\\x{84}\\x{e1}\\x{90}\\x{83}\\x{e1}\\x{91}\\x{a6}\\S*?\\x{e1}\\x{90}\\x{83}\\x{e1}\\x{93}\\x{84}\\x{e1}\\x{90}\\x{83}\\x{e1}\\x{91}\\x{a6}';
     $this->assertEquals($expected, $escaped, "The escaping has failed.");
 }
 public function test__grep__Case_syllabic_inuktitut_query_with_wild_char()
 {
     $query_roman = '\\S*valliasimaliratta';
     $query_syll = Syllabics::latinAlphabetToUnicode($query_roman, '0');
     $grepper = new Grepper($query_roman, $query_syll, 'iu', 'syll');
     $this->object->set_grepper($grepper);
     $grepped = $this->object->_grep();
     $expected = 'qimirruvalliasimaliratta:2:25673075:59233783';
     $this->assertEquals($expected, $grepped, "The grepping returned a wrong value");
 }
 public function testAlphabet_latin_a_unicode()
 {
     $latin = 'inuit';
     $attCodes = array(0x1403, 0x14c4, 0x1403, 0x1466);
     $att = utf8::numeric_to_utf8($attCodes);
     $syll = Syllabics::latinAlphabetToUnicode($latin, false);
     $this->assertEquals($att, $syll, "Erreur");
     $latin = 'taima';
     $attCodes = array(0x144c, 0x14aa);
     $att = utf8::numeric_to_utf8($attCodes);
     $syll = Syllabics::latinAlphabetToUnicode($latin, true);
     $this->assertEquals($att, $syll, "Erreur");
 }
Esempio n. 4
0
 function run($query_orig, $query_language, $output_inuktitut_script, $max_variants, $max_parts, $show_list_flag, $list_order, $lang)
 {
     $logger = Logger::getLogger('ProcessQuery.run');
     $logger->debug("\$query_orig= {$query_orig}");
     $this->max_variants = $max_variants;
     $this->max_parts = $max_parts;
     $this->query_orig = $query_orig;
     $this->query_language = $query_language;
     $this->output_inuktitut_script = $output_inuktitut_script;
     $this->show_list_flag = $show_list_flag;
     $this->list_order = $list_order;
     $this->lang = $lang;
     // Make the query a regular expression: * means '0 or more characters'
     $this->query_regexp = preg_replace('/\\*/', '\\S*?', $this->query_orig);
     $this->query_regexpSyllabic = '';
     if ($this->query_language == 'iu') {
         if (preg_match("/\\p{Canadian_Aboriginal}/u", $this->query_regexp)) {
             $this->query_regexpSyllabic = $this->query_regexp;
             // 			$this->query_regexp = transcode('unicode','roman alphabet','0',$this->query_regexp);
             $this->query_regexp = Syllabics::unicodeToLatinAlphabet($this->query_regexp);
         } elseif ($this->output_inuktitut_script == 'syl') {
             // 			$this->query_regexpSyllabic = transcode('roman alphabet','unicode','0',$this->query_regexp);
             $this->query_regexpSyllabic = Syllabics::latinAlphabetToUnicode($this->query_regexp, '0');
         }
     }
     $logger->debug("\$this->query_regexp= " . $this->query_regexp);
     $logger->debug("\$this->query_regexpSyllabic= " . $this->query_regexpSyllabic);
     # Starting time
     $startTime = time();
     # Prepare an object that tells the format of the query and which files to look into for that query.
     $this->grepper = new Grepper($this->query_regexp, $this->query_regexpSyllabic, $this->query_language, $this->output_inuktitut_script);
     # Report the number of matching terms, total frequency, and distribution.
     # This is looked for in the files InuktitutWordsIndex.txt,
     # InuktitutWordsSyllabicIndex.txt and EnglisWordsIndex.txt.
     #
     # %td contains the following keys:
     # words : a reference to an array containing the words of the query
     # total_frequency : number of sentences containing the query
     # indices : positions of those sentences in the file SingleLineAlignment*.txt
     # $...$ : the distribution for each word of the query
     $this->getDistribution();
     if (count($this->term_distribution->indices) > 0) {
         $this->get_all_matching_alignments_from_td();
         $this->term_distribution->total_frequency = count($this->alignments);
     }
     # Elapsed time
     $this->elapsed_time = time() - $startTime;
 }
Esempio n. 5
0
 function transcodeFromLatinAlphabet($toFormat, $aipaitai, $text)
 {
     if ($toFormat === 'aipainunavik') {
         $aipaitai = '1';
     }
     $transcodedText = Syllabics::latinAlphabetToUnicode($text, $aipaitai);
     $transcodedText = $this->transcodeFromUnicode($toFormat, $aipaitai, $transcodedText);
     return $transcodedText;
 }