public function test_escapeChars__Case_wildchar_inside() { $roman = 'inuit'; $syll = Syllabics::latinAlphabetToUnicode($roman, '0'); $query_syll = $syll . '\\S*?' . $syll; $escaped = Grepper::escapeChars($query_syll); $expected = '\\x{e1}\\x{90}\\x{83}\\x{e1}\\x{93}\\x{84}\\x{e1}\\x{90}\\x{83}\\x{e1}\\x{91}\\x{a6}\\S*?\\x{e1}\\x{90}\\x{83}\\x{e1}\\x{93}\\x{84}\\x{e1}\\x{90}\\x{83}\\x{e1}\\x{91}\\x{a6}'; $this->assertEquals($expected, $escaped, "The escaping has failed."); }
public function test__grep__Case_syllabic_inuktitut_query_with_wild_char() { $query_roman = '\\S*valliasimaliratta'; $query_syll = Syllabics::latinAlphabetToUnicode($query_roman, '0'); $grepper = new Grepper($query_roman, $query_syll, 'iu', 'syll'); $this->object->set_grepper($grepper); $grepped = $this->object->_grep(); $expected = 'qimirruvalliasimaliratta:2:25673075:59233783'; $this->assertEquals($expected, $grepped, "The grepping returned a wrong value"); }
public function testAlphabet_latin_a_unicode() { $latin = 'inuit'; $attCodes = array(0x1403, 0x14c4, 0x1403, 0x1466); $att = utf8::numeric_to_utf8($attCodes); $syll = Syllabics::latinAlphabetToUnicode($latin, false); $this->assertEquals($att, $syll, "Erreur"); $latin = 'taima'; $attCodes = array(0x144c, 0x14aa); $att = utf8::numeric_to_utf8($attCodes); $syll = Syllabics::latinAlphabetToUnicode($latin, true); $this->assertEquals($att, $syll, "Erreur"); }
function run($query_orig, $query_language, $output_inuktitut_script, $max_variants, $max_parts, $show_list_flag, $list_order, $lang) { $logger = Logger::getLogger('ProcessQuery.run'); $logger->debug("\$query_orig= {$query_orig}"); $this->max_variants = $max_variants; $this->max_parts = $max_parts; $this->query_orig = $query_orig; $this->query_language = $query_language; $this->output_inuktitut_script = $output_inuktitut_script; $this->show_list_flag = $show_list_flag; $this->list_order = $list_order; $this->lang = $lang; // Make the query a regular expression: * means '0 or more characters' $this->query_regexp = preg_replace('/\\*/', '\\S*?', $this->query_orig); $this->query_regexpSyllabic = ''; if ($this->query_language == 'iu') { if (preg_match("/\\p{Canadian_Aboriginal}/u", $this->query_regexp)) { $this->query_regexpSyllabic = $this->query_regexp; // $this->query_regexp = transcode('unicode','roman alphabet','0',$this->query_regexp); $this->query_regexp = Syllabics::unicodeToLatinAlphabet($this->query_regexp); } elseif ($this->output_inuktitut_script == 'syl') { // $this->query_regexpSyllabic = transcode('roman alphabet','unicode','0',$this->query_regexp); $this->query_regexpSyllabic = Syllabics::latinAlphabetToUnicode($this->query_regexp, '0'); } } $logger->debug("\$this->query_regexp= " . $this->query_regexp); $logger->debug("\$this->query_regexpSyllabic= " . $this->query_regexpSyllabic); # Starting time $startTime = time(); # Prepare an object that tells the format of the query and which files to look into for that query. $this->grepper = new Grepper($this->query_regexp, $this->query_regexpSyllabic, $this->query_language, $this->output_inuktitut_script); # Report the number of matching terms, total frequency, and distribution. # This is looked for in the files InuktitutWordsIndex.txt, # InuktitutWordsSyllabicIndex.txt and EnglisWordsIndex.txt. # # %td contains the following keys: # words : a reference to an array containing the words of the query # total_frequency : number of sentences containing the query # indices : positions of those sentences in the file SingleLineAlignment*.txt # $...$ : the distribution for each word of the query $this->getDistribution(); if (count($this->term_distribution->indices) > 0) { $this->get_all_matching_alignments_from_td(); $this->term_distribution->total_frequency = count($this->alignments); } # Elapsed time $this->elapsed_time = time() - $startTime; }
function transcodeFromLatinAlphabet($toFormat, $aipaitai, $text) { if ($toFormat === 'aipainunavik') { $aipaitai = '1'; } $transcodedText = Syllabics::latinAlphabetToUnicode($text, $aipaitai); $transcodedText = $this->transcodeFromUnicode($toFormat, $aipaitai, $transcodedText); return $transcodedText; }