Example #1
0
/**
 * Function converts an Javascript escaped string back into a string with
 * specified charset (default is UTF-8).
 * Modified function from http://pure-essence.net/stuff/code/utf8RawUrlDecode.phps
 *
 * @param $source String escaped with Javascript's escape() function
 * @param $iconv_to String destination character set will be used as second parameter 
 * in the iconv function. Default is UTF-8.
 * @return string
 */
function js_unescape($source, $iconv_to = 'UTF-8')
{
    $decodedStr = '';
    $pos = 0;
    $len = strlen($source);
    while ($pos < $len) {
        $charAt = substr($source, $pos, 1);
        if ($charAt == '%') {
            $pos++;
            $charAt = substr($source, $pos, 1);
            if ($charAt == 'u') {
                // we got a unicode character
                $pos++;
                $unicodeHexVal = substr($source, $pos, 4);
                $unicode = hexdec($unicodeHexVal);
                $decodedStr .= code2utf($unicode);
                $pos += 4;
            } else {
                // we have an escaped ascii character
                $hexVal = substr($source, $pos, 2);
                $decodedStr .= chr(hexdec($hexVal));
                $pos += 2;
            }
        } else {
            $decodedStr .= $charAt;
            $pos++;
        }
    }
    if ($iconv_to != "UTF-8") {
        $decodedStr = iconv("UTF-8", $iconv_to, $decodedStr);
    }
    return $decodedStr;
}
Example #2
0
function unicode2UTF8($string)
{
    preg_match_all("/&#(.*?);/is", $string, $matches);
    if (count($matches[1])) {
        $unicode = $matches[1];
        for ($i = 0; $i < count($unicode); $i++) {
            $pattern[$i] = "/&#" . $unicode[$i] . ";/";
            $replace_str[$i] = iconv("UNICODE", "GB2312", code2utf($unicode[$i]));
        }
        return preg_replace($pattern, $replace_str, $string);
    } else {
        return $string;
    }
    return $string;
}
Example #3
0
 function codeHex2utf($hex, $lo = true)
 {
     $num = hexdec($hex);
     if ($num < 128 && !$lo) {
         return '&#x' . $hex . ';';
     }
     return code2utf($num, $lo);
 }
Example #4
0
 function _bidiReorder(&$chunkorder, &$content, &$cOTLdata, $blockdir)
 {
     $bidiData = array();
     // First combine into one array (and get the highest level in use)
     $numchunks = count($content);
     $maxlevel = 0;
     for ($nc = 0; $nc < $numchunks; $nc++) {
         $numchars = count($cOTLdata[$nc]['char_data']);
         for ($i = 0; $i < $numchars; ++$i) {
             $carac = array();
             if (isset($cOTLdata[$nc]['GPOSinfo'][$i])) {
                 $carac['GPOSinfo'] = $cOTLdata[$nc]['GPOSinfo'][$i];
             }
             $carac['uni'] = $cOTLdata[$nc]['char_data'][$i]['uni'];
             if (isset($cOTLdata[$nc]['char_data'][$i]['type'])) {
                 $carac['type'] = $cOTLdata[$nc]['char_data'][$i]['type'];
             }
             if (isset($cOTLdata[$nc]['char_data'][$i]['level'])) {
                 $carac['level'] = $cOTLdata[$nc]['char_data'][$i]['level'];
             }
             if (isset($cOTLdata[$nc]['char_data'][$i]['orig_type'])) {
                 $carac['orig_type'] = $cOTLdata[$nc]['char_data'][$i]['orig_type'];
             }
             $carac['group'] = $cOTLdata[$nc]['group'][$i];
             $carac['chunkid'] = $chunkorder[$nc];
             // gives font id and/or object ID
             $maxlevel = max(isset($carac['level']) ? $carac['level'] : 0, $maxlevel);
             $bidiData[] = $carac;
         }
     }
     if ($maxlevel == 0) {
         return;
     }
     $numchars = count($bidiData);
     // L1. On each line, reset the embedding level of the following characters to the paragraph embedding level:
     //	1. Segment separators (Tab) 'S',
     //	2. Paragraph separators 'B',
     //	3. Any sequence of whitespace characters 'WS' preceding a segment separator or paragraph separator, and
     //	4. Any sequence of whitespace characters 'WS' at the end of the line.
     //	The types of characters used here are the original types, not those modified by the previous phase cf N1 and N2*******
     //	Because a Paragraph Separator breaks lines, there will be at most one per line, at the end of that line.
     // Set the initial paragraph embedding level
     if ($blockdir == 'rtl') {
         $pel = 1;
     } else {
         $pel = 0;
     }
     for ($i = $numchars - 1; $i > 0; $i--) {
         if ($bidiData[$i]['type'] == UCDN::BIDI_CLASS_WS || isset($bidiData[$i]['orig_type']) && $bidiData[$i]['orig_type'] == UCDN::BIDI_CLASS_WS) {
             $bidiData[$i]['level'] = $pel;
         } else {
             break;
         }
     }
     // L2. From the highest level found in the text to the lowest odd level on each line, including intermediate levels not actually present in the text, reverse any contiguous sequence of characters that are at that level or higher.
     for ($j = $maxlevel; $j > 0; $j--) {
         $ordarray = array();
         $revarr = array();
         $onlevel = false;
         for ($i = 0; $i < $numchars; ++$i) {
             if ($bidiData[$i]['level'] >= $j) {
                 $onlevel = true;
                 // L4. A character is depicted by a mirrored glyph if and only if (a) the resolved directionality of that character is R, and (b) the Bidi_Mirrored property value of that character is true.
                 if (isset(UCDN::$mirror_pairs[$bidiData[$i]['uni']]) && $bidiData[$i]['type'] == UCDN::BIDI_CLASS_R) {
                     $bidiData[$i]['uni'] = UCDN::$mirror_pairs[$bidiData[$i]['uni']];
                 }
                 $revarr[] = $bidiData[$i];
             } else {
                 if ($onlevel) {
                     $revarr = array_reverse($revarr);
                     $ordarray = array_merge($ordarray, $revarr);
                     $revarr = array();
                     $onlevel = false;
                 }
                 $ordarray[] = $bidiData[$i];
             }
         }
         if ($onlevel) {
             $revarr = array_reverse($revarr);
             $ordarray = array_merge($ordarray, $revarr);
         }
         $bidiData = $ordarray;
     }
     $content = array();
     $cOTLdata = array();
     $chunkorder = array();
     $nc = -1;
     // New chunk order ID
     $chunkid = -1;
     foreach ($bidiData as $carac) {
         if ($carac['chunkid'] != $chunkid) {
             $nc++;
             $chunkorder[$nc] = $carac['chunkid'];
             $cctr = 0;
             $content[$nc] = '';
             $cOTLdata[$nc]['group'] = '';
         }
         if ($carac['uni'] != 0xfffc) {
             // Object replacement character (65532)
             $content[$nc] .= code2utf($carac['uni']);
             $cOTLdata[$nc]['group'] .= $carac['group'];
             if (!empty($carac['GPOSinfo'])) {
                 if (isset($carac['GPOSinfo'])) {
                     $cOTLdata[$nc]['GPOSinfo'][$cctr] = $carac['GPOSinfo'];
                 }
                 $cOTLdata[$nc]['GPOSinfo'][$cctr]['wDir'] = $carac['level'] % 2 ? 'RTL' : 'LTR';
             }
         }
         $chunkid = $carac['chunkid'];
         $cctr++;
     }
 }
Example #5
0
function html_entity_decode_utf8($string)
{
    static $trans_tbl;
    // replace numeric entities
    //php will have issues with numbers with leading zeros, so do not include them in what we send to code2utf.
    $string = preg_replace_callback('~&#x0*([0-9a-f]+);~i', function ($matches) {
        return code2utf(hexdec($matches[1]));
    }, $string);
    $string = preg_replace_callback('~&#0*([0-9]+);~', function ($matches) {
        return code2utf($matches[1]);
    }, $string);
    // replace literal entities
    if (!isset($trans_tbl)) {
        $trans_tbl = array();
        foreach (get_html_translation_table(HTML_ENTITIES) as $val => $key) {
            $trans_tbl[$key] = utf8_encode($val);
        }
    }
    return strtr($string, $trans_tbl);
}
Example #6
0
 function substituteIndic($earr, $lang, $font)
 {
     global $voltdata;
     if (!isset($voltdata[$font])) {
         include_once _MPDF_PATH . 'includes/' . $font . '.volt.php';
         $voltdata[$font] = $volt;
     }
     foreach ($earr as $eid => $char) {
         $earr[$eid] = sprintf("%04s", strtoupper(dechex($char)));
     }
     $vstr = "0020 " . implode(" ", $earr) . " 0020";
     //============================
     // Common Indic Punctuation marks
     // If NOT devanagari
     if ($lang != 'hi') {
         $vstr = str_replace('0964', '007C', $vstr);
         // U+0964 replace with "|"
         $vstr = str_replace('0965', '007C 007C', $vstr);
         // U+0964 replace with "|"
     }
     //============================
     // Tamil numeral for Zero missing Added mPDF 4.2
     if ($lang == 'ta') {
         $vstr = str_replace('0BE6', '0030', $vstr);
         // U+0BEB replace with "0"
     }
     //============================
     // Re-order vowels
     // DEVANAGARI vowel sign matraI[093F] before consonant
     if ($lang == 'hi') {
         $prebasedvowels = "(093F)";
         $nukta = "093C";
         $halant = "094D";
         $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . '/', '\\2 \\1', $vstr);
         // vowel sign pre-based shift left
         $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . ' ' . $nukta . '/', '\\2 \\1 ' . $nukta, $vstr);
         // before NUKTA
         $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
         // before CHAR HALANT  ==  VIRAMA
     } else {
         if ($lang == 'bn') {
             // Khanda Ta 09CE not in font -> replace with 09A4|09CD
             $vstr = preg_replace('/09CE/', '09A4 09CD 200D', $vstr);
             // mPDF 5.3.09
             // BENGALI double-part vowels [09CB 09C7 09BE][09CC 09C7 09D7]
             $vstr = str_replace('09CB', '09C7 09BE', $vstr);
             // convert to 2 parts
             $vstr = str_replace('09CC', '09C7 09D7', $vstr);
             // 09C7 pre-based is then shifted below
             $prebasedvowels = "(09BF|09C7|09C8)";
             $nukta = "09BC";
             $halant = "09CD";
             // mPDF 5.0.044
             $bnfullcons = "0995|0996|0997|0998|0999|099A|099B|099C|099D|099F|09A0|09A1|09A2|09A3|09A4|09A5|09A6|09A7|09A8|09AA|09AB|09AC|09AD|09AE|09AF|09B0|09B2|09B6|09B7|09B8|09B9|09DC|09DD|09DF";
             $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . '/', '\\2 \\1', $vstr);
             // vowel sign pre-based shift left
             $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . ' ' . $nukta . '/', '\\2 \\1 ' . $nukta, $vstr);
             // before NUKTA
             $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
             // before CHAR HALANT
             // mPDF 5.0.044
             // .. and shifting left-based vowel further to the left in case 3 consonants together.
             $vstr = preg_replace('/(' . $bnfullcons . ') ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
             // mPDF 5.0.044
             // If left-based vowel has now been shifted to left of RA/Halant (09B0/09CD)
             // Convert here to above-line form (E068) as it would get missed later
             // e.g. 09B0 09CD 09AD 09C7 would be changed above =>
             // e.g. 09C7 09B0 09CD 09AD. The 09B0 09CD should => E068
             // ??? need to add 09BF as well (09BF|09C7|09C8)
             $vstr = preg_replace('/(09C7|09C8) 09B0 09CD/', '\\1 E068', $vstr);
         } else {
             if ($lang == 'gu') {
                 $prebasedvowels = "(0ABF)";
                 $nukta = "0ABC";
                 $halant = "0ACD";
                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . '/', '\\2 \\1', $vstr);
                 // vowel sign pre-based shift left
                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . ' ' . $nukta . '/', '\\2 \\1 ' . $nukta, $vstr);
                 // before NUKTA
                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
                 // before CHAR HALANT
             } else {
                 if ($lang == 'pa') {
                     $prebasedvowels = "(0A3F)";
                     $nukta = "0A3C";
                     $halant = "0A4D";
                     $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . '/', '\\2 \\1', $vstr);
                     // vowel sign pre-based shift left
                     $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . ' ' . $nukta . '/', '\\2 \\1 ' . $nukta, $vstr);
                     // before NUKTA
                     $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
                     // before CHAR HALANT
                 } else {
                     if ($lang == 'ta') {
                         // Shrii (Shree)
                         $vstr = preg_replace('/0BB6 0BCD 0BB0 0BC0/', 'E04B', $vstr);
                         // TAMIL double-part vowels [0BCA 0BC6 0BBE][0BCB 0BC7 0BBE][0BCC 0BC6 0BD7]
                         $vstr = preg_replace('/0BCA/', '0BC6 0BBE', $vstr);
                         // convert to 2 parts
                         $vstr = preg_replace('/0BCB/', '0BC7 0BBE', $vstr);
                         // pre-based is then shifted below
                         $vstr = preg_replace('/0BCC/', '0BC6 0BD7', $vstr);
                         $prebasedvowels = "(0BC6|0BC7|0BC8)";
                         // No nukta
                         $halant = "0BCD";
                         // Doesn't seem to move most in front of halanted consonants
                         $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . '/', '\\2 \\1', $vstr);
                         // vowel sign pre-based shift left
                         // ? Only for special case KSS (already moved to left of 0BB7)
                         $vstr = preg_replace('/0B95 ' . $halant . ' ' . $prebasedvowels . ' 0BB7/', '\\1 0B95 ' . $halant . ' 0BB7', $vstr);
                     } else {
                         if ($lang == 'or') {
                             // ORIYA double-part vowels []
                             $vstr = str_replace('0B48', '0B47 0B56', $vstr);
                             // 2-part Vowel
                             $vstr = str_replace('0B4B', '0B47 0B3E', $vstr);
                             // 2-part Vowel
                             $vstr = str_replace('0B4C', '0B47 0B57', $vstr);
                             // 2-part Vowel
                             $orprebasedvowels = "(0B47)";
                             // No nukta
                             $halant = "0B4D";
                             $vstr = preg_replace('/([A-F0-9]{4}) ' . $orprebasedvowels . '/', '\\2 \\1', $vstr);
                             // vowel sign pre-based shift left
                             $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $orprebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
                             // before CHAR HALANT
                             $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $orprebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
                             // before CHAR HALANT
                         } else {
                             if ($lang == 'ml') {
                                 // Chillus - old forms - remove ZWNJ after
                                 // This font Volt rules recognises e.g. "Na Halant(Virama)" as ChilluN
                                 $vstr = preg_replace('/(0D23 0D4D|0D28 0D4D|0D30 0D4D|0D32 0D4D|0D33 0D4D) 200D/', '\\1', $vstr);
                                 // See Chillus in Unicode [http://en.wikipedia.org/wiki/Malayalam_script]
                                 $vstr = str_replace('0D7A', '0D23 0D4D', $vstr);
                                 // [mlymChilluNn]
                                 $vstr = str_replace('0D7B', '0D28 0D4D', $vstr);
                                 // [mlymChilluN]
                                 $vstr = str_replace('0D7C', '0D30 0D4D', $vstr);
                                 // [mlymChilluR]
                                 $vstr = str_replace('0D7D', '0D32 0D4D', $vstr);
                                 // [mlymChilluL]
                                 $vstr = str_replace('0D7E', '0D33 0D4D', $vstr);
                                 // [mlymChilluLl]
                                 /*
                                 		// Chillus - 0D7A-0D7E not in font directly, but as E005-E009
                                 		$vstr = preg_replace('/0D23 0D4D 200D/','0D7A', $vstr);
                                 		$vstr = preg_replace('/0D28 0D4D 200D/','0D7B', $vstr);
                                 		$vstr = preg_replace('/0D30 0D4D 200D/','0D7C', $vstr);
                                 		$vstr = preg_replace('/0D32 0D4D 200D/','0D7D', $vstr);
                                 		$vstr = preg_replace('/0D33 0D4D 200D/','0D7E', $vstr);
                                 
                                 		$vstr = preg_replace('/0D7F/','E004', $vstr);	// [mlymChilluK] 
                                 		$vstr = preg_replace('/0D7A/','E005', $vstr);	// [mlymChilluNn] 
                                 		$vstr = preg_replace('/0D7B/','E006', $vstr);	// [mlymChilluN] 
                                 		$vstr = preg_replace('/0D7C/','E007', $vstr);	// [mlymChilluR] 
                                 		$vstr = preg_replace('/0D7D/','E008', $vstr);	// [mlymChilluL] 
                                 		$vstr = preg_replace('/0D7E/','E009', $vstr);	// [mlymChilluLl] 
                                 */
                                 // MALAYALAM double-part vowels []
                                 $vstr = str_replace('0D4A', '0D46 0D3E', $vstr);
                                 // 2-part Vowel
                                 $vstr = str_replace('0D4B', '0D47 0D3E', $vstr);
                                 // 2-part Vowel
                                 $vstr = str_replace('0D4C', '0D46 0D57', $vstr);
                                 // 2-part Vowel
                                 $mlprebasedvowels = "(0D46|0D47|0D48)";
                                 // No nukta
                                 $halant = "0D4D";
                                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $mlprebasedvowels . '/', '\\2 \\1', $vstr);
                                 // vowel sign pre-based shift left
                                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $mlprebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
                                 // before CHAR HALANT
                             } else {
                                 if ($lang == 'te') {
                                     // TELUGU double-part vowels [0C48 -> 0C46 0C56]
                                     $vstr = str_replace('0C48', '0C46 0C56', $vstr);
                                     // 2-part Vowel
                                     $prebasedvowels = "(0C46)";
                                     $abvvowels = "(0C3E|0C3F|0C40|0C46|0C47|0C4A|0C4B|0C4C|0C55)";
                                     // No nukta
                                     $halant = "0C4D";
                                     $tefullforms = "0C15|0C17|0C18|0C1A|0C1B|0C1C|0C1D|0C20|0C21|0C22|0C24|0C25|0C26|0C27|0C28|0C2A|0C2B|0C2D|0C2E|0C2F|0C30|0C33|0C35|0C36|0C37|0C38|0C39|E028|E029|E02A|E02B|E078|E07A|E07B";
                                     $vstr = preg_replace('/(' . $tefullforms . ') ' . $halant . ' (' . $tefullforms . ') ' . $abvvowels . '/', '\\1 \\3 ' . $halant . ' \\2', $vstr);
                                     // before HALANT
                                 } else {
                                     if ($lang == 'kn') {
                                         // KANNADA double-part vowels [0CC8 -> 0CC6 0CD6]
                                         $vstr = str_replace('0CC0', '0CBF 0CD5', $vstr);
                                         // 2-part Vowel
                                         $vstr = str_replace('0CC7', '0CC6 0CD5', $vstr);
                                         // 2-part Vowel
                                         $vstr = str_replace('0CC8', '0CC6 0CD6', $vstr);
                                         // 2-part Vowel AI - no glyph for single
                                         $vstr = str_replace('0CCA', '0CC6 0CC2', $vstr);
                                         // 2-part Vowel
                                         $vstr = str_replace('0CCB', '0CC6 0CC2 0CD5', $vstr);
                                         // 2-part Vowel
                                         $prebasedvowels = "(0CBF|0CC6)";
                                         $halant = "0CCD";
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     //============================
     // SPECIALS
     // DEVANAGARI Ra Halant Ra
     if ($lang == 'hi') {
         $vstr = str_replace('0930 094D 0930', 'E05D 0930', $vstr);
         // Ra Halant Ra => halfRa FullRa
     }
     // GUJARATI
     if ($lang == 'gu') {
         $vstr = str_replace('0AB0 0AC2', 'E02E', $vstr);
         // Ra VowelUu => SpecialForm RaUu
     }
     // TELUGU Ra Halant <Consonant> Halant => halfRa Halant<Consonant> Halant
     if ($lang == 'te') {
         $vstr = preg_replace('/0C30 0C4D ([A-F0-9]{4}) 0C4D/', 'E021 0C4D \\1 0C4D', $vstr);
     }
     // KANNADA
     // Reph at end of word becomes E0CC instead of E00B
     if ($lang == 'kn') {
         $vstr = str_replace('0CB0 0CCD 0020', 'E0CC 0020', $vstr);
         // mPDF 5.3.87
     }
     //============================
     // MAIN BIT FROM VOLT RULES
     foreach ($voltdata[$font] as $rid => $reps) {
         //echo $rid . ':  ' . $vstr.'<br />';
         $vstr = preg_replace('/' . $reps['match'] . '/', $reps['replace'], $vstr);
     }
     //echo $vstr.'<br />'; exit;
     //============================
     // SPECIALS
     // KANNADA
     // <Base> <BelowBase1> [<BelowBase2> ] MatraI -> <Base/MatraI ligature> <Belowbase1> etc
     if ($lang == 'kn') {
         $matraI = "0CBF";
         $knbase = preg_split('/\\|/', "0C95|0C96|0C97|0C98|0C9A|0C9B|0C9C|0C9D|0CA0|0CA1|0CA2|0CA3|0CA4|0CA5|0CA6|0CA7|0CA8|0CAA|0CAB|0CAC|0CAD|0CAE|0CAF|0CB0|0CB2|0CB3|0CB5|0CB6|0CB7|0CB8|0CB9|E0A3|E07D|E07E");
         $knmatraIligature = preg_split('/\\|/', "E082|E083|E084|E085|E086|E087|E088|E089|E08A|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A4|E0A1|E0A2");
         $belowbase1 = "E02E|E02F|E030|E031|E032|E033|E034|E035|E036|E037|E038|E039|E03A|E03B|E03C|E03D|E03E|E03F|E040|E041|E042|E043|E044|E045|E046|E047|E048|E049|E04A|E04B|E04C|E04D|E04E|E04F|E050|E081";
         $belowbase2 = "E052|E053|E054|E055|E056|E057|E058|E059|E05A|E05B|E05C|E05D|E05E|E05F|E060|E061|E062|E063|E064|E065|E066|E067|E068|E069|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071|E072|E073|E074|E081";
         for ($i = 0; $i < count($knbase); $i++) {
             $vstr = preg_replace('/' . $knbase[$i] . ' (' . $belowbase1 . ') (' . $belowbase2 . ') ' . $matraI . '/', $knmatraIligature[$i] . ' \\1 \\2', $vstr);
             $vstr = preg_replace('/' . $knbase[$i] . ' (' . $belowbase1 . ') ' . $matraI . '/', $knmatraIligature[$i] . ' \\1', $vstr);
         }
     }
     // KANNADA
     // [KanTtaFull] [matraI] => [KanTtaPartial] [matraI]
     if ($lang == 'kn') {
         $vstr = preg_replace('/0C9F ' . $matraI . '/', 'E015 ' . $matraI, $vstr);
     }
     // ORIYA
     if ($lang == 'or') {
         // SpecialCase Ra[0B30] Halant still left before [oryaFullNnNna] => E00F
         $vstr = preg_replace('/0B30 ' . $halant . ' E00F/', 'E00F E069', $vstr);
         // convert to Reph
     }
     //============================
     // SHIFT REPH
     // DEVANAGARI Shift Reph [E015]
     if ($lang == 'hi') {
         // FIRSTLY - halfRa = E05D - Change this to Reph [E015]
         $himatchhalfforms = "E043|E044|E045|E046|E047|E048|E049|E04A|E04B|E04C|E04D|E04E|E04F|E050|E051|E052|E053|E054|E055|E056|E057|E058|E059|E05A|E05B|E05C|E05D|E05E|E05F|E060|E061|E062|E063|E064|E065|E066|E067|E068|E069|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071|E072|E073|E074|E075|E076|E077|E078|E079|E07A|E07B|E07C|E07D|E07E|E07F|E080|E081|E082|E083|E084|E085|E086|E087|E088|E089|E08A|E0D3|E0D4|E0D5|E0D6|E0D7|E0D8|E0D9|E0DA|E0DB|E0DC|E0DD|E0DE|E0DF|E0E0|E0E1|E0E2|E0E3|E0E4|E0E5|E0E6|E0E7|E0E8|E0E9|E0EA|E0EB|E0EC|E0ED|E0EE|E0EF|E0F0|E0F1|E0F2|E0F3|E0F4|E0F5|E0F6|E0F7|E0F8|E0F9|E0FA|E0FB|E0FC|E0FD|E0FE|E0FF|E100|E101|E102|E103|E104|E105|E106|E107|E108|E109|E10A|E10B|E10C|E10D|E10E|E10F|E110|E111|E112|E113|E114|E115|E116|E117|E118|E119|E11A|E13D|E13E|E13F|E140|E141|E142|E143|E144|E145";
         $himatchfullforms = "0915|0916|0917|0918|0919|091A|091B|091C|091D|091E|091F|0920|0921|0922|0923|0924|0925|0926|0927|0928|092A|092B|092C|092D|092E|092F|0930|0932|0933|0935|0936|0937|0938|0939|E028|E029|0958|0959|095A|E02A|E02B|E02C|E02D|095B|E02E|E02F|E030|E031|095C|095D|E032|E033|E034|E035|E036|0929|E037|095E|E038|E039|E03A|095F|0931|E03B|0934|E03C|E03D|E03E|E03F|E040|E041|E042|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A1|E0A2|E0A3|E0A4|E0A5|E0A6|E0A7|E0A8|E0A9|E0AA|E0AB|E0AC|E0AD|E0AE|E0AF|E0B0|E0B1|E0B2|E0B3|E0B4|E0B5|E0B6|E0B7|E0B8|E0B9|E0BA|E0BB|E0BC|E0BD|E0BE|E0BF|E0C0|E0C1|E0C2|E0C3|E0C4|E0C5|E0C6|E0C7|E0C8|E0C9|E0CA|E0CB|E0CC|E0CD|E0CE|E0CF|E0D0|E0D1|E0D2|E11E|E11F|E120|E121|E122|E123|E124|E125|E126|E127|E128|E129|E12A|E12B|E12C|E12D|E12E|E12F|E130|E131|E132|E133";
         $vstr = preg_replace('/E05D (' . $himatchhalfforms . '|' . $himatchfullforms . ')/', 'E015 \\1', $vstr);
         // Reph = E015 - Shift Right to just after end of syllable
         // FullAllForms + HalfAllForms + 093E matraA
         while (preg_match('/E015 (' . $himatchhalfforms . ')/', $vstr)) {
             $vstr = preg_replace('/E015 (' . $himatchhalfforms . ')/', '\\1 E015', $vstr);
         }
         $vstr = preg_replace('/E015 (' . $himatchfullforms . ')/', '\\1 E015', $vstr);
         // Now shift it beyond post-based vowels  // ??? Need to add others e.g. 0949,094A,094B,094C + presentation forms like E198
         $vstr = str_replace('E015 093E', '093E E015', $vstr);
         $vstr = preg_replace('/E015 (0940|E194|E195|E196|E197|E198)/', '\\1 E014', $vstr);
         // (Small) reph [E014] to Right of matraI
         $vstr = str_replace('E015 0947', '0947 E014', $vstr);
         // (Small) reph [E014] to Right of matraI
     } else {
         if ($lang == 'bn') {
             $bnfullconjuncts = "E002|E003|E004|E041|E042|E043|E044|E045|E046|E047|E048|E049|E04A|E04B|E04C|E04D|E04E|E04F|E050|E051|E052|E053|E054|E055|E056|E057|E058|E059|E05A|E05B|E05C|E05D|E05E|E05F|E060|E061|E062|E063|E064|E065|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071|E072|E073|E074|E075|E076|E077|E078|E079|E07A|E07B|E07C|E07D|E07E|E07F|E080|E081|E082|E083|E084|E085|E086|E087|E088|E089|E08A|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A1|E0A2|E0A3|E0A4|E0A5|E0A6|E0A7|E0A8|E0A9|E0AA|E0AB|E0AC|E0AD|E0AE|E0AF|E0B0|E0B1|E0B2|E0B3|E0B4|E0B5|E0B6|E0B7|E0B8|E0B9|E0BA|E0BB|E0BC|E0BD|E0BE|E0BF|E0C0|E0C1|E0C2|E0C3|E0C4|E0C5|E0C6|E0C7|E0C8|E0C9|E0CA|E0CB|E0CC|E0CD|E0CE|E0CF|E0D0|E0D1|E0D2|E0D3|E0D4|E0D5|E0D6|E0D7|E0D8|E0D9|E0DA|E0DB|E0DC|E0DD|E0DE|E0DF|E0E0|E0E1|E0E2|E0E3|E0E4|E0E5|E0E6|E0E7|E0E8|E0E9|E0EA|E0EB|E0EC|E0ED|E0EE|E0EF|E0F0|E0F1|E0F2|E0F3|E0F4|E0F5|E0F6|E0F7|E0F8|E0F9|E0FA|E0FB|E0FC|E0FD|E0FE|E0FF|E100|E101|E102|E103|E104|E105|E106|E107|E108|E109|E10A|E10B|E10C|E10D|E10E|E10F|E110|E111|E112|E113|E114|E115|E116|E117|E118|E119|E11A|E11B|E11C|E11D|E11E|E11F|E120|E121|E122|E123|E124|E125|E126|E127|E128|E129|E12A|E12B|E12C|E12D|E12E|E12F|E130|E131|E132|E133|E134|E135|E136|E137|E138|E139|E13A|E13B|E13C|E13D|E13E|E13F|E140|E141|E142|E143|E144|E145|E146|E147|E148|E149|E14A|E14B|E14C|E14D|E14E|E14F|E150|E151|E152|E153|E154|E155|E156|E157|E158|E159|E15A|E15B|E15C|E15D|E15E|E15F|E160|E161|E162|E163|E164|E165|E166|E167|E168|E169|E16A|E16B|E16C|E16D|E16E|E16F|E170|E171|E172|E173|E174|E175|E176|E177|E178|E179|E17A|E17B|E17C|E17D|E17E|E17F|E180|E181|E182|E183|E184|E185|E186|E187|E188|E189|E18A|E18B|E18C|E18D|E18E|E18F|E190|E191|E192|E193|E194|E195|E196|E197|E198|E199|E19A";
             // $bnfullcons - set above;
             $vstr = preg_replace('/E068 (' . $bnfullconjuncts . '|' . $bnfullcons . ')/', '\\1 E068', $vstr);
             // ? Need to shift it beyond post-base vowels 09BE, 09C0, 09D7  haven't found so can't test??
             $vstr = preg_replace('/E068 (09BE|09C0|09D7)/', '\\1 E068', $vstr);
         } else {
             if ($lang == 'gu') {
                 $gufullforms = "0A95|0A96|0A97|0A98|0A99|0A9A|0A9B|0A9C|0A9D|0A9E|0A9F|0AA0|0AA1|0AA2|0AA3|0AA4|0AA5|0AA6|0AA7|0AA8|0AAA|0AAB|0AAC|0AAD|0AAE|0AAF|0AB0|0AB2|0AB3|0AB5|0AB6|0AB7|0AB8|0AB9|E002|E003|E004|E005|E006|E007|E008|E009|E00A|E00B|E00C|E00D|E00E|E00F|E010|E011|E012|E013|E014|E015|E016|E017|E018|E019|E01A|E01B|E01C|E01D|E01E|E01F|E020|E021|E022|E023|E024|E025|E026|E027|E05E|E05F|E060|E061|E062|E063|E064|E065|E066|E067|E068|E069|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071|E072|E073|E074|E075|E076|E077|E078|E079|E07A|E07B|E07C|E07D|E07E|E07F|E080|E081|E082|E083|E084|E085|E086|E087|E088|E089|E08A|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A1|E0A2|E0A3|E0A4|E0A5";
                 $vstr = preg_replace('/E032 (' . $gufullforms . ')/', '\\1 E032', $vstr);
                 // Now shift it beyond post-based vowels  // ??? Need to add others e.g. 0949,094A,094B,094C + presentation forms like E198
                 // ? Need to shift it beyond post-base vowels 0ABE, 0AC0 haven't found so can't test??
                 $vstr = preg_replace('/E032 (0ABE|0AC0)/', '\\1 E032', $vstr);
             } else {
                 if ($lang == 'te') {
                     // tefullforms defined earlier
                     $tepartialforms = "E00D|E00E|E00F|E010|E011|E012|E013|E014|E015|E016|E017|E018|E019|E01A|E01B|E01C|E01D|E01E|E01F|E020|E021|E022|E023|E024|E025|E026|E027|E07C|E07D|E07E";
                     $matraligs = "E07F|E080|E081|E082|E083|E084|E085|E086|E087|E088|E089|E08A|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A1|E0A2|E0A3|E0A4|E0A5|E0A6|E0A7|E0A8|E0A9|E0AA|E0AB|E0AC|E0AD|E0AE|E0AF";
                     $tevowels = "0C3E|0C3F|0C40|0C46|0C47|0C56|0C4A|0C4B|0C4C" . "|0C41|0C42|0C43|0C44";
                     // post matras
                     $vstr = preg_replace('/(' . $tevowels . ') (E046|E069|E077)/', '\\2 \\1', $vstr);
                     while (preg_match('/(' . $tepartialforms . ') (E046|E069|E077)/', $vstr)) {
                         $vstr = preg_replace('/(' . $tepartialforms . ') (E046|E069|E077)/', '\\2 \\1', $vstr);
                     }
                     $vstr = preg_replace('/(' . $tefullforms . '|' . $matraligs . ') (E046|E069|E077)/', '\\2 \\1', $vstr);
                 } else {
                     if ($lang == 'kn') {
                         $knfullforms = "0C95|0C96|0C97|0C98|0C99|0C9A|0C9B|0C9C|0C9D|0C9E|0C9F|0CA0|0CA1|0CA2|0CA3|0CA4|0CA5|0CA6|0CA7|0CA8|0CAA|0CAB|0CAC|0CAD|0CAE|0CAF|0CB0|0CB1|0CB2|0CB3|0CB5|0CB6|0CB7|0CB8|0CB9|E07D|E07E|E0A3";
                         $knpartialforms = "E00C|E00D|E00E|E00F|E010|E011|E012|E013|E014|0C9E|E015|E016|E017|E018|E019|E01A|E01B|E01C|E01D|E01E|E01F|E020|E021|E022|E023|E024|E025|E026|E027|E028|E029|E02A|E02B|E02C|E02D|E07F";
                         while (preg_match('/E00B (' . $knpartialforms . ')/', $vstr)) {
                             $vstr = preg_replace('/E00B (' . $knpartialforms . ')/', '\\1 E00B', $vstr);
                         }
                         // mPDF 5.3.47  Also move Reph to right of matraIligatures
                         $knfullforms .= "|E082|E083|E084|E085|E086|E087|E088|E089|E08A|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A4|E0A1|E0A2";
                         $vstr = preg_replace('/E00B (' . $knfullforms . ')/', '\\1 E00B', $vstr);
                         // ? Need to shift it beyond base or below-base forms - haven't found so can't test??
                         // mPDF 5.3.87
                         // E004 added to list (which is a transformed version of 0CBE)
                         $knvowels = "0CBE|0CC0|0CC1|0CC2|0CC3|0CC4|0CC7|0CC8|0CCA|0CCB|0CD5|0CD6|E004";
                         $vstr = preg_replace('/E00B (' . $knvowels . ')/', '\\1 E00B', $vstr);
                     } else {
                         if ($lang == 'or') {
                             $orrephs = "E069|E06A|E06B|E06C";
                             $orfullforms = "0B15|0B16|0B17|0B18|0B19|0B1A|0B1B|0B1C|0B1D|0B1E|0B1F|0B20|0B21|0B22|0B23|0B24|0B25|0B26|0B27|0B28|0B29|0B2A|0B2B|0B2C|0B2D|0B2E|0B2F|0B30|0B31|0B32|0B33|0B34|0B35|0B36|0B37|0B38|E003|E004|E005|E006|E007|E008|E009|E00A|E00B|E00C|E00D|E00E|E00F|E010|E011|E012|E013|E014|E015|E016|E017|E018|E019|E01A|E01B|E01C|E01D|E01E|E01F|E020|E021|E022|E023|E024|E025|E026|E027|E028|E029|E02A|E02B|E02C|E02D|E02E|E02F|E030|E031|E032|E033|E034|E035|E036|E037";
                             // E123 - E147  FullHalant forms ? add to FullForms
                             $orpartialforms = "E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A1|E0A2|E0A3|E0A4|E0A5|E0A6|E0A7|E0A8|E0A9|E0AA|E0AB|E0AC|E0AD|E0AE|E0AF|E0B0|E0B1|E0B2|E0B3|E0B4|E0B5|E0B6|E0B7|E0B8|E0B9|E0BA|E0BB|E0BC|E0BD|E0BE|E0BF|E0C0|E0C1|E0C2|E0C3|E0C4|E0C5|E0C6|E0C7|E0C8|E0C9|E0CA|E0CB|E0CC|E0CD|E0CE|E0CF|E0D0|E0D1|E0D2|E0D3|E0D4|E0D5|E0D6|E0D7|E0D8|E0D9|E0DA|E0DB|E0DC|E0DD|E0DE|E0DF|E0E0|E0E1|E0E2|E0E3|E0E4|E0E5|E0E6|E0E7|E0E8|E0E9|E0EA|E0EB|E0EC|E0ED|E0EE|E0EF|E0F0|E0F1|E0F2|E0F3|E0F4|E0F5";
                             // Combined MatraIReph[E06D] split [0B3F & E069] to allow reph to be shifted forwards
                             $vstr = preg_replace('/(' . $orfullforms . ') E06D (' . $orfullforms . ') 0B3E/', '\\1 0B3F E069 \\2 0B3E', $vstr);
                             while (preg_match('/(' . $orrephs . ') (' . $orpartialforms . ')/', $vstr)) {
                                 $vstr = preg_replace('/(' . $orrephs . ') (' . $orpartialforms . ')/', '\\2 \\1', $vstr);
                             }
                             $vstr = preg_replace('/(' . $orrephs . ') (' . $orfullforms . ')/', '\\2 \\1', $vstr);
                             // Combine Reph and MatraI
                             $vstr = str_replace('E069 0B3F', 'E06D', $vstr);
                             // Reph and MatraI -> MatraIReph
                             $vstr = str_replace('E06A 0B3F', 'E06E', $vstr);
                             // Reph and MatraI -> MatraIReph
                             $vstr = str_replace('E06B 0B3F', 'E06F', $vstr);
                             // Reph and MatraI -> MatraIReph
                         } else {
                             if ($lang == 'ml') {
                                 $halant = "0D4D";
                                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' 0D30/', 'E00E \\1', $vstr);
                                 // 0D30 = Ra
                                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $mlprebasedvowels . ' 0D30/', '\\2 E00E \\1', $vstr);
                                 // 0D30 = Ra
                                 $mlfullforms = "0D15|0D16|0D17|0D18|0D19|0D1A|0D1B|0D1C|0D1D|0D1E|0D1F|0D20|0D21|0D22|0D23|0D24|0D25|0D26|0D27|0D28|0D2A|0D2B|0D2C|0D2D|0D2E|0D2F|0D30|0D31|0D32|0D33|0D34|0D35|0D36|0D37|0D38|0D39" . "|E010|E011|E012|E013|E014|E015|E016|E017|E018|E019|E01A|E01B|E01C|E01D|E01E|E01F|E020|E021|E022|E023|E024|E025|E026|E027|E028|E029|E02A|E02B|E02C|E02D|E02E|E02F|E030|E031|E032|E033|E034|E035|E036|E037|E038|E039|E03A|E03B|E03C|E03D|E03E|E03F|E040|E041|E042|E043|E044|E045|E046|E047|E048|E049|E04A|E04B|E04C|E04D|E04E|E04F|E050|E051|E052|E053|E054|E055|E056|E057|E058|E059|E05A|E05B|E05C|E05D|E05E|E05F|E060|E061|E062|E063|E064|E065|E066|E067|E068|E069|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071|E072|E073|E074|E075|E076|E077|E078|E079|E07A|E07B|E07C|E07D";
                                 // = FullConsonants + FullConjuncts
                                 // = Add Chillu characters	// mPDF 5.0.024
                                 $mlfullforms .= "|E004|E005|E006|E007|E008|E009";
                                 while (preg_match('/(' . $mlfullforms . ') E00E/', $vstr)) {
                                     $vstr = preg_replace('/(' . $mlfullforms . ') E00E/', 'E00E \\1', $vstr);
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     //============================
     // SHIFT post-based vowels to Left of SmallForms (NOT to left of full forms)
     // TELUGU Shift
     if ($lang == 'te') {
         // NB $tevowels defined above
         // NB $tefullforms defined above
         $tebelowbase1 = "E02C|E02D|E02E|E02F|E030|E031|E032|E033|E034|E035|E036|E037|E038|E039|E03A|E03B|E03C|E03D|E03E|E03F|E040|E041|E042|E043|E044|E045|E046|E047|E048|E049|E04A|E04B|E04C|E04D|E04E";
         //'Small1KaToHa'
         $tebelowbase2 = "E04F|E050|E051|E052|E053|E054|E055|E056|E057|E058|E059|E05A|E05B|E05C|E05D|E05E|E05F|E060|E061|E062|E063|E064|E065|E066|E067|E068|E069|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071";
         // 'Small2KaToHa'
         $vstr = preg_replace('/(' . $tebelowbase2 . ') (' . $tevowels . ')/', '\\2 \\1', $vstr);
         $vstr = preg_replace('/(' . $tebelowbase1 . ') (' . $tevowels . ')/', '\\2 \\1', $vstr);
     } else {
         if ($lang == 'kn') {
             $knvowels = "0CBE|0CC0|0CC1|0CC2|0CC3|0CC4|0CC7|0CC8|0CCA|0CCB|0CD5|0CD6" . "|E004|E007|E008|E009|E00A";
             // NB $knvowels defined above
             // NB $fullforms defined above
             // $belowbase1/2 defined above
             $vstr = preg_replace('/(' . $belowbase2 . ') (' . $knvowels . ')/', '\\2 \\1', $vstr);
             // mPDF 5.3.87
             $vstr = preg_replace('/(' . $belowbase1 . ') (' . $knvowels . ')/', '\\2 \\1', $vstr);
             //$vstr = preg_replace('/('.$fullforms.') ('.$knvowels.')/', '\\2 \\1', $vstr);
         }
     }
     //============================
     // Clear unwanted ZWJ, ZWNJ
     // MALAYALAM
     if ($lang == 'ml') {
         $vstr = preg_replace('/(200C|200D) /', '', $vstr);
     }
     //============================
     // END & PUT IT BACK TOGETHER
     $vstr = preg_replace('/^0020 (.*) 0020$/', '\\1', $vstr);
     $varr = explode(" ", $vstr);
     $e = '';
     foreach ($varr as $v) {
         $e .= code2utf(hexdec($v));
     }
     //============================
     return $e;
 }
Example #7
0
function dec2other($num, $cp) {
	$nstr = (string) $num;
	$rnum = '';
	for ($i=0;$i<strlen($nstr);$i++) { 
		if ($this->_charDefined($this->CurrentFont['cw'],$cp+intval($nstr[$i]))) { // contains arabic-indic numbers
			$rnum .= code2utf($cp+intval($nstr[$i]));
		}
		else { $rnum .= $nstr[$i]; }
	}
	return $rnum;
}
Example #8
0
function unescape($strIn, $iconv_to = 'UTF-8')
{
    $strOut = '';
    $iPos = 0;
    $len = strlen($strIn);
    while ($iPos < $len) {
        $charAt = substr($strIn, $iPos, 1);
        if ($charAt == '%') {
            $iPos++;
            $charAt = substr($strIn, $iPos, 1);
            if ($charAt == 'u') {
                // Unicode character
                $iPos++;
                $unicodeHexVal = substr($strIn, $iPos, 4);
                $unicode = hexdec($unicodeHexVal);
                $strOut .= code2utf($unicode);
                $iPos += 4;
            } else {
                // Escaped ascii character
                $hexVal = substr($strIn, $iPos, 2);
                if (hexdec($hexVal) > 127) {
                    // Convert to Unicode
                    $strOut .= code2utf(hexdec($hexVal));
                } else {
                    $strOut .= chr(hexdec($hexVal));
                }
                $iPos += 2;
            }
        } else {
            $strOut .= $charAt;
            $iPos++;
        }
    }
    if ($iconv_to != "UTF-8") {
        $strOut = iconv("UTF-8", $iconv_to, $strOut);
    }
    return $strOut;
}
Example #9
0
File: svg.php Project: hipogea/zega
 function markScriptToLang($html)
 {
     if ($this->mpdf_ref->onlyCoreFonts) {
         return $html;
     }
     if (empty($this->script2lang)) {
         if (!empty($this->mpdf_ref->script2lang)) {
             $this->script2lang = $this->mpdf_ref->script2lang;
             $this->viet = $this->mpdf_ref->viet;
             $this->pashto = $this->mpdf_ref->pashto;
             $this->urdu = $this->mpdf_ref->urdu;
             $this->persian = $this->mpdf_ref->persian;
             $this->sindhi = $this->mpdf_ref->sindhi;
         } else {
             include _MPDF_PATH . 'config_script2lang.php';
         }
     }
     $n = '';
     $a = preg_split('/<(.*?)>/ms', $html, -1, PREG_SPLIT_DELIM_CAPTURE);
     foreach ($a as $i => $e) {
         if ($i % 2 == 0) {
             $e = strcode2utf($e);
             $e = $this->mpdf_ref->lesser_entity_decode($e);
             $earr = $this->mpdf_ref->UTF8StringToArray($e, false);
             $scriptblock = 0;
             $scriptblocks = array();
             $scriptblocks[0] = 0;
             $chardata = array();
             $subchunk = 0;
             $charctr = 0;
             foreach ($earr as $char) {
                 $ucd_record = UCDN::get_ucd_record($char);
                 $sbl = $ucd_record[6];
                 if ($sbl && $sbl != 40 && $sbl != 102) {
                     if ($scriptblock == 0) {
                         $scriptblock = $sbl;
                         $scriptblocks[$subchunk] = $scriptblock;
                     } else {
                         if ($scriptblock > 0 && $scriptblock != $sbl) {
                             // NEW (non-common) Script encountered in this chunk.
                             // Start a new subchunk
                             $subchunk++;
                             $scriptblock = $sbl;
                             $charctr = 0;
                             $scriptblocks[$subchunk] = $scriptblock;
                         }
                     }
                 }
                 $chardata[$subchunk][$charctr]['script'] = $sbl;
                 $chardata[$subchunk][$charctr]['uni'] = $char;
                 $charctr++;
             }
             // If scriptblock[x] = common & non-baseScript
             // and scriptblock[x+1] = baseScript
             // Move common script from end of x to start of x+1
             for ($sch = 0; $sch < $subchunk; $sch++) {
                 if ($scriptblocks[$sch] > 0 && $scriptblocks[$sch] != $this->mpdf_ref->baseScript && $scriptblocks[$sch + 1] == $this->mpdf_ref->baseScript) {
                     $end = count($chardata[$sch]) - 1;
                     while ($chardata[$sch][$end]['script'] == 0 && $end > 1) {
                         // common script
                         $tmp = array_pop($chardata[$sch]);
                         array_unshift($chardata[$sch + 1], $tmp);
                         $end--;
                     }
                 }
             }
             $o = '';
             for ($sch = 0; $sch <= $subchunk; $sch++) {
                 if (isset($chardata[$sch])) {
                     $s = '';
                     for ($j = 0; $j < count($chardata[$sch]); $j++) {
                         $s .= code2utf($chardata[$sch][$j]['uni']);
                     }
                     // ZZZ99 Undo lesser_entity_decode as above - but only for <>&
                     $s = str_replace("&", "&amp;", $s);
                     $s = str_replace("<", "&lt;", $s);
                     $s = str_replace(">", "&gt;", $s);
                     if (substr($a[$i - 1], 0, 5) != '<text' && substr($a[$i - 1], 0, 5) != '<tspa') {
                         continue;
                     }
                     // <tspan> or <text> only
                     $lang = '';
                     // Check Vietnamese if Latin script - even if Basescript
                     if ($scriptblocks[$sch] == UCDN::SCRIPT_LATIN && $this->mpdf_ref->autoVietnamese && preg_match("/([" . $this->viet . "])/u", $s)) {
                         $lang = "vi";
                     } else {
                         if ($scriptblocks[$sch] == UCDN::SCRIPT_ARABIC && $this->mpdf_ref->autoArabic) {
                             if (preg_match("/[" . $this->sindhi . "]/u", $s)) {
                                 $lang = "sd";
                             } else {
                                 if (preg_match("/[" . $this->urdu . "]/u", $s)) {
                                     $lang = "ur";
                                 } else {
                                     if (preg_match("/[" . $this->pashto . "]/u", $s)) {
                                         $lang = "ps";
                                     } else {
                                         if (preg_match("/[" . $this->persian . "]/u", $s)) {
                                             $lang = "fa";
                                         } else {
                                             if ($this->mpdf_ref->baseScript != UCDN::SCRIPT_ARABIC && isset($this->script2lang[$scriptblocks[$sch]])) {
                                                 $lang = "'.{$this->script2lang}[{$scriptblocks[$sch]}].'";
                                             }
                                         }
                                     }
                                 }
                             }
                         } else {
                             if ($scriptblocks[$sch] > 0 && $scriptblocks[$sch] != $this->mpdf_ref->baseScript && isset($this->script2lang[$scriptblocks[$sch]])) {
                                 $lang = $this->script2lang[$scriptblocks[$sch]];
                             }
                         }
                     }
                     if ($lang) {
                         $o .= '<tspan lang="' . $lang . '">' . $s . '</tspan>';
                     } else {
                         $o .= $s;
                     }
                 }
             }
             $a[$i] = $o;
         } else {
             $a[$i] = '<' . $e . '>';
         }
     }
     $n = implode('', $a);
     return $n;
 }
Example #10
0
 function _bidiSort($ta, $str = '', $dir, &$chunkOTLdata, $useGPOS, $lastBidiText = '', $nextBidiText = '')
 {
     $bidi_code = array('L' => 'L', 'E' => 'LRE', 'O' => 'LRO', 'R' => 'R', 'A' => 'AL', 'G' => 'RLE', 'V' => 'RLO', 'p' => 'PDF', 'n' => 'EN', 's' => 'ES', 't' => 'ET', 'a' => 'AN', 'c' => 'CS', 'm' => 'NSM', 'b' => 'BN', '&' => 'B', '%' => 'S', '$' => 'WS', '#' => 'ON');
     $pel = 0;
     // paragraph embedding level
     $maxlevel = 0;
     $numchars = count($ta);
     // CHECK WHETHER STRING CONTAINS ALL RTL OR SOME RTL
     // Find the first strong type character of nextBidiText - type L, AL, or R.
     $nextBidiStrongType = '';
     if ($nextBidiText) {
         $L = strpos(' ' . $nextBidiText . 'L', 'L');
         $R = strpos(' ' . $nextBidiText . 'R', 'R');
         $AL = strpos(' ' . $nextBidiText . 'A', 'A');
         $MR = min($R, $AL);
         if ($L < $MR) {
             $nextBidiStrongType = 'L';
         } else {
             if ($MR < $L) {
                 if ($R < $AL) {
                     $nextBidiStrongType = 'R';
                 } else {
                     $nextBidiStrongType = 'AL';
                 }
             }
         }
     }
     // Find the last strong type character of lastBidiText - type L, AL, or R.
     $lastBidiStrongType = '';
     if ($lastBidiText) {
         $L = strrpos(' ' . $lastBidiText, 'L');
         $R = strrpos(' ' . $lastBidiText, 'R');
         $AL = strrpos(' ' . $lastBidiText, 'A');
         $MR = max($R, $AL);
         if ($L > $MR) {
             $lastBidiStrongType = 'L';
         } else {
             if ($MR > $L) {
                 if ($R > $AL) {
                     $lastBidiStrongType = 'R';
                 } else {
                     $lastBidiStrongType = 'AL';
                 }
             }
         }
     }
     // Set the initial paragraph embedding level
     if ($dir == 'rtl') {
         $pel = 1;
     } else {
         $pel = 0;
     }
     // ???? Always ltr if not rtl
     /*
     	else if ($dir == 'ltr') { $pel = 0; }
     	else {
     		// P2. In each paragraph, find the first character of type L, AL, or R.
     		// P3. If a character is found in P2 and it is of type AL or R, then set the paragraph embedding level to one; otherwise, set it to zero.
     		$L = strpos(' '.$chunkOTLdata['bidi_type'].'L', 'L');
     		$R = strpos(' '.$chunkOTLdata['bidi_type'].'R', 'R');
     		$AL = strpos(' '.$chunkOTLdata['bidi_type'].'A', 'A');
     		$R = min($R,$AL);
     		if ($R<$L) { $pel = 1; }
     		else { $pel = 0; }
     	}
     */
     // X1. Begin by setting the current embedding level to the paragraph embedding level. Set the directional override status to neutral.
     // Current Embedding Level
     $cel = $pel;
     // directional override status
     $dos = 'N';
     $remember = array();
     // start-of-level-run
     $sor = $pel % 2 ? 'R' : 'L';
     $eor = $sor;
     // Array of characters data
     $chardata = array();
     // Process each character iteratively, applying rules X2 through X9. Only embedding levels from 0 to 61 are valid in this phase.
     // In the resolution of levels in rules I1 and I2, the maximum embedding level of 62 can be reached.
     for ($i = 0; $i < $numchars; ++$i) {
         if ($ta[$i] == 8235) {
             // RLE
             // X2. With each RLE, compute the least greater odd embedding level.
             //	a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to neutral.
             //	b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
             $next_level = $cel + $cel % 2 + 1;
             if ($next_level < 62) {
                 $remember[] = array('num' => 8235, 'cel' => $cel, 'dos' => $dos);
                 $cel = $next_level;
                 $dos = 'N';
                 $sor = $eor;
                 $eor = $cel % 2 ? 'R' : 'L';
             }
         } else {
             if ($ta[$i] == 8234) {
                 // LRE
                 // X3. With each LRE, compute the least greater even embedding level.
                 //	a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to neutral.
                 //	b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
                 $next_level = $cel + 2 - $cel % 2;
                 if ($next_level < 62) {
                     $remember[] = array('num' => 8234, 'cel' => $cel, 'dos' => $dos);
                     $cel = $next_level;
                     $dos = 'N';
                     $sor = $eor;
                     $eor = $cel % 2 ? 'R' : 'L';
                 }
             } else {
                 if ($ta[$i] == 8238) {
                     // RLO
                     // X4. With each RLO, compute the least greater odd embedding level.
                     //	a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to right-to-left.
                     //	b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
                     $next_level = $cel + $cel % 2 + 1;
                     if ($next_level < 62) {
                         $remember[] = array('num' => 8238, 'cel' => $cel, 'dos' => $dos);
                         $cel = $next_level;
                         $dos = 'R';
                         $sor = $eor;
                         $eor = $cel % 2 ? 'R' : 'L';
                     }
                 } else {
                     if ($ta[$i] == 8237) {
                         // LRO
                         // X5. With each LRO, compute the least greater even embedding level.
                         //	a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to left-to-right.
                         //	b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
                         $next_level = $cel + 2 - $cel % 2;
                         if ($next_level < 62) {
                             $remember[] = array('num' => 8237, 'cel' => $cel, 'dos' => $dos);
                             $cel = $next_level;
                             $dos = 'L';
                             $sor = $eor;
                             $eor = $cel % 2 ? 'R' : 'L';
                         }
                     } else {
                         if ($ta[$i] == 8236) {
                             // PDF
                             // X7. With each PDF, determine the matching embedding or override code. If there was a valid matching code, restore (pop) the last remembered (pushed) embedding level and directional override.
                             if (count($remember)) {
                                 $last = count($remember) - 1;
                                 if ($remember[$last]['num'] == 8235 || $remember[$last]['num'] == 8234 || $remember[$last]['num'] == 8238 || $remember[$last]['num'] == 8237) {
                                     $match = array_pop($remember);
                                     $cel = $match['cel'];
                                     $dos = $match['dos'];
                                     $sor = $eor;
                                     $eor = ($cel > $match['cel'] ? $cel : $match['cel']) % 2 ? 'R' : 'L';
                                 }
                             }
                         } else {
                             // X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
                             //	a. Set the level of the current character to the current embedding level.
                             //	b. When the directional override status is not neutral, reset the current character type to directional override status.
                             if ($dos != 'N') {
                                 $chardir = $dos;
                             } else {
                                 $chardir = $bidi_code[$chunkOTLdata['bidi_type'][$i]];
                             }
                             // stores string characters and other information
                             if (isset($chunkOTLdata['GPOSinfo'][$i])) {
                                 $gpos = $chunkOTLdata['GPOSinfo'][$i];
                             } else {
                                 $gpos = '';
                             }
                             $chardata[] = array('char' => $ta[$i], 'level' => $cel, 'type' => $chardir, 'sor' => $sor, 'eor' => $eor, 'group' => $chunkOTLdata['group'][$i], 'GPOSinfo' => $gpos);
                         }
                     }
                 }
             }
         }
     }
     // X8. All explicit directional embeddings and overrides are completely terminated at the end of each paragraph. Paragraph separators are not included in the embedding.
     // X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
     // This is effectively done by only saving other codes to chardata
     // X10. The remaining rules are applied to each run of characters at the same level. For each run, determine the start-of-level-run (sor) and end-of-level-run (eor) type, either L or R. This depends on the higher of the two levels on either side of the boundary (at the start or end of the paragraph, the level of the 'other' run is the base embedding level). If the higher level is odd, the type is R; otherwise, it is L.
     // 3.3.3 Resolving Weak Types
     // Weak types are now resolved one level run at a time. At level run boundaries where the type of the character on the other side of the boundary is required, the type assigned to sor or eor is used.
     // Nonspacing marks are now resolved based on the previous characters.
     $numchars = count($chardata);
     // W1. Examine each nonspacing mark (NSM) in the level run, and change the type of the NSM to the type of the previous character. If the NSM is at the start of the level run, it will get the type of sor.
     for ($i = 0; $i < $numchars; ++$i) {
         if ($chardata[$i]['type'] == 'NSM') {
             if ($i == 0 || $chardata[$i]['level'] != $chardata[$i - 1]['level']) {
                 $chardata[$i]['type'] = $chardata[$i]['sor'];
             } else {
                 $chardata[$i]['type'] = $chardata[$i - 1]['type'];
             }
         }
     }
     // W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sor) is found. If an AL is found, change the type of the European number to Arabic number.
     $prevlevel = -1;
     $levcount = 0;
     for ($i = 0; $i < $numchars; ++$i) {
         if ($chardata[$i]['type'] == 'EN') {
             $found = false;
             for ($j = $levcount; $j >= 0; $j--) {
                 if ($chardata[$j]['type'] == 'AL') {
                     $chardata[$i]['type'] = 'AN';
                     $found = true;
                     break;
                 } else {
                     if ($chardata[$j]['type'] == 'L' || $chardata[$j]['type'] == 'R') {
                         $found = true;
                         break;
                     }
                 }
             }
             if (!$found && $lastBidiStrongType == 'AL') {
                 $chardata[$i]['type'] = 'AN';
             }
         }
         if ($chardata[$i]['level'] != $prevlevel) {
             $levcount = 0;
         } else {
             ++$levcount;
         }
         $prevlevel = $chardata[$i]['level'];
     }
     // W3. Change all ALs to R.
     for ($i = 0; $i < $numchars; ++$i) {
         if ($chardata[$i]['type'] == 'AL') {
             $chardata[$i]['type'] = 'R';
         }
     }
     // W4. A single European separator between two European numbers changes to a European number. A single common separator between two numbers of the same type changes to that type.
     $prevlevel = -1;
     $levcount = 0;
     for ($i = 0; $i < $numchars; ++$i) {
         if ($levcount > 0 and $i + 1 < $numchars and $chardata[$i + 1]['level'] == $prevlevel) {
             if ($chardata[$i]['type'] == 'ES' && $chardata[$i - 1]['type'] == 'EN' && $chardata[$i + 1]['type'] == 'EN') {
                 $chardata[$i]['type'] = 'EN';
             } else {
                 if ($chardata[$i]['type'] == 'CS' && $chardata[$i - 1]['type'] == 'EN' && $chardata[$i + 1]['type'] == 'EN') {
                     $chardata[$i]['type'] = 'EN';
                 } else {
                     if ($chardata[$i]['type'] == 'CS' && $chardata[$i - 1]['type'] == 'AN' && $chardata[$i + 1]['type'] == 'AN') {
                         $chardata[$i]['type'] = 'AN';
                     }
                 }
             }
         }
         if ($chardata[$i]['level'] != $prevlevel) {
             $levcount = 0;
         } else {
             ++$levcount;
         }
         $prevlevel = $chardata[$i]['level'];
     }
     // W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
     $prevlevel = -1;
     $levcount = 0;
     for ($i = 0; $i < $numchars; ++$i) {
         if ($chardata[$i]['type'] == 'ET') {
             if ($levcount > 0 && $chardata[$i - 1]['type'] == 'EN') {
                 $chardata[$i]['type'] = 'EN';
             } else {
                 $j = $i + 1;
                 while ($j < $numchars && $chardata[$j]['level'] == $prevlevel) {
                     if ($chardata[$j]['type'] == 'EN') {
                         $chardata[$i]['type'] = 'EN';
                         break;
                     } else {
                         if ($chardata[$j]['type'] != 'ET') {
                             break;
                         }
                     }
                     ++$j;
                 }
             }
         }
         if ($chardata[$i]['level'] != $prevlevel) {
             $levcount = 0;
         } else {
             ++$levcount;
         }
         $prevlevel = $chardata[$i]['level'];
     }
     // W6. Otherwise, separators and terminators change to Other Neutral.
     for ($i = 0; $i < $numchars; ++$i) {
         if ($chardata[$i]['type'] == 'ET' || $chardata[$i]['type'] == 'ES' || $chardata[$i]['type'] == 'CS') {
             $chardata[$i]['type'] = 'ON';
         }
     }
     //W7. Search backward from each instance of a European number until the first strong type (R, L, or sor) is found. If an L is found, then change the type of the European number to L.
     for ($i = 0; $i < $numchars; ++$i) {
         if ($chardata[$i]['type'] == 'EN') {
             for ($j = $i - 1; $j >= 0; $j--) {
                 if ($chardata[$j]['level'] != $chardata[$i]['level']) {
                     // Level run boundary
                     break;
                 } else {
                     if ($chardata[$j]['type'] == 'L') {
                         $chardata[$i]['type'] = 'L';
                         break;
                     } else {
                         if ($chardata[$j]['type'] == 'R') {
                             break;
                         }
                     }
                 }
             }
         }
     }
     // N1. A sequence of neutrals takes the direction of the surrounding strong text if the text on both sides has the same direction. European and Arabic numbers act as if they were R in terms of their influence on neutrals. Start-of-level-run (sor) and end-of-level-run (eor) are used at level run boundaries.
     for ($i = 0; $i < $numchars; ++$i) {
         if ($chardata[$i]['type'] == 'ON' || $chardata[$i]['type'] == 'WS') {
             $left = '';
             // LEFT
             if ($i == 0) {
                 // first char
                 if (isset($lastBidiStrongType) && $lastBidiStrongType) {
                     if ($lastBidiStrongType == 'AL') {
                         $left = 'R';
                     } else {
                         $left = $lastBidiStrongType;
                     }
                 } else {
                     $left = $chardata[$i]['sor'];
                 }
             } else {
                 if ($chardata[$i - 1]['level'] != $chardata[$i]['level']) {
                     // run boundary
                     $left = $chardata[$i]['sor'];
                 } else {
                     if ($chardata[$i - 1]['type'] == 'L') {
                         $left = 'L';
                     } else {
                         if ($chardata[$i - 1]['type'] == 'R' || $chardata[$i - 1]['type'] == 'EN' || $chardata[$i - 1]['type'] == 'AN') {
                             $left = 'R';
                         }
                     }
                 }
             }
             // RIGHT
             $right = '';
             $j = $i;
             // move to the right of any following neutrals OR hit a run boundary
             while (($chardata[$j]['type'] == 'ON' || $chardata[$j]['type'] == 'WS') && $j <= $numchars - 1) {
                 if ($j == $numchars - 1) {
                     // last char
                     if (isset($nextBidiStrongType) && $nextBidiStrongType) {
                         if ($nextBidiStrongType == 'AL') {
                             $right = 'R';
                         } else {
                             $right = $nextBidiStrongType;
                         }
                     } else {
                         $right = $chardata[$j]['eor'];
                     }
                     break;
                 } else {
                     if ($chardata[$j + 1]['level'] != $chardata[$j]['level']) {
                         // run boundary
                         $right = $chardata[$j]['eor'];
                         break;
                     } else {
                         if ($chardata[$j + 1]['type'] == 'L') {
                             $right = 'L';
                             break;
                         } else {
                             if ($chardata[$j + 1]['type'] == 'R' || $chardata[$j + 1]['type'] == 'EN' || $chardata[$j + 1]['type'] == 'AN') {
                                 $right = 'R';
                                 break;
                             }
                         }
                     }
                 }
                 $j++;
             }
             if ($left && $left == $right) {
                 $chardata[$i]['type'] = $left;
             }
         }
     }
     // N2. Any remaining neutrals take the embedding direction
     for ($i = 0; $i < $numchars; ++$i) {
         if ($chardata[$i]['type'] == 'ON' || $chardata[$i]['type'] == 'WS') {
             $chardata[$i]['type'] = $chardata[$i]['level'] % 2 ? 'R' : 'L';
         }
     }
     // I1. For all characters with an even (left-to-right) embedding direction, those of type R go up one level and those of type AN or EN go up two levels.
     // I2. For all characters with an odd (right-to-left) embedding direction, those of type L, EN or AN go up one level.
     for ($i = 0; $i < $numchars; ++$i) {
         $odd = $chardata[$i]['level'] % 2;
         if ($odd) {
             if ($chardata[$i]['type'] == 'L' || $chardata[$i]['type'] == 'AN' || $chardata[$i]['type'] == 'EN') {
                 $chardata[$i]['level'] += 1;
             }
         } else {
             if ($chardata[$i]['type'] == 'R') {
                 $chardata[$i]['level'] += 1;
             } else {
                 if ($chardata[$i]['type'] == 'AN' || $chardata[$i]['type'] == 'EN') {
                     $chardata[$i]['level'] += 2;
                 }
             }
         }
         $maxlevel = max($chardata[$i]['level'], $maxlevel);
     }
     // L1. On each line, reset the embedding level of the following characters to the paragraph embedding level:
     //	1. Segment separators,
     //	2. Paragraph separators,
     //	3. Any sequence of whitespace characters preceding a segment separator or paragraph separator, and
     //	4. Any sequence of whitespace characters at the end of the line.
     for ($i = $numchars - 1; $i > 0; $i--) {
         if ($chardata[$i]['type'] == 'WS') {
             $chardata[$i]['level'] = $pel;
         } else {
             break;
         }
     }
     // L2. From the highest level found in the text to the lowest odd level on each line, including intermediate levels not actually present in the text, reverse any contiguous sequence of characters that are at that level or higher.
     for ($j = $maxlevel; $j > 0; $j--) {
         $ordarray = array();
         $revarr = array();
         $onlevel = false;
         for ($i = 0; $i < $numchars; ++$i) {
             if ($chardata[$i]['level'] >= $j) {
                 $onlevel = true;
                 // L4. A character is depicted by a mirrored glyph if and only if (a) the resolved directionality of that character is R, and (b) the Bidi_Mirrored property value of that character is true.
                 if (isset(UCDN::$mirror_pairs[$chardata[$i]['char']]) && $chardata[$i]['type'] == 'R') {
                     $chardata[$i]['char'] = UCDN::$mirror_pairs[$chardata[$i]['char']];
                 }
                 $revarr[] = $chardata[$i];
             } else {
                 if ($onlevel) {
                     $revarr = array_reverse($revarr);
                     $ordarray = array_merge($ordarray, $revarr);
                     $revarr = array();
                     $onlevel = false;
                 }
                 $ordarray[] = $chardata[$i];
             }
         }
         if ($onlevel) {
             $revarr = array_reverse($revarr);
             $ordarray = array_merge($ordarray, $revarr);
         }
         $chardata = $ordarray;
     }
     $group = '';
     $e = '';
     $GPOS = array();
     $cctr = 0;
     $rtl_content = 0x0;
     foreach ($chardata as $cd) {
         $e .= code2utf($cd['char']);
         $group .= $cd['group'];
         if ($useGPOS && is_array($cd['GPOSinfo'])) {
             $GPOS[$cctr] = $cd['GPOSinfo'];
             $GPOS[$cctr]['wDir'] = $cd['level'] % 2 ? 'RTL' : 'LTR';
         }
         if ($cd['type'] == 'L') {
             $rtl_content |= 1;
         } else {
             if ($cd['type'] == 'R') {
                 $rtl_content |= 2;
             }
         }
         $cctr++;
     }
     $chunkOTLdata['group'] = $group;
     if ($useGPOS) {
         $chunkOTLdata['GPOSinfo'] = $GPOS;
     }
     // NB Don't reverse chunkOTLdata['bidi_type'] - req'd in WriteFlowing Block for $lastBidiType in original logical order
     // NB Does reverse chunkOTLdata['group']
     return array($e, $rtl_content);
 }
Example #11
0
 /**
  * Converts char code to UTF codepoing, if required
  *
  * @param $chr char or 4-byte hex
  * @return string UTF codepoint
  */
 function __chr2utf($chr)
 {
     if (preg_match("/^[\\da-f]{4,6}\$/i", $chr)) {
         return code2utf(hexdec($chr));
     } else {
         return $chr;
     }
 }
Example #12
0
 function substituteIndic($earr, $lang, $font)
 {
     global $voltdata;
     if (!isset($voltdata[$font])) {
         include_once _MPDF_PATH . 'includes/' . $font . '.volt.php';
         $voltdata[$font] = $volt;
     }
     foreach ($earr as $eid => $char) {
         $earr[$eid] = sprintf("%04s", strtoupper(dechex($char)));
     }
     $vstr = "0020 " . implode(" ", $earr) . " 0020";
     //============================
     // Common Indic Punctuation marks
     // If NOT devanagari
     if ($lang != 'hi') {
         $vstr = str_replace('0964', '007C', $vstr);
         // U+0964 replace with "|"
         $vstr = str_replace('0965', '007C 007C', $vstr);
         // U+0964 replace with "|"
     }
     //============================
     // Tamil numeral for Zero missing Added mPDF 4.2
     if ($lang == 'ta') {
         $vstr = str_replace('0BE6', '0030', $vstr);
         // U+0BEB replace with "0"
     }
     //============================
     // Re-order vowels
     // DEVANAGARI vowel sign matraI[093F] before consonant
     if ($lang == 'hi') {
         $prebasedvowels = "(093F)";
         $nukta = "093C";
         $halant = "094D";
         $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . '/', '\\2 \\1', $vstr);
         // vowel sign pre-based shift left
         $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . ' ' . $nukta . '/', '\\2 \\1 ' . $nukta, $vstr);
         // before NUKTA
         $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
         // before CHAR HALANT  ==  VIRAMA
         $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
         // before CHAR HALANT  ==  VIRAMA
         $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
         // before CHAR HALANT  ==  VIRAMA
         $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
         // before CHAR HALANT  ==  VIRAMA
     } else {
         if ($lang == 'bn') {
             // Khanda Ta 09CE not in font -> replace with 09A4|09CD
             $vstr = preg_replace('/09CE/', '09A4 09CD 200D', $vstr);
             // mPDF 5.3.09
             // BENGALI double-part vowels [09CB 09C7 09BE][09CC 09C7 09D7]
             $vstr = str_replace('09CB', '09C7 09BE', $vstr);
             // convert to 2 parts
             $vstr = str_replace('09CC', '09C7 09D7', $vstr);
             // 09C7 pre-based is then shifted below
             $prebasedvowels = "(09BF|09C7|09C8)";
             $nukta = "09BC";
             $halant = "09CD";
             // mPDF 5.0.044
             $bnfullcons = "0995|0996|0997|0998|0999|099A|099B|099C|099D|099F|09A0|09A1|09A2|09A3|09A4|09A5|09A6|09A7|09A8|09AA|09AB|09AC|09AD|09AE|09AF|09B0|09B2|09B6|09B7|09B8|09B9|09DC|09DD|09DF";
             $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . '/', '\\2 \\1', $vstr);
             // vowel sign pre-based shift left
             $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . ' ' . $nukta . '/', '\\2 \\1 ' . $nukta, $vstr);
             // before NUKTA
             $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
             // before CHAR HALANT
             // mPDF 5.0.044
             // .. and shifting left-based vowel further to the left in case 3 consonants together.
             $vstr = preg_replace('/(' . $bnfullcons . ') ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
             // mPDF 5.0.044
             // If left-based vowel has now been shifted to left of RA/Halant (09B0/09CD)
             // Convert here to above-line form (E068) as it would get missed later
             // e.g. 09B0 09CD 09AD 09C7 would be changed above =>
             // e.g. 09C7 09B0 09CD 09AD. The 09B0 09CD should => E068
             // ??? need to add 09BF as well (09BF|09C7|09C8)
             $vstr = preg_replace('/(09C7|09C8) 09B0 09CD/', '\\1 E068', $vstr);
         } else {
             if ($lang == 'gu') {
                 $prebasedvowels = "(0ABF)";
                 $nukta = "0ABC";
                 $halant = "0ACD";
                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . '/', '\\2 \\1', $vstr);
                 // vowel sign pre-based shift left
                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . ' ' . $nukta . '/', '\\2 \\1 ' . $nukta, $vstr);
                 // before NUKTA
                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
                 // before CHAR HALANT
             } else {
                 if ($lang == 'pa') {
                     $prebasedvowels = "(0A3F)";
                     $nukta = "0A3C";
                     $halant = "0A4D";
                     $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . '/', '\\2 \\1', $vstr);
                     // vowel sign pre-based shift left
                     $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . ' ' . $nukta . '/', '\\2 \\1 ' . $nukta, $vstr);
                     // before NUKTA
                     $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $prebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
                     // before CHAR HALANT
                 } else {
                     if ($lang == 'ta') {
                         // Shrii (Shree)
                         $vstr = preg_replace('/0BB6 0BCD 0BB0 0BC0/', 'E04B', $vstr);
                         // TAMIL double-part vowels [0BCA 0BC6 0BBE][0BCB 0BC7 0BBE][0BCC 0BC6 0BD7]
                         $vstr = preg_replace('/0BCA/', '0BC6 0BBE', $vstr);
                         // convert to 2 parts
                         $vstr = preg_replace('/0BCB/', '0BC7 0BBE', $vstr);
                         // pre-based is then shifted below
                         $vstr = preg_replace('/0BCC/', '0BC6 0BD7', $vstr);
                         $prebasedvowels = "(0BC6|0BC7|0BC8)";
                         // No nukta
                         $halant = "0BCD";
                         // Doesn't seem to move most in front of halanted consonants
                         $vstr = preg_replace('/([A-F0-9]{4}) ' . $prebasedvowels . '/', '\\2 \\1', $vstr);
                         // vowel sign pre-based shift left
                         // ? Only for special case KSS (already moved to left of 0BB7)
                         $vstr = preg_replace('/0B95 ' . $halant . ' ' . $prebasedvowels . ' 0BB7/', '\\1 0B95 ' . $halant . ' 0BB7', $vstr);
                     } else {
                         if ($lang == 'or') {
                             // ORIYA double-part vowels []
                             $vstr = str_replace('0B48', '0B47 0B56', $vstr);
                             // 2-part Vowel
                             $vstr = str_replace('0B4B', '0B47 0B3E', $vstr);
                             // 2-part Vowel
                             $vstr = str_replace('0B4C', '0B47 0B57', $vstr);
                             // 2-part Vowel
                             $orprebasedvowels = "(0B47)";
                             // No nukta
                             $halant = "0B4D";
                             $vstr = preg_replace('/([A-F0-9]{4}) ' . $orprebasedvowels . '/', '\\2 \\1', $vstr);
                             // vowel sign pre-based shift left
                             $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $orprebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
                             // before CHAR HALANT
                             $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $orprebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
                             // before CHAR HALANT
                         } else {
                             if ($lang == 'ml') {
                                 // Chillus - old forms - remove ZWNJ after
                                 // This font Volt rules recognises e.g. "Na Halant(Virama)" as ChilluN
                                 $vstr = preg_replace('/(0D23 0D4D|0D28 0D4D|0D30 0D4D|0D32 0D4D|0D33 0D4D) 200D/', '\\1', $vstr);
                                 // See Chillus in Unicode [http://en.wikipedia.org/wiki/Malayalam_script]
                                 $vstr = str_replace('0D7A', '0D23 0D4D', $vstr);
                                 // [mlymChilluNn]
                                 $vstr = str_replace('0D7B', '0D28 0D4D', $vstr);
                                 // [mlymChilluN]
                                 $vstr = str_replace('0D7C', '0D30 0D4D', $vstr);
                                 // [mlymChilluR]
                                 $vstr = str_replace('0D7D', '0D32 0D4D', $vstr);
                                 // [mlymChilluL]
                                 $vstr = str_replace('0D7E', '0D33 0D4D', $vstr);
                                 // [mlymChilluLl]
                                 /*
                                 		// Chillus - 0D7A-0D7E not in font directly, but as E005-E009
                                 		$vstr = preg_replace('/0D23 0D4D 200D/','0D7A', $vstr);
                                 		$vstr = preg_replace('/0D28 0D4D 200D/','0D7B', $vstr);
                                 		$vstr = preg_replace('/0D30 0D4D 200D/','0D7C', $vstr);
                                 		$vstr = preg_replace('/0D32 0D4D 200D/','0D7D', $vstr);
                                 		$vstr = preg_replace('/0D33 0D4D 200D/','0D7E', $vstr);
                                 
                                 		$vstr = preg_replace('/0D7F/','E004', $vstr);	// [mlymChilluK] 
                                 		$vstr = preg_replace('/0D7A/','E005', $vstr);	// [mlymChilluNn] 
                                 		$vstr = preg_replace('/0D7B/','E006', $vstr);	// [mlymChilluN] 
                                 		$vstr = preg_replace('/0D7C/','E007', $vstr);	// [mlymChilluR] 
                                 		$vstr = preg_replace('/0D7D/','E008', $vstr);	// [mlymChilluL] 
                                 		$vstr = preg_replace('/0D7E/','E009', $vstr);	// [mlymChilluLl] 
                                 */
                                 // MALAYALAM double-part vowels []
                                 $vstr = str_replace('0D4A', '0D46 0D3E', $vstr);
                                 // 2-part Vowel
                                 $vstr = str_replace('0D4B', '0D47 0D3E', $vstr);
                                 // 2-part Vowel
                                 $vstr = str_replace('0D4C', '0D46 0D57', $vstr);
                                 // 2-part Vowel
                                 $mlprebasedvowels = "(0D46|0D47|0D48)";
                                 // No nukta
                                 $halant = "0D4D";
                                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $mlprebasedvowels . '/', '\\2 \\1', $vstr);
                                 // vowel sign pre-based shift left
                                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $mlprebasedvowels . '/', '\\2 \\1 ' . $halant, $vstr);
                                 // before CHAR HALANT
                             } else {
                                 if ($lang == 'te') {
                                     // TELUGU double-part vowels [0C48 -> 0C46 0C56]
                                     $vstr = str_replace('0C48', '0C46 0C56', $vstr);
                                     // 2-part Vowel
                                     $prebasedvowels = "(0C46)";
                                     $abvvowels = "(0C3E|0C3F|0C40|0C46|0C47|0C4A|0C4B|0C4C|0C55)";
                                     // No nukta
                                     $halant = "0C4D";
                                     $tefullforms = "0C15|0C17|0C18|0C1A|0C1B|0C1C|0C1D|0C20|0C21|0C22|0C24|0C25|0C26|0C27|0C28|0C2A|0C2B|0C2D|0C2E|0C2F|0C30|0C33|0C35|0C36|0C37|0C38|0C39|E028|E029|E02A|E02B|E078|E07A|E07B";
                                     $vstr = preg_replace('/(' . $tefullforms . ') ' . $halant . ' (' . $tefullforms . ') ' . $abvvowels . '/', '\\1 \\3 ' . $halant . ' \\2', $vstr);
                                     // before HALANT
                                 } else {
                                     if ($lang == 'kn') {
                                         // KANNADA double-part vowels [0CC8 -> 0CC6 0CD6]
                                         $vstr = str_replace('0CC0', '0CBF 0CD5', $vstr);
                                         // 2-part Vowel
                                         $vstr = str_replace('0CC7', '0CC6 0CD5', $vstr);
                                         // 2-part Vowel
                                         $vstr = str_replace('0CC8', '0CC6 0CD6', $vstr);
                                         // 2-part Vowel AI - no glyph for single
                                         $vstr = str_replace('0CCA', '0CC6 0CC2', $vstr);
                                         // 2-part Vowel
                                         $vstr = str_replace('0CCB', '0CC6 0CC2 0CD5', $vstr);
                                         // 2-part Vowel
                                         $prebasedvowels = "(0CBF|0CC6)";
                                         $halant = "0CCD";
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     //============================
     // SPECIALS
     // DEVANAGARI Ra Halant Ra
     if ($lang == 'hi') {
         $vstr = str_replace('0930 094D 0930', 'E05D 0930', $vstr);
         // Ra Halant Ra => halfRa FullRa
         $vstr = str_replace('E047 0915', 'F59F F5BA', $vstr);
     }
     // GUJARATI
     if ($lang == 'gu') {
         $vstr = str_replace('0AB0 0AC2', 'E02E', $vstr);
         // Ra VowelUu => SpecialForm RaUu
     }
     // TELUGU Ra Halant <Consonant> Halant => halfRa Halant<Consonant> Halant
     if ($lang == 'te') {
         $vstr = preg_replace('/0C30 0C4D ([A-F0-9]{4}) 0C4D/', 'E021 0C4D \\1 0C4D', $vstr);
     }
     // KANNADA
     // Reph at end of word becomes E0CC instead of E00B
     if ($lang == 'kn') {
         $vstr = str_replace('0CB0 0CCD 0020', 'E0CC 0020', $vstr);
         // mPDF 5.3.87
     }
     //============================
     // MAIN BIT FROM VOLT RULES
     foreach ($voltdata[$font] as $rid => $reps) {
         //echo $rid . ':  ' . $vstr.'<br />';
         $vstr = preg_replace('/' . $reps['match'] . '/', $reps['replace'], $vstr);
     }
     //echo $vstr.'<br />'; exit;
     //============================
     // SPECIALS
     // KANNADA
     // <Base> <BelowBase1> [<BelowBase2> ] MatraI -> <Base/MatraI ligature> <Belowbase1> etc
     if ($lang == 'kn') {
         $matraI = "0CBF";
         $knbase = preg_split('/\\|/', "0C95|0C96|0C97|0C98|0C9A|0C9B|0C9C|0C9D|0CA0|0CA1|0CA2|0CA3|0CA4|0CA5|0CA6|0CA7|0CA8|0CAA|0CAB|0CAC|0CAD|0CAE|0CAF|0CB0|0CB2|0CB3|0CB5|0CB6|0CB7|0CB8|0CB9|E0A3|E07D|E07E");
         $knmatraIligature = preg_split('/\\|/', "E082|E083|E084|E085|E086|E087|E088|E089|E08A|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A4|E0A1|E0A2");
         $belowbase1 = "E02E|E02F|E030|E031|E032|E033|E034|E035|E036|E037|E038|E039|E03A|E03B|E03C|E03D|E03E|E03F|E040|E041|E042|E043|E044|E045|E046|E047|E048|E049|E04A|E04B|E04C|E04D|E04E|E04F|E050|E081";
         $belowbase2 = "E052|E053|E054|E055|E056|E057|E058|E059|E05A|E05B|E05C|E05D|E05E|E05F|E060|E061|E062|E063|E064|E065|E066|E067|E068|E069|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071|E072|E073|E074|E081";
         for ($i = 0; $i < count($knbase); $i++) {
             $vstr = preg_replace('/' . $knbase[$i] . ' (' . $belowbase1 . ') (' . $belowbase2 . ') ' . $matraI . '/', $knmatraIligature[$i] . ' \\1 \\2', $vstr);
             $vstr = preg_replace('/' . $knbase[$i] . ' (' . $belowbase1 . ') ' . $matraI . '/', $knmatraIligature[$i] . ' \\1', $vstr);
         }
     }
     // KANNADA
     // [KanTtaFull] [matraI] => [KanTtaPartial] [matraI]
     if ($lang == 'kn') {
         $vstr = preg_replace('/0C9F ' . $matraI . '/', 'E015 ' . $matraI, $vstr);
     }
     // ORIYA
     if ($lang == 'or') {
         // SpecialCase Ra[0B30] Halant still left before [oryaFullNnNna] => E00F
         $vstr = preg_replace('/0B30 ' . $halant . ' E00F/', 'E00F E069', $vstr);
         // convert to Reph
     }
     //============================
     // SHIFT REPH
     // DEVANAGARI Shift Reph [E015]
     if ($lang == 'hi') {
         // FIRSTLY - halfRa = E05D - Change this to Reph [E015]
         $himatchhalfforms = "E043|E044|E045|E046|E047|E048|E049|E04A|E04B|E04C|E04D|E04E|E04F|E050|E051|E052|E053|E054|E055|E056|E057|E058|E059|E05A|E05B|E05C|E05D|E05E|E05F|E060|E061|E062|E063|E064|E065|E066|E067|E068|E069|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071|E072|E073|E074|E075|E076|E077|E078|E079|E07A|E07B|E07C|E07D|E07E|E07F|E080|E081|E082|E083|E084|E085|E086|E087|E088|E089|E08A|E0D3|E0D4|E0D5|E0D6|E0D7|E0D8|E0D9|E0DA|E0DB|E0DC|E0DD|E0DE|E0DF|E0E0|E0E1|E0E2|E0E3|E0E4|E0E5|E0E6|E0E7|E0E8|E0E9|E0EA|E0EB|E0EC|E0ED|E0EE|E0EF|E0F0|E0F1|E0F2|E0F3|E0F4|E0F5|E0F6|E0F7|E0F8|E0F9|E0FA|E0FB|E0FC|E0FD|E0FE|E0FF|E100|E101|E102|E103|E104|E105|E106|E107|E108|E109|E10A|E10B|E10C|E10D|E10E|E10F|E110|E111|E112|E113|E114|E115|E116|E117|E118|E119|E11A|E13D|E13E|E13F|E140|E141|E142|E143|E144|E145";
         $himatchfullforms = "0915|0916|0917|0918|0919|091A|091B|091C|091D|091E|091F|0920|0921|0922|0923|0924|0925|0926|0927|0928|092A|092B|092C|092D|092E|092F|0930|0932|0933|0935|0936|0937|0938|0939|E028|E029|0958|0959|095A|E02A|E02B|E02C|E02D|095B|E02E|E02F|E030|E031|095C|095D|E032|E033|E034|E035|E036|0929|E037|095E|E038|E039|E03A|095F|0931|E03B|0934|E03C|E03D|E03E|E03F|E040|E041|E042|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A1|E0A2|E0A3|E0A4|E0A5|E0A6|E0A7|E0A8|E0A9|E0AA|E0AB|E0AC|E0AD|E0AE|E0AF|E0B0|E0B1|E0B2|E0B3|E0B4|E0B5|E0B6|E0B7|E0B8|E0B9|E0BA|E0BB|E0BC|E0BD|E0BE|E0BF|E0C0|E0C1|E0C2|E0C3|E0C4|E0C5|E0C6|E0C7|E0C8|E0C9|E0CA|E0CB|E0CC|E0CD|E0CE|E0CF|E0D0|E0D1|E0D2|E11E|E11F|E120|E121|E122|E123|E124|E125|E126|E127|E128|E129|E12A|E12B|E12C|E12D|E12E|E12F|E130|E131|E132|E133|";
         $siddhantaconj1 = "F363|F364|F365|F366|F367|F368|F369|F36A|F36B|F36C|F36D|F36E|F36F|F370|F371|F372|F373|F374|F375|F376|F377|F378|F379|F37A|F37B|F37C|F37D|F37E|F37F|F380|F381|F382|F383|F384|F385|F3BF|F3C0|F3C1|F3C2|F3C3|F3C4|F3C5|F3C6|F3C7|F3C8|F3C9|F3CA|F3CB|F3CC|F3CD|F3CE|F3CF|F3D0|F3D1|F3D2|F3D3|F3D4|F3D5|F3D6|F3D7|F3D8|F3D9|F3DA|F3DB|F3DC|F3DD|F3DE|F3DF|F3E0|F3E1|F3E2|F3E3|F3E4|F3E5|F3E6|F3E7|F3E8|F3F9|F3FA|F3FB|F3FC|F3FD|F3FE|F3FF|F400|F401|F402|F403|F404|F405|F406|F407|F408|F409|F40A|F40B|F40C|F40D|F40E|F40F|F410|F411|F412|F413|F414|F415|F416|F417|F418|F419|F41A|F41B|F41C|F41D|F41E|F41F|F420|F421|F422|F423|F424|F425|F426|F427|F428|F429|F42A|F42B|F42C|F42D|F42E|F42F|F430|F431|F432|F433|F434|F435|F436|F437|F438|F439|F43A|F43B|F43C|F43D|F43E|F43F|F440|F441|F442|F443|F444|F445|F446|F447|F448|F449|F44A|F44B|F44C|F44D|F44E|F44F|F450|F451|F452|F453|F454|F455|F456|F457|F458|F459|F45A|F45B|F45C|F45D|F45E|F45F|F460|F461|F462|F463|F464|F465|F466|F467|F468|F469|F46A|F46B|F46C|F46D|F46E|F46F|F470|F471|F472|F473|F474|F475|F476|F477|F478|F479|F47A|F47B|F47C|F47D|F47E|F47F|F480|F481|F482|F483|F484|F485|F486|F487|F488|F489|F48A|F48B|F48C|F48D|F48E|F48F|F490|F491|F492|F493|F494|F495|F496|F497|F498|F499|F49A|F49B|F49C|F49D|F49E|F49F|F4A0|F4A1|F4A2|F4A3|F4A4|F4A5|F4A6|F4A7|F4A8|F4A9|F4AA|F4AB|F4AC|F4AD|F4AE|F4AF|F4B0|F4B1|F4B2|F4B3|F4B4|F4B5|F4B6|F4B7|F4B8|F4B9|F4BA|F4BB|F4BC|F4BD|F4BE|F4BF|F4C0|F4C1|F4C2|F4C3|F4C4|F4C5|F4C6|F4C7|F4C8|F4C9|F4CA|F4CB|F4CC|F4CD|F4CE|F4CF|F4D0|F4D1|F4D2|F4D3|F4D4|F4D5|F4D6|F4D7|F4D8|F4D9|F4DA|F4DB|F4DC|F4DD|F4DE|F4DF|F4E0|F4E1|F4E2|F4E3|F4E4|F4E5|F4E6|F4E7|F4E8|F4E9|F4EA|F4EB|F4EC|F4ED|F4EE|F4EF|F4F0|F4F1|F4F2|F4F3|F4F4|F4F5|F4F6|F4F7|F4F8|F4F9|F4FA|F4FB|F4FC|F4FD|F4FE|F4FF|F500|F501|F502|F503|F504|F505|F506|F507|F508|F509|F50A|";
         $vstr = preg_replace('/E05D (' . $himatchhalfforms . '|' . $himatchfullforms . ')/', 'E015 \\1', $vstr);
         // Reph = E015 - Shift Right to just after end of syllable
         // FullAllForms + HalfAllForms + 093E matraA
         while (preg_match('/E015 (' . $himatchhalfforms . ')/', $vstr)) {
             $vstr = preg_replace('/E015 (' . $himatchhalfforms . ')/', '\\1 E015', $vstr);
         }
         $vstr = preg_replace('/E015 (' . $himatchfullforms . ')/', '\\1 E015', $vstr);
         $vstr = preg_replace('/E015(' . $siddhantaconj1 . ')/', '\\1 E015', $vstr);
         // Now shift it beyond post-based vowels  // ??? Need to add others e.g. 0949,094A,094B,094C + presentation forms like E198
         $vstr = str_replace('E015 093E', '093E E015', $vstr);
         $vstr = preg_replace('/E015 (0940|E194|E195|E196|E197|E198|094B|094C)/', '\\1 E014', $vstr);
         // (Small) reph [E014] to Right of matraI
         $vstr = str_replace('E015 0947', '0947 E014', $vstr);
         // (Small) reph [E014] to Right of matraI
         // replacing top-below conjoints like TTa, Thva etc.
         $toporiginal = array('E047', 'E049', 'E057 E04D', 'E057 E04E', 'E062 E04D', 'E062 E04E', 'E04D', 'E04E', 'E04F', 'E050');
         $bottomoriginal = array('0915', 'F3F9', 'F3FB', 'E043 0916', 'F3FC', 'E043 091B', 'E11B', 'E043 E049 0928', 'E043 E049 091A', 'E043 E049 0932', 'E043 E049 0935', 'F403', 'F405', 'E043 0925', 'F407', 'E08B', 'F40D', 'F411', 'E043 0936', 'E043 F4DD', 'E043 F4E0', 'E043 E0A9', 'E043 F4E1', 'E043 F4E5', 'E043 0938', '0916', 'E08C', 'E044 0932', 'E044 0935', '0917', 'E045 0918', 'E045 0927', 'E045 E055 0935', 'F414', 'E08D', 'F415', 'E045 0935', '0918', 'F417', 'E08E', 'E046 0935', '0919', 'E047 0928', 'E047 091A', 'E047 0932', 'E047 0935', '091A', 'E048 092B', 'E090', 'F423', 'F41A', 'F425', '091B', 'E049 0928', 'E049 0932', 'E049 0935', '091C', 'E092', 'E04A 0935', '091D', 'E093', 'E04B 0935', '091E', 'F43E', 'E094', '091F', 'E04D 0928', 'E04D 0932', 'E04D 0935', '0920', 'E04E 0928', 'E04E 0932', 'E04E 0935', 'E04F 0921', '0921', 'E04F 0928', 'E04F 0932', 'E04F 0935', '0922', 'E050 0928', 'E050 0932', 'E050 0935', '0923', 'F43F', 'E099', 'E051 0935', '0924', 'F444', 'E09A', 'F391', 'F44A', 'E052 0938', '0925', 'E09B', 'E053 0935', '0926', 'E09C', 'F492', 'F493', '0927', 'F497', 'E09D', 'E055 0935', '0928', 'E09E', 'E056 0935', '092A', 'F4B1', 'F4B4', 'E09F', 'F4B5', 'F4B7', 'E057 0938', '092B', 'E0A0', 'F4BD', '092C', 'F4C1', 'E0A1', 'F4C2', 'F4C4', '092D', 'E0A2', 'F4C7', 'F4CC', '092E', 'F4CB', 'E0A3', 'F4CF', 'E05B 0935', '0932', 'E0A6', 'F4D4', 'F4D6', '0933', 'E05F 0935', '0935', 'F4D9', 'E0A8', 'F4DA', 'F4DC', '0936', 'F4DD', 'F4E0', 'E0A9', 'F4E1', 'F4E5', '0937', 'E062 0915', 'F4E7', 'F4E8', 'E062 0923', 'E062 092B', 'F4E9', 'E062 0925', 'F4EA', 'E0AA', 'F4EB', 'F4ED', '0938', 'E063 0915', 'E063 0916', 'E063 0924', 'F4F4', 'E063 0925', 'F4F5', 'E063 092A', 'E063 E09F', 'E063 092B', 'E0A3', 'E063 0932', 'E063 0935', '0939', 'F4F8', 'E0AC', 'F500', 'F504', 'E028', 'E065 0923', 'E0AD', 'E065 0932', 'E065 0935', 'E029', 'E0AE');
         $topreplacement = array('F59F', 'F5A0', 'F5A7', 'F5A8', 'F5A9', 'F5AA', 'F5A2', 'F5A3', 'F5A4', 'F5A5');
         $bottomreplacement = array('F5BA', 'F5BB', 'F5BC', 'F5BD', 'F5BE', 'F5BF', 'F5C5', 'F5C0', 'F5C1', 'F5C2', 'F5C4', 'F5C6', 'F5C7', 'F5C8', 'F5C9', 'F5CA', 'F5CC', 'F5CE', 'F5CF', 'F5D2', 'F5D3', 'F5D4', 'F5D5', 'F5D7', 'F5D8', 'F5E6', 'F5E7', 'F5E8', 'F5EA', 'F5EB', 'F5EC', 'F5EE', 'F5EF', 'F5F1', 'F5F2', 'F5F4', 'F5F6', 'F5F7', 'F5F8', 'F5F9', 'F5FB', 'F5FC', 'F5FE', 'F5FF', 'F600', 'F602', 'F603', 'F604', 'F605', 'F606', 'F608', 'F609', 'F60A', 'F60C', 'F60E', 'F610', 'F611', 'F612', 'F613', 'F614', 'F615', 'F616', 'F61C', 'F61D', 'F61E', 'F61F', 'F620', 'F622', 'F624', 'F625', 'F626', 'F628', 'F62A', 'F62D', 'F62B', 'F62E', 'F630', 'F632', 'F633', 'F634', 'F636', 'F638', 'F639', 'F63A', 'F63B', 'F63C', 'F641', 'F642', 'F643', 'F644', 'F645', 'F646', 'F654', 'F655', 'F656', 'F657', 'F658', 'F65A', 'F65C', 'F65D', 'F65E', 'F65F', 'F661', 'F669', 'F66A', 'F66B', 'F66C', 'F66D', 'F66E', 'F670', 'F671', 'F673', 'F674', 'F675', 'F676', 'F677', 'F678', 'F679', 'F67A', 'F67B', 'F67D', 'F67E', 'F67F', 'F681', 'F683', 'F68A', 'F68B', 'F68C', 'F68E', 'F690', 'F693', 'F695', 'F696', 'F698', 'F699', 'F69A', 'F69B', 'F69C', 'F69D', 'F69E', 'F6A0', 'F6A1', 'F6A4', 'F6A5', 'F6A6', 'F6A8', 'F6AA', 'F6AB', 'F6AC', 'F6AD', 'F6AE', 'F6AF', 'F6B1', 'F6B2', 'F6B3', 'F6B4', 'F6B5', 'F6B7', 'F6B9', 'F6BA', 'F6BB', 'F6BC', 'F6BD', 'F6BE', 'F6BF', 'F6C0', 'F6C1', 'F6C2', 'F6C3', 'F6C4', 'F6C6', 'F6C8', 'F6CA', 'F6CB', 'F6CD', 'F6CF', 'F6D1', 'F6D3', 'F6D4', 'F6D5', 'F6D6', 'F6D8', 'F6E1', 'F6E2');
         $j = 0;
         while ($j < 184) {
             $vstr = str_replace('E047' . ' ' . $bottomoriginal[$j], 'F59F' . ' ' . $bottomreplacement[$j], $vstr);
             $vstr = str_replace('E049' . ' ' . $bottomoriginal[$j], 'F5A0' . ' ' . $bottomreplacement[$j], $vstr);
             $vstr = str_replace('E057 E04D' . ' ' . $bottomoriginal[$j], 'F5A7' . ' ' . $bottomreplacement[$j], $vstr);
             $vstr = str_replace('E057 E04E' . ' ' . $bottomoriginal[$j], 'F5A8' . ' ' . $bottomreplacement[$j], $vstr);
             $vstr = str_replace('E062 E04D' . ' ' . $bottomoriginal[$j], 'F5A9' . ' ' . $bottomreplacement[$j], $vstr);
             $vstr = str_replace('E062 E04E' . ' ' . $bottomoriginal[$j], 'F5AA' . ' ' . $bottomreplacement[$j], $vstr);
             $vstr = str_replace('E04D' . ' ' . $bottomoriginal[$j], 'F5A2' . ' ' . $bottomreplacement[$j], $vstr);
             $vstr = str_replace('E04E' . ' ' . $bottomoriginal[$j], 'F5A3' . ' ' . $bottomreplacement[$j], $vstr);
             $vstr = str_replace('E04F' . ' ' . $bottomoriginal[$j], 'F5A4' . ' ' . $bottomreplacement[$j], $vstr);
             $vstr = str_replace('E050' . ' ' . $bottomoriginal[$j], 'F5A5' . ' ' . $bottomreplacement[$j], $vstr);
             $j++;
         }
         // trying for Tya, Dhya etc.
         $vstr = str_replace('E04D 092F', '091F F56F', $vstr);
         $vstr = str_replace('E04E 092F', '0920 F586', $vstr);
         $vstr = str_replace('E04F 092F', '0921 F586', $vstr);
         $vstr = str_replace('E050 092F', '0922 F586', $vstr);
     } else {
         if ($lang == 'bn') {
             $bnfullconjuncts = "E002|E003|E004|E041|E042|E043|E044|E045|E046|E047|E048|E049|E04A|E04B|E04C|E04D|E04E|E04F|E050|E051|E052|E053|E054|E055|E056|E057|E058|E059|E05A|E05B|E05C|E05D|E05E|E05F|E060|E061|E062|E063|E064|E065|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071|E072|E073|E074|E075|E076|E077|E078|E079|E07A|E07B|E07C|E07D|E07E|E07F|E080|E081|E082|E083|E084|E085|E086|E087|E088|E089|E08A|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A1|E0A2|E0A3|E0A4|E0A5|E0A6|E0A7|E0A8|E0A9|E0AA|E0AB|E0AC|E0AD|E0AE|E0AF|E0B0|E0B1|E0B2|E0B3|E0B4|E0B5|E0B6|E0B7|E0B8|E0B9|E0BA|E0BB|E0BC|E0BD|E0BE|E0BF|E0C0|E0C1|E0C2|E0C3|E0C4|E0C5|E0C6|E0C7|E0C8|E0C9|E0CA|E0CB|E0CC|E0CD|E0CE|E0CF|E0D0|E0D1|E0D2|E0D3|E0D4|E0D5|E0D6|E0D7|E0D8|E0D9|E0DA|E0DB|E0DC|E0DD|E0DE|E0DF|E0E0|E0E1|E0E2|E0E3|E0E4|E0E5|E0E6|E0E7|E0E8|E0E9|E0EA|E0EB|E0EC|E0ED|E0EE|E0EF|E0F0|E0F1|E0F2|E0F3|E0F4|E0F5|E0F6|E0F7|E0F8|E0F9|E0FA|E0FB|E0FC|E0FD|E0FE|E0FF|E100|E101|E102|E103|E104|E105|E106|E107|E108|E109|E10A|E10B|E10C|E10D|E10E|E10F|E110|E111|E112|E113|E114|E115|E116|E117|E118|E119|E11A|E11B|E11C|E11D|E11E|E11F|E120|E121|E122|E123|E124|E125|E126|E127|E128|E129|E12A|E12B|E12C|E12D|E12E|E12F|E130|E131|E132|E133|E134|E135|E136|E137|E138|E139|E13A|E13B|E13C|E13D|E13E|E13F|E140|E141|E142|E143|E144|E145|E146|E147|E148|E149|E14A|E14B|E14C|E14D|E14E|E14F|E150|E151|E152|E153|E154|E155|E156|E157|E158|E159|E15A|E15B|E15C|E15D|E15E|E15F|E160|E161|E162|E163|E164|E165|E166|E167|E168|E169|E16A|E16B|E16C|E16D|E16E|E16F|E170|E171|E172|E173|E174|E175|E176|E177|E178|E179|E17A|E17B|E17C|E17D|E17E|E17F|E180|E181|E182|E183|E184|E185|E186|E187|E188|E189|E18A|E18B|E18C|E18D|E18E|E18F|E190|E191|E192|E193|E194|E195|E196|E197|E198|E199|E19A";
             // $bnfullcons - set above;
             $vstr = preg_replace('/E068 (' . $bnfullconjuncts . '|' . $bnfullcons . ')/', '\\1 E068', $vstr);
             // ? Need to shift it beyond post-base vowels 09BE, 09C0, 09D7  haven't found so can't test??
             $vstr = preg_replace('/E068 (09BE|09C0|09D7)/', '\\1 E068', $vstr);
         } else {
             if ($lang == 'gu') {
                 $gufullforms = "0A95|0A96|0A97|0A98|0A99|0A9A|0A9B|0A9C|0A9D|0A9E|0A9F|0AA0|0AA1|0AA2|0AA3|0AA4|0AA5|0AA6|0AA7|0AA8|0AAA|0AAB|0AAC|0AAD|0AAE|0AAF|0AB0|0AB2|0AB3|0AB5|0AB6|0AB7|0AB8|0AB9|E002|E003|E004|E005|E006|E007|E008|E009|E00A|E00B|E00C|E00D|E00E|E00F|E010|E011|E012|E013|E014|E015|E016|E017|E018|E019|E01A|E01B|E01C|E01D|E01E|E01F|E020|E021|E022|E023|E024|E025|E026|E027|E05E|E05F|E060|E061|E062|E063|E064|E065|E066|E067|E068|E069|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071|E072|E073|E074|E075|E076|E077|E078|E079|E07A|E07B|E07C|E07D|E07E|E07F|E080|E081|E082|E083|E084|E085|E086|E087|E088|E089|E08A|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A1|E0A2|E0A3|E0A4|E0A5";
                 $vstr = preg_replace('/E032 (' . $gufullforms . ')/', '\\1 E032', $vstr);
                 // Now shift it beyond post-based vowels  // ??? Need to add others e.g. 0949,094A,094B,094C + presentation forms like E198
                 // ? Need to shift it beyond post-base vowels 0ABE, 0AC0 haven't found so can't test??
                 $vstr = preg_replace('/E032 (0ABE|0AC0)/', '\\1 E032', $vstr);
             } else {
                 if ($lang == 'te') {
                     // tefullforms defined earlier
                     $tepartialforms = "E00D|E00E|E00F|E010|E011|E012|E013|E014|E015|E016|E017|E018|E019|E01A|E01B|E01C|E01D|E01E|E01F|E020|E021|E022|E023|E024|E025|E026|E027|E07C|E07D|E07E";
                     $matraligs = "E07F|E080|E081|E082|E083|E084|E085|E086|E087|E088|E089|E08A|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A1|E0A2|E0A3|E0A4|E0A5|E0A6|E0A7|E0A8|E0A9|E0AA|E0AB|E0AC|E0AD|E0AE|E0AF";
                     $tevowels = "0C3E|0C3F|0C40|0C46|0C47|0C56|0C4A|0C4B|0C4C" . "|0C41|0C42|0C43|0C44";
                     // post matras
                     $vstr = preg_replace('/(' . $tevowels . ') (E046|E069|E077)/', '\\2 \\1', $vstr);
                     while (preg_match('/(' . $tepartialforms . ') (E046|E069|E077)/', $vstr)) {
                         $vstr = preg_replace('/(' . $tepartialforms . ') (E046|E069|E077)/', '\\2 \\1', $vstr);
                     }
                     $vstr = preg_replace('/(' . $tefullforms . '|' . $matraligs . ') (E046|E069|E077)/', '\\2 \\1', $vstr);
                 } else {
                     if ($lang == 'kn') {
                         $knfullforms = "0C95|0C96|0C97|0C98|0C99|0C9A|0C9B|0C9C|0C9D|0C9E|0C9F|0CA0|0CA1|0CA2|0CA3|0CA4|0CA5|0CA6|0CA7|0CA8|0CAA|0CAB|0CAC|0CAD|0CAE|0CAF|0CB0|0CB1|0CB2|0CB3|0CB5|0CB6|0CB7|0CB8|0CB9|E07D|E07E|E0A3";
                         $knpartialforms = "E00C|E00D|E00E|E00F|E010|E011|E012|E013|E014|0C9E|E015|E016|E017|E018|E019|E01A|E01B|E01C|E01D|E01E|E01F|E020|E021|E022|E023|E024|E025|E026|E027|E028|E029|E02A|E02B|E02C|E02D|E07F";
                         while (preg_match('/E00B (' . $knpartialforms . ')/', $vstr)) {
                             $vstr = preg_replace('/E00B (' . $knpartialforms . ')/', '\\1 E00B', $vstr);
                         }
                         // mPDF 5.3.47  Also move Reph to right of matraIligatures
                         $knfullforms .= "|E082|E083|E084|E085|E086|E087|E088|E089|E08A|E08B|E08C|E08D|E08E|E08F|E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A4|E0A1|E0A2";
                         $vstr = preg_replace('/E00B (' . $knfullforms . ')/', '\\1 E00B', $vstr);
                         // ? Need to shift it beyond base or below-base forms - haven't found so can't test??
                         // mPDF 5.3.87
                         // E004 added to list (which is a transformed version of 0CBE)
                         $knvowels = "0CBE|0CC0|0CC1|0CC2|0CC3|0CC4|0CC7|0CC8|0CCA|0CCB|0CD5|0CD6|E004";
                         $vstr = preg_replace('/E00B (' . $knvowels . ')/', '\\1 E00B', $vstr);
                     } else {
                         if ($lang == 'or') {
                             $orrephs = "E069|E06A|E06B|E06C";
                             $orfullforms = "0B15|0B16|0B17|0B18|0B19|0B1A|0B1B|0B1C|0B1D|0B1E|0B1F|0B20|0B21|0B22|0B23|0B24|0B25|0B26|0B27|0B28|0B29|0B2A|0B2B|0B2C|0B2D|0B2E|0B2F|0B30|0B31|0B32|0B33|0B34|0B35|0B36|0B37|0B38|E003|E004|E005|E006|E007|E008|E009|E00A|E00B|E00C|E00D|E00E|E00F|E010|E011|E012|E013|E014|E015|E016|E017|E018|E019|E01A|E01B|E01C|E01D|E01E|E01F|E020|E021|E022|E023|E024|E025|E026|E027|E028|E029|E02A|E02B|E02C|E02D|E02E|E02F|E030|E031|E032|E033|E034|E035|E036|E037";
                             // E123 - E147  FullHalant forms ? add to FullForms
                             $orpartialforms = "E090|E091|E092|E093|E094|E095|E096|E097|E098|E099|E09A|E09B|E09C|E09D|E09E|E09F|E0A0|E0A1|E0A2|E0A3|E0A4|E0A5|E0A6|E0A7|E0A8|E0A9|E0AA|E0AB|E0AC|E0AD|E0AE|E0AF|E0B0|E0B1|E0B2|E0B3|E0B4|E0B5|E0B6|E0B7|E0B8|E0B9|E0BA|E0BB|E0BC|E0BD|E0BE|E0BF|E0C0|E0C1|E0C2|E0C3|E0C4|E0C5|E0C6|E0C7|E0C8|E0C9|E0CA|E0CB|E0CC|E0CD|E0CE|E0CF|E0D0|E0D1|E0D2|E0D3|E0D4|E0D5|E0D6|E0D7|E0D8|E0D9|E0DA|E0DB|E0DC|E0DD|E0DE|E0DF|E0E0|E0E1|E0E2|E0E3|E0E4|E0E5|E0E6|E0E7|E0E8|E0E9|E0EA|E0EB|E0EC|E0ED|E0EE|E0EF|E0F0|E0F1|E0F2|E0F3|E0F4|E0F5";
                             // Combined MatraIReph[E06D] split [0B3F & E069] to allow reph to be shifted forwards
                             $vstr = preg_replace('/(' . $orfullforms . ') E06D (' . $orfullforms . ') 0B3E/', '\\1 0B3F E069 \\2 0B3E', $vstr);
                             while (preg_match('/(' . $orrephs . ') (' . $orpartialforms . ')/', $vstr)) {
                                 $vstr = preg_replace('/(' . $orrephs . ') (' . $orpartialforms . ')/', '\\2 \\1', $vstr);
                             }
                             $vstr = preg_replace('/(' . $orrephs . ') (' . $orfullforms . ')/', '\\2 \\1', $vstr);
                             // Combine Reph and MatraI
                             $vstr = str_replace('E069 0B3F', 'E06D', $vstr);
                             // Reph and MatraI -> MatraIReph
                             $vstr = str_replace('E06A 0B3F', 'E06E', $vstr);
                             // Reph and MatraI -> MatraIReph
                             $vstr = str_replace('E06B 0B3F', 'E06F', $vstr);
                             // Reph and MatraI -> MatraIReph
                         } else {
                             if ($lang == 'ml') {
                                 $halant = "0D4D";
                                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' 0D30/', 'E00E \\1', $vstr);
                                 // 0D30 = Ra
                                 $vstr = preg_replace('/([A-F0-9]{4}) ' . $halant . ' ' . $mlprebasedvowels . ' 0D30/', '\\2 E00E \\1', $vstr);
                                 // 0D30 = Ra
                                 $mlfullforms = "0D15|0D16|0D17|0D18|0D19|0D1A|0D1B|0D1C|0D1D|0D1E|0D1F|0D20|0D21|0D22|0D23|0D24|0D25|0D26|0D27|0D28|0D2A|0D2B|0D2C|0D2D|0D2E|0D2F|0D30|0D31|0D32|0D33|0D34|0D35|0D36|0D37|0D38|0D39" . "|E010|E011|E012|E013|E014|E015|E016|E017|E018|E019|E01A|E01B|E01C|E01D|E01E|E01F|E020|E021|E022|E023|E024|E025|E026|E027|E028|E029|E02A|E02B|E02C|E02D|E02E|E02F|E030|E031|E032|E033|E034|E035|E036|E037|E038|E039|E03A|E03B|E03C|E03D|E03E|E03F|E040|E041|E042|E043|E044|E045|E046|E047|E048|E049|E04A|E04B|E04C|E04D|E04E|E04F|E050|E051|E052|E053|E054|E055|E056|E057|E058|E059|E05A|E05B|E05C|E05D|E05E|E05F|E060|E061|E062|E063|E064|E065|E066|E067|E068|E069|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071|E072|E073|E074|E075|E076|E077|E078|E079|E07A|E07B|E07C|E07D";
                                 // = FullConsonants + FullConjuncts
                                 // = Add Chillu characters	// mPDF 5.0.024
                                 $mlfullforms .= "|E004|E005|E006|E007|E008|E009";
                                 while (preg_match('/(' . $mlfullforms . ') E00E/', $vstr)) {
                                     $vstr = preg_replace('/(' . $mlfullforms . ') E00E/', 'E00E \\1', $vstr);
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     //============================
     // SHIFT post-based vowels to Left of SmallForms (NOT to left of full forms)
     // TELUGU Shift
     if ($lang == 'te') {
         // NB $tevowels defined above
         // NB $tefullforms defined above
         $tebelowbase1 = "E02C|E02D|E02E|E02F|E030|E031|E032|E033|E034|E035|E036|E037|E038|E039|E03A|E03B|E03C|E03D|E03E|E03F|E040|E041|E042|E043|E044|E045|E046|E047|E048|E049|E04A|E04B|E04C|E04D|E04E";
         //'Small1KaToHa'
         $tebelowbase2 = "E04F|E050|E051|E052|E053|E054|E055|E056|E057|E058|E059|E05A|E05B|E05C|E05D|E05E|E05F|E060|E061|E062|E063|E064|E065|E066|E067|E068|E069|E06A|E06B|E06C|E06D|E06E|E06F|E070|E071";
         // 'Small2KaToHa'
         $vstr = preg_replace('/(' . $tebelowbase2 . ') (' . $tevowels . ')/', '\\2 \\1', $vstr);
         $vstr = preg_replace('/(' . $tebelowbase1 . ') (' . $tevowels . ')/', '\\2 \\1', $vstr);
     } else {
         if ($lang == 'kn') {
             $knvowels = "0CBE|0CC0|0CC1|0CC2|0CC3|0CC4|0CC7|0CC8|0CCA|0CCB|0CD5|0CD6" . "|E004|E007|E008|E009|E00A";
             // NB $knvowels defined above
             // NB $fullforms defined above
             // $belowbase1/2 defined above
             $vstr = preg_replace('/(' . $belowbase2 . ') (' . $knvowels . ')/', '\\2 \\1', $vstr);
             // mPDF 5.3.87
             $vstr = preg_replace('/(' . $belowbase1 . ') (' . $knvowels . ')/', '\\2 \\1', $vstr);
             //$vstr = preg_replace('/('.$fullforms.') ('.$knvowels.')/', '\\2 \\1', $vstr);
         }
     }
     //============================
     // Clear unwanted ZWJ, ZWNJ
     // MALAYALAM
     if ($lang == 'ml') {
         $vstr = preg_replace('/(200C|200D) /', '', $vstr);
     }
     //============================
     // END & PUT IT BACK TOGETHER
     $vstr = preg_replace('/^0020 (.*) 0020$/', '\\1', $vstr);
     $varr = explode(" ", $vstr);
     $e = '';
     //	foreach($varr AS $v) {
     //		$e.=code2utf(hexdec($v));
     //	}
     // E00 SERIES REPLACED BY 040 AND E01 SERIES REPLACED BY 041
     $array = array('E001' => 'F5B5', 'E002' => 'F5B6', 'E003' => 'F5B7', 'E004' => 'F5AD', 'E005' => 'F5AE', 'E006' => 'F5AF', 'E007' => 'F5B0', 'E008' => 'F5B1', 'E009' => '0915', 'E00A' => '092A', 'E00B' => '040B', 'E00C' => '040C', 'E00D' => '040D', 'E00E' => '040E', 'E00F' => '040F', 'E010' => 'F307', 'E011' => '0411', 'E012' => '0412', 'E013' => 'F1B0', 'E014' => 'F306', 'E015' => 'F306', 'E016' => '0416', 'E017' => '0417', 'E026' => 'F121', 'E027' => 'F123', 'E028' => 'F337', 'E029' => 'F338', 'E043' => 'F33A', 'E044' => 'F33B', 'E045' => 'F33C', 'E046' => 'F33D', 'E048' => 'F33E', 'E049' => '091B 094D', 'E04A' => 'F33F', 'E04B' => 'F340', 'E04C' => 'F343', 'E051' => 'F344', 'E052' => 'F346', 'E053' => 'F347', 'E054' => '0926 O04D', 'E055' => 'F348', 'E056' => 'F34A', 'E057' => 'F34B', 'E058' => 'F34C', 'E059' => 'F34D', 'E05A' => 'F34E', 'E05B' => 'F350', 'E05C' => 'F351', 'E05D' => 'F352', 'E05E' => 'F353', 'E05F' => 'F355', 'E060' => 'F356', 'E061' => 'F357', 'E062' => 'F35A', 'E063' => 'F35B', 'E064' => 'F35C', 'E065' => 'F35D', 'E066' => 'F35F', 'E08B' => 'F363', 'E08C' => 'F364', 'E08D' => 'F365', 'E08E' => 'F366', 'E08F' => '0919 094D 0930', 'E090' => 'F367', 'E091' => '091B 094D 0930', 'E092' => 'F368', 'E093' => 'F369', 'E094' => 'F36B', 'E095' => '091F 094D 0930', 'E096' => '0920 094D 0930', 'E097' => '0921 094D 0930', 'E098' => '0922 094D 0930', 'E099' => 'F36C', 'E09A' => 'F36E', 'E09B' => 'F36F', 'E09C' => 'F370', 'E09D' => 'F371', 'E09E' => 'F373', 'E09F' => 'F374', 'E0A0' => 'F375', 'E0A1' => 'F376', 'E0A2' => 'F377', 'E0A3' => 'F379', 'E0A4' => 'F37A', 'E0A5' => '', 'E0A6' => 'F37B', 'E0A7' => '', 'E0A8' => 'F37C', 'E0A9' => 'F37D', 'E0AA' => 'F37E', 'E0AB' => 'F37F', 'E0AC' => 'F380', 'E0AD' => 'F382', 'E0AE' => 'F383', 'E0D3' => 'F386', 'E0D4' => 'F387', 'E0D5' => 'F388', 'E0D6' => 'F389', 'E0D8' => 'F38A', 'E0DA' => 'F38B', 'E0DB' => 'F38C', 'EODC' => 'F38E', 'EOE1' => 'F38F', 'E0E2' => 'F391', 'EOE3' => 'F392', 'E0E5' => 'F393', 'E0E6' => 'F395', 'E0E7' => 'F396', 'E0E8' => 'F397', 'E0E9' => 'F398', 'E0EA' => 'F399', 'EOEB' => 'F39B', 'EOEC' => 'F39C', 'E0EE' => 'F39D', 'E0F0' => 'F39E', 'E0F1' => 'F39F', 'E0F2' => 'F3A0', 'EOF3' => 'F3A1', 'E0F4' => 'F3A2', 'E0F5' => 'F3A4', 'E0F6' => 'F3A7', 'E11B' => 'F3FF', 'E11C' => 'F413', 'E11D' => 'F59F F68A', 'E11E' => 'F444', 'E11F' => 'F449', 'E120' => 'F44F', 'E121' => 'F469', 'E122' => 'F45A', 'E123' => 'F46A', 'E124' => 'F477', 'E125' => 'F47C', 'E126' => 'F480', 'E127' => 'F482', 'E128' => 'F48E', 'E129' => 'F493', 'E12A' => 'F4B1', 'E12B' => 'F4E0', 'E12C' => 'F4DD', 'E12D' => 'F4E2', 'E12E' => 'F4E5', 'E12F' => 'F4E7', 'E130' => 'F4E8', 'E16D' => 'F55E', '091C 094D 091C' => 'F459', 'E14B' => 'F30D', 'E14C' => 'F311', 'E14D' => 'F314', 'E14E' => 'F318', 'E131' => 'F4E7', 'E194' => 'F325', 'E195' => 'F326', 'E196' => 'F327', 'E198' => 'F328', 'E133' => 'F4F4', 'E134' => 'F4F8', 'E135' => 'F4FB', 'E136' => 'F4FD', 'E137' => 'F4F6', 'E138' => 'F500', 'E139' => 'F504', 'E13B' => 'F413', 'E13D' => 'F3F3', 'E140' => 'F533', 'E141' => 'F544', 'E142' => 'F543', 'E143' => 'F545', 'E145' => 'F548', 'E153' => 'F556', 'E154' => 'F557', 'E11D' => 'F59F F68A');
     foreach ($varr as $v) {
         if ($font == 'ind_hi_2_001' && isset($array[$v])) {
             $v = $array[$v];
         }
         $e .= code2utf(hexdec($v));
     }
     //============================
     return $e;
 }
Example #13
0
  function codeHex2utf($hex,$lo=true){
	$num = hexdec($hex);
	if (($num<128) && !$lo) return '&#x'.$hex.';';	// i.e. no change
	return code2utf($num,$lo);
  }
Example #14
0
function html_entity_decode_utf8($string, $htmltrans = false)
{
    static $trans_tbl;
    if (!isset($trans_tbl)) {
        $trans_tbl = array();
        foreach (get_html_translation_table(HTML_ENTITIES) as $val => $key) {
            $trans_tbl[$key] = utf8_encode($val);
        }
    }
    $string = preg_replace_callback(array('~&#x([0-9a-f]+);~i', '~%u([0-9a-f]{4})~i'), function ($matches) {
        return code2utf(hexdec($matches[1]));
    }, $string);
    $string = preg_replace_callback('~&#([0-9]+);~', function ($matches) {
        return code2utf($matches[1]);
    }, $string);
    if ($htmltrans) {
        $string = strtr($string, $trans_tbl);
    }
    return $string;
}
Example #15
0
function decode_jsescape($string)
{
    preg_match_all('/%u[0-9A-F]{4}/', $string, $u_codes);
    foreach ($u_codes[0] as $code) {
        $string = str_replace($code, code2utf(hexdec(substr($code, 2, 4))), $string);
    }
    return $string;
}
Example #16
0
function asc2hex($theString)
{
    $temp = $theString;
    $data = "";
    for ($i = 0; $i < strlen($temp); $i++) {
        $char = substr($temp, $i, 1);
        if (!ereg('[ A-Za-z0-9|/:.^]', $char)) {
            $data .= rawurlencode(code2utf(ord($char)));
        } else {
            $data .= $char;
        }
    }
    return $data;
}