function applyOTL($str, $useOTL) { $this->OTLdata = array(); if (trim($str) == '') { return $str; } if (!$useOTL) { return $str; } // 1. Load GDEF data //============================== $this->fontkey = $this->mpdf->CurrentFont['fontkey']; $this->glyphIDtoUni = $this->mpdf->CurrentFont['glyphIDtoUni']; if (!isset($this->GDEFdata[$this->fontkey])) { include _MPDF_TTFONTDATAPATH . $this->fontkey . '.GDEFdata.php'; $this->GSUB_offset = $this->GDEFdata[$this->fontkey]['GSUB_offset'] = $GSUB_offset; $this->GPOS_offset = $this->GDEFdata[$this->fontkey]['GPOS_offset'] = $GPOS_offset; $this->GSUB_length = $this->GDEFdata[$this->fontkey]['GSUB_length'] = $GSUB_length; $this->MarkAttachmentType = $this->GDEFdata[$this->fontkey]['MarkAttachmentType'] = $MarkAttachmentType; $this->MarkGlyphSets = $this->GDEFdata[$this->fontkey]['MarkGlyphSets'] = $MarkGlyphSets; $this->GlyphClassMarks = $this->GDEFdata[$this->fontkey]['GlyphClassMarks'] = $GlyphClassMarks; $this->GlyphClassLigatures = $this->GDEFdata[$this->fontkey]['GlyphClassLigatures'] = $GlyphClassLigatures; $this->GlyphClassComponents = $this->GDEFdata[$this->fontkey]['GlyphClassComponents'] = $GlyphClassComponents; $this->GlyphClassBases = $this->GDEFdata[$this->fontkey]['GlyphClassBases'] = $GlyphClassBases; } else { $this->GSUB_offset = $this->GDEFdata[$this->fontkey]['GSUB_offset']; $this->GPOS_offset = $this->GDEFdata[$this->fontkey]['GPOS_offset']; $this->GSUB_length = $this->GDEFdata[$this->fontkey]['GSUB_length']; $this->MarkAttachmentType = $this->GDEFdata[$this->fontkey]['MarkAttachmentType']; $this->MarkGlyphSets = $this->GDEFdata[$this->fontkey]['MarkGlyphSets']; $this->GlyphClassMarks = $this->GDEFdata[$this->fontkey]['GlyphClassMarks']; $this->GlyphClassLigatures = $this->GDEFdata[$this->fontkey]['GlyphClassLigatures']; $this->GlyphClassComponents = $this->GDEFdata[$this->fontkey]['GlyphClassComponents']; $this->GlyphClassBases = $this->GDEFdata[$this->fontkey]['GlyphClassBases']; } // 2. Prepare string as HEX string and Analyse character properties //================================================================= $earr = $this->mpdf->UTF8StringToArray($str, false); $scriptblock = 0; $scriptblocks = array(); $scriptblocks[0] = 0; $vstr = ''; $OTLdata = array(); $subchunk = 0; $charctr = 0; foreach ($earr as $char) { $ucd_record = UCDN::get_ucd_record($char); $sbl = $ucd_record[6]; // Special case - Arabic End of Ayah if ($char == 1757) { $sbl = UCDN::SCRIPT_ARABIC; } if ($sbl && $sbl != 40 && $sbl != 102) { if ($scriptblock == 0) { $scriptblock = $sbl; $scriptblocks[$subchunk] = $scriptblock; } else { if ($scriptblock > 0 && $scriptblock != $sbl) { // ************************************************* // NEW (non-common) Script encountered in this chunk. Start a new subchunk $subchunk++; $scriptblock = $sbl; $charctr = 0; $scriptblocks[$subchunk] = $scriptblock; } } } $OTLdata[$subchunk][$charctr]['general_category'] = $ucd_record[0]; $OTLdata[$subchunk][$charctr]['bidi_type'] = $ucd_record[2]; //$OTLdata[$subchunk][$charctr]['combining_class'] = $ucd_record[1]; //$OTLdata[$subchunk][$charctr]['bidi_type'] = $ucd_record[2]; //$OTLdata[$subchunk][$charctr]['mirrored'] = $ucd_record[3]; //$OTLdata[$subchunk][$charctr]['east_asian_width'] = $ucd_record[4]; //$OTLdata[$subchunk][$charctr]['normalization_check'] = $ucd_record[5]; //$OTLdata[$subchunk][$charctr]['script'] = $ucd_record[6]; $charasstr = $this->unicode_hex($char); if (strpos($this->GlyphClassMarks, $charasstr) !== false) { $OTLdata[$subchunk][$charctr]['group'] = 'M'; } else { if ($char == 32 || $char == 12288) { $OTLdata[$subchunk][$charctr]['group'] = 'S'; } else { $OTLdata[$subchunk][$charctr]['group'] = 'C'; } } $OTLdata[$subchunk][$charctr]['uni'] = $char; $OTLdata[$subchunk][$charctr]['hex'] = $charasstr; $charctr++; } /* PROCESS EACH SUBCHUNK WITH DIFFERENT SCRIPTS */ for ($sch = 0; $sch <= $subchunk; $sch++) { $this->OTLdata = $OTLdata[$sch]; $scriptblock = $scriptblocks[$sch]; // 3. Get Appropriate Scripts, and Shaper engine from analysing text and list of available scripts/langsys in font //============================== // Based on actual script block of text, select shaper (and line-breaking dictionaries) if (UCDN::SCRIPT_DEVANAGARI <= $scriptblock && $scriptblock <= UCDN::SCRIPT_MALAYALAM) { $this->shaper = "I"; } else { if ($scriptblock == UCDN::SCRIPT_ARABIC || $scriptblock == UCDN::SCRIPT_SYRIAC) { $this->shaper = "A"; } else { if ($scriptblock == UCDN::SCRIPT_NKO || $scriptblock == UCDN::SCRIPT_MANDAIC) { $this->shaper = "A"; } else { if ($scriptblock == UCDN::SCRIPT_KHMER) { $this->shaper = "K"; } else { if ($scriptblock == UCDN::SCRIPT_THAI) { $this->shaper = "T"; } else { if ($scriptblock == UCDN::SCRIPT_LAO) { $this->shaper = "L"; } else { if ($scriptblock == UCDN::SCRIPT_SINHALA) { $this->shaper = "S"; } else { if ($scriptblock == UCDN::SCRIPT_MYANMAR) { $this->shaper = "M"; } else { if ($scriptblock == UCDN::SCRIPT_NEW_TAI_LUE) { $this->shaper = "E"; } else { if ($scriptblock == UCDN::SCRIPT_CHAM) { $this->shaper = "E"; } else { if ($scriptblock == UCDN::SCRIPT_TAI_THAM) { $this->shaper = "E"; } else { $this->shaper = ""; } } } } } } } } } } } // Get scripttag based on actual text script $scripttag = UCDN::$uni_scriptblock[$scriptblock]; $GSUBscriptTag = ''; $GSUBlangsys = ''; $GPOSscriptTag = ''; $GPOSlangsys = ''; $is_old_spec = false; $ScriptLang = $this->mpdf->CurrentFont['GSUBScriptLang']; if (count($ScriptLang)) { list($GSUBscriptTag, $is_old_spec) = $this->_getOTLscriptTag($ScriptLang, $scripttag, $scriptblock, $this->shaper, $useOTL, 'GSUB'); if ($this->mpdf->fontLanguageOverride && strpos($ScriptLang[$GSUBscriptTag], $this->mpdf->fontLanguageOverride) !== false) { $GSUBlangsys = str_pad($this->mpdf->fontLanguageOverride, 4); } else { if ($GSUBscriptTag && isset($ScriptLang[$GSUBscriptTag]) && $ScriptLang[$GSUBscriptTag] != '') { $GSUBlangsys = $this->_getOTLLangTag($this->mpdf->currentLang, $ScriptLang[$GSUBscriptTag]); } } } $ScriptLang = $this->mpdf->CurrentFont['GPOSScriptLang']; // NB If after GSUB, the same script/lang exist for GPOS, just use these... if ($GSUBscriptTag && $GSUBlangsys && isset($ScriptLang[$GSUBscriptTag]) && strpos($ScriptLang[$GSUBscriptTag], $GSUBlangsys) !== false) { $GPOSlangsys = $GSUBlangsys; $GPOSscriptTag = $GSUBscriptTag; } else { if (count($ScriptLang)) { list($GPOSscriptTag, $dummy) = $this->_getOTLscriptTag($ScriptLang, $scripttag, $scriptblock, $this->shaper, $useOTL, 'GPOS'); if ($GPOSscriptTag && $this->mpdf->fontLanguageOverride && strpos($ScriptLang[$GPOSscriptTag], $this->mpdf->fontLanguageOverride) !== false) { $GPOSlangsys = str_pad($this->mpdf->fontLanguageOverride, 4); } else { if ($GPOSscriptTag && isset($ScriptLang[$GPOSscriptTag]) && $ScriptLang[$GPOSscriptTag] != '') { $GPOSlangsys = $this->_getOTLLangTag($this->mpdf->currentLang, $ScriptLang[$GPOSscriptTag]); } } } } //////////////////////////////////////////////////////////////// // This is just for the font_dump_OTL utility to set script and langsys override if (isset($this->mpdf->overrideOTLsettings) && isset($this->mpdf->overrideOTLsettings[$this->fontkey])) { $GSUBscriptTag = $GPOSscriptTag = $this->mpdf->overrideOTLsettings[$this->fontkey]['script']; $GSUBlangsys = $GPOSlangsys = $this->mpdf->overrideOTLsettings[$this->fontkey]['lang']; } //////////////////////////////////////////////////////////////// if (!$GSUBscriptTag && !$GSUBlangsys && !$GPOSscriptTag && !$GPOSlangsys) { // Remove ZWJ and ZWNJ for ($i = 0; $i < count($this->OTLdata); $i++) { if ($this->OTLdata[$i]['uni'] == 8204 || $this->OTLdata[$i]['uni'] == 8205) { array_splice($this->OTLdata, $i, 1); } } $this->schOTLdata[$sch] = $this->OTLdata; $this->OTLdata = array(); continue; } // Don't use MYANMAR shaper unless using v2 scripttag if ($this->shaper == 'M' && $GSUBscriptTag != 'mym2') { $this->shaper = ''; } $GSUBFeatures = isset($this->mpdf->CurrentFont['GSUBFeatures'][$GSUBscriptTag][$GSUBlangsys]) ? $this->mpdf->CurrentFont['GSUBFeatures'][$GSUBscriptTag][$GSUBlangsys] : false; $GPOSFeatures = isset($this->mpdf->CurrentFont['GPOSFeatures'][$GPOSscriptTag][$GPOSlangsys]) ? $this->mpdf->CurrentFont['GPOSFeatures'][$GPOSscriptTag][$GPOSlangsys] : false; $this->assocLigs = array(); // Ligatures[$posarr lpos] => nc $this->assocMarks = array(); // assocMarks[$posarr mpos] => array(compID, ligPos) if (!isset($this->GDEFdata[$this->fontkey]['GSUBGPOStables'])) { $this->ttfOTLdata = $this->GDEFdata[$this->fontkey]['GSUBGPOStables'] = file_get_contents(_MPDF_TTFONTDATAPATH . $this->fontkey . '.GSUBGPOStables.dat', 'rb') or die('Can\'t open file ' . _MPDF_TTFONTDATAPATH . $this->fontkey . '.GSUBGPOStables.dat'); } else { $this->ttfOTLdata = $this->GDEFdata[$this->fontkey]['GSUBGPOStables']; } if ($this->debugOTL) { $this->_dumpproc('BEGIN', '-', '-', '-', '-', -1, '-', 0); } //////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////// ///////// LINE BREAKING FOR KHMER, THAI + LAO ///////////////// //////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////// // Insert U+200B at word boundaries using dictionaries if ($this->mpdf->useDictionaryLBR && ($this->shaper == "K" || $this->shaper == "T" || $this->shaper == "L")) { // Sets $this->OTLdata[$i]['wordend']=true at possible end of word boundaries $this->SEAlineBreaking(); } else { if ($this->mpdf->useTibetanLBR && $scriptblock == UCDN::SCRIPT_TIBETAN) { // Sets $this->OTLdata[$i]['wordend']=true at possible end of word boundaries $this->TibetanlineBreaking(); } } //////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////// ////////// GSUB ///////////////////////////////// //////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////// if ($useOTL & 0xff && $GSUBscriptTag && $GSUBlangsys && $GSUBFeatures) { // 4. Load GSUB data, Coverage & Lookups //================================================================= $this->GSUBfont = $this->fontkey . '.GSUB.' . $GSUBscriptTag . '.' . $GSUBlangsys; if (!isset($this->GSUBdata[$this->GSUBfont])) { if (file_exists(_MPDF_TTFONTDATAPATH . $this->mpdf->CurrentFont['fontkey'] . '.GSUB.' . $GSUBscriptTag . '.' . $GSUBlangsys . '.php')) { include_once _MPDF_TTFONTDATAPATH . $this->mpdf->CurrentFont['fontkey'] . '.GSUB.' . $GSUBscriptTag . '.' . $GSUBlangsys . '.php'; $this->GSUBdata[$this->GSUBfont]['rtlSUB'] = $rtlSUB; $this->GSUBdata[$this->GSUBfont]['finals'] = $finals; if ($this->shaper == 'I') { $this->GSUBdata[$this->GSUBfont]['rphf'] = $rphf; $this->GSUBdata[$this->GSUBfont]['half'] = $half; $this->GSUBdata[$this->GSUBfont]['pref'] = $pref; $this->GSUBdata[$this->GSUBfont]['blwf'] = $blwf; $this->GSUBdata[$this->GSUBfont]['pstf'] = $pstf; } } else { $this->GSUBdata[$this->GSUBfont] = array('rtlSUB' => array(), 'rphf' => array(), 'rphf' => array(), 'pref' => array(), 'blwf' => array(), 'pstf' => array(), 'finals' => ''); } } if (!isset($this->GSUBdata[$this->fontkey])) { include _MPDF_TTFONTDATAPATH . $this->fontkey . '.GSUBdata.php'; $this->GSLuCoverage = $this->GSUBdata[$this->fontkey]['GSLuCoverage'] = $GSLuCoverage; } else { $this->GSLuCoverage = $this->GSUBdata[$this->fontkey]['GSLuCoverage']; } $this->GSUBLookups = $this->mpdf->CurrentFont['GSUBLookups']; // 5(A). GSUB - Shaper - ARABIC //============================== if ($this->shaper == 'A') { //----------------------------------------------------------------------------------- // a. Apply initial GSUB Lookups (in order specified in lookup list but only selecting from certain tags) //----------------------------------------------------------------------------------- $tags = 'locl ccmp'; $omittags = ''; $usetags = $tags; if (!empty($this->mpdf->OTLtags)) { $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, true); } $this->_applyGSUBrules($usetags, $GSUBscriptTag, $GSUBlangsys); //----------------------------------------------------------------------------------- // b. Apply context-specific forms GSUB Lookups (initial, isolated, medial, final) //----------------------------------------------------------------------------------- // Arab and Syriac are the only scripts requiring the special joining - which takes the place of // isol fina medi init rules in GSUB (+ fin2 fin3 med2 in Syriac syrc) $tags = 'isol fina fin2 fin3 medi med2 init'; $omittags = ''; $usetags = $tags; if (!empty($this->mpdf->OTLtags)) { $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, true); } $this->arabGlyphs = $this->GSUBdata[$this->GSUBfont]['rtlSUB']; $gcms = explode("| ", $this->GlyphClassMarks); $gcm = array(); foreach ($gcms as $g) { $gcm[hexdec($g)] = 1; } $this->arabTransparentJoin = $this->arabTransparent + $gcm; $this->arabic_shaper($usetags, $GSUBscriptTag); //----------------------------------------------------------------------------------- // c. Set Kashida points (after joining occurred - medi, fina, init) but before other substitutions //----------------------------------------------------------------------------------- //if ($scriptblock == UCDN::SCRIPT_ARABIC ) { for ($i = 0; $i < count($this->OTLdata); $i++) { // Put the kashida marker on the character BEFORE which is inserted the kashida // Kashida marker is inverse of priority i.e. Priority 1 => 7, Priority 7 => 1. // Priority 1 User-inserted Kashida 0640 = Tatweel // The user entered a Kashida in a position // Position: Before the user-inserted kashida if ($this->OTLdata[$i]['uni'] == 0x640) { $this->OTLdata[$i]['GPOSinfo']['kashida'] = 8; // Put before the next character } else { if ($this->OTLdata[$i]['uni'] == 0xfeb3 || $this->OTLdata[$i]['uni'] == 0xfeb4 || $this->OTLdata[$i]['uni'] == 0xfebb || $this->OTLdata[$i]['uni'] == 0xfebc) { $checkpos = $i + 1; while (isset($this->OTLdata[$checkpos]) && strpos($this->GlyphClassMarks, $this->OTLdata[$checkpos]['hex']) !== false) { $checkpos++; } if (isset($this->OTLdata[$checkpos])) { $this->OTLdata[$checkpos]['GPOSinfo']['kashida'] = 7; // Put after marks on next character } } else { if ($this->OTLdata[$i]['uni'] == 0xfe94 || $this->OTLdata[$i]['uni'] == 0xfea2 || $this->OTLdata[$i]['uni'] == 0xfeaa) { $this->OTLdata[$i]['GPOSinfo']['kashida'] = 6; } else { if ($this->OTLdata[$i]['uni'] == 0xfe8e || $this->OTLdata[$i]['uni'] == 0xfec2 || $this->OTLdata[$i]['uni'] == 0xfede || $this->OTLdata[$i]['uni'] == 0xfeda || $this->OTLdata[$i]['uni'] == 0xfb93) { $this->OTLdata[$i]['GPOSinfo']['kashida'] = 5; } else { if ($this->OTLdata[$i]['uni'] == 0xfeae || $this->OTLdata[$i]['uni'] == 0xfef2 || $this->OTLdata[$i]['uni'] == 0xfef0 || $this->OTLdata[$i]['uni'] == 0xfef4 || $this->OTLdata[$i]['uni'] == 0xfbe9 || $this->OTLdata[$i]['uni'] == 0xfbfd || $this->OTLdata[$i]['uni'] == 0xfbff) { $checkpos = $i - 1; while (isset($this->OTLdata[$checkpos]) && strpos($this->GlyphClassMarks, $this->OTLdata[$checkpos]['hex']) !== false) { $checkpos--; } if (isset($this->OTLdata[$checkpos]) && $this->OTLdata[$checkpos]['uni'] == 0xfe92) { $this->OTLdata[$checkpos]['GPOSinfo']['kashida'] = 4; // ******* Before preceding BAA } } else { if ($this->OTLdata[$i]['uni'] == 0xfeee || $this->OTLdata[$i]['uni'] == 0xfeca || $this->OTLdata[$i]['uni'] == 0xfed6 || $this->OTLdata[$i]['uni'] == 0xfed2) { $this->OTLdata[$i]['GPOSinfo']['kashida'] = 3; } } } } } } // Priority 7 Other connecting characters // Final form // Connecting to previous character // Position: Before the character /* This isn't in the spec, but using MS WORD as a basis, give a lower priority to the 3 characters already checked in (5) above. Test case: خْرَىٰ فَتُذَكِّر */ if (!isset($this->OTLdata[$i]['GPOSinfo']['kashida'])) { if (strpos($this->GSUBdata[$this->GSUBfont]['finals'], $this->OTLdata[$i]['hex']) !== false) { // ANY OTHER FINAL FORM $this->OTLdata[$i]['GPOSinfo']['kashida'] = 2; } else { if (strpos('0FEAE 0FEF0 0FEF2', $this->OTLdata[$i]['hex']) !== false) { // not already included in 5 above $this->OTLdata[$i]['GPOSinfo']['kashida'] = 1; } } } } //----------------------------------------------------------------------------------- // d. Apply Presentation Forms GSUB Lookups (+ any discretionary) - Apply one at a time in Feature order //----------------------------------------------------------------------------------- $tags = 'rlig calt liga clig mset'; $omittags = 'locl ccmp nukt akhn rphf rkrf pref blwf abvf half pstf cfar vatu cjct init medi fina isol med2 fin2 fin3 ljmo vjmo tjmo'; $usetags = $tags; if (!empty($this->mpdf->OTLtags)) { $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, false); } $ts = explode(' ', $usetags); foreach ($ts as $ut) { // - Apply one at a time in Feature order $this->_applyGSUBrules($ut, $GSUBscriptTag, $GSUBlangsys); } //----------------------------------------------------------------------------------- // e. NOT IN SPEC // If space precedes a mark -> substitute a before the Mark, to prevent line breaking Test: //----------------------------------------------------------------------------------- for ($ptr = 1; $ptr < count($this->OTLdata); $ptr++) { if ($this->OTLdata[$ptr]['general_category'] == UCDN::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK && $this->OTLdata[$ptr - 1]['uni'] == 32) { $this->OTLdata[$ptr - 1]['uni'] = 0xa0; $this->OTLdata[$ptr - 1]['hex'] = '000A0'; } } } else { if ($this->shaper == 'I' || $this->shaper == 'K' || $this->shaper == 'S') { $this->restrictToSyllable = true; //----------------------------------------------------------------------------------- // a. First decompose/compose split mattras // (normalize) ??????? Nukta/Halant order etc ?????????????????????????????????????????????????????????????????????????? //----------------------------------------------------------------------------------- for ($ptr = 0; $ptr < count($this->OTLdata); $ptr++) { $char = $this->OTLdata[$ptr]['uni']; $sub = INDIC::decompose_indic($char); if ($sub) { $newinfo = array(); for ($i = 0; $i < count($sub); $i++) { $newinfo[$i] = array(); $ucd_record = UCDN::get_ucd_record($sub[$i]); $newinfo[$i]['general_category'] = $ucd_record[0]; $newinfo[$i]['bidi_type'] = $ucd_record[2]; $charasstr = $this->unicode_hex($sub[$i]); if (strpos($this->GlyphClassMarks, $charasstr) !== false) { $newinfo[$i]['group'] = 'M'; } else { $newinfo[$i]['group'] = 'C'; } $newinfo[$i]['uni'] = $sub[$i]; $newinfo[$i]['hex'] = $charasstr; } array_splice($this->OTLdata, $ptr, 1, $newinfo); $ptr += count($sub) - 1; } /* Only Composition-exclusion exceptions that we want to recompose. */ if ($this->shaper == 'I') { if ($char == 0x9af && isset($this->OTLdata[$ptr + 1]) && $this->OTLdata[$ptr + 1]['uni'] == 0x9bc) { $sub = 0x9df; $newinfo = array(); $newinfo[0] = array(); $ucd_record = UCDN::get_ucd_record($sub); $newinfo[0]['general_category'] = $ucd_record[0]; $newinfo[0]['bidi_type'] = $ucd_record[2]; $newinfo[0]['group'] = 'C'; $newinfo[0]['uni'] = $sub; $newinfo[0]['hex'] = $this->unicode_hex($sub); array_splice($this->OTLdata, $ptr, 2, $newinfo); } } } //----------------------------------------------------------------------------------- // b. Analyse characters - group as syllables/clusters (Indic); invalid diacritics; add dotted circle //----------------------------------------------------------------------------------- $indic_category_string = ''; foreach ($this->OTLdata as $eid => $c) { INDIC::set_indic_properties($this->OTLdata[$eid], $scriptblock); // sets ['indic_category'] and ['indic_position'] //$c['general_category'] //$c['combining_class'] //$c['uni'] = $char; $indic_category_string .= INDIC::$indic_category_char[$this->OTLdata[$eid]['indic_category']]; } $broken_syllables = false; if ($this->shaper == 'I') { INDIC::set_syllables($this->OTLdata, $indic_category_string, $broken_syllables); } else { if ($this->shaper == 'S') { INDIC::set_syllables_sinhala($this->OTLdata, $indic_category_string, $broken_syllables); } else { if ($this->shaper == 'K') { INDIC::set_syllables_khmer($this->OTLdata, $indic_category_string, $broken_syllables); } } } $indic_category_string = ''; //----------------------------------------------------------------------------------- // c. Initial Re-ordering (Indic / Khmer / Sinhala) //----------------------------------------------------------------------------------- // Find base consonant // Decompose/compose and reorder Matras // Reorder marks to canonical order $indic_config = INDIC::$indic_configs[$scriptblock]; $dottedcircle = false; if ($broken_syllables) { if ($this->mpdf->_charDefined($this->mpdf->fonts[$this->fontkey]['cw'], 0x25cc)) { $dottedcircle = array(); $ucd_record = UCDN::get_ucd_record(0x25cc); $dottedcircle[0]['general_category'] = $ucd_record[0]; $dottedcircle[0]['bidi_type'] = $ucd_record[2]; $dottedcircle[0]['group'] = 'C'; $dottedcircle[0]['uni'] = 0x25cc; $dottedcircle[0]['indic_category'] = INDIC::OT_DOTTEDCIRCLE; $dottedcircle[0]['indic_position'] = INDIC::POS_BASE_C; $dottedcircle[0]['hex'] = '025CC'; // TEMPORARY ***** } } INDIC::initial_reordering($this->OTLdata, $this->GSUBdata[$this->GSUBfont], $broken_syllables, $indic_config, $scriptblock, $is_old_spec, $dottedcircle); //----------------------------------------------------------------------------------- // d. Apply initial and basic shaping forms GSUB Lookups (one at a time) //----------------------------------------------------------------------------------- if ($this->shaper == 'I' || $this->shaper == 'S') { $tags = 'locl ccmp nukt akhn rphf rkrf pref blwf half pstf vatu cjct'; } else { if ($this->shaper == 'K') { $tags = 'locl ccmp pref blwf abvf pstf cfar'; } } $this->_applyGSUBrulesIndic($tags, $GSUBscriptTag, $GSUBlangsys, $is_old_spec); //----------------------------------------------------------------------------------- // e. Final Re-ordering (Indic / Khmer / Sinhala) //----------------------------------------------------------------------------------- // Reorder matras // Reorder reph // Reorder pre-base reordering consonants: INDIC::final_reordering($this->OTLdata, $this->GSUBdata[$this->GSUBfont], $indic_config, $scriptblock, $is_old_spec); //----------------------------------------------------------------------------------- // f. Apply 'init' feature to first syllable in word (indicated by ['mask']) INDIC::FLAG(INDIC::INIT); //----------------------------------------------------------------------------------- if ($this->shaper == 'I' || $this->shaper == 'S') { $tags = 'init'; $this->_applyGSUBrulesIndic($tags, $GSUBscriptTag, $GSUBlangsys, $is_old_spec); } //----------------------------------------------------------------------------------- // g. Apply Presentation Forms GSUB Lookups (+ any discretionary) //----------------------------------------------------------------------------------- $tags = 'pres abvs blws psts haln rlig calt liga clig mset'; $omittags = 'locl ccmp nukt akhn rphf rkrf pref blwf abvf half pstf cfar vatu cjct init medi fina isol med2 fin2 fin3 ljmo vjmo tjmo'; $usetags = $tags; if (!empty($this->mpdf->OTLtags)) { $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, false); } if ($this->shaper == 'K') { // Features are applied one at a time, working through each codepoint $this->_applyGSUBrulesSingly($usetags, $GSUBscriptTag, $GSUBlangsys); } else { $this->_applyGSUBrules($usetags, $GSUBscriptTag, $GSUBlangsys); } $this->restrictToSyllable = false; } else { if ($this->shaper == 'M') { $this->restrictToSyllable = true; //----------------------------------------------------------------------------------- // a. Analyse characters - group as syllables/clusters (Myanmar); invalid diacritics; add dotted circle //----------------------------------------------------------------------------------- $myanmar_category_string = ''; foreach ($this->OTLdata as $eid => $c) { MYANMAR::set_myanmar_properties($this->OTLdata[$eid]); // sets ['myanmar_category'] and ['myanmar_position'] $myanmar_category_string .= MYANMAR::$myanmar_category_char[$this->OTLdata[$eid]['myanmar_category']]; } $broken_syllables = false; MYANMAR::set_syllables($this->OTLdata, $myanmar_category_string, $broken_syllables); $myanmar_category_string = ''; //----------------------------------------------------------------------------------- // b. Re-ordering (Myanmar mym2) //----------------------------------------------------------------------------------- $dottedcircle = false; if ($broken_syllables) { if ($this->mpdf->_charDefined($this->mpdf->fonts[$this->fontkey]['cw'], 0x25cc)) { $dottedcircle = array(); $ucd_record = UCDN::get_ucd_record(0x25cc); $dottedcircle[0]['general_category'] = $ucd_record[0]; $dottedcircle[0]['bidi_type'] = $ucd_record[2]; $dottedcircle[0]['group'] = 'C'; $dottedcircle[0]['uni'] = 0x25cc; $dottedcircle[0]['myanmar_category'] = MYANMAR::OT_DOTTEDCIRCLE; $dottedcircle[0]['myanmar_position'] = MYANMAR::POS_BASE_C; $dottedcircle[0]['hex'] = '025CC'; } } MYANMAR::reordering($this->OTLdata, $this->GSUBdata[$this->GSUBfont], $broken_syllables, $dottedcircle); //----------------------------------------------------------------------------------- // c. Apply initial and basic shaping forms GSUB Lookups (one at a time) //----------------------------------------------------------------------------------- $tags = 'locl ccmp rphf pref blwf pstf'; $this->_applyGSUBrulesMyanmar($tags, $GSUBscriptTag, $GSUBlangsys); //----------------------------------------------------------------------------------- // d. Apply Presentation Forms GSUB Lookups (+ any discretionary) //----------------------------------------------------------------------------------- $tags = 'pres abvs blws psts haln rlig calt liga clig mset'; $omittags = 'locl ccmp nukt akhn rphf rkrf pref blwf abvf half pstf cfar vatu cjct init medi fina isol med2 fin2 fin3 ljmo vjmo tjmo'; $usetags = $tags; if (!empty($this->mpdf->OTLtags)) { $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, false); } $this->_applyGSUBrules($usetags, $GSUBscriptTag, $GSUBlangsys); $this->restrictToSyllable = false; } else { if ($this->shaper == 'E') { /* HarfBuzz says: If the designer designed the font for the 'DFLT' script, * use the default shaper. Otherwise, use the SEA shaper. * Note that for some simple scripts, there may not be *any* * GSUB/GPOS needed, so there may be no scripts found! */ $this->restrictToSyllable = true; //----------------------------------------------------------------------------------- // a. Analyse characters - group as syllables/clusters (Indic); invalid diacritics; add dotted circle //----------------------------------------------------------------------------------- $sea_category_string = ''; foreach ($this->OTLdata as $eid => $c) { SEA::set_sea_properties($this->OTLdata[$eid], $scriptblock); // sets ['sea_category'] and ['sea_position'] //$c['general_category'] //$c['combining_class'] //$c['uni'] = $char; $sea_category_string .= SEA::$sea_category_char[$this->OTLdata[$eid]['sea_category']]; } $broken_syllables = false; SEA::set_syllables($this->OTLdata, $sea_category_string, $broken_syllables); $sea_category_string = ''; //----------------------------------------------------------------------------------- // b. Apply locl and ccmp shaping forms - before initial re-ordering; GSUB Lookups (one at a time) //----------------------------------------------------------------------------------- $tags = 'locl ccmp'; $this->_applyGSUBrulesSingly($tags, $GSUBscriptTag, $GSUBlangsys); //----------------------------------------------------------------------------------- // c. Initial Re-ordering //----------------------------------------------------------------------------------- // Find base consonant // Decompose/compose and reorder Matras // Reorder marks to canonical order $dottedcircle = false; if ($broken_syllables) { if ($this->mpdf->_charDefined($this->mpdf->fonts[$this->fontkey]['cw'], 0x25cc)) { $dottedcircle = array(); $ucd_record = UCDN::get_ucd_record(0x25cc); $dottedcircle[0]['general_category'] = $ucd_record[0]; $dottedcircle[0]['bidi_type'] = $ucd_record[2]; $dottedcircle[0]['group'] = 'C'; $dottedcircle[0]['uni'] = 0x25cc; $dottedcircle[0]['sea_category'] = SEA::OT_GB; $dottedcircle[0]['sea_position'] = SEA::POS_BASE_C; $dottedcircle[0]['hex'] = '025CC'; // TEMPORARY ***** } } SEA::initial_reordering($this->OTLdata, $this->GSUBdata[$this->GSUBfont], $broken_syllables, $scriptblock, $dottedcircle); //----------------------------------------------------------------------------------- // d. Apply basic shaping forms GSUB Lookups (one at a time) //----------------------------------------------------------------------------------- $tags = 'pref abvf blwf pstf'; $this->_applyGSUBrulesSingly($tags, $GSUBscriptTag, $GSUBlangsys); //----------------------------------------------------------------------------------- // e. Final Re-ordering //----------------------------------------------------------------------------------- SEA::final_reordering($this->OTLdata, $this->GSUBdata[$this->GSUBfont], $scriptblock); //----------------------------------------------------------------------------------- // f. Apply Presentation Forms GSUB Lookups (+ any discretionary) //----------------------------------------------------------------------------------- $tags = 'pres abvs blws psts'; $omittags = 'locl ccmp nukt akhn rphf rkrf pref blwf abvf half pstf cfar vatu cjct init medi fina isol med2 fin2 fin3 ljmo vjmo tjmo'; $usetags = $tags; if (!empty($this->mpdf->OTLtags)) { $usetags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, false); } $this->_applyGSUBrules($usetags, $GSUBscriptTag, $GSUBlangsys); $this->restrictToSyllable = false; } else { // DEFAULT //----------------------------------------------------------------------------------- // a. First decompose/compose in Thai / Lao - Tibetan //----------------------------------------------------------------------------------- // Decomposition for THAI or LAO /* This function implements the shaping logic documented here: * * http://linux.thai.net/~thep/th-otf/shaping.html * * The first shaping rule listed there is needed even if the font has Thai * OpenType tables. * * * The following is NOT specified in the MS OT Thai spec, however, it seems * to be what Uniscribe and other engines implement. According to Eric Muller: * * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the * NIKHAHIT backwards over any tone mark (0E48-0E4B). * * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32> * * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably * not what a user wanted, but the rendering is nevertheless nikhahit above * chattawa. * * Same for Lao. * * Thai Lao * SARA AM: U+0E33 U+0EB3 * SARA AA: U+0E32 U+0EB2 * Nikhahit: U+0E4D U+0ECD * * Testing shows that Uniscribe reorder the following marks: * Thai: <0E31,0E34..0E37,0E47..0E4E> * Lao: <0EB1,0EB4..0EB7,0EC7..0ECE> * * Lao versions are the same as Thai + 0x80. */ if ($this->shaper == 'T' || $this->shaper == 'L') { for ($ptr = 0; $ptr < count($this->OTLdata); $ptr++) { $char = $this->OTLdata[$ptr]['uni']; if (($char & ~0x80) == 0xe33) { // if SARA_AM (U+0E33 or U+0EB3) $NIKHAHIT = $char + 0x1a; $SARA_AA = $char - 1; $sub = array($SARA_AA, $NIKHAHIT); $newinfo = array(); $ucd_record = UCDN::get_ucd_record($sub[0]); $newinfo[0]['general_category'] = $ucd_record[0]; $newinfo[0]['bidi_type'] = $ucd_record[2]; $charasstr = $this->unicode_hex($sub[0]); if (strpos($this->GlyphClassMarks, $charasstr) !== false) { $newinfo[0]['group'] = 'M'; } else { $newinfo[0]['group'] = 'C'; } $newinfo[0]['uni'] = $sub[0]; $newinfo[0]['hex'] = $charasstr; $this->OTLdata[$ptr] = $newinfo[0]; // Substitute SARA_AM => SARA_AA $ntones = 0; // number of (preceding) tone marks // IS_TONE_MARK ((x) & ~0x0080, 0x0E34 - 0x0E37, 0x0E47 - 0x0E4E, 0x0E31) while (isset($this->OTLdata[$ptr - 1 - $ntones]) && (($this->OTLdata[$ptr - 1 - $ntones]['uni'] & ~0x80) == 0xe31 || ($this->OTLdata[$ptr - 1 - $ntones]['uni'] & ~0x80) >= 0xe34 && ($this->OTLdata[$ptr - 1 - $ntones]['uni'] & ~0x80) <= 0xe37 || ($this->OTLdata[$ptr - 1 - $ntones]['uni'] & ~0x80) >= 0xe47 && ($this->OTLdata[$ptr - 1 - $ntones]['uni'] & ~0x80) <= 0xe4e)) { $ntones++; } $newinfo = array(); $ucd_record = UCDN::get_ucd_record($sub[1]); $newinfo[0]['general_category'] = $ucd_record[0]; $newinfo[0]['bidi_type'] = $ucd_record[2]; $charasstr = $this->unicode_hex($sub[1]); if (strpos($this->GlyphClassMarks, $charasstr) !== false) { $newinfo[0]['group'] = 'M'; } else { $newinfo[0]['group'] = 'C'; } $newinfo[0]['uni'] = $sub[1]; $newinfo[0]['hex'] = $charasstr; // Insert NIKAHIT array_splice($this->OTLdata, $ptr - $ntones, 0, $newinfo); $ptr++; } } } if ($scriptblock == UCDN::SCRIPT_TIBETAN) { // ========================= // Reordering TIBETAN // ========================= // Tibetan does not need to need a shaper generally, as long as characters are presented in the correct order // so we will do one minor change here: // From ICU: If the present character is a number, and the next character is a pre-number combining mark // then the two characters are reordered // From MS OTL spec the following are Digit modifiers (Md): 0F18�0F19, 0F3E�0F3F // Digits: 0F20�0F33 // On testing only 0x0F3F (pre-based mark) seems to need re-ordering for ($ptr = 0; $ptr < count($this->OTLdata) - 1; $ptr++) { if (INDIC::in_range($this->OTLdata[$ptr]['uni'], 0xf20, 0xf33) && $this->OTLdata[$ptr + 1]['uni'] == 0xf3f) { $tmp = $this->OTLdata[$ptr + 1]; $this->OTLdata[$ptr + 1] = $this->OTLdata[$ptr]; $this->OTLdata[$ptr] = $tmp; } } // ========================= // Decomposition for TIBETAN // ========================= /* Recommended, but does not seem to change anything... for($ptr=0; $ptr<count($this->OTLdata); $ptr++) { $char = $this->OTLdata[$ptr]['uni']; $sub = INDIC::decompose_indic($char); if ($sub) { $newinfo = array(); for($i=0;$i<count($sub);$i++) { $newinfo[$i] = array(); $ucd_record = UCDN::get_ucd_record($sub[$i]); $newinfo[$i]['general_category'] = $ucd_record[0]; $newinfo[$i]['bidi_type'] = $ucd_record[2]; $charasstr = $this->unicode_hex($sub[$i]); if (strpos($this->GlyphClassMarks, $charasstr)!==false) { $newinfo[$i]['group'] = 'M'; } else { $newinfo[$i]['group'] = 'C'; } $newinfo[$i]['uni'] = $sub[$i]; $newinfo[$i]['hex'] = $charasstr; } array_splice($this->OTLdata, $ptr, 1, $newinfo); $ptr += count($sub)-1; } } */ } //----------------------------------------------------------------------------------- // b. Apply all GSUB Lookups (in order specified in lookup list) //----------------------------------------------------------------------------------- $tags = 'locl ccmp pref blwf abvf pstf pres abvs blws psts haln rlig calt liga clig mset RQD'; // pref blwf abvf pstf required for Tibetan // " RQD" is a non-standard tag in Garuda font - presumably intended to be used by default ? "ReQuireD" // Being a 3 letter tag is non-standard, and does not allow it to be set by font-feature-settings /* ?Add these until shapers witten? Hangul: ljmo vjmo tjmo */ $omittags = ''; $useGSUBtags = $tags; if (!empty($this->mpdf->OTLtags)) { $useGSUBtags = $this->_applyTagSettings($tags, $GSUBFeatures, $omittags, false); } // APPLY GSUB rules (as long as not Latin + SmallCaps - but not OTL smcp) if (!($this->mpdf->textvar & FC_SMALLCAPS && $scriptblock == UCDN::SCRIPT_LATIN && strpos($useGSUBtags, 'smcp') === false)) { $this->_applyGSUBrules($useGSUBtags, $GSUBscriptTag, $GSUBlangsys); } } } } } } // Shapers - KHMER & THAI & LAO - Replace Word boundary marker with U+200B // Also TIBETAN (no shaper) //======================================================= if ($this->shaper == "K" || $this->shaper == "T" || $this->shaper == "L" || $scriptblock == UCDN::SCRIPT_TIBETAN) { // Set up properties to insert a U+200B character $newinfo = array(); //$newinfo[0] = array('general_category' => 1, 'bidi_type' => 14, 'group' => 'S', 'uni' => 0x200B, 'hex' => '0200B'); $newinfo[0] = array('general_category' => UCDN::UNICODE_GENERAL_CATEGORY_FORMAT, 'bidi_type' => UCDN::BIDI_CLASS_BN, 'group' => 'S', 'uni' => 0x200b, 'hex' => '0200B'); // Then insert U+200B at (after) all word end boundaries for ($i = count($this->OTLdata) - 1; $i > 0; $i--) { // Make sure after GSUB that wordend has not been moved - check next char is not in the same syllable if (isset($this->OTLdata[$i]['wordend']) && $this->OTLdata[$i]['wordend'] && isset($this->OTLdata[$i + 1]['uni']) && (!isset($this->OTLdata[$i + 1]['syllable']) || !isset($this->OTLdata[$i + 1]['syllable']) || $this->OTLdata[$i + 1]['syllable'] != $this->OTLdata[$i]['syllable'])) { array_splice($this->OTLdata, $i + 1, 0, $newinfo); $this->_updateLigatureMarks($i, 1); } else { if ($this->OTLdata[$i]['uni'] == 0x2e) { // Word end if Full-stop. array_splice($this->OTLdata, $i + 1, 0, $newinfo); $this->_updateLigatureMarks($i, 1); } } } } // Shapers - INDIC & ARABIC & KHMER & SINHALA & MYANMAR - Remove ZWJ and ZWNJ //======================================================= if ($this->shaper == 'I' || $this->shaper == 'S' || $this->shaper == 'A' || $this->shaper == 'K' || $this->shaper == 'M') { // Remove ZWJ and ZWNJ for ($i = 0; $i < count($this->OTLdata); $i++) { if ($this->OTLdata[$i]['uni'] == 8204 || $this->OTLdata[$i]['uni'] == 8205) { array_splice($this->OTLdata, $i, 1); $this->_updateLigatureMarks($i, -1); } } } //print_r($this->OTLdata); echo '<br />'; //print_r($this->assocMarks); echo '<br />'; //print_r($this->assocLigs); exit; //////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////// ////////// GPOS ///////////////////////////////// //////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////// if ($useOTL & 0xff && $GPOSscriptTag && $GPOSlangsys && $GPOSFeatures) { $this->Entry = array(); $this->Exit = array(); // 6. Load GPOS data, Coverage & Lookups //================================================================= if (!isset($this->GPOSdata[$this->fontkey])) { include _MPDF_TTFONTDATAPATH . $this->mpdf->CurrentFont['fontkey'] . '.GPOSdata.php'; $this->LuCoverage = $this->GPOSdata[$this->fontkey]['LuCoverage'] = $LuCoverage; } else { $this->LuCoverage = $this->GPOSdata[$this->fontkey]['LuCoverage']; } $this->GPOSLookups = $this->mpdf->CurrentFont['GPOSLookups']; // 7. Select Feature tags to use (incl optional) //============================== $tags = 'abvm blwm mark mkmk curs cpsp dist requ'; // Default set /* 'requ' is not listed in the Microsoft registry of Feature tags Found in Arial Unicode MS, it repositions the baseline for punctuation in Kannada script */ // ZZZ96 // Set kern to be included by default in non-Latin script (? just when shapers used) // Kern is used in some fonts to reposition marks etc. and is essential for correct display //if ($this->shaper) {$tags .= ' kern'; } if ($scriptblock != UCDN::SCRIPT_LATIN) { $tags .= ' kern'; } $omittags = ''; $usetags = $tags; if (!empty($this->mpdf->OTLtags)) { $usetags = $this->_applyTagSettings($tags, $GPOSFeatures, $omittags, false); } // 8. Get GPOS LookupList from Feature tags //============================== $LookupList = array(); foreach ($GPOSFeatures as $tag => $arr) { if (strpos($usetags, $tag) !== false) { foreach ($arr as $lu) { $LookupList[$lu] = $tag; } } } ksort($LookupList); // 9. Apply GPOS Lookups (in order specified in lookup list but selecting from specified tags) //============================== // APPLY THE GPOS RULES (as long as not Latin + SmallCaps - but not OTL smcp) if (!($this->mpdf->textvar & FC_SMALLCAPS && $scriptblock == UCDN::SCRIPT_LATIN && strpos($useGSUBtags, 'smcp') === false)) { $this->_applyGPOSrules($LookupList, $is_old_spec); // (sets: $this->OTLdata[n]['GPOSinfo'] XPlacement YPlacement XAdvance Entry Exit ) } // 10. Process cursive text //============================== if (count($this->Entry) || count($this->Exit)) { // RTL $incurs = false; for ($i = count($this->OTLdata) - 1; $i >= 0; $i--) { if (isset($this->Entry[$i]) && isset($this->Entry[$i]['Y']) && $this->Entry[$i]['output'] == 'RTL') { $nextbase = $i - 1; // Set as next base ignoring marks (next base reading RTL in logical oder while (isset($this->OTLdata[$nextbase]['hex']) && strpos($this->GlyphClassMarks, $this->OTLdata[$nextbase]['hex']) !== false) { $nextbase--; } if (isset($this->Exit[$nextbase]) && isset($this->Exit[$nextbase]['Y'])) { $diff = $this->Entry[$i]['Y'] - $this->Exit[$nextbase]['Y']; if ($incurs === false) { $incurs = $diff; } else { $incurs += $diff; } for ($j = $i - 1; $j >= $nextbase; $j--) { if (isset($this->OTLdata[$j]['GPOSinfo']['YPlacement'])) { $this->OTLdata[$j]['GPOSinfo']['YPlacement'] += $incurs; } else { $this->OTLdata[$j]['GPOSinfo']['YPlacement'] = $incurs; } } if (isset($this->Exit[$i]['X']) && isset($this->Entry[$nextbase]['X'])) { $adj = -($this->Entry[$i]['X'] - $this->Exit[$nextbase]['X']); // If XAdvance is aplied - in order for PDF to position the Advance correctly need to place it on: // in RTL - the current glyph or the last of any associated marks if (isset($this->OTLdata[$nextbase + 1]['GPOSinfo']['XAdvance'])) { $this->OTLdata[$nextbase + 1]['GPOSinfo']['XAdvance'] += $adj; } else { $this->OTLdata[$nextbase + 1]['GPOSinfo']['XAdvance'] = $adj; } } } else { $incurs = false; } } else { if (strpos($this->GlyphClassMarks, $this->OTLdata[$i]['hex']) !== false) { continue; } else { $incurs = false; } } } // LTR $incurs = false; for ($i = 0; $i < count($this->OTLdata); $i++) { if (isset($this->Exit[$i]) && isset($this->Exit[$i]['Y']) && $this->Exit[$i]['output'] == 'LTR') { $nextbase = $i + 1; // Set as next base ignoring marks while (strpos($this->GlyphClassMarks, $this->OTLdata[$nextbase]['hex']) !== false) { $nextbase++; } if (isset($this->Entry[$nextbase]) && isset($this->Entry[$nextbase]['Y'])) { $diff = $this->Exit[$i]['Y'] - $this->Entry[$nextbase]['Y']; if ($incurs === false) { $incurs = $diff; } else { $incurs += $diff; } for ($j = $i + 1; $j <= $nextbase; $j++) { if (isset($this->OTLdata[$j]['GPOSinfo']['YPlacement'])) { $this->OTLdata[$j]['GPOSinfo']['YPlacement'] += $incurs; } else { $this->OTLdata[$j]['GPOSinfo']['YPlacement'] = $incurs; } } if (isset($this->Exit[$i]['X']) && isset($this->Entry[$nextbase]['X'])) { $adj = -($this->Exit[$i]['X'] - $this->Entry[$nextbase]['X']); // If XAdvance is aplied - in order for PDF to position the Advance correctly need to place it on: // in LTR - the next glyph, ignoring marks if (isset($this->OTLdata[$nextbase]['GPOSinfo']['XAdvance'])) { $this->OTLdata[$nextbase]['GPOSinfo']['XAdvance'] += $adj; } else { $this->OTLdata[$nextbase]['GPOSinfo']['XAdvance'] = $adj; } } } else { $incurs = false; } } else { if (strpos($this->GlyphClassMarks, $this->OTLdata[$i]['hex']) !== false) { continue; } else { $incurs = false; } } } } } // end GPOS if ($this->debugOTL) { $this->_dumpproc('END', '-', '-', '-', '-', 0, '-', 0); exit; } $this->schOTLdata[$sch] = $this->OTLdata; $this->OTLdata = array(); } // END foreach subchunk // 11. Re-assemble and return text string //============================== $newGPOSinfo = array(); $newOTLdata = array(); $newchar_data = array(); $newgroup = ''; $e = ''; $ectr = 0; for ($sch = 0; $sch <= $subchunk; $sch++) { for ($i = 0; $i < count($this->schOTLdata[$sch]); $i++) { if (isset($this->schOTLdata[$sch][$i]['GPOSinfo'])) { $newGPOSinfo[$ectr] = $this->schOTLdata[$sch][$i]['GPOSinfo']; } $newchar_data[$ectr] = array('bidi_class' => $this->schOTLdata[$sch][$i]['bidi_type'], 'uni' => $this->schOTLdata[$sch][$i]['uni']); $newgroup .= $this->schOTLdata[$sch][$i]['group']; $e .= code2utf($this->schOTLdata[$sch][$i]['uni']); if (isset($this->mpdf->CurrentFont['subset'])) { $this->mpdf->CurrentFont['subset'][$this->schOTLdata[$sch][$i]['uni']] = $this->schOTLdata[$sch][$i]['uni']; } $ectr++; } } $this->OTLdata['GPOSinfo'] = $newGPOSinfo; $this->OTLdata['char_data'] = $newchar_data; $this->OTLdata['group'] = $newgroup; // This leaves OTLdata::GPOSinfo, ::bidi_type, & ::group return $e; }
function markScriptToLang($html) { if ($this->mpdf_ref->onlyCoreFonts) { return $html; } if (empty($this->script2lang)) { if (!empty($this->mpdf_ref->script2lang)) { $this->script2lang = $this->mpdf_ref->script2lang; $this->viet = $this->mpdf_ref->viet; $this->pashto = $this->mpdf_ref->pashto; $this->urdu = $this->mpdf_ref->urdu; $this->persian = $this->mpdf_ref->persian; $this->sindhi = $this->mpdf_ref->sindhi; } else { include _MPDF_PATH . 'config_script2lang.php'; } } $n = ''; $a = preg_split('/<(.*?)>/ms', $html, -1, PREG_SPLIT_DELIM_CAPTURE); foreach ($a as $i => $e) { if ($i % 2 == 0) { $e = strcode2utf($e); $e = $this->mpdf_ref->lesser_entity_decode($e); $earr = $this->mpdf_ref->UTF8StringToArray($e, false); $scriptblock = 0; $scriptblocks = array(); $scriptblocks[0] = 0; $chardata = array(); $subchunk = 0; $charctr = 0; foreach ($earr as $char) { $ucd_record = UCDN::get_ucd_record($char); $sbl = $ucd_record[6]; if ($sbl && $sbl != 40 && $sbl != 102) { if ($scriptblock == 0) { $scriptblock = $sbl; $scriptblocks[$subchunk] = $scriptblock; } else { if ($scriptblock > 0 && $scriptblock != $sbl) { // NEW (non-common) Script encountered in this chunk. // Start a new subchunk $subchunk++; $scriptblock = $sbl; $charctr = 0; $scriptblocks[$subchunk] = $scriptblock; } } } $chardata[$subchunk][$charctr]['script'] = $sbl; $chardata[$subchunk][$charctr]['uni'] = $char; $charctr++; } // If scriptblock[x] = common & non-baseScript // and scriptblock[x+1] = baseScript // Move common script from end of x to start of x+1 for ($sch = 0; $sch < $subchunk; $sch++) { if ($scriptblocks[$sch] > 0 && $scriptblocks[$sch] != $this->mpdf_ref->baseScript && $scriptblocks[$sch + 1] == $this->mpdf_ref->baseScript) { $end = count($chardata[$sch]) - 1; while ($chardata[$sch][$end]['script'] == 0 && $end > 1) { // common script $tmp = array_pop($chardata[$sch]); array_unshift($chardata[$sch + 1], $tmp); $end--; } } } $o = ''; for ($sch = 0; $sch <= $subchunk; $sch++) { if (isset($chardata[$sch])) { $s = ''; for ($j = 0; $j < count($chardata[$sch]); $j++) { $s .= code2utf($chardata[$sch][$j]['uni']); } // ZZZ99 Undo lesser_entity_decode as above - but only for <>& $s = str_replace("&", "&", $s); $s = str_replace("<", "<", $s); $s = str_replace(">", ">", $s); if (substr($a[$i - 1], 0, 5) != '<text' && substr($a[$i - 1], 0, 5) != '<tspa') { continue; } // <tspan> or <text> only $lang = ''; // Check Vietnamese if Latin script - even if Basescript if ($scriptblocks[$sch] == UCDN::SCRIPT_LATIN && $this->mpdf_ref->autoVietnamese && preg_match("/([" . $this->viet . "])/u", $s)) { $lang = "vi"; } else { if ($scriptblocks[$sch] == UCDN::SCRIPT_ARABIC && $this->mpdf_ref->autoArabic) { if (preg_match("/[" . $this->sindhi . "]/u", $s)) { $lang = "sd"; } else { if (preg_match("/[" . $this->urdu . "]/u", $s)) { $lang = "ur"; } else { if (preg_match("/[" . $this->pashto . "]/u", $s)) { $lang = "ps"; } else { if (preg_match("/[" . $this->persian . "]/u", $s)) { $lang = "fa"; } else { if ($this->mpdf_ref->baseScript != UCDN::SCRIPT_ARABIC && isset($this->script2lang[$scriptblocks[$sch]])) { $lang = "'.{$this->script2lang}[{$scriptblocks[$sch]}].'"; } } } } } } else { if ($scriptblocks[$sch] > 0 && $scriptblocks[$sch] != $this->mpdf_ref->baseScript && isset($this->script2lang[$scriptblocks[$sch]])) { $lang = $this->script2lang[$scriptblocks[$sch]]; } } } if ($lang) { $o .= '<tspan lang="' . $lang . '">' . $s . '</tspan>'; } else { $o .= $s; } } } $a[$i] = $o; } else { $a[$i] = '<' . $e . '>'; } } $n = implode('', $a); return $n; }