/** * This function encapsulates all texts in the input with <span dir='xxx'> and </span> * according to the directionality specified. * * @param string $inputText Raw input * @param string $direction Directionality (LTR, BOTH, RTL) default BOTH * @param string $class Additional text to insert into output <span dir="xxx"> (such as 'class="yyy"') * * @return string The string with all texts encapsulated as required */ public static function spanLtrRtl($inputText, $direction = 'BOTH', $class = '') { if ($inputText == '') { // Nothing to do return ''; } $workingText = str_replace("\n", '<br>', $inputText); $workingText = str_replace(array('<span class="starredname"><br>', '<span<br>class="starredname">'), '<br><span class="starredname">', $workingText); // Reposition some incorrectly placed line breaks $workingText = self::stripLrmRlm($workingText); // Get rid of any existing UTF8 control codes // $nothing = '‌'; // Zero Width Non-Joiner (not sure whether this is still needed to work around a TCPDF bug) $nothing = ''; self::$startLTR = '<LTR>'; // This will become '<span dir="ltr">' at the end self::$endLTR = '</LTR>'; // This will become '</span>' at the end self::$startRTL = '<RTL>'; // This will become '<span dir="rtl">' at the end self::$endRTL = '</RTL>'; // This will become '</span>' at the end self::$lenStart = strlen(self::$startLTR); // RTL version MUST have same length self::$lenEnd = strlen(self::$endLTR); // RTL version MUST have same length self::$previousState = ''; self::$currentState = strtoupper(I18N::direction()); $numberState = false; // Set when we're inside a numeric string $result = ''; self::$waitingText = ''; $openParDirection = array(); self::beginCurrentSpan($result); while ($workingText != '') { $charArray = self::getChar($workingText, 0); // Get the next ASCII or UTF-8 character $currentLetter = $charArray['letter']; $currentLen = $charArray['length']; $openParIndex = strpos(self::OPEN_PARENTHESES, $currentLetter); // Which opening parenthesis is this? $closeParIndex = strpos(self::CLOSE_PARENTHESES, $currentLetter); // Which closing parenthesis is this? switch ($currentLetter) { case '<': // Assume this '<' starts an HTML element $endPos = strpos($workingText, '>'); // look for the terminating '>' if ($endPos === false) { $endPos = 0; } $currentLen += $endPos; $element = substr($workingText, 0, $currentLen); $temp = strtolower(substr($element, 0, 3)); if (strlen($element) < 7 && $temp == '<br') { if ($numberState) { $numberState = false; if (self::$currentState == 'RTL') { self::$waitingText .= WT_UTF8_PDF; } } self::breakCurrentSpan($result); } elseif (self::$waitingText == '') { $result .= $element; } else { self::$waitingText .= $element; } $workingText = substr($workingText, $currentLen); break; case '&': // Assume this '&' starts an HTML entity $endPos = strpos($workingText, ';'); // look for the terminating ';' if ($endPos === false) { $endPos = 0; } $currentLen += $endPos; $entity = substr($workingText, 0, $currentLen); if (strtolower($entity) == ' ') { $entity .= ' '; // Ensure consistent case for this entity } if (self::$waitingText == '') { $result .= $entity; } else { self::$waitingText .= $entity; } $workingText = substr($workingText, $currentLen); break; case '{': if (substr($workingText, 1, 1) == '{') { // Assume this '{{' starts a TCPDF directive $endPos = strpos($workingText, '}}'); // look for the terminating '}}' if ($endPos === false) { $endPos = 0; } $currentLen = $endPos + 2; $directive = substr($workingText, 0, $currentLen); $workingText = substr($workingText, $currentLen); $result = $result . self::$waitingText . $directive; self::$waitingText = ''; break; } default: // Look for strings of numbers with optional leading or trailing + or - // and with optional embedded numeric punctuation if ($numberState) { // If we're inside a numeric string, look for reasons to end it $offset = 0; // Be sure to look at the current character first $charArray = self::getChar($workingText . "\n", $offset); if (strpos(self::NUMBERS, $charArray['letter']) === false) { // This is not a digit. Is it numeric punctuation? if (substr($workingText . "\n", $offset, 6) == ' ') { $offset += 6; // This could be numeric punctuation } elseif (strpos(self::NUMBER_PUNCTUATION, $charArray['letter']) !== false) { $offset += $charArray['length']; // This could be numeric punctuation } // If the next character is a digit, the current character is numeric punctuation $charArray = self::getChar($workingText . "\n", $offset); if (strpos(self::NUMBERS, $charArray['letter']) === false) { // This is not a digit. End the run of digits and punctuation. $numberState = false; if (self::$currentState == 'RTL') { if (strpos(self::NUMBER_PREFIX, $currentLetter) === false) { $currentLetter = WT_UTF8_PDF . $currentLetter; } else { $currentLetter = $currentLetter . WT_UTF8_PDF; // Include a trailing + or - in the run } } } } } else { // If we're outside a numeric string, look for reasons to start it if (strpos(self::NUMBER_PREFIX, $currentLetter) !== false) { // This might be a number lead-in $offset = $currentLen; $nextChar = substr($workingText . "\n", $offset, 1); if (strpos(self::NUMBERS, $nextChar) !== false) { $numberState = true; // We found a digit: the lead-in is therefore numeric if (self::$currentState == 'RTL') { $currentLetter = WT_UTF8_LRE . $currentLetter; } } } elseif (strpos(self::NUMBERS, $currentLetter) !== false) { $numberState = true; // The current letter is a digit if (self::$currentState == 'RTL') { $currentLetter = WT_UTF8_LRE . $currentLetter; } } } // Determine the directionality of the current UTF-8 character $newState = self::$currentState; while (true) { if (I18N::scriptDirection(I18N::textScript($currentLetter)) === 'rtl') { if (self::$currentState == '') { $newState = 'RTL'; break; } if (self::$currentState == 'RTL') { break; } // Switch to RTL only if this isn't a solitary RTL letter $tempText = substr($workingText, $currentLen); while ($tempText != '') { $nextCharArray = self::getChar($tempText, 0); $nextLetter = $nextCharArray['letter']; $nextLen = $nextCharArray['length']; $tempText = substr($tempText, $nextLen); if (I18N::scriptDirection(I18N::textScript($nextLetter)) === 'rtl') { $newState = 'RTL'; break 2; } if (strpos(self::PUNCTUATION, $nextLetter) !== false || strpos(self::OPEN_PARENTHESES, $nextLetter) !== false) { $newState = 'RTL'; break 2; } if ($nextLetter === ' ') { break; } $nextLetter .= substr($tempText . "\n", 0, 5); if ($nextLetter === ' ') { break; } } // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality $currentLetter = WT_UTF8_LRO . $currentLetter . WT_UTF8_PDF; $newState = 'LTR'; break; } if ($currentLen != 1 || $currentLetter >= 'A' && $currentLetter <= 'Z' || $currentLetter >= 'a' && $currentLetter <= 'z') { // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR $newState = 'LTR'; break; } if ($closeParIndex !== false) { // This closing parenthesis has to inherit the matching opening parenthesis' directionality if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] != '?') { $newState = $openParDirection[$closeParIndex]; } $openParDirection[$closeParIndex] = ''; break; } if ($openParIndex !== false) { // Opening parentheses always inherit the following directionality self::$waitingText .= $currentLetter; $workingText = substr($workingText, $currentLen); while (true) { if ($workingText === '') { break; } if (substr($workingText, 0, 1) === ' ') { // Spaces following this left parenthesis inherit the following directionality too self::$waitingText .= ' '; $workingText = substr($workingText, 1); continue; } if (substr($workingText, 0, 6) === ' ') { // Spaces following this left parenthesis inherit the following directionality too self::$waitingText .= ' '; $workingText = substr($workingText, 6); continue; } break; } $openParDirection[$openParIndex] = '?'; break 2; // double break because we're waiting for more information } // We have a digit or a "normal" special character. // // When this character is not at the start of the input string, it inherits the preceding directionality; // at the start of the input string, it assumes the following directionality. // // Exceptions to this rule will be handled later during final clean-up. // self::$waitingText .= $currentLetter; $workingText = substr($workingText, $currentLen); if (self::$currentState != '') { $result .= self::$waitingText; self::$waitingText = ''; } break 2; // double break because we're waiting for more information } if ($newState != self::$currentState) { // A direction change has occurred self::finishCurrentSpan($result, false); self::$previousState = self::$currentState; self::$currentState = $newState; self::beginCurrentSpan($result); } self::$waitingText .= $currentLetter; $workingText = substr($workingText, $currentLen); $result .= self::$waitingText; self::$waitingText = ''; foreach ($openParDirection as $index => $value) { // Since we now know the proper direction, remember it for all waiting opening parentheses if ($value === '?') { $openParDirection[$index] = self::$currentState; } } break; } } // We're done. Finish last <span> if necessary if ($numberState) { if (self::$waitingText === '') { if (self::$currentState === 'RTL') { $result .= WT_UTF8_PDF; } } else { if (self::$currentState === 'RTL') { self::$waitingText .= WT_UTF8_PDF; } } } self::finishCurrentSpan($result, true); // Get rid of any waiting text if (self::$waitingText != '') { if (I18N::direction() === 'rtl' && self::$currentState === 'LTR') { $result .= self::$startRTL; $result .= self::$waitingText; $result .= self::$endRTL; } else { $result .= self::$startLTR; $result .= self::$waitingText; $result .= self::$endLTR; } self::$waitingText = ''; } // Lastly, do some more cleanups // Move leading RTL numeric strings to following LTR text // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text) while (substr($result, 0, self::$lenStart + 3) === self::$startRTL . WT_UTF8_LRE) { $spanEnd = strpos($result, self::$endRTL . self::$startLTR); if ($spanEnd === false) { break; } $textSpan = self::stripLrmRlm(substr($result, self::$lenStart + 3, $spanEnd - self::$lenStart - 3)); if (I18N::scriptDirection(I18N::textScript($textSpan)) === 'rtl') { break; } $result = self::$startLTR . substr($result, self::$lenStart, $spanEnd - self::$lenStart) . substr($result, $spanEnd + self::$lenStart + self::$lenEnd); break; } // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span if (I18N::direction() === 'rtl') { $result = str_replace(WT_UTF8_PDF . '.' . self::$endRTL, WT_UTF8_PDF . self::$endRTL . self::$startRTL . '.' . self::$endRTL, $result); } // Trim trailing blanks preceding <br> in LTR text while (self::$previousState != 'RTL') { if (strpos($result, ' <LTRbr>') !== false) { $result = str_replace(' <LTRbr>', '<LTRbr>', $result); continue; } if (strpos($result, ' <LTRbr>') !== false) { $result = str_replace(' <LTRbr>', '<LTRbr>', $result); continue; } if (strpos($result, ' <br>') !== false) { $result = str_replace(' <br>', '<br>', $result); continue; } if (strpos($result, ' <br>') !== false) { $result = str_replace(' <br>', '<br>', $result); continue; } break; // Neither space nor : we're done } // Trim trailing blanks preceding <br> in RTL text while (true) { if (strpos($result, ' <RTLbr>') !== false) { $result = str_replace(' <RTLbr>', '<RTLbr>', $result); continue; } if (strpos($result, ' <RTLbr>') !== false) { $result = str_replace(' <RTLbr>', '<RTLbr>', $result); continue; } break; // Neither space nor : we're done } // Convert '<LTRbr>' and '<RTLbr /' $result = str_replace(array('<LTRbr>', '<RTLbr>'), array(self::$endLTR . '<br>' . self::$startLTR, self::$endRTL . '<br>' . self::$startRTL), $result); // Include leading indeterminate directional text in whatever follows if (substr($result . "\n", 0, self::$lenStart) != self::$startLTR && substr($result . "\n", 0, self::$lenStart) != self::$startRTL && substr($result . "\n", 0, 6) != '<br>') { $leadingText = ''; while (true) { if ($result == '') { $result = $leadingText; break; } if (substr($result . "\n", 0, self::$lenStart) != self::$startLTR && substr($result . "\n", 0, self::$lenStart) != self::$startRTL) { $leadingText .= substr($result, 0, 1); $result = substr($result, 1); continue; } $result = substr($result, 0, self::$lenStart) . $leadingText . substr($result, self::$lenStart); break; } } // Include solitary "-" and "+" in surrounding RTL text $result = str_replace(array(self::$endRTL . self::$startLTR . '-' . self::$endLTR . self::$startRTL, self::$endRTL . self::$startLTR . '-' . self::$endLTR . self::$startRTL), array('-', '+'), $result); // Remove empty spans $result = str_replace(array(self::$startLTR . self::$endLTR, self::$startRTL . self::$endRTL), '', $result); // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>' switch ($direction) { case 'BOTH': case 'both': // LTR text: <span dir="ltr"> text </span> // RTL text: <span dir="rtl"> text </span> $sLTR = '<span dir="ltr" ' . $class . '>' . $nothing; $eLTR = $nothing . '</span>'; $sRTL = '<span dir="rtl" ' . $class . '>' . $nothing; $eRTL = $nothing . '</span>'; break; case 'LTR': case 'ltr': // LTR text: <span dir="ltr"> text </span> // RTL text: text $sLTR = '<span dir="ltr" ' . $class . '>' . $nothing; $eLTR = $nothing . '</span>'; $sRTL = ''; $eRTL = ''; break; case 'RTL': case 'rtl': default: // LTR text: text // RTL text: <span dir="rtl"> text </span> $sLTR = ''; $eLTR = ''; $sRTL = '<span dir="rtl" ' . $class . '>' . $nothing; $eRTL = $nothing . '</span>'; break; } $result = str_replace(array(self::$startLTR, self::$endLTR, self::$startRTL, self::$endRTL), array($sLTR, $eLTR, $sRTL, $eRTL), $result); return $result; }