Example #1
0
/**
 * This function encapsulates all texts in the input with <span dir='xxx'> and </span>
 * according to the directionality specified.
 *
 * @param  string Raw input
 * @param string Directionality (LTR, BOTH, RTL) default BOTH
 * @param string Additional text to insert into output <span dir="xxx"> (such as 'class="yyy"')
 * @return string The string with all texts encapsulated as required
 */
function spanLTRRTL($inputText, $direction = 'BOTH', $class = '')
{
    global $TEXT_DIRECTION;
    global $openPar, $closePar, $punctuation;
    global $numbers, $numberPrefix, $numberPunctuation;
    global $previousState, $currentState, $waitingText;
    global $startLTR, $endLTR, $startRTL, $endRTL, $lenStart, $lenEnd;
    static $spanNumber = 0;
    if ($inputText == '') {
        return '';
    }
    // Nothing to do
    $spanNumber++;
    $workingText = str_replace("\n", '<br>', $inputText);
    $workingText = str_replace(array('<span class="starredname"><br>', '<span<br>class="starredname">'), '<br><span class="starredname">', $workingText);
    // Reposition some incorrectly placed line breaks
    $workingText = stripLRMRLM($workingText);
    // Get rid of any existing UTF8 control codes
    //	$nothing  = '&zwnj;'; // Zero Width Non-Joiner  (not sure whether this is still needed to work around a TCPDF bug)
    $nothing = '';
    $startLTR = '<LTR>';
    // This will become '<span dir="ltr">' at the end
    $endLTR = '</LTR>';
    // This will become '</span>' at the end
    $startRTL = '<RTL>';
    // This will become '<span dir="rtl">' at the end
    $endRTL = '</RTL>';
    // This will become '</span>' at the end
    $lenStart = strlen($startLTR);
    // RTL version MUST have same length
    $lenEnd = strlen($endLTR);
    // RTL version MUST have same length
    $previousState = '';
    $currentState = strtoupper($TEXT_DIRECTION);
    $numberState = false;
    // Set when we're inside a numeric string
    $result = '';
    $waitingText = '';
    $openParDirection = array();
    beginCurrentSpan($result);
    while ($workingText != '') {
        $charArray = getChar($workingText, 0);
        // Get the next ASCII or UTF-8 character
        $currentLetter = $charArray['letter'];
        $currentLen = $charArray['length'];
        $openParIndex = strpos($openPar, $currentLetter);
        // Which opening parenthesis is this?
        $closeParIndex = strpos($closePar, $currentLetter);
        // Which closing parenthesis is this?
        switch ($currentLetter) {
            case '<':
                // Assume this '<' starts an HTML element
                $endPos = strpos($workingText, '>');
                // look for the terminating '>'
                if ($endPos === false) {
                    $endPos = 0;
                }
                $currentLen += $endPos;
                $element = substr($workingText, 0, $currentLen);
                $temp = strtolower(substr($element, 0, 3));
                if (strlen($element < 7) && $temp == '<br') {
                    // assume we have '<br>' or a variant thereof
                    if ($numberState) {
                        $numberState = false;
                        if ($currentState == 'RTL') {
                            $waitingText .= WT_UTF8_PDF;
                        }
                    }
                    breakCurrentSpan($result);
                } else {
                    if ($waitingText == '') {
                        $result .= $element;
                    } else {
                        $waitingText .= $element;
                    }
                }
                $workingText = substr($workingText, $currentLen);
                break;
            case '&':
                // Assume this '&' starts an HTML entity
                $endPos = strpos($workingText, ';');
                // look for the terminating ';'
                if ($endPos === false) {
                    $endPos = 0;
                }
                $currentLen += $endPos;
                $entity = substr($workingText, 0, $currentLen);
                if (substr($entity, 0, 2 == '&#')) {
                    // look for possible New Line codes
                    if (substr($entity, 2, 1) == 'x' || substr($entity, 2, 1) == 'X') {
                        // the entity is a hexadecimal number
                        $ordinal = hexdec(substr($entity, 3, -1));
                    } else {
                        // the entity is a decimal number
                        $ordinal = intval(substr($entity, 2, -1));
                    }
                    if ($ordinal == 10) {
                        // we have a New-Line code
                        if ($numberState) {
                            $numberState = false;
                            if ($currentState == 'RTL') {
                                $waitingText .= WT_UTF8_PDF;
                            }
                        }
                        breakCurrentSpan($result);
                        $workingText = substr($workingText, $currentLen);
                    }
                } else {
                    if (strtolower($entity) == '&nbsp;') {
                        $entity .= '&nbsp;';
                        // Ensure consistent case for this entity
                    }
                    if ($waitingText == '') {
                        $result .= $entity;
                    } else {
                        $waitingText .= $entity;
                    }
                    $workingText = substr($workingText, $currentLen);
                }
                break;
            case '{':
                if (substr($workingText, 1, 1) == '{') {
                    // Assume this '{{' starts a TCPDF directive
                    $endPos = strpos($workingText, '}}');
                    // look for the terminating '}}'
                    if ($endPos === false) {
                        $endPos = 0;
                    }
                    $currentLen = $endPos + 2;
                    $directive = substr($workingText, 0, $currentLen);
                    $workingText = substr($workingText, $currentLen);
                    $result = $result . $waitingText . $directive;
                    $waitingText = '';
                    break;
                }
            default:
                // Look for strings of numbers with optional leading or trailing + or -
                // and with optional embedded numeric punctuation
                if ($numberState) {
                    // If we're inside a numeric string, look for reasons to end it
                    $offset = 0;
                    // Be sure to look at the current character first
                    $charArray = getChar($workingText . "\n", $offset);
                    if (strpos($numbers, $charArray['letter']) === false) {
                        // This is not a digit.  Is it numeric punctuation?
                        if (substr($workingText . "\n", $offset, 6) == '&nbsp;') {
                            $offset += 6;
                            // This could be numeric punctuation
                        } else {
                            if (strpos($numberPunctuation, $charArray['letter']) !== false) {
                                $offset += $charArray['length'];
                                // This could be numeric punctuation
                            }
                        }
                        // If the next character is a digit, the current character is numeric punctuation
                        $charArray = getChar($workingText . "\n", $offset);
                        if (strpos($numbers, $charArray['letter']) === false) {
                            // This is not a digit.  End the run of digits and punctuation.
                            $numberState = false;
                            if ($currentState == 'RTL') {
                                if (strpos($numberPrefix, $currentLetter) === false) {
                                    $currentLetter = WT_UTF8_PDF . $currentLetter;
                                } else {
                                    $currentLetter = $currentLetter . WT_UTF8_PDF;
                                    // Include a trailing + or - in the run
                                }
                            }
                        }
                    }
                } else {
                    // If we're outside a numeric string, look for reasons to start it
                    if (strpos($numberPrefix, $currentLetter) !== false) {
                        // This might be a number lead-in
                        $offset = $currentLen;
                        $nextChar = substr($workingText . "\n", $offset, 1);
                        if (strpos($numbers, $nextChar) !== false) {
                            $numberState = true;
                            // We found a digit: the lead-in is therefore numeric
                            if ($currentState == 'RTL') {
                                $currentLetter = WT_UTF8_LRE . $currentLetter;
                            }
                        }
                    } else {
                        if (strpos($numbers, $currentLetter) !== false) {
                            $numberState = true;
                            // The current letter is a digit
                            if ($currentState == 'RTL') {
                                $currentLetter = WT_UTF8_LRE . $currentLetter;
                            }
                        }
                    }
                }
                // Determine the directionality of the current UTF-8 character
                $newState = $currentState;
                while (true) {
                    if (WT_I18N::scriptDirection(WT_I18N::languageScript($currentLetter)) == 'rtl') {
                        if ($currentState == '') {
                            $newState = 'RTL';
                            break;
                        }
                        if ($currentState == 'RTL') {
                            break;
                        }
                        // Switch to RTL only if this isn't a solitary RTL letter
                        $tempText = substr($workingText, $currentLen);
                        while ($tempText != '') {
                            $nextCharArray = getChar($tempText, 0);
                            $nextLetter = $nextCharArray['letter'];
                            $nextLen = $nextCharArray['length'];
                            $tempText = substr($tempText, $nextLen);
                            if (WT_I18N::scriptDirection(WT_I18N::languageScript($nextLetter)) == 'rtl') {
                                $newState = 'RTL';
                                break 2;
                            }
                            if (strpos($punctuation, $nextLetter) !== false || strpos($openPar, $nextLetter) !== false) {
                                $newState = 'RTL';
                                break 2;
                            }
                            if ($nextLetter == ' ') {
                                break;
                            }
                            $nextLetter .= substr($tempText . "\n", 0, 5);
                            if ($nextLetter == '&nbsp;') {
                                $tempText = substr($tempText, 5);
                                break;
                            }
                        }
                        // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality
                        $currentLetter = WT_UTF8_LRO . $currentLetter . WT_UTF8_PDF;
                        $newState = 'LTR';
                        break;
                    }
                    if ($currentLen != 1 || $currentLetter >= 'A' && $currentLetter <= 'Z' || $currentLetter >= 'a' && $currentLetter <= 'z') {
                        // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR
                        $newState = 'LTR';
                        break;
                    }
                    if ($closeParIndex !== false) {
                        // This closing parenthesis has to inherit the matching opening parenthesis' directionality
                        if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] != '?') {
                            $newState = $openParDirection[$closeParIndex];
                        }
                        $openParDirection[$closeParIndex] = '';
                        break;
                    }
                    if ($openParIndex !== false) {
                        // Opening parentheses always inherit the following directionality
                        $waitingText .= $currentLetter;
                        $workingText = substr($workingText, $currentLen);
                        while (true) {
                            if ($workingText == '') {
                                break;
                            }
                            if (substr($workingText, 0, 1) == ' ') {
                                // Spaces following this left parenthesis inherit the following directionality too
                                $waitingText .= ' ';
                                $workingText = substr($workingText, 1);
                                continue;
                            }
                            if (substr($workingText, 0, 6) == '&nbsp;') {
                                // Spaces following this left parenthesis inherit the following directionality too
                                $waitingText .= '&nbsp;';
                                $workingText = substr($workingText, 6);
                                continue;
                            }
                            break;
                        }
                        $openParDirection[$openParIndex] = '?';
                        break 2;
                        // double break because we're waiting for more information
                    }
                    // We have a digit or a "normal" special character.
                    //
                    // When this character is not at the start of the input string, it inherits the preceding directionality;
                    // at the start of the input string, it assumes the following directionality.
                    //
                    // Exceptions to this rule will be handled later during final clean-up.
                    //
                    $waitingText .= $currentLetter;
                    $workingText = substr($workingText, $currentLen);
                    if ($currentState != '') {
                        $result .= $waitingText;
                        $waitingText = '';
                    }
                    break 2;
                    // double break because we're waiting for more information
                }
                if ($newState != $currentState) {
                    // A direction change has occurred
                    finishCurrentSpan($result, false);
                    $previousState = $currentState;
                    $currentState = $newState;
                    beginCurrentSpan($result);
                }
                $waitingText .= $currentLetter;
                $workingText = substr($workingText, $currentLen);
                $result .= $waitingText;
                $waitingText = '';
                foreach ($openParDirection as $index => $value) {
                    // Since we now know the proper direction, remember it for all waiting opening parentheses
                    if ($value == '?') {
                        $openParDirection[$index] = $currentState;
                    }
                }
                break;
        }
    }
    // We're done.  Finish last <span> if necessary
    if ($numberState) {
        $numberState = false;
        if ($waitingText == '') {
            if ($currentState == 'RTL') {
                $result .= WT_UTF8_PDF;
            }
        } else {
            if ($currentState == 'RTL') {
                $waitingText .= WT_UTF8_PDF;
            }
        }
    }
    finishCurrentSpan($result, true);
    // Get rid of any waiting text
    if ($waitingText != '') {
        if ($TEXT_DIRECTION == 'rtl' && $currentState == 'LTR') {
            $result .= $startRTL;
            $result .= $waitingText;
            $result .= $endRTL;
        } else {
            $result .= $startLTR;
            $result .= $waitingText;
            $result .= $endLTR;
        }
        $waitingText = '';
    }
    // Lastly, do some more cleanups
    // Move leading RTL numeric strings to following LTR text
    // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text)
    while (substr($result, 0, $lenStart + 3) == $startRTL . WT_UTF8_LRE) {
        $spanEnd = strpos($result, $endRTL . $startLTR);
        if ($spanEnd === false) {
            break;
        }
        $textSpan = stripLRMRLM(substr($result, $lenStart + 3, $spanEnd - $lenStart - 3));
        $langSpan = WT_I18N::textScript($textSpan);
        if ($langSpan == 'Hebr' || $langSpan == 'Arab') {
            break;
        }
        $result = $startLTR . substr($result, $lenStart, $spanEnd - $lenStart) . substr($result, $spanEnd + $lenStart + $lenEnd);
        break;
    }
    // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span
    if ($TEXT_DIRECTION == 'rtl') {
        $result = str_replace(WT_UTF8_PDF . '.' . $endRTL, WT_UTF8_PDF . $endRTL . $startRTL . '.' . $endRTL, $result);
    }
    // Trim trailing blanks preceding <br> in LTR text
    while ($previousState != 'RTL') {
        if (strpos($result, ' <LTRbr>') !== false) {
            $result = str_replace(' <LTRbr>', '<LTRbr>', $result);
            continue;
        }
        if (strpos($result, '&nbsp;<LTRbr>') !== false) {
            $result = str_replace('&nbsp;<LTRbr>', '<LTRbr>', $result);
            continue;
        }
        if (strpos($result, ' <br>') !== false) {
            $result = str_replace(' <br>', '<br>', $result);
            continue;
        }
        if (strpos($result, '&nbsp;<br>') !== false) {
            $result = str_replace('&nbsp;<br>', '<br>', $result);
            continue;
        }
        break;
        // Neither space nor &nbsp; : we're done
    }
    // Trim trailing blanks preceding <br> in RTL text
    while (true) {
        if (strpos($result, ' <RTLbr>') !== false) {
            $result = str_replace(' <RTLbr>', '<RTLbr>', $result);
            continue;
        }
        if (strpos($result, '&nbsp;<RTLbr>') !== false) {
            $result = str_replace('&nbsp;<RTLbr>', '<RTLbr>', $result);
            continue;
        }
        break;
        // Neither space nor &nbsp; : we're done
    }
    // Convert '<LTRbr>' and '<RTLbr /'
    $result = str_replace(array('<LTRbr>', '<RTLbr>'), array($endLTR . '<br>' . $startLTR, $endRTL . '<br>' . $startRTL), $result);
    // Include leading indeterminate directional text in whatever follows
    if (substr($result . "\n", 0, $lenStart) != $startLTR && substr($result . "\n", 0, $lenStart) != $startRTL && substr($result . "\n", 0, 6) != '<br>') {
        $leadingText = '';
        while (true) {
            if ($result == '') {
                $result = $leadingText;
                break;
            }
            if (substr($result . "\n", 0, $lenStart) != $startLTR && substr($result . "\n", 0, $lenStart) != $startRTL) {
                $leadingText .= substr($result, 0, 1);
                $result = substr($result, 1);
                continue;
            }
            $result = substr($result, 0, $lenStart) . $leadingText . substr($result, $lenStart);
            break;
        }
    }
    // Include solitary "-" and "+" in surrounding RTL text
    $result = str_replace(array($endRTL . $startLTR . '-' . $endLTR . $startRTL, $endRTL . $startLTR . '-' . $endLTR . $startRTL), array('-', '+'), $result);
    // Remove empty spans
    $result = str_replace(array($startLTR . $endLTR, $startRTL . $endRTL), '', $result);
    // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>'
    switch ($direction) {
        case 'BOTH':
        case 'both':
            // LTR text: <span dir="ltr"> text </span>
            // RTL text: <span dir="rtl"> text </span>
            $sLTR = '<span dir="ltr" ' . $class . '>' . $nothing;
            $eLTR = $nothing . '</span>';
            $sRTL = '<span dir="rtl" ' . $class . '>' . $nothing;
            $eRTL = $nothing . '</span>';
            break;
        case 'LTR':
        case 'ltr':
            // LTR text: <span dir="ltr"> text </span>
            // RTL text: text
            $sLTR = '<span dir="ltr" ' . $class . '>' . $nothing;
            $eLTR = $nothing . '</span>';
            $sRTL = '';
            $eRTL = '';
            break;
        case 'RTL':
        case 'rtl':
        default:
            // LTR text: text
            // RTL text: <span dir="rtl"> text </span>
            $sLTR = '';
            $eLTR = '';
            $sRTL = '<span dir="rtl" ' . $class . '>' . $nothing;
            $eRTL = $nothing . '</span>';
            break;
    }
    $result = str_replace(array($startLTR, $endLTR, $startRTL, $endRTL), array($sLTR, $eLTR, $sRTL, $eRTL), $result);
    return $result;
}