private static function DMSoundex($name) { // Apply special transformation rules to the input string $name = WT_I18N::strtoupper($name); foreach (self::$transformNameTable as $transformRule) { $name = str_replace($transformRule[0], $transformRule[1], $name); } // Initialize $name_script = WT_I18N::textScript($name); if ($name_script == 'Hebr' || $name_script == 'Arab') { $noVowels = true; } else { $noVowels = false; } $lastPos = strlen($name) - 1; $currPos = 0; $state = 1; // 1: start of input string, 2: before vowel, 3: other $result = array(); // accumulate complete 6-digit D-M codes here $partialResult = array(); // accumulate incomplete D-M codes here $partialResult[] = array('!'); // initialize 1st partial result ('!' stops "duplicate sound" check) // Loop through the input string. // Stop when the string is exhausted or when no more partial results remain while (count($partialResult) != 0 && $currPos <= $lastPos) { // Find the DM coding table entry for the chunk at the current position $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk while ($thisEntry != '') { if (isset(self::$dmsounds[$thisEntry])) { break; } $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk } if ($thisEntry == '') { $currPos++; // Not in table: advance pointer to next byte continue; // and try again } $soundTableEntry = self::$dmsounds[$thisEntry]; $workingResult = $partialResult; $partialResult = array(); $currPos += strlen($thisEntry); if ($state != 1) { // Not at beginning of input string if ($currPos <= $lastPos) { // Determine whether the next chunk is a vowel $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk while ($nextEntry != '') { if (isset(self::$dmsounds[$nextEntry])) { break; } $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk } } else { $nextEntry = ''; } if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') { $state = 2; } else { $state = 3; } } while ($state < count($soundTableEntry)) { if ($soundTableEntry[$state] == '') { // empty means 'ignore this sound in this state' foreach ($workingResult as $workingEntry) { $tempEntry = $workingEntry; $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' $partialResult[] = $tempEntry; } } else { foreach ($workingResult as $workingEntry) { if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { // Incoming sound isn't a duplicate of the previous sound $workingEntry[] = $soundTableEntry[$state]; } else { // Incoming sound is a duplicate of the previous sound // For Hebrew and Arabic, we need to create a pair of D-M sound codes, // one of the pair with only a single occurrence of the duplicate sound, // the other with both occurrences if ($noVowels) { //$partialResult[] = $workingEntry; $workingEntry[] = $soundTableEntry[$state]; } } if (count($workingEntry) < 7) { $partialResult[] = $workingEntry; } else { // This is the 6th code in the sequence // We're looking for 7 entries because the first is '!' and doesn't count $tempResult = str_replace('!', '', implode('', $workingEntry)); // Only return codes from recognisable sounds if ($tempResult) { $result[] = substr($tempResult . '000000', 0, 6); } } } } $state = $state + 3; // Advance to next triplet while keeping the same basic state } } // Zero-fill and copy all remaining partial results foreach ($partialResult as $workingEntry) { $tempResult = str_replace('!', '', implode('', $workingEntry)); // Only return codes from recognisable sounds if ($tempResult) { $result[] = substr($tempResult . '000000', 0, 6); } } return $result; }
public function getSecondaryName() { if (is_null($this->_getSecondaryName)) { // Generally, the primary and secondary names are the same $this->_getSecondaryName = $this->getPrimaryName(); // ....except when there are names with different character sets $all_names = $this->getAllNames(); if (count($all_names) > 1) { $primary_script = WT_I18N::textScript($all_names[$this->getPrimaryName()]['sort']); foreach ($all_names as $n => $name) { if ($n != $this->getPrimaryName() && $name['type'] != '_MARNM' && WT_I18N::textScript($name['sort']) != $primary_script) { $this->_getSecondaryName = $n; break; } } } } return $this->_getSecondaryName; }
public function getAllNames() { global $UNKNOWN_NN, $UNKNOWN_PN; if (is_null($this->_getAllNames)) { // Check the script used by each name, so we can match cyrillic with cyrillic, greek with greek, etc. if ($this->husb) { $husb_names = $this->husb->getAllNames(); } else { $husb_names = array(0 => array('type' => 'BIRT', 'sort' => '@N.N.', 'full' => $UNKNOWN_PN, ' ', $UNKNOWN_NN)); } foreach ($husb_names as $n => $husb_name) { $husb_names[$n]['script'] = WT_I18N::textScript($husb_name['full']); } if ($this->wife) { $wife_names = $this->wife->getAllNames(); } else { $wife_names = array(0 => array('type' => 'BIRT', 'sort' => '@N.N.', 'full' => $UNKNOWN_PN, ' ', $UNKNOWN_NN)); } foreach ($wife_names as $n => $wife_name) { $wife_names[$n]['script'] = WT_I18N::textScript($wife_name['full']); } // Add the matched names first foreach ($husb_names as $husb_name) { foreach ($wife_names as $wife_name) { if ($husb_name['type'] != '_MARNM' && $wife_name['type'] != '_MARNM' && $husb_name['script'] == $wife_name['script']) { $this->_getAllNames[] = array('type' => $husb_name['type'], 'sort' => $husb_name['sort'] . ' + ' . $wife_name['sort'], 'full' => $husb_name['full'] . ' + ' . $wife_name['full']); } } } // Add the unmatched names second (there may be no matched names) foreach ($husb_names as $husb_name) { foreach ($wife_names as $wife_name) { if ($husb_name['type'] != '_MARNM' && $wife_name['type'] != '_MARNM' && $husb_name['script'] != $wife_name['script']) { $this->_getAllNames[] = array('type' => $husb_name['type'], 'sort' => $husb_name['sort'] . ' + ' . $wife_name['sort'], 'full' => $husb_name['full'] . ' + ' . $wife_name['full']); } } } } return $this->_getAllNames; }
/** * This function encapsulates all texts in the input with <span dir='xxx'> and </span> * according to the directionality specified. * * @param string Raw input * @param string Directionality (LTR, BOTH, RTL) default BOTH * @param string Additional text to insert into output <span dir="xxx"> (such as 'class="yyy"') * @return string The string with all texts encapsulated as required */ function spanLTRRTL($inputText, $direction = 'BOTH', $class = '') { global $TEXT_DIRECTION; global $openPar, $closePar, $punctuation; global $numbers, $numberPrefix, $numberPunctuation; global $previousState, $currentState, $waitingText; global $startLTR, $endLTR, $startRTL, $endRTL, $lenStart, $lenEnd; static $spanNumber = 0; if ($inputText == '') { return ''; } // Nothing to do $spanNumber++; $workingText = str_replace("\n", '<br>', $inputText); $workingText = str_replace(array('<span class="starredname"><br>', '<span<br>class="starredname">'), '<br><span class="starredname">', $workingText); // Reposition some incorrectly placed line breaks $workingText = stripLRMRLM($workingText); // Get rid of any existing UTF8 control codes // $nothing = '‌'; // Zero Width Non-Joiner (not sure whether this is still needed to work around a TCPDF bug) $nothing = ''; $startLTR = '<LTR>'; // This will become '<span dir="ltr">' at the end $endLTR = '</LTR>'; // This will become '</span>' at the end $startRTL = '<RTL>'; // This will become '<span dir="rtl">' at the end $endRTL = '</RTL>'; // This will become '</span>' at the end $lenStart = strlen($startLTR); // RTL version MUST have same length $lenEnd = strlen($endLTR); // RTL version MUST have same length $previousState = ''; $currentState = strtoupper($TEXT_DIRECTION); $numberState = false; // Set when we're inside a numeric string $result = ''; $waitingText = ''; $openParDirection = array(); beginCurrentSpan($result); while ($workingText != '') { $charArray = getChar($workingText, 0); // Get the next ASCII or UTF-8 character $currentLetter = $charArray['letter']; $currentLen = $charArray['length']; $openParIndex = strpos($openPar, $currentLetter); // Which opening parenthesis is this? $closeParIndex = strpos($closePar, $currentLetter); // Which closing parenthesis is this? switch ($currentLetter) { case '<': // Assume this '<' starts an HTML element $endPos = strpos($workingText, '>'); // look for the terminating '>' if ($endPos === false) { $endPos = 0; } $currentLen += $endPos; $element = substr($workingText, 0, $currentLen); $temp = strtolower(substr($element, 0, 3)); if (strlen($element < 7) && $temp == '<br') { // assume we have '<br>' or a variant thereof if ($numberState) { $numberState = false; if ($currentState == 'RTL') { $waitingText .= WT_UTF8_PDF; } } breakCurrentSpan($result); } else { if ($waitingText == '') { $result .= $element; } else { $waitingText .= $element; } } $workingText = substr($workingText, $currentLen); break; case '&': // Assume this '&' starts an HTML entity $endPos = strpos($workingText, ';'); // look for the terminating ';' if ($endPos === false) { $endPos = 0; } $currentLen += $endPos; $entity = substr($workingText, 0, $currentLen); if (substr($entity, 0, 2 == '&#')) { // look for possible New Line codes if (substr($entity, 2, 1) == 'x' || substr($entity, 2, 1) == 'X') { // the entity is a hexadecimal number $ordinal = hexdec(substr($entity, 3, -1)); } else { // the entity is a decimal number $ordinal = intval(substr($entity, 2, -1)); } if ($ordinal == 10) { // we have a New-Line code if ($numberState) { $numberState = false; if ($currentState == 'RTL') { $waitingText .= WT_UTF8_PDF; } } breakCurrentSpan($result); $workingText = substr($workingText, $currentLen); } } else { if (strtolower($entity) == ' ') { $entity .= ' '; // Ensure consistent case for this entity } if ($waitingText == '') { $result .= $entity; } else { $waitingText .= $entity; } $workingText = substr($workingText, $currentLen); } break; case '{': if (substr($workingText, 1, 1) == '{') { // Assume this '{{' starts a TCPDF directive $endPos = strpos($workingText, '}}'); // look for the terminating '}}' if ($endPos === false) { $endPos = 0; } $currentLen = $endPos + 2; $directive = substr($workingText, 0, $currentLen); $workingText = substr($workingText, $currentLen); $result = $result . $waitingText . $directive; $waitingText = ''; break; } default: // Look for strings of numbers with optional leading or trailing + or - // and with optional embedded numeric punctuation if ($numberState) { // If we're inside a numeric string, look for reasons to end it $offset = 0; // Be sure to look at the current character first $charArray = getChar($workingText . "\n", $offset); if (strpos($numbers, $charArray['letter']) === false) { // This is not a digit. Is it numeric punctuation? if (substr($workingText . "\n", $offset, 6) == ' ') { $offset += 6; // This could be numeric punctuation } else { if (strpos($numberPunctuation, $charArray['letter']) !== false) { $offset += $charArray['length']; // This could be numeric punctuation } } // If the next character is a digit, the current character is numeric punctuation $charArray = getChar($workingText . "\n", $offset); if (strpos($numbers, $charArray['letter']) === false) { // This is not a digit. End the run of digits and punctuation. $numberState = false; if ($currentState == 'RTL') { if (strpos($numberPrefix, $currentLetter) === false) { $currentLetter = WT_UTF8_PDF . $currentLetter; } else { $currentLetter = $currentLetter . WT_UTF8_PDF; // Include a trailing + or - in the run } } } } } else { // If we're outside a numeric string, look for reasons to start it if (strpos($numberPrefix, $currentLetter) !== false) { // This might be a number lead-in $offset = $currentLen; $nextChar = substr($workingText . "\n", $offset, 1); if (strpos($numbers, $nextChar) !== false) { $numberState = true; // We found a digit: the lead-in is therefore numeric if ($currentState == 'RTL') { $currentLetter = WT_UTF8_LRE . $currentLetter; } } } else { if (strpos($numbers, $currentLetter) !== false) { $numberState = true; // The current letter is a digit if ($currentState == 'RTL') { $currentLetter = WT_UTF8_LRE . $currentLetter; } } } } // Determine the directionality of the current UTF-8 character $newState = $currentState; while (true) { if (WT_I18N::scriptDirection(WT_I18N::languageScript($currentLetter)) == 'rtl') { if ($currentState == '') { $newState = 'RTL'; break; } if ($currentState == 'RTL') { break; } // Switch to RTL only if this isn't a solitary RTL letter $tempText = substr($workingText, $currentLen); while ($tempText != '') { $nextCharArray = getChar($tempText, 0); $nextLetter = $nextCharArray['letter']; $nextLen = $nextCharArray['length']; $tempText = substr($tempText, $nextLen); if (WT_I18N::scriptDirection(WT_I18N::languageScript($nextLetter)) == 'rtl') { $newState = 'RTL'; break 2; } if (strpos($punctuation, $nextLetter) !== false || strpos($openPar, $nextLetter) !== false) { $newState = 'RTL'; break 2; } if ($nextLetter == ' ') { break; } $nextLetter .= substr($tempText . "\n", 0, 5); if ($nextLetter == ' ') { $tempText = substr($tempText, 5); break; } } // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality $currentLetter = WT_UTF8_LRO . $currentLetter . WT_UTF8_PDF; $newState = 'LTR'; break; } if ($currentLen != 1 || $currentLetter >= 'A' && $currentLetter <= 'Z' || $currentLetter >= 'a' && $currentLetter <= 'z') { // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR $newState = 'LTR'; break; } if ($closeParIndex !== false) { // This closing parenthesis has to inherit the matching opening parenthesis' directionality if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] != '?') { $newState = $openParDirection[$closeParIndex]; } $openParDirection[$closeParIndex] = ''; break; } if ($openParIndex !== false) { // Opening parentheses always inherit the following directionality $waitingText .= $currentLetter; $workingText = substr($workingText, $currentLen); while (true) { if ($workingText == '') { break; } if (substr($workingText, 0, 1) == ' ') { // Spaces following this left parenthesis inherit the following directionality too $waitingText .= ' '; $workingText = substr($workingText, 1); continue; } if (substr($workingText, 0, 6) == ' ') { // Spaces following this left parenthesis inherit the following directionality too $waitingText .= ' '; $workingText = substr($workingText, 6); continue; } break; } $openParDirection[$openParIndex] = '?'; break 2; // double break because we're waiting for more information } // We have a digit or a "normal" special character. // // When this character is not at the start of the input string, it inherits the preceding directionality; // at the start of the input string, it assumes the following directionality. // // Exceptions to this rule will be handled later during final clean-up. // $waitingText .= $currentLetter; $workingText = substr($workingText, $currentLen); if ($currentState != '') { $result .= $waitingText; $waitingText = ''; } break 2; // double break because we're waiting for more information } if ($newState != $currentState) { // A direction change has occurred finishCurrentSpan($result, false); $previousState = $currentState; $currentState = $newState; beginCurrentSpan($result); } $waitingText .= $currentLetter; $workingText = substr($workingText, $currentLen); $result .= $waitingText; $waitingText = ''; foreach ($openParDirection as $index => $value) { // Since we now know the proper direction, remember it for all waiting opening parentheses if ($value == '?') { $openParDirection[$index] = $currentState; } } break; } } // We're done. Finish last <span> if necessary if ($numberState) { $numberState = false; if ($waitingText == '') { if ($currentState == 'RTL') { $result .= WT_UTF8_PDF; } } else { if ($currentState == 'RTL') { $waitingText .= WT_UTF8_PDF; } } } finishCurrentSpan($result, true); // Get rid of any waiting text if ($waitingText != '') { if ($TEXT_DIRECTION == 'rtl' && $currentState == 'LTR') { $result .= $startRTL; $result .= $waitingText; $result .= $endRTL; } else { $result .= $startLTR; $result .= $waitingText; $result .= $endLTR; } $waitingText = ''; } // Lastly, do some more cleanups // Move leading RTL numeric strings to following LTR text // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text) while (substr($result, 0, $lenStart + 3) == $startRTL . WT_UTF8_LRE) { $spanEnd = strpos($result, $endRTL . $startLTR); if ($spanEnd === false) { break; } $textSpan = stripLRMRLM(substr($result, $lenStart + 3, $spanEnd - $lenStart - 3)); $langSpan = WT_I18N::textScript($textSpan); if ($langSpan == 'Hebr' || $langSpan == 'Arab') { break; } $result = $startLTR . substr($result, $lenStart, $spanEnd - $lenStart) . substr($result, $spanEnd + $lenStart + $lenEnd); break; } // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span if ($TEXT_DIRECTION == 'rtl') { $result = str_replace(WT_UTF8_PDF . '.' . $endRTL, WT_UTF8_PDF . $endRTL . $startRTL . '.' . $endRTL, $result); } // Trim trailing blanks preceding <br> in LTR text while ($previousState != 'RTL') { if (strpos($result, ' <LTRbr>') !== false) { $result = str_replace(' <LTRbr>', '<LTRbr>', $result); continue; } if (strpos($result, ' <LTRbr>') !== false) { $result = str_replace(' <LTRbr>', '<LTRbr>', $result); continue; } if (strpos($result, ' <br>') !== false) { $result = str_replace(' <br>', '<br>', $result); continue; } if (strpos($result, ' <br>') !== false) { $result = str_replace(' <br>', '<br>', $result); continue; } break; // Neither space nor : we're done } // Trim trailing blanks preceding <br> in RTL text while (true) { if (strpos($result, ' <RTLbr>') !== false) { $result = str_replace(' <RTLbr>', '<RTLbr>', $result); continue; } if (strpos($result, ' <RTLbr>') !== false) { $result = str_replace(' <RTLbr>', '<RTLbr>', $result); continue; } break; // Neither space nor : we're done } // Convert '<LTRbr>' and '<RTLbr /' $result = str_replace(array('<LTRbr>', '<RTLbr>'), array($endLTR . '<br>' . $startLTR, $endRTL . '<br>' . $startRTL), $result); // Include leading indeterminate directional text in whatever follows if (substr($result . "\n", 0, $lenStart) != $startLTR && substr($result . "\n", 0, $lenStart) != $startRTL && substr($result . "\n", 0, 6) != '<br>') { $leadingText = ''; while (true) { if ($result == '') { $result = $leadingText; break; } if (substr($result . "\n", 0, $lenStart) != $startLTR && substr($result . "\n", 0, $lenStart) != $startRTL) { $leadingText .= substr($result, 0, 1); $result = substr($result, 1); continue; } $result = substr($result, 0, $lenStart) . $leadingText . substr($result, $lenStart); break; } } // Include solitary "-" and "+" in surrounding RTL text $result = str_replace(array($endRTL . $startLTR . '-' . $endLTR . $startRTL, $endRTL . $startLTR . '-' . $endLTR . $startRTL), array('-', '+'), $result); // Remove empty spans $result = str_replace(array($startLTR . $endLTR, $startRTL . $endRTL), '', $result); // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>' switch ($direction) { case 'BOTH': case 'both': // LTR text: <span dir="ltr"> text </span> // RTL text: <span dir="rtl"> text </span> $sLTR = '<span dir="ltr" ' . $class . '>' . $nothing; $eLTR = $nothing . '</span>'; $sRTL = '<span dir="rtl" ' . $class . '>' . $nothing; $eRTL = $nothing . '</span>'; break; case 'LTR': case 'ltr': // LTR text: <span dir="ltr"> text </span> // RTL text: text $sLTR = '<span dir="ltr" ' . $class . '>' . $nothing; $eLTR = $nothing . '</span>'; $sRTL = ''; $eRTL = ''; break; case 'RTL': case 'rtl': default: // LTR text: text // RTL text: <span dir="rtl"> text </span> $sLTR = ''; $eLTR = ''; $sRTL = '<span dir="rtl" ' . $class . '>' . $nothing; $eRTL = $nothing . '</span>'; break; } $result = str_replace(array($startLTR, $endLTR, $startRTL, $endRTL), array($sLTR, $eLTR, $sRTL, $eRTL), $result); return $result; }