/** * Calculate the Daitch-Mokotoff soundex for a word. * * @param string $name * * @return string[] List of possible DM codes for the word. */ private static function daitchMokotoffWord($name) { // Apply special transformation rules to the input string $name = I18N::strtoupper($name); foreach (self::$transformNameTable as $transformRule) { $name = str_replace($transformRule[0], $transformRule[1], $name); } // Initialize $name_script = I18N::textScript($name); $noVowels = $name_script == 'Hebr' || $name_script == 'Arab'; $lastPos = strlen($name) - 1; $currPos = 0; $state = 1; // 1: start of input string, 2: before vowel, 3: other $result = array(); // accumulate complete 6-digit D-M codes here $partialResult = array(); // accumulate incomplete D-M codes here $partialResult[] = array('!'); // initialize 1st partial result ('!' stops "duplicate sound" check) // Loop through the input string. // Stop when the string is exhausted or when no more partial results remain while (count($partialResult) !== 0 && $currPos <= $lastPos) { // Find the DM coding table entry for the chunk at the current position $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk while ($thisEntry != '') { if (isset(self::$dmsounds[$thisEntry])) { break; } $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk } if ($thisEntry === '') { $currPos++; // Not in table: advance pointer to next byte continue; // and try again } $soundTableEntry = self::$dmsounds[$thisEntry]; $workingResult = $partialResult; $partialResult = array(); $currPos += strlen($thisEntry); // Not at beginning of input string if ($state != 1) { if ($currPos <= $lastPos) { // Determine whether the next chunk is a vowel $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk while ($nextEntry != '') { if (isset(self::$dmsounds[$nextEntry])) { break; } $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk } } else { $nextEntry = ''; } if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') { $state = 2; } else { // Next chunk is a vowel $state = 3; } } while ($state < count($soundTableEntry)) { // empty means 'ignore this sound in this state' if ($soundTableEntry[$state] == '') { foreach ($workingResult as $workingEntry) { $tempEntry = $workingEntry; $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' $partialResult[] = $tempEntry; } } else { foreach ($workingResult as $workingEntry) { if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { // Incoming sound isn't a duplicate of the previous sound $workingEntry[] = $soundTableEntry[$state]; } else { // Incoming sound is a duplicate of the previous sound // For Hebrew and Arabic, we need to create a pair of D-M sound codes, // one of the pair with only a single occurrence of the duplicate sound, // the other with both occurrences if ($noVowels) { $workingEntry[] = $soundTableEntry[$state]; } } if (count($workingEntry) < 7) { $partialResult[] = $workingEntry; } else { // This is the 6th code in the sequence // We're looking for 7 entries because the first is '!' and doesn't count $tempResult = str_replace('!', '', implode('', $workingEntry)); // Only return codes from recognisable sounds if ($tempResult) { $result[] = substr($tempResult . '000000', 0, 6); } } } } $state = $state + 3; // Advance to next triplet while keeping the same basic state } } // Zero-fill and copy all remaining partial results foreach ($partialResult as $workingEntry) { $tempResult = str_replace('!', '', implode('', $workingEntry)); // Only return codes from recognisable sounds if ($tempResult) { $result[] = substr($tempResult . '000000', 0, 6); } } return $result; }
/** * Derived classes should redefine this function, otherwise the object will have no name * * @return string[][] */ public function getAllNames() { if (is_null($this->_getAllNames)) { // Check the script used by each name, so we can match cyrillic with cyrillic, greek with greek, etc. if ($this->husb) { $husb_names = $this->husb->getAllNames(); } else { $husb_names = array(0 => array('type' => 'BIRT', 'sort' => '@N.N.', 'full' => I18N::translateContext('Unknown given name', '…') . ' ' . I18N::translateContext('Unknown surname', '…'))); } foreach ($husb_names as $n => $husb_name) { $husb_names[$n]['script'] = I18N::textScript($husb_name['full']); } if ($this->wife) { $wife_names = $this->wife->getAllNames(); } else { $wife_names = array(0 => array('type' => 'BIRT', 'sort' => '@N.N.', 'full' => I18N::translateContext('Unknown given name', '…') . ' ' . I18N::translateContext('Unknown surname', '…'))); } foreach ($wife_names as $n => $wife_name) { $wife_names[$n]['script'] = I18N::textScript($wife_name['full']); } // Add the matched names first foreach ($husb_names as $husb_name) { foreach ($wife_names as $wife_name) { if ($husb_name['type'] != '_MARNM' && $wife_name['type'] != '_MARNM' && $husb_name['script'] == $wife_name['script']) { $this->_getAllNames[] = array('type' => $husb_name['type'], 'sort' => $husb_name['sort'] . ' + ' . $wife_name['sort'], 'full' => $husb_name['full'] . ' + ' . $wife_name['full']); } } } // Add the unmatched names second (there may be no matched names) foreach ($husb_names as $husb_name) { foreach ($wife_names as $wife_name) { if ($husb_name['type'] != '_MARNM' && $wife_name['type'] != '_MARNM' && $husb_name['script'] != $wife_name['script']) { $this->_getAllNames[] = array('type' => $husb_name['type'], 'sort' => $husb_name['sort'] . ' + ' . $wife_name['sort'], 'full' => $husb_name['full'] . ' + ' . $wife_name['full']); } } } } return $this->_getAllNames; }