function UTF8_strcmp($text1, $text2) { $UTF8_text1 = UTF8_str_split($text1); $text1Len = count($UTF8_text1); $UTF8_text2 = UTF8_str_split($text2); $text2Len = count($UTF8_text2); $minLen = min($UTF8_text1, $UTF8_text2); for ($i = 0; $i < $minLen; $i++) { $UTF8_ord1 = UTF8_ord($UTF8_text1[$i]); $UTF8_ord2 = UTF8_ord($UTF8_text2[$i]); if ($UTF8_ord1 < $UTF8_ord2) { return -1; } if ($UTF8_ord1 > $UTF8_ord2) { return 1; } } return $text1Len - $text2Len; }
function compareStrings($aName, $bName, $ignoreCase = true) { global $LANGUAGE, $CHARACTER_SET; global $alphabet, $alphabet_lower, $alphabet_upper; global $digraph, $trigraph, $quadgraph; global $DICTIONARY_SORT, $UCDiacritWhole, $UCDiacritStrip, $UCDiacritOrder, $LCDiacritWhole, $LCDiacritStrip, $LCDiacritOrder; if (is_array($aName)) { debug_print_backtrace(); } getAlphabet(); if ($LANGUAGE == "danish" || $LANGUAGE == "norwegian") { $danishFrom = array("AA", "Aa", "AE", "Ae", "OE", "Oe", "aa", "ae", "oe"); $danishTo = array("Å", "Å", "Æ", "Æ", "Ø", "Ø", "å", "æ", "ø"); } if ($LANGUAGE == "german") { $germanFrom = array("AA", "Aa", "Æ", "AE", "Ae", "Ø", "OE", "Oe", "SS", "Ss", "UE", "Ue", "aa", "æ", "ae", "ø", "oe", "ss", "ue"); $germanTo = array("Å", "Å", "Ä", "Ä", "Ä", "Ö", "Ö", "Ö", "ß", "ß", "Ü", "Ü", "å", "ä", "ä", "ö", "ö", "ß", "ü"); } //-- split strings into strings and numbers $aParts = preg_split("/(\\d+)/", $aName, -1, PREG_SPLIT_DELIM_CAPTURE); $bParts = preg_split("/(\\d+)/", $bName, -1, PREG_SPLIT_DELIM_CAPTURE); //-- loop through the arrays of strings and numbers $ac = count($aParts); $bc = count($bParts); for ($j = 0; $j < $ac && $j < $bc; $j++) { $aName = $aParts[$j]; $bName = $bParts[$j]; //-- sort numbers differently if (is_numeric($aName) && is_numeric($bName)) { if ($aName != $bName) { return $aName - $bName; } } else { //-- Take care of Danish and Norwegian character transformations if ($LANGUAGE == "danish" || $LANGUAGE == "norwegian") { $aName = str_replace($danishFrom, $danishTo, $aName); $bName = str_replace($danishFrom, $danishTo, $bName); } // -- Take care of German character transformations if ($LANGUAGE == "german") { $aName = str_replace($germanFrom, $germanTo, $aName); $bName = str_replace($germanFrom, $germanTo, $bName); } //-- get the name lengths $alen = strlen($aName); $blen = strlen($bName); //-- loop through the characters in the string and if we find one that is different between the strings //-- return the difference $aIndex = 0; $bIndex = 0; $aDiacriticValue = ""; $bDiacriticValue = ""; while (true) { $aMultiLetter = false; $bMultiLetter = false; // Look for quadgraphs (4 letters that should be treated as 1) if (isset($quadgraph[$LANGUAGE])) { $aLetter = strtoupper(substr($aName, $aIndex, 4)); if (isset($quadgraph[$LANGUAGE][$aLetter])) { $aMultiLetter = $quadgraph[$LANGUAGE][$aLetter]; $aCharLen = 4; } $bLetter = strtoupper(substr($bName, $bIndex, 4)); if (isset($quadgraph[$LANGUAGE][$bLetter])) { $bMultiLetter = $quadgraph[$LANGUAGE][$bLetter]; $bCharLen = 4; } } // Look for trigraphs (3 letters that should be treated as 1) if (isset($trigraph[$LANGUAGE])) { if (!$aMultiLetter) { $aLetter = strtoupper(substr($aName, $aIndex, 3)); if (isset($trigraph[$LANGUAGE][$aLetter])) { $aMultiLetter = $trigraph[$LANGUAGE][$aLetter]; $aCharLen = 3; } } if (!$bMultiLetter) { $bLetter = strtoupper(substr($bName, $bIndex, 3)); if (isset($trigraph[$LANGUAGE][$bLetter])) { $bMultiLetter = $trigraph[$LANGUAGE][$bLetter]; $bCharLen = 3; } } } // Look for digraphs (2 letters that should be treated as 1) if (isset($digraphs[$LANGUAGE])) { if (!$aMultiLetter) { $aLetter = strtoupper(substr($aName, $aIndex, 2)); if (isset($digraph[$LANGUAGE][$aLetter])) { $aMultiLetter = $digraph[$LANGUAGE][$aLetter]; $aCharLen = 2; } } if (!$bMultiLetter) { $bLetter = strtoupper(substr($bName, $bIndex, 2)); if (isset($digraph[$LANGUAGE][$bLetter])) { $bMultiLetter = $digraph[$LANGUAGE][$bLetter]; $bCharLen = 2; } } } // Look for UTF-8 encoded characters if (!$aMultiLetter) { $aCharLen = 1; $aLetter = substr($aName, $aIndex, 1); $aOrd = ord($aLetter); if (($aOrd & 0xe0) == 0xc0) { $aCharLen = 2; } // 2-byte sequence if (($aOrd & 0xf0) == 0xe0) { $aCharLen = 3; } // 3-byte sequence if (($aOrd & 0xf8) == 0xf0) { $aCharLen = 4; } // 4-byte sequence } if (!$bMultiLetter) { $bCharLen = 1; $bLetter = substr($bName, $bIndex, 1); $bOrd = ord($bLetter); if (($bOrd & 0xe0) == 0xc0) { $bCharLen = 2; } // 2-byte sequence if (($bOrd & 0xf0) == 0xe0) { $bCharLen = 3; } // 3-byte sequence if (($bOrd & 0xf8) == 0xf0) { $bCharLen = 4; } // 4-byte sequence } $aLetter = substr($aName, $aIndex, $aCharLen); $bLetter = substr($bName, $bIndex, $bCharLen); if ($DICTIONARY_SORT[$LANGUAGE]) { //-- strip diacritics before checking equality if ($aCharLen == 2) { $aPos = strpos($UCDiacritWhole, $aLetter); if ($aPos !== false) { $aPos = $aPos >> 1; $aLetter = substr($UCDiacritStrip, $aPos, 1); $aDiacriticValue .= substr($UCDiacritOrder, $aPos, 1); } else { $aPos = strpos($LCDiacritWhole, $aLetter); if ($aPos !== false) { $aPos = $aPos >> 1; $aLetter = substr($LCDiacritStrip, $aPos, 1); $aDiacriticValue .= substr($LCDiacritOrder, $aPos, 1); } else { $aDiacriticValue .= " "; } } } else { $aDiacriticValue .= " "; } if ($bCharLen == 2) { $bPos = strpos($UCDiacritWhole, $bLetter); if ($bPos !== false) { $bPos = $bPos >> 1; $bLetter = substr($UCDiacritStrip, $bPos, 1); $bDiacriticValue .= substr($UCDiacritOrder, $bPos, 1); } else { $bPos = strpos($LCDiacritWhole, $bLetter); if ($bPos !== false) { $bPos = $bPos >> 1; $bLetter = substr($LCDiacritStrip, $bPos, 1); $bDiacriticValue .= substr($LCDiacritOrder, $bPos, 1); } else { $bDiacriticValue .= " "; } } } else { $bDiacriticValue .= " "; } } if ($ignoreCase) { $aLetter = UTF8_strtoupper($aLetter); $bLetter = UTF8_strtoupper($bLetter); } if ($aLetter != $bLetter && $bLetter != "" && $aLetter != "") { //-- get the position of the letter in the alphabet string if ($aMultiLetter) { $sortAfter = substr($aLetter, 0, 1); if ($aLetter == "CH") { $sortAfter = "H"; } // This one doesn't follow the rule if ($aLetter == "Ch") { $sortAfter = "H"; } if ($aLetter == "ch") { $sortAfter = "h"; } $aPos = strpos($alphabet_upper, $sortAfter); if ($aPos === false) { $aPos = strpos($alphabet_lower, $sortAfter); } } else { $aPos = @strpos($alphabet_upper, $aLetter); if ($aPos === false) { $aPos = @strpos($alphabet_lower, $aLetter); } } if ($bMultiLetter) { $sortAfter = substr($bLetter, 0, 1); if ($bLetter == "CH") { $sortAfter = "H"; } // This one doesn't follow the rule if ($bLetter == "Ch") { $sortAfter = "H"; } if ($bLetter == "ch") { $sortAfter = "h"; } $bPos = strpos($alphabet_upper, $sortAfter); if ($bPos === false) { $bPos = strpos($alphabet_lower, $sortAfter); } } else { $bPos = @strpos($alphabet_upper, $bLetter); if ($bPos === false) { $bPos = @strpos($alphabet_lower, $bLetter); } } // Insert digraphs and trigraphs into main sequence if ($aMultiLetter || $bMultiLetter) { $aPos = ((int) $aPos << 3) + (int) $aMultiLetter; $bPos = ((int) $bPos << 3) + (int) $bMultiLetter; } if ($aPos != $bPos) { if ($aLetter == "@") { return 1; } // Force "@" to the end if ($bLetter == "@") { return -1; } // Force "@" to the end if ($bPos !== false && $aPos === false) { return -1; } if ($bPos === false && $aPos !== false) { return 1; } if ($bPos === false && $aPos === false) { // Determine the binary value of both letters $aValue = UTF8_ord($aLetter); $bValue = UTF8_ord($bLetter); return $aValue - $bValue; } return $aPos - $bPos; } } $aIndex += $aCharLen; // advance to the 1st byte of the next sequence $bIndex += $bCharLen; // advance to the 1st byte of the next sequence if ($aIndex >= $alen) { break; } if ($bIndex >= $blen) { break; } } } //-- if we made it through the loop then check if one name is longer than the //-- other, the shorter one should be first if ($alen != $blen) { return $alen - $blen; } //-- They're identical: let diacritics (if any) decide if ($aDiacriticValue < $bDiacriticValue) { return -1; } if ($aDiacriticValue > $bDiacriticValue) { return 1; } } if (count($aParts) != count($bParts)) { return count($aParts) - count($bParts); } //-- the strings are exactly the same so return 0 return 0; }