Ejemplo n.º 1
0
function UTF8_strcmp($text1, $text2)
{
    $UTF8_text1 = UTF8_str_split($text1);
    $text1Len = count($UTF8_text1);
    $UTF8_text2 = UTF8_str_split($text2);
    $text2Len = count($UTF8_text2);
    $minLen = min($UTF8_text1, $UTF8_text2);
    for ($i = 0; $i < $minLen; $i++) {
        $UTF8_ord1 = UTF8_ord($UTF8_text1[$i]);
        $UTF8_ord2 = UTF8_ord($UTF8_text2[$i]);
        if ($UTF8_ord1 < $UTF8_ord2) {
            return -1;
        }
        if ($UTF8_ord1 > $UTF8_ord2) {
            return 1;
        }
    }
    return $text1Len - $text2Len;
}
Ejemplo n.º 2
0
function compareStrings($aName, $bName, $ignoreCase = true)
{
    global $LANGUAGE, $CHARACTER_SET;
    global $alphabet, $alphabet_lower, $alphabet_upper;
    global $digraph, $trigraph, $quadgraph;
    global $DICTIONARY_SORT, $UCDiacritWhole, $UCDiacritStrip, $UCDiacritOrder, $LCDiacritWhole, $LCDiacritStrip, $LCDiacritOrder;
    if (is_array($aName)) {
        debug_print_backtrace();
    }
    getAlphabet();
    if ($LANGUAGE == "danish" || $LANGUAGE == "norwegian") {
        $danishFrom = array("AA", "Aa", "AE", "Ae", "OE", "Oe", "aa", "ae", "oe");
        $danishTo = array("Å", "Å", "Æ", "Æ", "Ø", "Ø", "å", "æ", "ø");
    }
    if ($LANGUAGE == "german") {
        $germanFrom = array("AA", "Aa", "Æ", "AE", "Ae", "Ø", "OE", "Oe", "SS", "Ss", "UE", "Ue", "aa", "æ", "ae", "ø", "oe", "ss", "ue");
        $germanTo = array("Å", "Å", "Ä", "Ä", "Ä", "Ö", "Ö", "Ö", "ß", "ß", "Ü", "Ü", "å", "ä", "ä", "ö", "ö", "ß", "ü");
    }
    //-- split strings into strings and numbers
    $aParts = preg_split("/(\\d+)/", $aName, -1, PREG_SPLIT_DELIM_CAPTURE);
    $bParts = preg_split("/(\\d+)/", $bName, -1, PREG_SPLIT_DELIM_CAPTURE);
    //-- loop through the arrays of strings and numbers
    $ac = count($aParts);
    $bc = count($bParts);
    for ($j = 0; $j < $ac && $j < $bc; $j++) {
        $aName = $aParts[$j];
        $bName = $bParts[$j];
        //-- sort numbers differently
        if (is_numeric($aName) && is_numeric($bName)) {
            if ($aName != $bName) {
                return $aName - $bName;
            }
        } else {
            //-- Take care of Danish and Norwegian character transformations
            if ($LANGUAGE == "danish" || $LANGUAGE == "norwegian") {
                $aName = str_replace($danishFrom, $danishTo, $aName);
                $bName = str_replace($danishFrom, $danishTo, $bName);
            }
            // -- Take care of German character transformations
            if ($LANGUAGE == "german") {
                $aName = str_replace($germanFrom, $germanTo, $aName);
                $bName = str_replace($germanFrom, $germanTo, $bName);
            }
            //-- get the name lengths
            $alen = strlen($aName);
            $blen = strlen($bName);
            //-- loop through the characters in the string and if we find one that is different between the strings
            //-- return the difference
            $aIndex = 0;
            $bIndex = 0;
            $aDiacriticValue = "";
            $bDiacriticValue = "";
            while (true) {
                $aMultiLetter = false;
                $bMultiLetter = false;
                // Look for quadgraphs (4 letters that should be treated as 1)
                if (isset($quadgraph[$LANGUAGE])) {
                    $aLetter = strtoupper(substr($aName, $aIndex, 4));
                    if (isset($quadgraph[$LANGUAGE][$aLetter])) {
                        $aMultiLetter = $quadgraph[$LANGUAGE][$aLetter];
                        $aCharLen = 4;
                    }
                    $bLetter = strtoupper(substr($bName, $bIndex, 4));
                    if (isset($quadgraph[$LANGUAGE][$bLetter])) {
                        $bMultiLetter = $quadgraph[$LANGUAGE][$bLetter];
                        $bCharLen = 4;
                    }
                }
                // Look for trigraphs (3 letters that should be treated as 1)
                if (isset($trigraph[$LANGUAGE])) {
                    if (!$aMultiLetter) {
                        $aLetter = strtoupper(substr($aName, $aIndex, 3));
                        if (isset($trigraph[$LANGUAGE][$aLetter])) {
                            $aMultiLetter = $trigraph[$LANGUAGE][$aLetter];
                            $aCharLen = 3;
                        }
                    }
                    if (!$bMultiLetter) {
                        $bLetter = strtoupper(substr($bName, $bIndex, 3));
                        if (isset($trigraph[$LANGUAGE][$bLetter])) {
                            $bMultiLetter = $trigraph[$LANGUAGE][$bLetter];
                            $bCharLen = 3;
                        }
                    }
                }
                // Look for digraphs (2 letters that should be treated as 1)
                if (isset($digraphs[$LANGUAGE])) {
                    if (!$aMultiLetter) {
                        $aLetter = strtoupper(substr($aName, $aIndex, 2));
                        if (isset($digraph[$LANGUAGE][$aLetter])) {
                            $aMultiLetter = $digraph[$LANGUAGE][$aLetter];
                            $aCharLen = 2;
                        }
                    }
                    if (!$bMultiLetter) {
                        $bLetter = strtoupper(substr($bName, $bIndex, 2));
                        if (isset($digraph[$LANGUAGE][$bLetter])) {
                            $bMultiLetter = $digraph[$LANGUAGE][$bLetter];
                            $bCharLen = 2;
                        }
                    }
                }
                // Look for UTF-8 encoded characters
                if (!$aMultiLetter) {
                    $aCharLen = 1;
                    $aLetter = substr($aName, $aIndex, 1);
                    $aOrd = ord($aLetter);
                    if (($aOrd & 0xe0) == 0xc0) {
                        $aCharLen = 2;
                    }
                    // 2-byte sequence
                    if (($aOrd & 0xf0) == 0xe0) {
                        $aCharLen = 3;
                    }
                    // 3-byte sequence
                    if (($aOrd & 0xf8) == 0xf0) {
                        $aCharLen = 4;
                    }
                    // 4-byte sequence
                }
                if (!$bMultiLetter) {
                    $bCharLen = 1;
                    $bLetter = substr($bName, $bIndex, 1);
                    $bOrd = ord($bLetter);
                    if (($bOrd & 0xe0) == 0xc0) {
                        $bCharLen = 2;
                    }
                    // 2-byte sequence
                    if (($bOrd & 0xf0) == 0xe0) {
                        $bCharLen = 3;
                    }
                    // 3-byte sequence
                    if (($bOrd & 0xf8) == 0xf0) {
                        $bCharLen = 4;
                    }
                    // 4-byte sequence
                }
                $aLetter = substr($aName, $aIndex, $aCharLen);
                $bLetter = substr($bName, $bIndex, $bCharLen);
                if ($DICTIONARY_SORT[$LANGUAGE]) {
                    //-- strip diacritics before checking equality
                    if ($aCharLen == 2) {
                        $aPos = strpos($UCDiacritWhole, $aLetter);
                        if ($aPos !== false) {
                            $aPos = $aPos >> 1;
                            $aLetter = substr($UCDiacritStrip, $aPos, 1);
                            $aDiacriticValue .= substr($UCDiacritOrder, $aPos, 1);
                        } else {
                            $aPos = strpos($LCDiacritWhole, $aLetter);
                            if ($aPos !== false) {
                                $aPos = $aPos >> 1;
                                $aLetter = substr($LCDiacritStrip, $aPos, 1);
                                $aDiacriticValue .= substr($LCDiacritOrder, $aPos, 1);
                            } else {
                                $aDiacriticValue .= " ";
                            }
                        }
                    } else {
                        $aDiacriticValue .= " ";
                    }
                    if ($bCharLen == 2) {
                        $bPos = strpos($UCDiacritWhole, $bLetter);
                        if ($bPos !== false) {
                            $bPos = $bPos >> 1;
                            $bLetter = substr($UCDiacritStrip, $bPos, 1);
                            $bDiacriticValue .= substr($UCDiacritOrder, $bPos, 1);
                        } else {
                            $bPos = strpos($LCDiacritWhole, $bLetter);
                            if ($bPos !== false) {
                                $bPos = $bPos >> 1;
                                $bLetter = substr($LCDiacritStrip, $bPos, 1);
                                $bDiacriticValue .= substr($LCDiacritOrder, $bPos, 1);
                            } else {
                                $bDiacriticValue .= " ";
                            }
                        }
                    } else {
                        $bDiacriticValue .= " ";
                    }
                }
                if ($ignoreCase) {
                    $aLetter = UTF8_strtoupper($aLetter);
                    $bLetter = UTF8_strtoupper($bLetter);
                }
                if ($aLetter != $bLetter && $bLetter != "" && $aLetter != "") {
                    //-- get the position of the letter in the alphabet string
                    if ($aMultiLetter) {
                        $sortAfter = substr($aLetter, 0, 1);
                        if ($aLetter == "CH") {
                            $sortAfter = "H";
                        }
                        // This one doesn't follow the rule
                        if ($aLetter == "Ch") {
                            $sortAfter = "H";
                        }
                        if ($aLetter == "ch") {
                            $sortAfter = "h";
                        }
                        $aPos = strpos($alphabet_upper, $sortAfter);
                        if ($aPos === false) {
                            $aPos = strpos($alphabet_lower, $sortAfter);
                        }
                    } else {
                        $aPos = @strpos($alphabet_upper, $aLetter);
                        if ($aPos === false) {
                            $aPos = @strpos($alphabet_lower, $aLetter);
                        }
                    }
                    if ($bMultiLetter) {
                        $sortAfter = substr($bLetter, 0, 1);
                        if ($bLetter == "CH") {
                            $sortAfter = "H";
                        }
                        // This one doesn't follow the rule
                        if ($bLetter == "Ch") {
                            $sortAfter = "H";
                        }
                        if ($bLetter == "ch") {
                            $sortAfter = "h";
                        }
                        $bPos = strpos($alphabet_upper, $sortAfter);
                        if ($bPos === false) {
                            $bPos = strpos($alphabet_lower, $sortAfter);
                        }
                    } else {
                        $bPos = @strpos($alphabet_upper, $bLetter);
                        if ($bPos === false) {
                            $bPos = @strpos($alphabet_lower, $bLetter);
                        }
                    }
                    // Insert digraphs and trigraphs into main sequence
                    if ($aMultiLetter || $bMultiLetter) {
                        $aPos = ((int) $aPos << 3) + (int) $aMultiLetter;
                        $bPos = ((int) $bPos << 3) + (int) $bMultiLetter;
                    }
                    if ($aPos != $bPos) {
                        if ($aLetter == "@") {
                            return 1;
                        }
                        // Force "@" to the end
                        if ($bLetter == "@") {
                            return -1;
                        }
                        // Force "@" to the end
                        if ($bPos !== false && $aPos === false) {
                            return -1;
                        }
                        if ($bPos === false && $aPos !== false) {
                            return 1;
                        }
                        if ($bPos === false && $aPos === false) {
                            // Determine the binary value of both letters
                            $aValue = UTF8_ord($aLetter);
                            $bValue = UTF8_ord($bLetter);
                            return $aValue - $bValue;
                        }
                        return $aPos - $bPos;
                    }
                }
                $aIndex += $aCharLen;
                // advance to the 1st byte of the next sequence
                $bIndex += $bCharLen;
                // advance to the 1st byte of the next sequence
                if ($aIndex >= $alen) {
                    break;
                }
                if ($bIndex >= $blen) {
                    break;
                }
            }
        }
        //-- if we made it through the loop then check if one name is longer than the
        //-- other, the shorter one should be first
        if ($alen != $blen) {
            return $alen - $blen;
        }
        //-- They're identical: let diacritics (if any) decide
        if ($aDiacriticValue < $bDiacriticValue) {
            return -1;
        }
        if ($aDiacriticValue > $bDiacriticValue) {
            return 1;
        }
    }
    if (count($aParts) != count($bParts)) {
        return count($aParts) - count($bParts);
    }
    //-- the strings are exactly the same so return 0
    return 0;
}