/** * @param array $array * @param bool $keepKeys * * @return array */ public function sort(&$array, $keepKeys = false) { $me = $this; if ($keepKeys) { if (isset($this->collator)) { $result = $this->collator->asort($array); } else { $result = uasort($array, function ($a, $b) use($me) { return $me->compare($a, $b); }); } } else { if (isset($this->collator)) { $result = $this->collator->sort($array); } else { $result = usort($array, function ($a, $b) use($me) { return $me->compare($a, $b); }); } } return $result; }
<?php header ('Content-Type: text/html; charset=UTF-8'); ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <meta http-equiv="content-type" content="text/html; charset=utf-8" /> <title>Collation in PHP</title> </head> <body style="font-size: 18pt;"> <?php # Script 14.3 - collation.php // Create an array of words: $words = array('chère', 'côté', 'chaise', 'château', 'chaînette', 'châle', 'Chère', 'côte', 'chemise'); // Sort using the default PHP function: echo '<h3>Using sort()</h3>'; sort($words); echo implode('<br />', $words); // Sort using the Collator: echo '<h3>Using Collator</h3>'; $c = new Collator('fr_FR'); $c->sort($words); echo implode('<br />', $words); ?> </body> </html>
function generateFirstChars() { $file = fopen("{$this->dataDir}/allkeys.txt", 'r'); if (!$file) { $this->error("Unable to open allkeys.txt"); exit(1); } global $IP; $outFile = fopen("{$IP}/serialized/first-letters-root.ser", 'w'); if (!$outFile) { $this->error("Unable to open output file first-letters-root.ser"); exit(1); } $goodTertiaryChars = array(); // For each character with an entry in allkeys.txt, overwrite the implicit // entry in $this->weights that came from the UCD. // Also gather a list of tertiary weights, for use in selecting the group header while (false !== ($line = fgets($file))) { // We're only interested in single-character weights, pick them out with a regex $line = trim($line); if (!preg_match('/^([0-9A-F]+)\\s*;\\s*([^#]*)/', $line, $m)) { continue; } $cp = hexdec($m[1]); $allWeights = trim($m[2]); $primary = ''; $tertiary = ''; if (!isset($this->weights[$cp])) { // Non-printable, ignore continue; } foreach (StringUtils::explode('[', $allWeights) as $weightStr) { preg_match_all('/[*.]([0-9A-F]+)/', $weightStr, $m); if (!empty($m[1])) { if ($m[1][0] !== '0000') { $primary .= '.' . $m[1][0]; } if ($m[1][2] !== '0000') { $tertiary .= '.' . $m[1][2]; } } } $this->weights[$cp] = $primary; if ($tertiary === '.0008' || $tertiary === '.000E') { $goodTertiaryChars[$cp] = true; } } fclose($file); // Identify groups of characters with the same primary weight $this->groups = array(); asort($this->weights, SORT_STRING); $prevWeight = reset($this->weights); $group = array(); foreach ($this->weights as $cp => $weight) { if ($weight !== $prevWeight) { $this->groups[$prevWeight] = $group; $prevWeight = $weight; if (isset($this->groups[$weight])) { $group = $this->groups[$weight]; } else { $group = array(); } } $group[] = $cp; } if ($group) { $this->groups[$prevWeight] = $group; } // If one character has a given primary weight sequence, and a second // character has a longer primary weight sequence with an initial // portion equal to the first character, then remove the second // character. This avoids having characters like U+A732 (double A) // polluting the basic latin sort area. foreach ($this->groups as $weight => $group) { if (preg_match('/(\\.[0-9A-F]*)\\./', $weight, $m)) { if (isset($this->groups[$m[1]])) { unset($this->groups[$weight]); } } } ksort($this->groups, SORT_STRING); // Identify the header character in each group $headerChars = array(); $prevChar = ""; $tertiaryCollator = new Collator('root'); $primaryCollator = new Collator('root'); $primaryCollator->setStrength(Collator::PRIMARY); $numOutOfOrder = 0; foreach ($this->groups as $weight => $group) { $uncomposedChars = array(); $goodChars = array(); foreach ($group as $cp) { if (isset($goodTertiaryChars[$cp])) { $goodChars[] = $cp; } if (!isset($this->mappedChars[$cp])) { $uncomposedChars[] = $cp; } } $x = array_intersect($goodChars, $uncomposedChars); if (!$x) { $x = $uncomposedChars; if (!$x) { $x = $group; } } // Use ICU to pick the lowest sorting character in the selection $tertiaryCollator->sort($x); $cp = $x[0]; $char = UtfNormal\Utils::codepointToUtf8($cp); $headerChars[] = $char; if ($primaryCollator->compare($char, $prevChar) <= 0) { $numOutOfOrder++; /* printf( "Out of order: U+%05X > U+%05X\n", utf8ToCodepoint( $prevChar ), utf8ToCodepoint( $char ) ); */ } $prevChar = $char; if ($this->debugOutFile) { fwrite($this->debugOutFile, sprintf("%05X %s %s (%s)\n", $cp, $weight, $char, implode(' ', array_map('UtfNormal\\Utils::codepointToUtf8', $group)))); } } print "Out of order: {$numOutOfOrder} / " . count($headerChars) . "\n"; fwrite($outFile, serialize($headerChars)); }
<?php $words = array('Малина', 'Клубника', 'Огурец'); $collator = new Collator('ru_RU'); print_r($words); $collator->sort($words); print_r($words);
function get_donate_list_from_gdoc($source_url) { $handle = @fopen($source_url, 'r'); if (!$handle) { return FALSE; // failed } $donate_list = array(); // name, money while (($entry = fgetcsv($handle)) !== FALSE) { if ($entry[0] === 'anonymous') { $donate_list[0] = $entry[1]; continue; } $money = intval($entry[1]); if (!isset($donate_list[$money])) { $donate_list[$money] = array(); } $donate_list[$money][] = $entry[0]; } fclose($handle); krsort($donate_list); if (extension_loaded("intl")) { $collator = new Collator('zh_TW_STROKE'); foreach ($donate_list as $m => &$names) { if ($m === 0) { continue; } $collator->sort($names); // sorting by name } } return $donate_list; }