/** * Changes a string into a URL-friendly string * * @param string $string The string to convert * @param integer $max_length The maximum length of the friendly URL * @param string $delimiter The delimiter to use between words, defaults to `_` * @param string |$string * @param string |$delimiter * @return string The URL-friendly version of the string */ public static function makeFriendly($string, $max_length = NULL, $delimiter = NULL) { // This allows omitting the max length, but including a delimiter if ($max_length && !is_numeric($max_length)) { $delimiter = $max_length; $max_length = NULL; } $string = fHTML::decode(fUTF8::ascii($string)); $string = strtolower(trim($string)); $string = str_replace("'", '', $string); if (!strlen($delimiter)) { $delimiter = '_'; } $delimiter_replacement = strtr($delimiter, array('\\' => '\\\\', '$' => '\\$')); $delimiter_regex = preg_quote($delimiter, '#'); $string = preg_replace('#[^a-z0-9\\-_]+#', $delimiter_replacement, $string); $string = preg_replace('#' . $delimiter_regex . '{2,}#', $delimiter_replacement, $string); $string = preg_replace('#_-_#', '-', $string); $string = preg_replace('#(^' . $delimiter_regex . '+|' . $delimiter_regex . '+$)#D', '', $string); $length = strlen($string); if ($max_length && $length > $max_length) { $last_pos = strrpos($string, $delimiter, ($length - $max_length - 1) * -1); if ($last_pos < ceil($max_length / 2)) { $last_pos = $max_length; } $string = substr($string, 0, $last_pos); } return $string; }
/** * Changes a string into a URL-friendly string * * @param string $string The string to convert * @return void */ public static function makeFriendly($string) { $string = fHTML::decode(fUTF8::ascii($string)); $string = strtolower(trim($string)); $string = str_replace("'", '', $string); $string = preg_replace('#[^a-z0-9\\-]+#', '_', $string); $string = preg_replace('#_{2,}#', '_', $string); $string = preg_replace('#_-_#', '-', $string); return preg_replace('#(^_+|_+$)#D', '', $string); }
/** * Uses the Porter Stemming algorithm to create the stem of a word, which is useful for searching * * See http://tartarus.org/~martin/PorterStemmer/ for details about the * algorithm. * * @param string $word The word to get the stem of * @return string The stem of the word */ public static function stem($word) { $s_v = '^([^aeiou][^aeiouy]*)?[aeiouy]'; $mgr0 = $s_v . '[aeiou]*[^aeiou][^aeiouy]*'; $s_v_regex = '#' . $s_v . '#'; $mgr0_regex = '#' . $mgr0 . '#'; $meq1_regex = '#' . $mgr0 . '([aeiouy][aeiou]*)?$#'; $mgr1_regex = '#' . $mgr0 . '[aeiouy][aeiou]*[^aeiou][^aeiouy]*#'; $word = fUTF8::ascii($word); $word = strtolower($word); if (strlen($word) < 3) { return $word; } if ($word[0] == 'y') { $word = 'Y' . substr($word, 1); } // Step 1a $word = preg_replace('#^(.+?)(?:(ss|i)es|([^s])s)$#', '\\1\\2\\3', $word); // Step 1b if (preg_match('#^(.+?)eed$#', $word, $match)) { if (preg_match($mgr0_regex, $match[1])) { $word = substr($word, 0, -1); } } elseif (preg_match('#^(.+?)(ed|ing)$#', $word, $match)) { if (preg_match($s_v_regex, $match[1])) { $word = $match[1]; if (preg_match('#(at|bl|iz)$#', $word)) { $word .= 'e'; } elseif (preg_match('#([^aeiouylsz])\\1$#', $word)) { $word = substr($word, 0, -1); } elseif (preg_match('#^[^aeiou][^aeiouy]*[aeiouy][^aeiouwxy]$#', $word)) { $word .= 'e'; } } } // Step 1c if (substr($word, -1) == 'y') { $stem = substr($word, 0, -1); if (preg_match($s_v_regex, $stem)) { $word = $stem . 'i'; } } // Step 2 if (preg_match('#^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$#', $word, $match)) { if (preg_match($mgr0_regex, $match[1])) { $word = $match[1] . strtr($match[2], array('ational' => 'ate', 'tional' => 'tion', 'enci' => 'ence', 'anci' => 'ance', 'izer' => 'ize', 'bli' => 'ble', 'alli' => 'al', 'entli' => 'ent', 'eli' => 'e', 'ousli' => 'ous', 'ization' => 'ize', 'ation' => 'ate', 'ator' => 'ate', 'alism' => 'al', 'iveness' => 'ive', 'fulness' => 'ful', 'ousness' => 'ous', 'aliti' => 'al', 'iviti' => 'ive', 'biliti' => 'ble', 'logi' => 'log')); } } // Step 3 if (preg_match('#^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$#', $word, $match)) { if (preg_match($mgr0_regex, $match[1])) { $word = $match[1] . strtr($match[2], array('icate' => 'ic', 'ative' => '', 'alize' => 'al', 'iciti' => 'ic', 'ical' => 'ic', 'ful' => '', 'ness' => '')); } } // Step 4 if (preg_match('#^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize|(?<=[st])ion)$#', $word, $match) && preg_match($mgr1_regex, $match[1])) { $word = $match[1]; } // Step 5 if (substr($word, -1) == 'e') { $stem = substr($word, 0, -1); if (preg_match($mgr1_regex, $stem)) { $word = $stem; } elseif (preg_match($meq1_regex, $stem) && !preg_match('#^[^aeiou][^aeiouy]*[aeiouy][^aeiouwxy]$#', $stem)) { $word = $stem; } } if (preg_match('#ll$#', $word) && preg_match($mgr1_regex, $word)) { $word = substr($word, 0, -1); } if ($word[0] == 'Y') { $word = 'y' . substr($word, 1); } return $word; }
/** * Changes a string into a URL-friendly string * * @param string $string The string to convert * @param interger $max_length The maximum length of the friendly URL * @return string The URL-friendly version of the string */ public static function makeFriendly($string, $max_length = NULL) { $string = fHTML::decode(fUTF8::ascii($string)); $string = strtolower(trim($string)); $string = str_replace("'", '', $string); $string = preg_replace('#[^a-z0-9\\-]+#', '_', $string); $string = preg_replace('#_{2,}#', '_', $string); $string = preg_replace('#_-_#', '-', $string); $string = preg_replace('#(^_+|_+$)#D', '', $string); $length = strlen($string); if ($max_length && $length > $max_length) { $last_pos = strrpos($string, '_', ($length - $max_length - 1) * -1); if ($last_pos < ceil($max_length / 2)) { $last_pos = $max_length; } $string = substr($string, 0, $last_pos); } return $string; }