Ejemplo n.º 1
0
/**
 * UTF-8 aware replacement for strpos().
 *
 * Returns the position of the first occurrence of $sub in the $str. If $sub is
 * not found, it returns false.
 *
 * This will get alot slower if an negative $offset is used.
 *
 * This function may return boolean false, but may also return a non-boolean 0
 * which evaluates to false. Use the === operator for testing the return value
 * of this function.
 *
 * @param  string $str    The string to search in
 * @param  string $sub    The string to search for
 * @param  int    $offset If presented, it specifies the position in the string
 *                        to begin the search
 * @param  bool   $ci     Should the function be case-insensitive?
 * @return int The position or false on failure
 */
function utf8_search($str, $sub, $offset = 0, $ci = false)
{
    $n = $r = 0;
    // The position of matching UTF-8 character within the $sub
    if ($offset < 0) {
        $offset += utf8_length($str);
        if ($offset < 0) {
            $offset = 0;
        }
    }
    while ($n < $offset) {
        if (utf8_get_char($str, $j) === false) {
            return false;
        }
        ++$n;
    }
    while (true) {
        if (!isset($sub_cp[$r])) {
            if (utf8_get_char($sub, $i, $cp) !== false) {
                $sub_cp[] = array($cp);
                if ($ci && ($_ = unicode_swapcase($cp)) != $cp) {
                    $sub_cp[$r][] = $_;
                }
            } elseif (!$r) {
                trigger_error('utf8_search: The string to search for is empty');
                return false;
            } else {
                return $n;
            }
        }
        $offset = $n + $r;
        if (!isset($str_cp[$offset])) {
            if (utf8_get_char($str, $j, $cp) !== false) {
                $str_cp[$offset] = $cp;
            } else {
                return false;
            }
        }
        if (in_array($str_cp[$offset], $sub_cp[$r])) {
            $r++;
        } else {
            unset($str_cp[$n++]);
            $r = 0;
        }
    }
}
Ejemplo n.º 2
0
 /**
  * @dataProvider providerUtf8GetChar
  */
 public function testUtf8GetChar($str, $rchars, $runicode)
 {
     $chars = $unicode = array();
     while (($char = utf8_get_char($str, $i, $cp)) !== false) {
         $chars[] = $char;
         $unicode[] = $cp;
     }
     $this->assertEquals($chars, $rchars);
     $this->assertEquals($unicode, $runicode);
 }
Ejemplo n.º 3
0
/**
 * UTF-8 aware replacement for substr().
 *
 * Returns the portion of the string specified by the start and end positions.
 *
 * @param  string $str   The UTF-8 encoded string
 * @param  int    $start The start position
 * @param  int    $end   The end position
 * @return string The portion of the string
 */
function utf8_slice($str, $start, $end = null)
{
    $start = (int) $start;
    if ($end !== null) {
        $end = (int) $end;
    }
    if ($start < 0 || $end < 0) {
        $len = utf8_length($str);
        if ($start < 0) {
            $start += $len;
            if ($start < 0) {
                $start = 0;
            }
        }
        if ($end < 0) {
            $end += $len;
            if ($end < 0) {
                $end = 0;
            }
        }
    }
    if ($end === null) {
        $end = -1;
    } elseif ($start > $end) {
        return '';
    }
    $j = 0;
    $chunk = '';
    while (($char = utf8_get_char($str, $i)) !== false) {
        if ($j >= $start) {
            $chunk .= $char;
        }
        if ($j++ == $end) {
            break;
        }
    }
    return $chunk;
}
Ejemplo n.º 4
0
/**
 * UTF-8 aware replacement for trim().
 *
 * Strip whitespace (or other characters) from the beginning and end of
 * a string.
 *
 * @param  string $str        The UTF-8 encoded string
 * @param  mixed  $stripchars The stripped characters
 * @param  int    $striptype  The optional argument $striptype can be
 *                            UTF8_STRIP_BOTH, UTF8_STRIP_LEFT, or UTF8_STRIP_RIGHT.
 *                            If $striptype is not specified it is assumed to be
 *                            UTF8_STRIP_BOTH.
 * @return string The stripped string
 */
function utf8_strip($str, $stripchars = null, $striptype = UTF8_STRIP_BOTH)
{
    static $defaults;
    global $unicode_separators_array;
    if ($stripchars === null) {
        if ($defaults === null) {
            foreach ($unicode_separators_array as $cp) {
                $defaults[] = utf8_chr($cp);
            }
        }
        $stripchars = $defaults;
    } elseif (is_array($stripchars)) {
        $chars = array();
        foreach ($stripchars as $char) {
            if (($char = utf8_get_char($char)) !== false) {
                $chars[] = $char;
            }
        }
        $stripchars = $chars;
    } else {
        $stripchars = utf8_split($stripchars, 1);
    }
    $left = $striptype & UTF8_STRIP_LEFT;
    $right = $striptype & UTF8_STRIP_RIGHT;
    $rv = $buffer = '';
    while ($char = utf8_get_char($str, $i)) {
        $state = in_array($char, $stripchars);
        if ($left) {
            if ($state) {
                continue;
            } else {
                $left = false;
            }
        }
        if ($right) {
            if ($state) {
                $buffer .= $char;
                continue;
            } else {
                $rv .= $buffer;
                $buffer = '';
            }
        }
        $rv .= $char;
    }
    return $rv;
}
Ejemplo n.º 5
0
/**
 * Returns a string with the first character of each word converted to
 * uppercase and the remainder to lowercase.
 *
 * @param  string $str The UTF-8 encoded string
 * @return string with the first character of each word converted to uppercase
 *                and the remainder to lowercase
 */
function utf8_capwords($str)
{
    $rv = '';
    $state = true;
    while (($char = utf8_get_char($str, $i, $cp)) !== false) {
        if (!($issep = unicode_is_separator($cp))) {
            $_ = $state ? unicode_upcase($cp) : unicode_downcase($cp);
            if ($_ != $cp) {
                $char = utf8_chr($_);
            }
        }
        $state = $issep;
        $rv .= $char;
    }
    return $rv;
}