Example #1
0
/**
 * UTF-8 aware replacement for strpos().
 *
 * Returns the position of the first occurrence of $sub in the $str. If $sub is
 * not found, it returns false.
 *
 * This will get alot slower if an negative $offset is used.
 *
 * This function may return boolean false, but may also return a non-boolean 0
 * which evaluates to false. Use the === operator for testing the return value
 * of this function.
 *
 * @param  string $str    The string to search in
 * @param  string $sub    The string to search for
 * @param  int    $offset If presented, it specifies the position in the string
 *                        to begin the search
 * @param  bool   $ci     Should the function be case-insensitive?
 * @return int The position or false on failure
 */
function utf8_search($str, $sub, $offset = 0, $ci = false)
{
    $n = $r = 0;
    // The position of matching UTF-8 character within the $sub
    if ($offset < 0) {
        $offset += utf8_length($str);
        if ($offset < 0) {
            $offset = 0;
        }
    }
    while ($n < $offset) {
        if (utf8_get_char($str, $j) === false) {
            return false;
        }
        ++$n;
    }
    while (true) {
        if (!isset($sub_cp[$r])) {
            if (utf8_get_char($sub, $i, $cp) !== false) {
                $sub_cp[] = array($cp);
                if ($ci && ($_ = unicode_swapcase($cp)) != $cp) {
                    $sub_cp[$r][] = $_;
                }
            } elseif (!$r) {
                trigger_error('utf8_search: The string to search for is empty');
                return false;
            } else {
                return $n;
            }
        }
        $offset = $n + $r;
        if (!isset($str_cp[$offset])) {
            if (utf8_get_char($str, $j, $cp) !== false) {
                $str_cp[$offset] = $cp;
            } else {
                return false;
            }
        }
        if (in_array($str_cp[$offset], $sub_cp[$r])) {
            $r++;
        } else {
            unset($str_cp[$n++]);
            $r = 0;
        }
    }
}
Example #2
0
 /**
  * @dataProvider providerUtf8Length
  */
 public function testUtf8Length($str, $rv)
 {
     $this->assertEquals(utf8_length($str), $rv);
 }
Example #3
0
/**
 * UTF-8 aware replacement for substr().
 *
 * Returns the portion of the string specified by the start and end positions.
 *
 * @param  string $str   The UTF-8 encoded string
 * @param  int    $start The start position
 * @param  int    $end   The end position
 * @return string The portion of the string
 */
function utf8_slice($str, $start, $end = null)
{
    $start = (int) $start;
    if ($end !== null) {
        $end = (int) $end;
    }
    if ($start < 0 || $end < 0) {
        $len = utf8_length($str);
        if ($start < 0) {
            $start += $len;
            if ($start < 0) {
                $start = 0;
            }
        }
        if ($end < 0) {
            $end += $len;
            if ($end < 0) {
                $end = 0;
            }
        }
    }
    if ($end === null) {
        $end = -1;
    } elseif ($start > $end) {
        return '';
    }
    $j = 0;
    $chunk = '';
    while (($char = utf8_get_char($str, $i)) !== false) {
        if ($j >= $start) {
            $chunk .= $char;
        }
        if ($j++ == $end) {
            break;
        }
    }
    return $chunk;
}