In this library, this class should be used to parse UTF-8 queries.
Inheritance: implements ArrayAccess
Ejemplo n.º 1
0
 /**
  * Gets the starting position of each line.
  *
  * @param string $str String to be analyzed.
  *
  * @return array
  */
 public static function getLines($str)
 {
     if (!$str instanceof UtfString && defined('USE_UTF_STRINGS') && USE_UTF_STRINGS) {
         // If the lexer uses UtfString for processing then the position will
         // represent the position of the character and not the position of
         // the byte.
         $str = new UtfString($str);
     }
     // The reason for using the '8bit' parameter is that the length
     // required is the length in bytes, not characters.
     //
     // Given the following string: `????+`, where `?` represents a
     // multi-byte character (lets assume that every `?` is a 2-byte
     // character) and `+` is a newline, the first value of `$i` is `0`
     // and the last one is `4` (because there are 5 characters). Bytes
     // `$str[0]` and `$str[1]` are the first character, `$str[2]` and
     // `$str[3]` are the second one and `$str[4]` is going to be the
     // first byte of the third character. The fourth and the last one
     // (which is actually a new line) aren't going to be processed at
     // all.
     $len = $str instanceof UtfString ? $str->length() : mb_strlen($len, '8bit');
     $lines = array(0);
     for ($i = 0; $i < $len; ++$i) {
         if ($str[$i] === "\n") {
             $lines[] = $i + 1;
         }
     }
     return $lines;
 }
Ejemplo n.º 2
0
 public function testGetCharLength()
 {
     $this->assertEquals(1, UtfString::getCharLength(chr(0x0)));
     // 00000000
     $this->assertEquals(1, UtfString::getCharLength(chr(0x7f)));
     // 01111111
     $this->assertEquals(2, UtfString::getCharLength(chr(0xc0)));
     // 11000000
     $this->assertEquals(2, UtfString::getCharLength(chr(0xdf)));
     // 11011111
     $this->assertEquals(3, UtfString::getCharLength(chr(0xe0)));
     // 11100000
     $this->assertEquals(3, UtfString::getCharLength(chr(0xef)));
     // 11101111
     $this->assertEquals(4, UtfString::getCharLength(chr(0xf0)));
     // 11110000
     $this->assertEquals(4, UtfString::getCharLength(chr(0xf7)));
     // 11110111
     $this->assertEquals(5, UtfString::getCharLength(chr(0xf8)));
     // 11111000
     $this->assertEquals(5, UtfString::getCharLength(chr(0xfb)));
     // 11111011
     $this->assertEquals(6, UtfString::getCharLength(chr(0xfc)));
     // 11111100
     $this->assertEquals(6, UtfString::getCharLength(chr(0xfd)));
     // 11111101
 }
Ejemplo n.º 3
0
 /**
  * Constructor.
  *
  * @param string|UtfString $str    The query to be lexed.
  * @param bool             $strict Whether strict mode should be enabled or not.
  */
 public function __construct($str, $strict = false)
 {
     // `strlen` is used instead of `mb_strlen` because the lexer needs to
     // parse each byte of the input.
     $len = $str instanceof UtfString ? $str->length() : strlen($str);
     // For multi-byte strings, a new instance of `UtfString` is
     // initialized (only if `UtfString` usage is forced.
     if (!$str instanceof UtfString) {
         if (USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) {
             $str = new UtfString($str);
         }
     }
     $this->str = $str;
     $this->len = $str instanceof UtfString ? $str->length() : $len;
     $this->strict = $strict;
     // Setting the delimiter.
     $this->delimiter = static::$DEFAULT_DELIMITER;
     $this->lex();
 }
Ejemplo n.º 4
0
 /**
  * Constructor.
  *
  * @param string|UtfString $str    The query to be lexed.
  * @param bool             $strict Whether strict mode should be enabled or not.
  */
 public function __construct($str, $strict = false)
 {
     $this->str = $str;
     $this->len = $str instanceof UtfString ? $str->length() : strlen($str);
     $this->strict = $strict;
     $this->lex();
 }
Ejemplo n.º 5
0
 /**
  * Constructor.
  *
  * @param string|UtfString $str    The query to be lexed.
  * @param bool             $strict Whether strict mode should be enabled or not.
  */
 public function __construct($str, $strict = false)
 {
     $this->str = $str;
     $this->len = $str instanceof UtfString ? $str->length() : strlen($str);
     $this->strict = $strict;
     $this->delimiter = static::$DEFAULT_DELIMITER;
     $this->lex();
 }