/** * Gets the starting position of each line. * * @param string $str String to be analyzed. * * @return array */ public static function getLines($str) { if (!$str instanceof UtfString && defined('USE_UTF_STRINGS') && USE_UTF_STRINGS) { // If the lexer uses UtfString for processing then the position will // represent the position of the character and not the position of // the byte. $str = new UtfString($str); } // The reason for using the '8bit' parameter is that the length // required is the length in bytes, not characters. // // Given the following string: `????+`, where `?` represents a // multi-byte character (lets assume that every `?` is a 2-byte // character) and `+` is a newline, the first value of `$i` is `0` // and the last one is `4` (because there are 5 characters). Bytes // `$str[0]` and `$str[1]` are the first character, `$str[2]` and // `$str[3]` are the second one and `$str[4]` is going to be the // first byte of the third character. The fourth and the last one // (which is actually a new line) aren't going to be processed at // all. $len = $str instanceof UtfString ? $str->length() : mb_strlen($len, '8bit'); $lines = array(0); for ($i = 0; $i < $len; ++$i) { if ($str[$i] === "\n") { $lines[] = $i + 1; } } return $lines; }
/** * Constructor. * * @param string|UtfString $str The query to be lexed. * @param bool $strict Whether strict mode should be enabled or not. */ public function __construct($str, $strict = false) { // `strlen` is used instead of `mb_strlen` because the lexer needs to // parse each byte of the input. $len = $str instanceof UtfString ? $str->length() : strlen($str); // For multi-byte strings, a new instance of `UtfString` is // initialized (only if `UtfString` usage is forced. if (!$str instanceof UtfString) { if (USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) { $str = new UtfString($str); } } $this->str = $str; $this->len = $str instanceof UtfString ? $str->length() : $len; $this->strict = $strict; // Setting the delimiter. $this->delimiter = static::$DEFAULT_DELIMITER; $this->lex(); }
/** * Constructor. * * @param string|UtfString $str The query to be lexed. * @param bool $strict Whether strict mode should be enabled or not. */ public function __construct($str, $strict = false) { $this->str = $str; $this->len = $str instanceof UtfString ? $str->length() : strlen($str); $this->strict = $strict; $this->lex(); }
/** * Constructor. * * @param string|UtfString $str The query to be lexed. * @param bool $strict Whether strict mode should be enabled or not. */ public function __construct($str, $strict = false) { $this->str = $str; $this->len = $str instanceof UtfString ? $str->length() : strlen($str); $this->strict = $strict; $this->delimiter = static::$DEFAULT_DELIMITER; $this->lex(); }