/** * This method provides a multibyte-safe fgets() equivalent with * transparent character set conversion. * * @param resource $fh * @param string &$buffer * @param string $sourceEncoding = null * @param string $destEncoding = 'UTF-8' * @param string $eol = PHP_EOL * @return string, boolean */ public static function fgetsMB($fh, &$buffer, $sourceEncoding = null, $destEncoding = 'UTF-8', $eol = PHP_EOL) { /* If character set conversion isn't required and the EOL character ends with "\n", it's (at least in principle) more efficient to fall back to the native fgets(). */ if ($sourceEncoding !== null && $sourceEncoding == $destEncoding && substr($eol, -1) == "\n") { return fgets($fh); } /* When we look for EOL characters in the data, we need to look for their encoded representations; this is a caching mechanism that prevents us from having to do the conversion on every call. */ if ($sourceEncoding !== self::$_lastUsedEncoding) { self::$_lastUsedEncoding = $sourceEncoding; /* This is an array because there could be multiple possible EOL sequences we try to look for on the same encoding. */ self::$_lastUsedEncodingEOL = array(); } if (!isset(self::$_lastUsedEncodingEOL[$eol])) { /* This assumes that the EOL is being passed in the same encoding to which we are being asked to convert. */ self::$_lastUsedEncodingEOL[$eol] = $sourceEncoding === null ? $eol : mb_convert_encoding($eol, $sourceEncoding, $destEncoding); } $eolLen = strlen(self::$_lastUsedEncodingEOL[$eol]); $buffer = (string) $buffer; $bufSize = strlen($buffer); $line = ''; $eofReached = feof($fh); // First deal with the contents of the buffer, if any if ($bufSize) { $eolPos = strpos($buffer, self::$_lastUsedEncodingEOL[$eol]); if ($eolPos !== false) { $eolPos += $eolLen; $line .= substr($buffer, 0, $eolPos); $buffer = substr($buffer, $eolPos); if ($sourceEncoding !== null && $sourceEncoding != $destEncoding) { $line = mb_convert_encoding($line, $destEncoding, $sourceEncoding); } return $line; } elseif ($eofReached) { /* We only want to return the remainder of the buffer if we've reached EOF. Otherwise we always want to append the next chunk that we read to the remainder of the buffer, because that's the only way we will be able to detect a line ending in a case where EOL is represented by a sequence of more than one character and the last line read split the EOL character. */ $line = $buffer; $buffer = ''; } } elseif ($eofReached) { return false; } $chunk = $buffer; $buffer = ''; $eolPos = false; while ($eolPos === false && !feof($fh)) { $chunklet = fread($fh, 4096); $chunk .= $chunklet; $eolPos = strpos($chunk, self::$_lastUsedEncodingEOL[$eol]); } if ($eolPos === false) { $line .= $chunk; } else { $eolPos += $eolLen; $line .= substr($chunk, 0, $eolPos); $buffer = substr($chunk, $eolPos); } if ($sourceEncoding !== null && $sourceEncoding != $destEncoding) { /* We are allowing this to fail if mbstring isn't available, because code that requires this method should fail in such an environment. */ $line = mb_convert_encoding($line, $destEncoding, $sourceEncoding); } return $line; }