Example #1
0
 /**
  * This method provides a multibyte-safe fgets() equivalent with
  * transparent character set conversion.
  *
  * @param resource $fh
  * @param string &$buffer
  * @param string $sourceEncoding = null
  * @param string $destEncoding = 'UTF-8'
  * @param string $eol = PHP_EOL
  * @return string, boolean
  */
 public static function fgetsMB($fh, &$buffer, $sourceEncoding = null, $destEncoding = 'UTF-8', $eol = PHP_EOL)
 {
     /* If character set conversion isn't required and the EOL character
        ends with "\n", it's (at least in principle) more efficient to fall
        back to the native fgets(). */
     if ($sourceEncoding !== null && $sourceEncoding == $destEncoding && substr($eol, -1) == "\n") {
         return fgets($fh);
     }
     /* When we look for EOL characters in the data, we need to look for
        their encoded representations; this is a caching mechanism that
        prevents us from having to do the conversion on every call. */
     if ($sourceEncoding !== self::$_lastUsedEncoding) {
         self::$_lastUsedEncoding = $sourceEncoding;
         /* This is an array because there could be multiple possible EOL
            sequences we try to look for on the same encoding. */
         self::$_lastUsedEncodingEOL = array();
     }
     if (!isset(self::$_lastUsedEncodingEOL[$eol])) {
         /* This assumes that the EOL is being passed in the same encoding
            to which we are being asked to convert. */
         self::$_lastUsedEncodingEOL[$eol] = $sourceEncoding === null ? $eol : mb_convert_encoding($eol, $sourceEncoding, $destEncoding);
     }
     $eolLen = strlen(self::$_lastUsedEncodingEOL[$eol]);
     $buffer = (string) $buffer;
     $bufSize = strlen($buffer);
     $line = '';
     $eofReached = feof($fh);
     // First deal with the contents of the buffer, if any
     if ($bufSize) {
         $eolPos = strpos($buffer, self::$_lastUsedEncodingEOL[$eol]);
         if ($eolPos !== false) {
             $eolPos += $eolLen;
             $line .= substr($buffer, 0, $eolPos);
             $buffer = substr($buffer, $eolPos);
             if ($sourceEncoding !== null && $sourceEncoding != $destEncoding) {
                 $line = mb_convert_encoding($line, $destEncoding, $sourceEncoding);
             }
             return $line;
         } elseif ($eofReached) {
             /* We only want to return the remainder of the buffer if we've
                reached EOF. Otherwise we always want to append the next chunk
                that we read to the remainder of the buffer, because that's the
                only way we will be able to detect a line ending in a case
                where EOL is represented by a sequence of more than one
                character and the last line read split the EOL character. */
             $line = $buffer;
             $buffer = '';
         }
     } elseif ($eofReached) {
         return false;
     }
     $chunk = $buffer;
     $buffer = '';
     $eolPos = false;
     while ($eolPos === false && !feof($fh)) {
         $chunklet = fread($fh, 4096);
         $chunk .= $chunklet;
         $eolPos = strpos($chunk, self::$_lastUsedEncodingEOL[$eol]);
     }
     if ($eolPos === false) {
         $line .= $chunk;
     } else {
         $eolPos += $eolLen;
         $line .= substr($chunk, 0, $eolPos);
         $buffer = substr($chunk, $eolPos);
     }
     if ($sourceEncoding !== null && $sourceEncoding != $destEncoding) {
         /* We are allowing this to fail if mbstring isn't available,
            because code that requires this method should fail in such an
            environment. */
         $line = mb_convert_encoding($line, $destEncoding, $sourceEncoding);
     }
     return $line;
 }