/** * Encode the given string using Huffman-encoding. * * @param string $input Input string containing only symbols that are mapped to codes. * @param int $bits Passed by reference, will be populated with the number of bits used to encode the input string. * @return string Huffman-encoded string. */ public function encode(string $input, int &$bits = NULL) : string { if ($this->symbols === NULL) { $this->symbols = $this->code->getEncoderData(); } $encoded = ''; $offset = 7; $byte = 0; for ($inputLen = strlen($input), $i = 0; $i < $inputLen; $i++) { list($code, $len) = $this->symbols[$input[$i]]; for ($j = $len - 1; $j >= 0; $j--) { // Get next code bit and shift it into the encoded byte at the next bit offset. $byte |= ($code >> $j & 1) << $offset--; $bits++; if ($offset == -1) { $offset = 7; $encoded .= chr($byte); $byte = 0; } } } // Pad encoded string with "1" bits. if ($offset < 7) { do { $byte |= 1 << $offset--; } while ($offset >= 0); $encoded .= chr($byte); } return $encoded; }
protected function decodeCanonical(string $input, int $bits) : string { if ($this->symbol === NULL) { list($this->symbol, $this->first) = $this->code->getCanonicalDecoderData(); } $decoded = ''; $buffer = NULL; $len = strlen($input); $byteOffset = 0; $bitOffset = 7; while (true) { $code = 0; $codeLen = 0; do { if ($bits-- == 0) { break; } if ($buffer === NULL) { if ($byteOffset == $len) { if ($code === 0 || $this->isHuffmanPaddingCode($code)) { return $decoded; } throw new \RuntimeException('Cannot read beyond end of Huffman-encoded string'); } $buffer = ord($input[$byteOffset++]); } // Read next bit and and append it as LSB (least significant bit) to the code. $code = $code << 1 | $buffer >> $bitOffset-- & 1; if ($bitOffset == -1) { $bitOffset = 7; $buffer = NULL; } } while ($code > $this->first[++$codeLen]); if ($bits == 0 && $codeLen == 0) { return $decoded; } $char = $this->symbol[$codeLen][$this->first[$codeLen] - $code] ?? NULL; if ($char !== NULL) { $decoded .= $char; if ($bits == 0) { return $decoded; } continue; } if ($this->isHuffmanPaddingCode($code)) { return $decoded; } break; } throw new \RuntimeException('Invalid Huffman code detected'); }