public function readBOF($rqd_stream) { Helper::debug("reqd: 0x%04x", $rqd_stream); $savpos = $this->position; $opcode = $this->read2Bytes(); if ($opcode == MY_EOF) { throw new XLSParserException("Unsupported format, or corrupt file: Expected BOF record; met end of file"); } if (!in_array($opcode, Defs::$bofcodes)) { throw new XLSParserException("Expected BOF record; found " . substr($this->mem, $savpos, 8)); } $length = $this->read2Bytes(); if ($length == MY_EOF) { throw new XLSParserException("Expected BOF record[1]; met end of file"); } if ($length < 4 || $length > 20) { throw new XLSParserException("Invalid length ({$length}) for BOF record type 0x" . dechex($opcode)); } $padding = str_repeat("", max(0, Defs::$boflen[$opcode] - $length)); $data = $this->read($this->position, $length); Helper::debug("getbof(): data=%s", Helper::as_hex($data)); if (strlen($data) < $length) { throw new XLSParserException("Incomplete BOF record[2]; met end of file"); } $data .= $padding; $version1 = $opcode >> 8; # <HH list($version2, $streamtype) = array_values(unpack('v2', substr($data, 0, 4))); Helper::debug("getbof(): op=0x%04x version2=0x%04x streamtype=0x%04x", $opcode, $version2, $streamtype); $bof_offset = $this->position - 4 - $length; Helper::debug("getbof(): BOF found at offset %d; savpos=%d", $bof_offset, $savpos); $version = $build = $year = 0; if ($version1 == 0x8) { # <HH list($build, $year) = array_values(unpack('v2', substr($data, 4, 4))); if ($version2 == 0x600) { $version = 80; } elseif ($version2 == 0x500) { if ($year < 1994 || in_array($build, [2412, 3218, 3321])) { $version = 50; } else { $version = 70; } } else { # dodgy one, created by a 3rd party tool $arr = [0x0 => 21, 0x7 => 21, 0x200 => 21, 0x300 => 30, 0x400 => 40]; $version = isset($arr[$version2]) ? $arr[$version2] : 0; } } elseif (in_array($version1, [0x4, 0x2, 0x0])) { $arr = [0x4 => 40, 0x2 => 30, 0x0 => 21]; $version = $arr[$version1]; } if ($version == 40 && $streamtype == XL_WORKBOOK_GLOBALS_4W) { $version = 45; # i.e. 4W } Helper::debug("BOF: op=0x%04x vers=0x%04x stream=0x%04x buildid=%d buildyr=%d -> BIFF%d", $opcode, $version2, $streamtype, $build, $year, $version); $got_globals = $streamtype == XL_WORKBOOK_GLOBALS || $version == 45 && $streamtype == XL_WORKBOOK_GLOBALS_4W; if ($rqd_stream == XL_WORKBOOK_GLOBALS && $got_globals || $streamtype == $rqd_stream) { return $version; } if ($version < 50 && $streamtype == XL_WORKSHEET) { return $version; } if ($version >= 50 && $streamtype == 0x100) { throw new XLSParserException("Workspace file - no spreadsheet data"); } throw new XLSParserException(sprintf('BOF not workbook/worksheet: op=0x%s vers=0x%s strm=0x%s build = %d year = %d -> BIFF = %d', dechex($opcode), dechex($version2), dechex($streamtype), $build, $year, $version)); }
/** * @param $data * @param $txos MSTxo[] */ public function handle_note($data, $txos) { $o = new Note(); $data_len = strlen($data); if ($this->biff_version < 80) { # <HHH list($o->rowx, $o->colx, $expected_bytes) = array_values(unpack("v3", substr($data, 0, 6))); $nb = strlen($data) - 6; assert($nb <= $expected_bytes); $pieces = [substr($data, 6)]; $expected_bytes -= $nb; while ($expected_bytes > 0) { list($rc2, $data2_len, $data2) = $this->book->readRecordParts(); assert($rc2 == XL_NOTE); list($dummy_rowx, $nb) = array_values(unpack('v2/x/va', substr($data2, 0, 6))); assert($dummy_rowx == 0xffff); assert($nb == $data2_len - 6); $pieces[] = substr($data2, 6); $expected_bytes -= $nb; } assert($expected_bytes == 0); $enc = $this->book->encoding ?: $this->book->deriveEncoding(); $o->text = join('', $pieces); $o->rich_text_runlist = [[0, 0]]; $o->show = 0; $o->row_hidden = 0; $o->col_hidden = 0; $o->author = ''; $o->_object_id = Null; $this->cell_note_map[$o->rowx][$o->colx] = $o; return; } # Excel 8.0+ # <HHHH list($o->rowx, $o->colx, $option_flags, $o->_object_id) = array_values(unpack("v4", substr($data, 0, 8))); $o->show = $option_flags >> 1 & 1; $o->row_hidden = $option_flags >> 7 & 1; $o->col_hidden = $option_flags >> 8 & 1; # XL97 dev kit book says NULL [sic] bytes padding between string count and string data # to ensure that string is word-aligned. Appears to be nonsense. list($o->author, $endpos) = Helper::unpack_unicode_update_pos($data, 8, 2); # There is a random/undefined byte after the author string (not counted in the # string length). # Issue 4 on github: Google Spreadsheet doesn't write the undefined byte. assert(in_array($data_len - $endpos, [0, 1])); if (isset($txos[$o->_object_id])) { $txo = $txos[$o->_object_id]; $o->text = $txo->text; $o->rich_text_runlist = $txo->rich_text_runlist; $this->cell_note_map[$o->rowx][$o->colx] = $o; } }