Example #1
0
 function read_from_stream($stream, $pdf)
 {
     $tmp = fread($stream, 2);
     if ($tmp != '<<') {
         die("Error reading PDF: dictionary read error.");
     }
     $data = array();
     while (true) {
         $tok = read_non_whitespace($stream);
         if ($tok == '>') {
             fread($stream, 1);
             break;
         }
         fseek($stream, -1, 1);
         $key = read_object($stream, $pdf);
         $tok = read_non_whitespace($stream);
         fseek($stream, -1, 1);
         $value = read_object($stream, $pdf);
         if (in_array($key, array_keys($data))) {
             die("Error reading PDF: multiple definitions in dictionary.");
         }
         $data[$key] = $value;
     }
     $pos = ftell($stream);
     $s = read_non_whitespace($stream);
     if ($s == 's' && fread($stream, 5) == 'tream') {
         $eol = fread($stream, 1);
         while ($eol == ' ') {
             $eol = fread($stream, 1);
         }
         assert($eol == "\n" || $eol == "\r");
         if ($eol == "\r") {
             fread($stream, 1);
         }
         $length = $data['/Length'];
         if (is_a($length, 'IndirectObject')) {
             $t = ftell($stream);
             $length = $pdf->get_object($length);
             fseek($stream, $t, 0);
         }
         $data['__streamdata__'] = fread($stream, $length);
         $e = read_non_whitespace($stream);
         $ndstream = fread($stream, 8);
         if ($e + $ndstream != "endstream") {
             $pos = ftell($stream);
             fseek($stream, -10, 1);
             $end = fread($stream, 9);
             if ($end == "endstream") {
                 $data['__streamdata__'] = substr($data['__streamdata__'], 0, -1);
             } else {
                 fseek($stream, $pos, 0);
                 die("Error reading PDF: Unable to find 'endstream' marker after stream.");
             }
         }
     } else {
         fseek($stream, $pos, 0);
     }
     if (in_array('__streamdata__', array_keys($data))) {
         return StreamObject::init_from_dict($data);
     } else {
         return $data;
     }
 }
Example #2
0
 function read($stream)
 {
     fseek($stream, -1, 2);
     $line = '';
     while (!$line) {
         $line = $this->read_next_end_line($stream);
     }
     if (substr($line, 0, 5) != '%%EOF') {
         die("Error reading PDF: EOF marker not found.");
     }
     $line = $this->read_next_end_line($stream);
     $startxref = (int) $line;
     $line = $this->read_next_end_line($stream);
     if (substr($line, 0, 9) != 'startxref') {
         die("Error reading PDF: startxref not found.");
     }
     $this->xref = array();
     $this->xref_obj_stm = array();
     $this->trailer = array();
     while (1) {
         fseek($stream, $startxref, 0);
         $x = fread($stream, 1);
         if ($x == "x") {
             $ref = fread($stream, 4);
             if (substr($ref, 0, 3) != 'ref') {
                 die("Error reading PDF: xref table read error.");
             }
             read_non_whitespace($stream);
             fseek($stream, -1, 1);
             while (1) {
                 $num = read_object($stream, $this);
                 read_non_whitespace($stream);
                 fseek($stream, -1, 1);
                 $size = read_object($stream, $this);
                 read_non_whitespace($stream);
                 fseek($stream, -1, 1);
                 $cnt = 0;
                 while ($cnt < $size) {
                     $line = fread($stream, 20);
                     if (in_array(substr($line, -1, 1), array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't'))) {
                         fseek($stream, -1, 1);
                     }
                     $tmp = explode(' ', substr($line, 0, 16));
                     $offset = (int) $tmp[0];
                     $generation = (int) $tmp[1];
                     if (!in_array($generation, array_keys($this->xref))) {
                         $this->xref[$generation] = array();
                     }
                     if (!$this->xref[$generation][$num]) {
                         $this->xref[$generation][$num] = $offset;
                     }
                     $cnt += 1;
                     $num += 1;
                 }
                 read_non_whitespace($stream);
                 fseek($stream, -1, 1);
                 $trailertag = fread($stream, 7);
                 if ($trailertag != 'trailer') {
                     fseek($stream, -7, 1);
                 } else {
                     break;
                 }
             }
             read_non_whitespace($stream);
             fseek($stream, -1, 1);
             $new_trailer = read_object($stream, $this);
             foreach ($new_trailer as $key => $value) {
                 if (!in_array($key, array_keys($this->trailer))) {
                     $this->trailer[$key] = $value;
                 }
             }
             if (in_array('/Prev', array_keys($new_trailer))) {
                 $startxref = $new_trailer['/Prev'];
             } else {
                 break;
             }
         } else {
             if (ctype_digit($x)) {
                 fseek($stream, -1, 1);
                 $hdr = $this->read_object_header($stream);
                 $idnum = $hdr[0];
                 $generation = $hdr[1];
                 $xrefstream = read_object($stream, $this);
                 assert($xrefstream->data['/Type'] == '/XRef');
                 $this->cache_indirect_object($generation, $idnum, $xrefstream);
                 $stream_data = $xrefstream->get_data();
                 $cursor = 0;
                 $idx_pairs = $xrefstream->data['/Index'];
                 if (!$idx_pairs) {
                     $idx_pairs = array(0, $xrefstream->data['/Size']);
                 }
                 $entry_sizes = $xrefstream->data['/W'];
                 foreach ($this->_pairs($idx_pairs) as $pair) {
                     $num = $pair[0];
                     $size = $pair[1];
                     $cnt = 0;
                     while ($cnt < $size) {
                         for ($i = 0; $i < count($entry_sizes); $i++) {
                             $d = substr($stream_data, $cursor, $entry_sizes[$i]);
                             $cursor += $entry_sizes[$i];
                             $di = convert_to_int($d, $entry_sizes[$i]);
                             if ($i == 0) {
                                 $xref_type = $di;
                             } else {
                                 if ($i == 1) {
                                     if ($xref_type == 0) {
                                         $next_free_object = $di;
                                     } else {
                                         if ($xref_type == 1) {
                                             $byte_offset = $di;
                                         } else {
                                             if ($xref_type == 2) {
                                                 $objstr_num = $di;
                                             }
                                         }
                                     }
                                 } else {
                                     if ($i == 2) {
                                         if ($xref_type == 0) {
                                             $next_generation = $di;
                                         } else {
                                             if ($xref_type == 1) {
                                                 $generation = $di;
                                             } else {
                                                 if ($xref_type == 2) {
                                                     $objstr_idx = $di;
                                                 }
                                             }
                                         }
                                     }
                                 }
                             }
                         }
                         if ($xref_type == 0) {
                         } else {
                             if ($xref_type == 1) {
                                 if (!in_array($generation, array_keys($this->xref))) {
                                     $this->xref[$generation] = array();
                                 }
                                 if (!in_array($num, array_keys($this->xref[$generation]))) {
                                     $this->xref[$generation][$num] = $byte_offset;
                                 }
                             } else {
                                 if ($xref_type == 2) {
                                     if (!in_array($num, array_keys($this->xref_obj_stm))) {
                                         $this->xref_obj_stm[$num] = array($objstr_num, $objstr_idx);
                                     }
                                 }
                             }
                         }
                         $cnt += 1;
                         $num += 1;
                     }
                 }
                 $trailer_keys = array('/Root', '/Info', '/ID');
                 foreach ($trailer_keys as $key) {
                     if (in_array($key, array_keys($xrefstream->data)) and !in_array($key, array_keys($this->trailer))) {
                         $this->trailer[$key] = $xrefstream->data[$key];
                     }
                 }
                 if (in_array('/Prev', array_keys($xrefstream->data))) {
                     $startxref = $xrefstream->data['/Prev'];
                 } else {
                     break;
                 }
             } else {
                 fseek($stream, -11, 1);
                 $tmp = fread($stream, 20);
                 $xref_loc = strpos($tmp, 'xref');
                 if ($xref_loc !== -1) {
                     $startxref -= 10 - $xref_loc;
                     continue;
                 } else {
                     assert(false);
                     break;
                 }
             }
         }
     }
     // var_dump($this->get_xmp_metadata());
 }