Esempio n. 1
0
 public function parse(PdfStream $stream)
 {
     $charmap = new CharMap();
     $matches = array();
     $stream->rewind();
     $line = trim($stream->gets());
     // want_begin, want_line_or_end, end
     $state = 'want_begin';
     while ($line !== false && $state !== 'end') {
         switch (true) {
             case 1 === preg_match('/^[0-9]+ beginbfchar$/', $line):
                 if ($state === 'want_begin') {
                     $state = 'want_line_or_end';
                 } else {
                     throw new PdfException('Unexpected beginbfchar');
                 }
                 break;
             case 1 === preg_match('/^endbfchar$/', $line):
                 if ($state === 'want_line_or_end') {
                     $state = 'end';
                 } else {
                     throw new PdfException('Unexpected endbfchar');
                 }
                 break;
             case 1 === preg_match('/^<([0-9A-Fa-f]+)> <([0-9A-Fa-f]+)>$/', $line, $matches):
                 if ($state === 'want_begin') {
                     // nop, I guess
                 } elseif ($state === 'want_line_or_end') {
                     $key = base_convert($matches[1], 16, 10);
                     $value = $this->utf8(base_convert($matches[2], 16, 10));
                     $charmap[$key] = $value;
                 } else {
                     throw new PdfException('Unknown state');
                 }
                 break;
             default:
                 if ($state !== 'want_begin') {
                     throw new PdfException('Unkown state (default)');
                 }
                 break;
         }
         $line = trim($stream->gets());
     }
     if ($state !== 'end') {
         throw new PdfException('end not reached');
     }
     return $charmap;
 }
Esempio n. 2
0
 /**
  *
  * @param int $offset
  * @return PdfIndirectObject
  * @throws PdfException
  */
 protected function parseObjectFromOffset($offset)
 {
     $matches = array();
     $object = array();
     $this->stream->seek($offset);
     // assert "<x> <y> obj"
     if (false === $this->stream->consume([['/[0-9]/', '+'], ['/\\s/', '+'], ['/[0-9]/', '+'], ['/\\s/', '+'], 'obj', ['/[^a-z]/']], $matches)) {
         throw new PdfException('No object at ' . $offset);
     }
     $this->stream->seek(-1, SEEK_CUR);
     $object['id'] = (int) $matches[0];
     $object['revision'] = (int) $matches[2];
     // Load PDF object
     list($endPos, $value) = $this->objectParser->getNextObjectFromString($this->source, $this->stream->tell());
     $this->stream->seek($endPos);
     // Assert key word
     if (false === $this->stream->consume([['/\\s/', '*'], ['/[a-z]/', '+'], ['/[^a-z]/']], $matches)) {
         throw new PdfException('No key word found at ' . $this->stream->tell());
     }
     $this->stream->seek(-1, SEEK_CUR);
     $keyWord = $matches[1];
     switch ($keyWord) {
         case 'stream':
             if (!$value instanceof PdfDictionary) {
                 throw new PdfException('Value has to be a dictionary');
             }
             $this->stream->push();
             $length = $this->resolveRef($value->get('/Length'));
             $this->stream->pop();
             $chars = $this->stream->read(2);
             if ($chars === "\r\n") {
                 // nop
             } elseif ($chars[0] === "\n") {
                 $this->stream->seek(-1, SEEK_CUR);
             } else {
                 throw new PdfException('Expected \\n or \\r\\n after stream key word');
             }
             $value = new PdfStreamObject($value, $this->stream->tell(), $length);
             $this->stream->seek($length, SEEK_CUR);
             // Assert "endstream"
             if (!$this->stream->consume([['/\\s/', '*'], 'endstream'])) {
                 throw new PdfException('Expected endstream key word');
             }
             // Assert "endobj"
             if (!$this->stream->consume([['/\\s/', '*'], 'endobj'])) {
                 throw new PdfException('Expected endobj key word');
             }
             break;
         case 'endobj':
             // nop
             break;
         default:
             throw new PdfException('Unexpected key word ' . $keyWord);
             break;
     }
     $object['value'] = $value;
     return new PdfIndirectObject($object['id'], $object['revision'], $object['value']);
 }
Esempio n. 3
0
 /**
  *
  * @param PdfStreamObject $streamObj
  * @return string
  * @throws PdfException
  */
 public function decodeStream(PdfStreamObject $streamObj)
 {
     $this->stream->push();
     $this->stream->seek($streamObj->getStartOffset());
     $stream = $this->stream->read($streamObj->getLength());
     $this->stream->pop();
     $options = $streamObj->getDictionary();
     $filters = $options->get('/Filter');
     $ret = $stream;
     if (!is_array($filters)) {
         $filters = new PdfArray(array($filters));
     }
     foreach ($filters as $filter) {
         if (!array_key_exists($filter, $this->decoders)) {
             throw new PdfException(sprintf('No decoder found for %s.', $filter));
         }
         $ret = $this->decoders[$filter]->decode($ret);
     }
     return $ret;
 }
Esempio n. 4
0
 /**
  *
  * @param string $stream
  */
 public function loadFromStream(PdfStream $stream)
 {
     $this->reset();
     $stream->push();
     list($this->crossReferenceTable, $this->trailer) = $this->crossReferenceTableParser->parse($stream);
     $stream->pop();
     $stream->push();
     $this->objectRepository = new ObjectRepository($stream, $this->crossReferenceTable);
     $stream->pop();
     $this->stream = $stream;
     $this->streamDecoder = new StreamDecoder($this->stream);
     //        if ($this->trailer->has('/Info')) {
     //            $ref = $this->trailer->get('/Info');
     //            $infoDict = $this->objectRepository->getObjectByIdAndRevision($ref->getTargetId(), $ref->getTargetRevision())->getValue();
     //
     //            var_dump($infoDict);
     //        }
 }
 /**
  *
  * @param PdfStream $stream
  * @return PdfDictionary
  * @throws PdfException
  */
 protected function parseTrailer(PdfStream $stream)
 {
     if (trim($stream->gets()) !== 'trailer') {
         throw new PdfException('trailer expected');
     }
     list(, $object) = $this->objectParser->getNextObjectFromString($stream->getContents(), 0);
     if (!$object instanceof PdfDictionary) {
         throw new PdfException('No dictionary found in trailer');
     }
     return $object;
 }
Esempio n. 6
0
 public function getText(PdfStream $stream, array $charMaps)
 {
     $this->charMaps = $charMaps;
     $this->data = array('font' => '');
     $textObjectLexer = new TextObjectLexer();
     $stream->rewind();
     $source = $stream->getContents();
     $offset = 0;
     $count = strlen($source);
     $text = '';
     // default, in_array
     $state = 'default';
     $operandBuffer = array();
     while ($offset < $count) {
         $token = $textObjectLexer->getNextTokenFromString($source, $offset);
         $tokenContent = substr($source, $offset, $token->newPos - $offset);
         switch ($token->tokenName) {
             case 'white_space':
                 // nop. Ignore white space
                 break;
             case 'operator':
                 if ($state === 'default') {
                     $response = $this->runOperator($operandBuffer, $tokenContent);
                     if ($response['type'] === 'text') {
                         $text .= $response['content'];
                     }
                     $operandBuffer = array();
                 } else {
                     throw new PdfException('Unexpected token ' . $token->tokenName);
                 }
                 break;
             case 'numeric':
             case 'name':
             case 'boolean':
             case 'string':
             case 'null':
                 if ($state === 'default') {
                     $operandBuffer[] = $tokenContent;
                 } elseif ($state === 'in_array') {
                     $operandBuffer[] = $tokenContent;
                 } else {
                     throw new PdfException('Unexpected token ' . $token->tokenName);
                 }
                 break;
             case 'array_start':
                 if ($state === 'default') {
                     if (count($operandBuffer) > 0) {
                         throw new PdfException('array_start but operand buffer not emtpy');
                     }
                     $state = 'in_array';
                 } else {
                     throw new PdfException('Unexpected token ' . $token->tokenName);
                 }
                 break;
             case 'array_end':
                 if ($state === 'in_array') {
                     $newArray = new Objects\PdfArray();
                     foreach ($operandBuffer as $entry) {
                         $newArray[] = $entry;
                     }
                     $operandBuffer = array();
                     $operandBuffer[] = $newArray;
                     $state = 'default';
                 } else {
                     throw new PdfException('Unexpected token ' . $token->tokenName);
                 }
                 break;
             default:
                 throw new PdfException('Unknown token ' . $token->tokenName);
                 break;
         }
         $offset = $token->newPos;
     }
     if ($state !== 'default') {
         throw new PdfException('default state expected, ' . $state . ' given');
     }
     if (count($operandBuffer) !== 0) {
         throw new PdfException('Empty operand buffer expected');
     }
     return $text;
 }