public function getText(PdfStream $stream, array $charMaps) { $this->charMaps = $charMaps; $this->data = array('font' => ''); $textObjectLexer = new TextObjectLexer(); $stream->rewind(); $source = $stream->getContents(); $offset = 0; $count = strlen($source); $text = ''; // default, in_array $state = 'default'; $operandBuffer = array(); while ($offset < $count) { $token = $textObjectLexer->getNextTokenFromString($source, $offset); $tokenContent = substr($source, $offset, $token->newPos - $offset); switch ($token->tokenName) { case 'white_space': // nop. Ignore white space break; case 'operator': if ($state === 'default') { $response = $this->runOperator($operandBuffer, $tokenContent); if ($response['type'] === 'text') { $text .= $response['content']; } $operandBuffer = array(); } else { throw new PdfException('Unexpected token ' . $token->tokenName); } break; case 'numeric': case 'name': case 'boolean': case 'string': case 'null': if ($state === 'default') { $operandBuffer[] = $tokenContent; } elseif ($state === 'in_array') { $operandBuffer[] = $tokenContent; } else { throw new PdfException('Unexpected token ' . $token->tokenName); } break; case 'array_start': if ($state === 'default') { if (count($operandBuffer) > 0) { throw new PdfException('array_start but operand buffer not emtpy'); } $state = 'in_array'; } else { throw new PdfException('Unexpected token ' . $token->tokenName); } break; case 'array_end': if ($state === 'in_array') { $newArray = new Objects\PdfArray(); foreach ($operandBuffer as $entry) { $newArray[] = $entry; } $operandBuffer = array(); $operandBuffer[] = $newArray; $state = 'default'; } else { throw new PdfException('Unexpected token ' . $token->tokenName); } break; default: throw new PdfException('Unknown token ' . $token->tokenName); break; } $offset = $token->newPos; } if ($state !== 'default') { throw new PdfException('default state expected, ' . $state . ' given'); } if (count($operandBuffer) !== 0) { throw new PdfException('Empty operand buffer expected'); } return $text; }
/** * @expectedException mermshaus\Pdf\Parser\PdfException * @dataProvider providerException */ public function testException($test) { $lexer = new TextObjectLexer(); $lexer->tokenize($test); }