示例#1
0
 public function getText(PdfStream $stream, array $charMaps)
 {
     $this->charMaps = $charMaps;
     $this->data = array('font' => '');
     $textObjectLexer = new TextObjectLexer();
     $stream->rewind();
     $source = $stream->getContents();
     $offset = 0;
     $count = strlen($source);
     $text = '';
     // default, in_array
     $state = 'default';
     $operandBuffer = array();
     while ($offset < $count) {
         $token = $textObjectLexer->getNextTokenFromString($source, $offset);
         $tokenContent = substr($source, $offset, $token->newPos - $offset);
         switch ($token->tokenName) {
             case 'white_space':
                 // nop. Ignore white space
                 break;
             case 'operator':
                 if ($state === 'default') {
                     $response = $this->runOperator($operandBuffer, $tokenContent);
                     if ($response['type'] === 'text') {
                         $text .= $response['content'];
                     }
                     $operandBuffer = array();
                 } else {
                     throw new PdfException('Unexpected token ' . $token->tokenName);
                 }
                 break;
             case 'numeric':
             case 'name':
             case 'boolean':
             case 'string':
             case 'null':
                 if ($state === 'default') {
                     $operandBuffer[] = $tokenContent;
                 } elseif ($state === 'in_array') {
                     $operandBuffer[] = $tokenContent;
                 } else {
                     throw new PdfException('Unexpected token ' . $token->tokenName);
                 }
                 break;
             case 'array_start':
                 if ($state === 'default') {
                     if (count($operandBuffer) > 0) {
                         throw new PdfException('array_start but operand buffer not emtpy');
                     }
                     $state = 'in_array';
                 } else {
                     throw new PdfException('Unexpected token ' . $token->tokenName);
                 }
                 break;
             case 'array_end':
                 if ($state === 'in_array') {
                     $newArray = new Objects\PdfArray();
                     foreach ($operandBuffer as $entry) {
                         $newArray[] = $entry;
                     }
                     $operandBuffer = array();
                     $operandBuffer[] = $newArray;
                     $state = 'default';
                 } else {
                     throw new PdfException('Unexpected token ' . $token->tokenName);
                 }
                 break;
             default:
                 throw new PdfException('Unknown token ' . $token->tokenName);
                 break;
         }
         $offset = $token->newPos;
     }
     if ($state !== 'default') {
         throw new PdfException('default state expected, ' . $state . ' given');
     }
     if (count($operandBuffer) !== 0) {
         throw new PdfException('Empty operand buffer expected');
     }
     return $text;
 }
示例#2
0
 /**
  * @expectedException mermshaus\Pdf\Parser\PdfException
  * @dataProvider providerException
  */
 public function testException($test)
 {
     $lexer = new TextObjectLexer();
     $lexer->tokenize($test);
 }