Esempio n. 1
0
 /**
  * Build details array.
  */
 protected function buildDetails()
 {
     // Build details array.
     $details = array();
     // Extract document info
     if ($this->trailer->has('Info')) {
         /** @var Object $info */
         $info = $this->trailer->get('Info');
         $details = $info->getHeader()->getDetails();
     }
     // Retrieve the page count
     try {
         $pages = $this->getPages();
         $details['Pages'] = count($pages);
     } catch (\Exception $e) {
         $details['Pages'] = 0;
     }
     $this->details = $details;
 }
Esempio n. 2
0
 /**
  * @param $document Document
  * @param $header   Header
  * @param $content  string
  *
  * @return Object
  */
 public static function factory(Document $document, Header $header, $content)
 {
     switch ($header->get('Type')->getContent()) {
         case 'XObject':
             switch ($header->get('Subtype')->getContent()) {
                 case 'Image':
                     return new Image($document, $header, $content);
                 case 'Form':
                     return new Form($document, $header, $content);
                 default:
                     return new Object($document, $header, $content);
             }
             break;
         case 'Pages':
             return new Pages($document, $header, $content);
         case 'Page':
             return new Page($document, $header, $content);
         case 'Encoding':
             return new Encoding($document, $header, $content);
         case 'Font':
             $subtype = $header->get('Subtype')->getContent();
             $classname = '\\Smalot\\PdfParser\\Font\\Font' . $subtype;
             if (class_exists($classname)) {
                 return new $classname($document, $header, $content);
             } else {
                 return new Font($document, $header, $content);
             }
         default:
             return new Object($document, $header, $content);
     }
 }
Esempio n. 3
0
 /**
  * @param string   $id
  * @param array    $structure
  * @param Document $document
  */
 protected function parseObject($id, $structure, $document)
 {
     $header = new Header(array(), $document);
     $content = '';
     foreach ($structure as $position => $part) {
         switch ($part[0]) {
             case '[':
                 $elements = array();
                 foreach ($part[1] as $sub_element) {
                     $sub_type = $sub_element[0];
                     $sub_value = $sub_element[1];
                     $elements[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
                 }
                 $header = new Header($elements, $document);
                 break;
             case '<<':
                 $header = $this->parseHeader($part[1], $document);
                 break;
             case 'stream':
                 $content = isset($part[3][0]) ? $part[3][0] : $part[1];
                 if ($header->get('Type')->equals('ObjStm')) {
                     $match = array();
                     // Split xrefs and contents.
                     preg_match('/^((\\d+\\s+\\d+\\s*)*)(.*)$/s', $content, $match);
                     $content = $match[3];
                     // Extract xrefs.
                     $xrefs = preg_split('/(\\d+\\s+\\d+\\s*)/s', $match[1], -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
                     $table = array();
                     foreach ($xrefs as $xref) {
                         list($id, $position) = explode(' ', trim($xref));
                         $table[$position] = $id;
                     }
                     ksort($table);
                     $ids = array_values($table);
                     $positions = array_keys($table);
                     foreach ($positions as $index => $position) {
                         $id = $ids[$index] . '_0';
                         $next_position = isset($positions[$index + 1]) ? $positions[$index + 1] : strlen($content);
                         $sub_content = substr($content, $position, $next_position - $position);
                         $sub_header = Header::parse($sub_content, $document);
                         $object = Object::factory($document, $sub_header, '');
                         $this->objects[$id] = $object;
                     }
                     // It is not necessary to store this content.
                     $content = '';
                     return;
                 }
                 break;
             default:
                 if ($part != 'null') {
                     $element = $this->parseHeaderElement($part[0], $part[1], $document);
                     if ($element) {
                         $header = new Header(array($element), $document);
                     }
                 }
                 break;
         }
     }
     if (!isset($this->objects[$id])) {
         $this->objects[$id] = Object::factory($document, $header, $content);
     }
 }
Esempio n. 4
0
 public function testGetDetails()
 {
     //        // Document with text.
     //        $filename = __DIR__ . '/../../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
     //        $parser   = new \Smalot\PdfParser\Parser();
     //        $document = $parser->parseFile($filename);
     //        $object   = $document->getObjectById('3_0');
     //        /** @var \Smalot\PdfParser\Element\ElementArray $kids */
     //        $kids    = $object->get('Kids');
     //        $details = $kids->getDetails();
     //
     //        $this->assert->array($details)->hasSize(1);
     //        $this->assert->string($details[0]['Type'])->isEqualTo('Page');
     $document = new Document();
     $content = '<</Type/Page/Types[8]/Sizes[1 2 3 4 5 <</Subtype/XObject>> [8 [9 <</FontSize 10>>]]]>>';
     $details_reference = array('Type' => 'Page', 'Types' => array(8), 'Sizes' => array(1, 2, 3, 4, 5, array('Subtype' => 'XObject'), array(8, array(9, array('FontSize' => 10)))));
     $header = Header::parse($content, $document);
     $details = $header->getDetails();
     $this->assert->array($details)->hasSize(3);
     $this->assert->array($details)->isEqualTo($details_reference);
 }
Esempio n. 5
0
 public function testGetPages()
 {
     // Missing catalog
     $document = new \Smalot\PdfParser\Document();
     try {
         $pages = $document->getPages();
         $this->assert->boolean($pages)->isEqualTo(false);
     } catch (\Exception $e) {
         $this->assert->object($e)->isInstanceOf('\\Exception');
     }
     // Listing pages from type Page
     $content = '<</Type/Page>>';
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object1 = new \Smalot\PdfParser\Page($document, $header);
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object2 = new \Smalot\PdfParser\Page($document, $header);
     $document->setObjects(array(1 => $object1, 2 => $object2));
     $pages = $document->getPages();
     $this->assert->integer(count($pages))->isEqualTo(2);
     $this->assert->object($pages[0])->isInstanceOf('\\Smalot\\PdfParser\\Page');
     $this->assert->object($pages[1])->isInstanceOf('\\Smalot\\PdfParser\\Page');
     // Listing pages from type Pages (kids)
     $content = '<</Type/Page>>';
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object1 = new \Smalot\PdfParser\Page($document, $header);
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object2 = new \Smalot\PdfParser\Page($document, $header);
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object3 = new \Smalot\PdfParser\Page($document, $header);
     $content = '<</Type/Pages/Kids[1 0 R 2 0 R]>>';
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object4 = new \Smalot\PdfParser\Pages($document, $header);
     $content = '<</Type/Pages/Kids[3 0 R]>>';
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object5 = new \Smalot\PdfParser\Pages($document, $header);
     $document->setObjects(array('1_0' => $object1, '2_0' => $object2, '3_0' => $object3, '4_0' => $object4, '5_0' => $object5));
     $pages = $document->getPages();
     $this->assert->integer(count($pages))->isEqualTo(3);
     $this->assert->object($pages[0])->isInstanceOf('\\Smalot\\PdfParser\\Page');
     $this->assert->object($pages[1])->isInstanceOf('\\Smalot\\PdfParser\\Page');
     $this->assert->object($pages[2])->isInstanceOf('\\Smalot\\PdfParser\\Page');
     // Listing pages from type Catalog
     $content = '<</Type/Page>>';
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object1 = new \Smalot\PdfParser\Page($document, $header);
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object2 = new \Smalot\PdfParser\Page($document, $header);
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object3 = new \Smalot\PdfParser\Page($document, $header);
     $content = '<</Type/Pages/Kids[1 0 R 2 0 R]>>';
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object4 = new \Smalot\PdfParser\Pages($document, $header);
     $content = '<</Type/Pages/Kids[4 0 R 3 0 R]>>';
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object5 = new \Smalot\PdfParser\Pages($document, $header);
     $content = '<</Type/Catalog/Pages 5 0 R >>';
     $header = \Smalot\PdfParser\Header::parse($content, $document);
     $object6 = new \Smalot\PdfParser\Pages($document, $header);
     $document->setObjects(array('1_0' => $object1, '2_0' => $object2, '3_0' => $object3, '4_0' => $object4, '5_0' => $object5, '6_0' => $object6));
     $pages = $document->getPages();
     $this->assert->integer(count($pages))->isEqualTo(3);
     $this->assert->object($pages[0])->isInstanceOf('\\Smalot\\PdfParser\\Page');
     $this->assert->object($pages[1])->isInstanceOf('\\Smalot\\PdfParser\\Page');
     $this->assert->object($pages[2])->isInstanceOf('\\Smalot\\PdfParser\\Page');
 }
Esempio n. 6
0
 public function testResolveXRef()
 {
     $document = new \Smalot\PdfParser\Document();
     $content = '<</Type/Page/SubType/Text/Font 5 0 R/Resources 8 0 R>>foo';
     $position = 0;
     $header = \Smalot\PdfParser\Header::parse($content, $document, $position);
     $object = new \Smalot\PdfParser\Page($document, $header);
     $document->setObjects(array('5_0' => $object));
     $this->assert->object($header->get('Font'))->isInstanceOf('\\Smalot\\PdfParser\\Object');
     try {
         $this->assert->object($header->get('Resources'))->isInstanceOf('\\Smalot\\PdfParser\\Element\\ElementMissing');
         $this->assert->boolean(true)->isEqualTo(false);
     } catch (\Exception $e) {
         $this->assert->exception($e)->hasMessage('Missing object reference #8_0.');
     }
 }