/** * Build details array. */ protected function buildDetails() { // Build details array. $details = array(); // Extract document info if ($this->trailer->has('Info')) { /** @var Object $info */ $info = $this->trailer->get('Info'); $details = $info->getHeader()->getDetails(); } // Retrieve the page count try { $pages = $this->getPages(); $details['Pages'] = count($pages); } catch (\Exception $e) { $details['Pages'] = 0; } $this->details = $details; }
/** * @param $document Document * @param $header Header * @param $content string * * @return Object */ public static function factory(Document $document, Header $header, $content) { switch ($header->get('Type')->getContent()) { case 'XObject': switch ($header->get('Subtype')->getContent()) { case 'Image': return new Image($document, $header, $content); case 'Form': return new Form($document, $header, $content); default: return new Object($document, $header, $content); } break; case 'Pages': return new Pages($document, $header, $content); case 'Page': return new Page($document, $header, $content); case 'Encoding': return new Encoding($document, $header, $content); case 'Font': $subtype = $header->get('Subtype')->getContent(); $classname = '\\Smalot\\PdfParser\\Font\\Font' . $subtype; if (class_exists($classname)) { return new $classname($document, $header, $content); } else { return new Font($document, $header, $content); } default: return new Object($document, $header, $content); } }
/** * @param string $id * @param array $structure * @param Document $document */ protected function parseObject($id, $structure, $document) { $header = new Header(array(), $document); $content = ''; foreach ($structure as $position => $part) { switch ($part[0]) { case '[': $elements = array(); foreach ($part[1] as $sub_element) { $sub_type = $sub_element[0]; $sub_value = $sub_element[1]; $elements[] = $this->parseHeaderElement($sub_type, $sub_value, $document); } $header = new Header($elements, $document); break; case '<<': $header = $this->parseHeader($part[1], $document); break; case 'stream': $content = isset($part[3][0]) ? $part[3][0] : $part[1]; if ($header->get('Type')->equals('ObjStm')) { $match = array(); // Split xrefs and contents. preg_match('/^((\\d+\\s+\\d+\\s*)*)(.*)$/s', $content, $match); $content = $match[3]; // Extract xrefs. $xrefs = preg_split('/(\\d+\\s+\\d+\\s*)/s', $match[1], -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); $table = array(); foreach ($xrefs as $xref) { list($id, $position) = explode(' ', trim($xref)); $table[$position] = $id; } ksort($table); $ids = array_values($table); $positions = array_keys($table); foreach ($positions as $index => $position) { $id = $ids[$index] . '_0'; $next_position = isset($positions[$index + 1]) ? $positions[$index + 1] : strlen($content); $sub_content = substr($content, $position, $next_position - $position); $sub_header = Header::parse($sub_content, $document); $object = Object::factory($document, $sub_header, ''); $this->objects[$id] = $object; } // It is not necessary to store this content. $content = ''; return; } break; default: if ($part != 'null') { $element = $this->parseHeaderElement($part[0], $part[1], $document); if ($element) { $header = new Header(array($element), $document); } } break; } } if (!isset($this->objects[$id])) { $this->objects[$id] = Object::factory($document, $header, $content); } }
public function testGetDetails() { // // Document with text. // $filename = __DIR__ . '/../../../../../../samples/Document1_pdfcreator_nocompressed.pdf'; // $parser = new \Smalot\PdfParser\Parser(); // $document = $parser->parseFile($filename); // $object = $document->getObjectById('3_0'); // /** @var \Smalot\PdfParser\Element\ElementArray $kids */ // $kids = $object->get('Kids'); // $details = $kids->getDetails(); // // $this->assert->array($details)->hasSize(1); // $this->assert->string($details[0]['Type'])->isEqualTo('Page'); $document = new Document(); $content = '<</Type/Page/Types[8]/Sizes[1 2 3 4 5 <</Subtype/XObject>> [8 [9 <</FontSize 10>>]]]>>'; $details_reference = array('Type' => 'Page', 'Types' => array(8), 'Sizes' => array(1, 2, 3, 4, 5, array('Subtype' => 'XObject'), array(8, array(9, array('FontSize' => 10))))); $header = Header::parse($content, $document); $details = $header->getDetails(); $this->assert->array($details)->hasSize(3); $this->assert->array($details)->isEqualTo($details_reference); }
public function testGetPages() { // Missing catalog $document = new \Smalot\PdfParser\Document(); try { $pages = $document->getPages(); $this->assert->boolean($pages)->isEqualTo(false); } catch (\Exception $e) { $this->assert->object($e)->isInstanceOf('\\Exception'); } // Listing pages from type Page $content = '<</Type/Page>>'; $header = \Smalot\PdfParser\Header::parse($content, $document); $object1 = new \Smalot\PdfParser\Page($document, $header); $header = \Smalot\PdfParser\Header::parse($content, $document); $object2 = new \Smalot\PdfParser\Page($document, $header); $document->setObjects(array(1 => $object1, 2 => $object2)); $pages = $document->getPages(); $this->assert->integer(count($pages))->isEqualTo(2); $this->assert->object($pages[0])->isInstanceOf('\\Smalot\\PdfParser\\Page'); $this->assert->object($pages[1])->isInstanceOf('\\Smalot\\PdfParser\\Page'); // Listing pages from type Pages (kids) $content = '<</Type/Page>>'; $header = \Smalot\PdfParser\Header::parse($content, $document); $object1 = new \Smalot\PdfParser\Page($document, $header); $header = \Smalot\PdfParser\Header::parse($content, $document); $object2 = new \Smalot\PdfParser\Page($document, $header); $header = \Smalot\PdfParser\Header::parse($content, $document); $object3 = new \Smalot\PdfParser\Page($document, $header); $content = '<</Type/Pages/Kids[1 0 R 2 0 R]>>'; $header = \Smalot\PdfParser\Header::parse($content, $document); $object4 = new \Smalot\PdfParser\Pages($document, $header); $content = '<</Type/Pages/Kids[3 0 R]>>'; $header = \Smalot\PdfParser\Header::parse($content, $document); $object5 = new \Smalot\PdfParser\Pages($document, $header); $document->setObjects(array('1_0' => $object1, '2_0' => $object2, '3_0' => $object3, '4_0' => $object4, '5_0' => $object5)); $pages = $document->getPages(); $this->assert->integer(count($pages))->isEqualTo(3); $this->assert->object($pages[0])->isInstanceOf('\\Smalot\\PdfParser\\Page'); $this->assert->object($pages[1])->isInstanceOf('\\Smalot\\PdfParser\\Page'); $this->assert->object($pages[2])->isInstanceOf('\\Smalot\\PdfParser\\Page'); // Listing pages from type Catalog $content = '<</Type/Page>>'; $header = \Smalot\PdfParser\Header::parse($content, $document); $object1 = new \Smalot\PdfParser\Page($document, $header); $header = \Smalot\PdfParser\Header::parse($content, $document); $object2 = new \Smalot\PdfParser\Page($document, $header); $header = \Smalot\PdfParser\Header::parse($content, $document); $object3 = new \Smalot\PdfParser\Page($document, $header); $content = '<</Type/Pages/Kids[1 0 R 2 0 R]>>'; $header = \Smalot\PdfParser\Header::parse($content, $document); $object4 = new \Smalot\PdfParser\Pages($document, $header); $content = '<</Type/Pages/Kids[4 0 R 3 0 R]>>'; $header = \Smalot\PdfParser\Header::parse($content, $document); $object5 = new \Smalot\PdfParser\Pages($document, $header); $content = '<</Type/Catalog/Pages 5 0 R >>'; $header = \Smalot\PdfParser\Header::parse($content, $document); $object6 = new \Smalot\PdfParser\Pages($document, $header); $document->setObjects(array('1_0' => $object1, '2_0' => $object2, '3_0' => $object3, '4_0' => $object4, '5_0' => $object5, '6_0' => $object6)); $pages = $document->getPages(); $this->assert->integer(count($pages))->isEqualTo(3); $this->assert->object($pages[0])->isInstanceOf('\\Smalot\\PdfParser\\Page'); $this->assert->object($pages[1])->isInstanceOf('\\Smalot\\PdfParser\\Page'); $this->assert->object($pages[2])->isInstanceOf('\\Smalot\\PdfParser\\Page'); }
public function testResolveXRef() { $document = new \Smalot\PdfParser\Document(); $content = '<</Type/Page/SubType/Text/Font 5 0 R/Resources 8 0 R>>foo'; $position = 0; $header = \Smalot\PdfParser\Header::parse($content, $document, $position); $object = new \Smalot\PdfParser\Page($document, $header); $document->setObjects(array('5_0' => $object)); $this->assert->object($header->get('Font'))->isInstanceOf('\\Smalot\\PdfParser\\Object'); try { $this->assert->object($header->get('Resources'))->isInstanceOf('\\Smalot\\PdfParser\\Element\\ElementMissing'); $this->assert->boolean(true)->isEqualTo(false); } catch (\Exception $e) { $this->assert->exception($e)->hasMessage('Missing object reference #8_0.'); } }