public function testSomething() { $pdfDocument = new PdfDocument(); $file = __DIR__ . '/data/pdfs/writer-lorem.pdf'; $pdfDocument->loadFromStream(new PdfStream(fopen($file, 'rb'))); $this->assertEquals(40, count($pdfDocument->getObjects())); }
<?php use mermshaus\Pdf\Parser\PdfDocument; use mermshaus\Pdf\Parser\PdfStream; require __DIR__ . '/../../bootstrap.php'; $pdf = new PdfDocument(); $file = __DIR__ . '/../../../tests/mermshaus/Tests/Pdf/Parser/data/pdfs/writer-lorem.pdf'; $pdf->loadFromStream(new PdfStream(fopen($file, 'rb'))); foreach ($pdf->getObjects() as $object) { echo '<p>' . $object->getId() . ':' . $object->getRevision() . '</p>'; var_dump($object->getValue()); }
<?php use mermshaus\Pdf\Parser\CharMapParser; use mermshaus\Pdf\Parser\PdfDocument; use mermshaus\Pdf\Parser\PdfStream; use mermshaus\Pdf\Parser\TextObjectParser; require __DIR__ . '/../../bootstrap.php'; $pdf = new PdfDocument(); $rr = function ($value) use($pdf) { return $pdf->resolveRef($value); }; $stringToStream = function ($string) { $streamData = 'data://text/plain;base64,' . base64_encode($string); return fopen($streamData, 'rb'); }; $file = __DIR__ . '/../../../tests/mermshaus/Tests/Pdf/Parser/data/pdfs/writer-lorem.pdf'; $pdf->loadFromStream(new PdfStream(fopen($file, 'rb'))); $pageTree = $pdf->getPageTree(); foreach ($pageTree->get('/Kids') as $kid) { $page = $rr($kid); $resources = $rr($page->get('/Resources')); $font = $rr($resources->get('/Font')); $fontmap = array(); $toUnicodeMap = array(); foreach ($font->getKeys() as $key) { $fontmap[$key] = $rr($font->get($key)); $tmp = $rr($font->get($key)); if ($tmp->has('/ToUnicode')) { $toUnicodeMap[$key] = $rr($tmp->get('/ToUnicode')); } }
$second .= $char; break; } } return $second; } function e($s) { /** * @todo There are still cases that will make htmlspecialchars fail without * ENT_SUBSTITUTE */ return htmlspecialchars($s, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); } require __DIR__ . '/../../bootstrap.php'; $pdf = new PdfDocument(); $file = __DIR__ . '/../../../tests/mermshaus/Tests/Pdf/Parser/data/pdfs/writer-lorem.pdf'; $pdf->loadFromStream(new PdfStream(fopen($file, 'rb'))); $decodedStreams = array(); foreach ($pdf->getObjects() as $object) { $value = $object->getValue(); if ($value instanceof PdfStreamObject) { $decodedStreams[] = array('title' => 'Object (' . $object->getId() . ' ' . $object->getRevision() . ')', 'content' => convertInvalidUtf8CharacersToHex($pdf->decodeStream($value))); } } header('Content-Type: text/html; charset=UTF-8'); ?> <!DOCTYPE html> <html lang="en">