Beispiel #1
0
    $page = $rr($kid);
    $resources = $rr($page->get('/Resources'));
    $font = $rr($resources->get('/Font'));
    $fontmap = array();
    $toUnicodeMap = array();
    foreach ($font->getKeys() as $key) {
        $fontmap[$key] = $rr($font->get($key));
        $tmp = $rr($font->get($key));
        if ($tmp->has('/ToUnicode')) {
            $toUnicodeMap[$key] = $rr($tmp->get('/ToUnicode'));
        }
    }
    $charMaps = array();
    foreach ($toUnicodeMap as $key => $data) {
        $charMapParser = new CharMapParser();
        $charMaps[$key] = $charMapParser->parse(new PdfStream($stringToStream($pdf->decodeStream($data))));
    }
    $contents = $rr($page->get('/Contents'));
    $textObjectParser = new TextObjectParser();
    echo '<pre>';
    echo $textObjectParser->getText(new PdfStream($stringToStream($pdf->decodeStream($contents))), $charMaps);
    echo '</pre>';
}
?>
<style>
/*<![CDATA[*/

body {
    background: #eee;
    padding: 10px 50px;
}
Beispiel #2
0
{
    /**
     * @todo There are still cases that will make htmlspecialchars fail without
     * ENT_SUBSTITUTE
     */
    return htmlspecialchars($s, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
}
require __DIR__ . '/../../bootstrap.php';
$pdf = new PdfDocument();
$file = __DIR__ . '/../../../tests/mermshaus/Tests/Pdf/Parser/data/pdfs/writer-lorem.pdf';
$pdf->loadFromStream(new PdfStream(fopen($file, 'rb')));
$decodedStreams = array();
foreach ($pdf->getObjects() as $object) {
    $value = $object->getValue();
    if ($value instanceof PdfStreamObject) {
        $decodedStreams[] = array('title' => 'Object (' . $object->getId() . ' ' . $object->getRevision() . ')', 'content' => convertInvalidUtf8CharacersToHex($pdf->decodeStream($value)));
    }
}
header('Content-Type: text/html; charset=UTF-8');
?>
<!DOCTYPE html>

<html lang="en">

    <head>
        <meta charset="UTF-8" />
        <title>title</title>
        <style>
/*<![CDATA[*/
pre {
    word-break: break-all;