コード例 #1
0
    if ($cur_flow_id != -1) {
        if ($cur_para_id != -1) {
            $cur_para_id = -1;
            echo nl2br("</Para>\n");
        }
        echo nl2br("</Flow>\n");
    }
    $txt->Destroy();
    $doc->Close();
    echo nl2br("-----------------------------------------------------------\n");
}
if ($example5_low_level) {
    $doc = new PDFDoc($input_path);
    $doc->InitSecurityHandler();
    // Example 1. Extract all text content from the document
    $reader = new ElementReader();
    //  Read every page
    for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) {
        $reader->Begin($itr->Current());
        DumpAllText($reader);
        $reader->End();
    }
    // Example 2. Extract text content based on the
    // selection rectangle.
    echo nl2br("\n----------------------------------------------------");
    echo nl2br("\nExtract text based on the selection rectangle.");
    echo nl2br("\n----------------------------------------------------\n");
    $first_page = $doc->GetPage(1);
    $s1 = ReadTextFromRect($first_page, new Rect(27.0, 392.0, 563.0, 534.0), $reader);
    echo nl2br("\nField 1: " . $s1);
    $s1 = ReadTextFromRect($first_page, new Rect(28.0, 551.0, 106.0, 623.0), $reader);
コード例 #2
0
                $reader->FormBegin();
                ImageExtract($reader);
                $reader->End();
                break;
        }
    }
}
// Initialize PDFNet
PDFNet::Initialize();
// Example 1:
// Extract images by traversing the display list for
// every page. With this approach it is possible to obtain
// image positioning information and DPI.
$doc = new PDFDoc($input_path . "newsletter.pdf");
$doc->InitSecurityHandler();
$reader = new ElementReader();
//  Read every page
for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) {
    $reader->Begin($itr->Current());
    ImageExtract($reader);
    $reader->End();
}
$doc->Close();
echo nl2br("Done...\n");
echo nl2br("----------------------------------------------------------------\n");
// Example 2:
// Extract images by scanning the low-level document.
$doc = new PDFDoc($input_path . "newsletter.pdf");
$doc->InitSecurityHandler();
$image_counter = 0;
$cos_doc = $doc->GetSDFDoc();
コード例 #3
0
} else {
    echo nl2br("This document does not contain any logical structure.\n");
}
echo nl2br("\nDone 1.\n");
echo nl2br("____________________________________________________________\n");
echo nl2br("Sample 2 - Get parent logical structure elements from\n");
echo nl2br("layout elements.\n");
$reader = new ElementReader();
for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) {
    $reader->Begin($itr->Current());
    ProcessElements($reader);
    $reader->End();
}
echo nl2br("\nDone 2.\n");
echo nl2br("____________________________________________________________\n");
echo nl2br("Sample 3 - 'XML style' extraction of PDF logical structure and page content.\n");
$mcid_doc_map = array();
$reader = new ElementReader();
for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) {
    $reader->Begin($itr->Current());
    $mcid_doc_map[$itr->Current()->GetIndex()] = array();
    ProcessElements2($reader, $mcid_doc_map[$itr->Current()->GetIndex()]);
    $reader->End();
}
$tree = $doc->GetStructTree();
if ($tree->IsValid()) {
    for ($i = 0; $i < $tree->GetNumKids(); ++$i) {
        ProcessStructElement2($tree->GetKid($i), $mcid_doc_map, 0);
    }
}
echo nl2br("\nDone 3.\n");
コード例 #4
0
            case Element::e_path:
                $data = $element->GetPathData();
                $points = $data->GetPoints();
                break;
            case Element::e_text:
                $data = $element->GetTextString();
                echo nl2br($data . "\n");
                break;
            case Element::e_form:
                $reader->FormBegin();
                ProcessElements($reader);
                $reader->End();
                break;
        }
    }
}
PDFNet::Initialize();
// Extract text data from all pages in the document
echo nl2br("__________________________________________________\n");
echo nl2br("Sample 1 - Extract text data from all pages in the document.\n");
echo nl2br("Opening the input pdf...\n");
$doc = new PDFDoc($input_path . "newsletter.pdf");
$doc->InitSecurityHandler();
$pgnum = $doc->GetPageCount();
$page_reader = new ElementReader();
for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) {
    $page_reader->Begin($itr->Current());
    ProcessElements($page_reader);
    $page_reader->End();
}
echo nl2br("Done.\n");
コード例 #5
0
// documents that don't need to be saved/read from a disk.
PDFNet::Initialize();
// Read a PDF document in a memory buffer.
$file = new MappedFile($input_path . "tiger.pdf");
$file_sz = $file->FileSize();
$file_reader = new FilterReader($file);
$mem = $file_reader->Read($file_sz);
$test = array();
for ($i = 0; $i < strlen($mem); $i++) {
    $test[] = ord($mem[$i]);
}
$doc = new PDFDoc($mem, $file_sz);
$doc->InitSecurityHandler();
$num_pages = $doc->GetPageCount();
$writer = new ElementWriter();
$reader = new ElementReader();
// Create a duplicate of every page but copy only path objects
for ($i = 1; $i <= $num_pages; ++$i) {
    $itr = $doc->GetPageIterator(2 * $i - 1);
    $reader->Begin($itr->Current());
    $new_page = $doc->PageCreate($itr->Current()->GetMediaBox());
    $next_page = $itr;
    $next_page->Next();
    $doc->PageInsert($next_page, $new_page);
    $writer->Begin($new_page);
    while (($element = $reader->Next()) != null) {
        //if ($element->GetType() == Element::e_path)
        $writer->WriteElement($element);
    }
    $writer->End();
    $reader->End();
コード例 #6
0
$gstate = $element->GetGState();
$gstate->SetTextRenderMode(GState::e_clip_text);
$writer->WriteElement($element);
// Finish the block of text
$writer->WriteElement($builder->CreateTextEnd());
// Draw an image that will be clipped by the above text
$writer->WriteElement($builder->CreateImage($img, 10.0, 100.0, 1300.0, 720.0));
$writer->End();
// save changes to the current page
$doc->PagePushBack($page);
// Start a new page ------------------------------------
//
// The example illustrates how to embed the external font in a PDF document.
// The example also shows how ElementReader can be used to copy and modify
// Elements between pages.
$reader = new ElementReader();
// Start reading Elements from the last page. We will copy all Elements to
// a new page but will modify the font associated with text.
$reader->Begin($doc->GetPage($doc->GetPageCount()));
$page = $doc->PageCreate(new Rect(0.0, 0.0, 1300.0, 794.0));
$writer->Begin($page);
// begin writing to this page
$builder->Reset();
// Reset the GState to default
// Embed an external font in the document.
$font = Font::CreateTrueTypeFont($doc->GetSDFDoc(), $input_path . "font.ttf");
while (($element = $reader->Next()) != null) {
    if ($element->GetType() == Element::e_text) {
        $element->GetGState()->SetFont($font, 12);
    }
    $writer->WriteElement($element);
コード例 #7
0
                $map[$objNum] = $o;
                $writer->WriteElement($element);
                break;
            default:
                $writer->WriteElement($element);
        }
    }
}
PDFNet::Initialize();
echo nl2br("-------------------------------------------------\n");
// Open the test file
echo nl2br("Opening the input file...\n");
$doc = new PDFDoc($input_path . $input_filename);
$doc->InitSecurityHandler();
$writer = new ElementWriter();
$reader = new ElementReader();
$itr = $doc->GetPageIterator();
while ($itr->HasNext()) {
    $page = $itr->Current();
    $reader->Begin($page);
    $writer->Begin($page, ElementWriter::e_replacement, false);
    $map1 = array();
    ProcessElements($reader, $writer, $map1);
    $writer->End();
    $reader->End();
    $map2 = array();
    while (!(empty($map1) && empty($map2))) {
        foreach ($map1 as $k => $v) {
            $obj = $v;
            $writer->Begin($obj);
            $reader->Begin($obj, $page->GetResourceDict());