if ($cur_flow_id != -1) { if ($cur_para_id != -1) { $cur_para_id = -1; echo nl2br("</Para>\n"); } echo nl2br("</Flow>\n"); } $txt->Destroy(); $doc->Close(); echo nl2br("-----------------------------------------------------------\n"); } if ($example5_low_level) { $doc = new PDFDoc($input_path); $doc->InitSecurityHandler(); // Example 1. Extract all text content from the document $reader = new ElementReader(); // Read every page for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) { $reader->Begin($itr->Current()); DumpAllText($reader); $reader->End(); } // Example 2. Extract text content based on the // selection rectangle. echo nl2br("\n----------------------------------------------------"); echo nl2br("\nExtract text based on the selection rectangle."); echo nl2br("\n----------------------------------------------------\n"); $first_page = $doc->GetPage(1); $s1 = ReadTextFromRect($first_page, new Rect(27.0, 392.0, 563.0, 534.0), $reader); echo nl2br("\nField 1: " . $s1); $s1 = ReadTextFromRect($first_page, new Rect(28.0, 551.0, 106.0, 623.0), $reader);
$reader->FormBegin(); ImageExtract($reader); $reader->End(); break; } } } // Initialize PDFNet PDFNet::Initialize(); // Example 1: // Extract images by traversing the display list for // every page. With this approach it is possible to obtain // image positioning information and DPI. $doc = new PDFDoc($input_path . "newsletter.pdf"); $doc->InitSecurityHandler(); $reader = new ElementReader(); // Read every page for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) { $reader->Begin($itr->Current()); ImageExtract($reader); $reader->End(); } $doc->Close(); echo nl2br("Done...\n"); echo nl2br("----------------------------------------------------------------\n"); // Example 2: // Extract images by scanning the low-level document. $doc = new PDFDoc($input_path . "newsletter.pdf"); $doc->InitSecurityHandler(); $image_counter = 0; $cos_doc = $doc->GetSDFDoc();
} else { echo nl2br("This document does not contain any logical structure.\n"); } echo nl2br("\nDone 1.\n"); echo nl2br("____________________________________________________________\n"); echo nl2br("Sample 2 - Get parent logical structure elements from\n"); echo nl2br("layout elements.\n"); $reader = new ElementReader(); for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) { $reader->Begin($itr->Current()); ProcessElements($reader); $reader->End(); } echo nl2br("\nDone 2.\n"); echo nl2br("____________________________________________________________\n"); echo nl2br("Sample 3 - 'XML style' extraction of PDF logical structure and page content.\n"); $mcid_doc_map = array(); $reader = new ElementReader(); for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) { $reader->Begin($itr->Current()); $mcid_doc_map[$itr->Current()->GetIndex()] = array(); ProcessElements2($reader, $mcid_doc_map[$itr->Current()->GetIndex()]); $reader->End(); } $tree = $doc->GetStructTree(); if ($tree->IsValid()) { for ($i = 0; $i < $tree->GetNumKids(); ++$i) { ProcessStructElement2($tree->GetKid($i), $mcid_doc_map, 0); } } echo nl2br("\nDone 3.\n");
case Element::e_path: $data = $element->GetPathData(); $points = $data->GetPoints(); break; case Element::e_text: $data = $element->GetTextString(); echo nl2br($data . "\n"); break; case Element::e_form: $reader->FormBegin(); ProcessElements($reader); $reader->End(); break; } } } PDFNet::Initialize(); // Extract text data from all pages in the document echo nl2br("__________________________________________________\n"); echo nl2br("Sample 1 - Extract text data from all pages in the document.\n"); echo nl2br("Opening the input pdf...\n"); $doc = new PDFDoc($input_path . "newsletter.pdf"); $doc->InitSecurityHandler(); $pgnum = $doc->GetPageCount(); $page_reader = new ElementReader(); for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) { $page_reader->Begin($itr->Current()); ProcessElements($page_reader); $page_reader->End(); } echo nl2br("Done.\n");
// documents that don't need to be saved/read from a disk. PDFNet::Initialize(); // Read a PDF document in a memory buffer. $file = new MappedFile($input_path . "tiger.pdf"); $file_sz = $file->FileSize(); $file_reader = new FilterReader($file); $mem = $file_reader->Read($file_sz); $test = array(); for ($i = 0; $i < strlen($mem); $i++) { $test[] = ord($mem[$i]); } $doc = new PDFDoc($mem, $file_sz); $doc->InitSecurityHandler(); $num_pages = $doc->GetPageCount(); $writer = new ElementWriter(); $reader = new ElementReader(); // Create a duplicate of every page but copy only path objects for ($i = 1; $i <= $num_pages; ++$i) { $itr = $doc->GetPageIterator(2 * $i - 1); $reader->Begin($itr->Current()); $new_page = $doc->PageCreate($itr->Current()->GetMediaBox()); $next_page = $itr; $next_page->Next(); $doc->PageInsert($next_page, $new_page); $writer->Begin($new_page); while (($element = $reader->Next()) != null) { //if ($element->GetType() == Element::e_path) $writer->WriteElement($element); } $writer->End(); $reader->End();
$gstate = $element->GetGState(); $gstate->SetTextRenderMode(GState::e_clip_text); $writer->WriteElement($element); // Finish the block of text $writer->WriteElement($builder->CreateTextEnd()); // Draw an image that will be clipped by the above text $writer->WriteElement($builder->CreateImage($img, 10.0, 100.0, 1300.0, 720.0)); $writer->End(); // save changes to the current page $doc->PagePushBack($page); // Start a new page ------------------------------------ // // The example illustrates how to embed the external font in a PDF document. // The example also shows how ElementReader can be used to copy and modify // Elements between pages. $reader = new ElementReader(); // Start reading Elements from the last page. We will copy all Elements to // a new page but will modify the font associated with text. $reader->Begin($doc->GetPage($doc->GetPageCount())); $page = $doc->PageCreate(new Rect(0.0, 0.0, 1300.0, 794.0)); $writer->Begin($page); // begin writing to this page $builder->Reset(); // Reset the GState to default // Embed an external font in the document. $font = Font::CreateTrueTypeFont($doc->GetSDFDoc(), $input_path . "font.ttf"); while (($element = $reader->Next()) != null) { if ($element->GetType() == Element::e_text) { $element->GetGState()->SetFont($font, 12); } $writer->WriteElement($element);
$map[$objNum] = $o; $writer->WriteElement($element); break; default: $writer->WriteElement($element); } } } PDFNet::Initialize(); echo nl2br("-------------------------------------------------\n"); // Open the test file echo nl2br("Opening the input file...\n"); $doc = new PDFDoc($input_path . $input_filename); $doc->InitSecurityHandler(); $writer = new ElementWriter(); $reader = new ElementReader(); $itr = $doc->GetPageIterator(); while ($itr->HasNext()) { $page = $itr->Current(); $reader->Begin($page); $writer->Begin($page, ElementWriter::e_replacement, false); $map1 = array(); ProcessElements($reader, $writer, $map1); $writer->End(); $reader->End(); $map2 = array(); while (!(empty($map1) && empty($map2))) { foreach ($map1 as $k => $v) { $obj = $v; $writer->Begin($obj); $reader->Begin($obj, $page->GetResourceDict());