function ProcessElements($reader) { for ($element = $reader->Next(); $element != null; $element = $reader->Next()) { switch ($element->GetType()) { case Element::e_path: $data = $element->GetPathData(); $points = $data->GetPoints(); break; case Element::e_text: $data = $element->GetTextString(); echo nl2br($data . "\n"); break; case Element::e_form: $reader->FormBegin(); ProcessElements($reader); $reader->End(); break; } } }
echo nl2br("Document has a StructTree root.\n"); for ($i = 0; $i < $tree->GetNumKids(); ++$i) { // Recursively get structure info for all child elements. ProcessStructElement($tree->GetKid($i), 0); } } else { echo nl2br("This document does not contain any logical structure.\n"); } echo nl2br("\nDone 1.\n"); echo nl2br("____________________________________________________________\n"); echo nl2br("Sample 2 - Get parent logical structure elements from\n"); echo nl2br("layout elements.\n"); $reader = new ElementReader(); for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) { $reader->Begin($itr->Current()); ProcessElements($reader); $reader->End(); } echo nl2br("\nDone 2.\n"); echo nl2br("____________________________________________________________\n"); echo nl2br("Sample 3 - 'XML style' extraction of PDF logical structure and page content.\n"); $mcid_doc_map = array(); $reader = new ElementReader(); for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) { $reader->Begin($itr->Current()); $mcid_doc_map[$itr->Current()->GetIndex()] = array(); ProcessElements2($reader, $mcid_doc_map[$itr->Current()->GetIndex()]); $reader->End(); } $tree = $doc->GetStructTree(); if ($tree->IsValid()) {
function ProcessElements($reader) { while (($element = $reader->Next()) != NULL) { switch ($element->GetType()) { case Element::e_path: ProcessPath($reader, $element); break; case Element::e_text_begin: ProcessText($reader); break; case Element::e_form: $reader->FormBegin(); ProcessElements($reader); $reader->End(); break; case Element::e_image: ProcessImage($element); break; } } }
$writer = new ElementWriter(); $reader = new ElementReader(); $itr = $doc->GetPageIterator(); while ($itr->HasNext()) { $page = $itr->Current(); $reader->Begin($page); $writer->Begin($page, ElementWriter::e_replacement, false); $map1 = array(); ProcessElements($reader, $writer, $map1); $writer->End(); $reader->End(); $map2 = array(); while (!(empty($map1) && empty($map2))) { foreach ($map1 as $k => $v) { $obj = $v; $writer->Begin($obj); $reader->Begin($obj, $page->GetResourceDict()); ProcessElements($reader, $writer, $map2); $reader->End(); $writer->End(); unset($map1[$k]); } if (empty($map1) && !empty($map2)) { $map1 = $map1 + $map2; $map2 = array(); } } $itr->Next(); } $doc->Save($output_path . $output_filename, SDFDoc::e_remove_unused); echo nl2br("Done. Result saved in " . $output_filename . "...\n");