Пример #1
0
function ProcessElements($reader)
{
    for ($element = $reader->Next(); $element != null; $element = $reader->Next()) {
        switch ($element->GetType()) {
            case Element::e_path:
                $data = $element->GetPathData();
                $points = $data->GetPoints();
                break;
            case Element::e_text:
                $data = $element->GetTextString();
                echo nl2br($data . "\n");
                break;
            case Element::e_form:
                $reader->FormBegin();
                ProcessElements($reader);
                $reader->End();
                break;
        }
    }
}
    echo nl2br("Document has a StructTree root.\n");
    for ($i = 0; $i < $tree->GetNumKids(); ++$i) {
        // Recursively get structure info for all child elements.
        ProcessStructElement($tree->GetKid($i), 0);
    }
} else {
    echo nl2br("This document does not contain any logical structure.\n");
}
echo nl2br("\nDone 1.\n");
echo nl2br("____________________________________________________________\n");
echo nl2br("Sample 2 - Get parent logical structure elements from\n");
echo nl2br("layout elements.\n");
$reader = new ElementReader();
for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) {
    $reader->Begin($itr->Current());
    ProcessElements($reader);
    $reader->End();
}
echo nl2br("\nDone 2.\n");
echo nl2br("____________________________________________________________\n");
echo nl2br("Sample 3 - 'XML style' extraction of PDF logical structure and page content.\n");
$mcid_doc_map = array();
$reader = new ElementReader();
for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) {
    $reader->Begin($itr->Current());
    $mcid_doc_map[$itr->Current()->GetIndex()] = array();
    ProcessElements2($reader, $mcid_doc_map[$itr->Current()->GetIndex()]);
    $reader->End();
}
$tree = $doc->GetStructTree();
if ($tree->IsValid()) {
function ProcessElements($reader)
{
    while (($element = $reader->Next()) != NULL) {
        switch ($element->GetType()) {
            case Element::e_path:
                ProcessPath($reader, $element);
                break;
            case Element::e_text_begin:
                ProcessText($reader);
                break;
            case Element::e_form:
                $reader->FormBegin();
                ProcessElements($reader);
                $reader->End();
                break;
            case Element::e_image:
                ProcessImage($element);
                break;
        }
    }
}
Пример #4
0
$writer = new ElementWriter();
$reader = new ElementReader();
$itr = $doc->GetPageIterator();
while ($itr->HasNext()) {
    $page = $itr->Current();
    $reader->Begin($page);
    $writer->Begin($page, ElementWriter::e_replacement, false);
    $map1 = array();
    ProcessElements($reader, $writer, $map1);
    $writer->End();
    $reader->End();
    $map2 = array();
    while (!(empty($map1) && empty($map2))) {
        foreach ($map1 as $k => $v) {
            $obj = $v;
            $writer->Begin($obj);
            $reader->Begin($obj, $page->GetResourceDict());
            ProcessElements($reader, $writer, $map2);
            $reader->End();
            $writer->End();
            unset($map1[$k]);
        }
        if (empty($map1) && !empty($map2)) {
            $map1 = $map1 + $map2;
            $map2 = array();
        }
    }
    $itr->Next();
}
$doc->Save($output_path . $output_filename, SDFDoc::e_remove_unused);
echo nl2br("Done. Result saved in " . $output_filename . "...\n");