/** * @param Page * * @return array * @throws \Exception */ public function getTextArray(Page $page = null) { $text = array(); $sections = $this->getSectionsText($this->content); $current_font = new Font($this->document); foreach ($sections as $section) { $commands = $this->getCommandsText($section); foreach ($commands as $command) { switch ($command[self::OPERATOR]) { // set character spacing case 'Tc': break; // move text current point // move text current point case 'Td': break; // move text current point and set leading // move text current point and set leading case 'TD': break; case 'Tf': list($id, ) = preg_split('/\\s/s', $command[self::COMMAND]); $id = trim($id, '/'); $current_font = $page->getFont($id); break; case "'": case 'Tj': $command[self::COMMAND] = array($command); case 'TJ': // Skip if not previously defined, should never happened. if (is_null($current_font)) { // Fallback // TODO : Improve $text[] = $command[self::COMMAND][0][self::COMMAND]; continue; } $sub_text = $current_font->decodeText($command[self::COMMAND]); $text[] = $sub_text; break; // set leading // set leading case 'TL': break; case 'Tm': break; // set super/subscripting text rise // set super/subscripting text rise case 'Ts': break; // set word spacing // set word spacing case 'Tw': break; // set horizontal scaling // set horizontal scaling case 'Tz': //$text .= "\n"; break; // move to start of next line // move to start of next line case 'T*': //$text .= "\n"; break; case 'Da': break; case 'Do': if (!is_null($page)) { $args = preg_split('/\\s/s', $command[self::COMMAND]); $id = trim(array_pop($args), '/ '); if ($xobject = $page->getXObject($id)) { $text[] = $xobject->getText($page); } } break; case 'rg': case 'RG': break; case 're': break; case 'co': break; case 'cs': break; case 'gs': break; case 'en': break; case 'sc': case 'SC': break; case 'g': case 'G': break; case 'V': break; case 'vo': case 'Vo': break; default: } } } return $text; }
/** * @param Page * * @return string * @throws \Exception */ public function getText(Page $page = null) { $text = ''; $sections = $this->getSectionsText($this->content); $current_font = new Font($this->document); $current_position_td = array('x' => false, 'y' => false); $current_position_tm = array('x' => false, 'y' => false); foreach ($sections as $section) { $commands = $this->getCommandsText($section); foreach ($commands as $command) { switch ($command[self::OPERATOR]) { // set character spacing case 'Tc': break; // move text current point // move text current point case 'Td': $args = preg_split('/\\s/s', $command[self::COMMAND]); $y = array_pop($args); $x = array_pop($args); if (floatval($x) <= 0 || $current_position_td['y'] !== false && floatval($y) < floatval($current_position_td['y'])) { // vertical offset $text .= "\n"; } elseif ($current_position_td['x'] !== false && floatval($x) > floatval($current_position_td['x'])) { // horizontal offset $text .= ' '; } $current_position_td = array('x' => $x, 'y' => $y); break; // move text current point and set leading // move text current point and set leading case 'TD': $args = preg_split('/\\s/s', $command[self::COMMAND]); $y = array_pop($args); $x = array_pop($args); if (floatval($y) < 0) { $text .= "\n"; } elseif (floatval($x) <= 0) { $text .= ' '; } break; case 'Tf': list($id, ) = preg_split('/\\s/s', $command[self::COMMAND]); $id = trim($id, '/'); $current_font = $page->getFont($id); break; case "'": case 'Tj': $command[self::COMMAND] = array($command); case 'TJ': // Skip if not previously defined, should never happened. if (is_null($current_font)) { // Fallback // TODO : Improve $text .= $command[self::COMMAND][0][self::COMMAND]; continue; } $sub_text = $current_font->decodeText($command[self::COMMAND]); $text .= $sub_text; break; // set leading // set leading case 'TL': $text .= ' '; break; case 'Tm': $args = preg_split('/\\s/s', $command[self::COMMAND]); $y = array_pop($args); $x = array_pop($args); if ($current_position_tm['y'] !== false) { $delta = abs(floatval($y) - floatval($current_position_tm['y'])); if ($delta > 10) { $text .= "\n"; } } $current_position_tm = array('x' => $x, 'y' => $y); break; // set super/subscripting text rise // set super/subscripting text rise case 'Ts': break; // set word spacing // set word spacing case 'Tw': break; // set horizontal scaling // set horizontal scaling case 'Tz': $text .= "\n"; break; // move to start of next line // move to start of next line case 'T*': $text .= "\n"; break; case 'Da': break; case 'Do': // if (!is_null($page)) { // $args = preg_split('/\s/s', $command[self::COMMAND]); // $id = trim(array_pop($args), '/ '); // if ($xobject = $page->getXObject($id)) { // $text .= $xobject->getText($page); // } // } break; case 'rg': case 'RG': break; case 're': break; case 'co': break; case 'cs': break; case 'gs': break; case 'en': break; case 'sc': case 'SC': break; case 'g': case 'G': break; case 'V': break; case 'vo': case 'Vo': break; default: } } } return $text . ' '; }
/** * @param Page * * @return string * @throws \Exception */ public function getText(Page $page = null) { $bDebug = defined('DEBUG_SMALOT_OBJECT') && DEBUG_SMALOT_OBJECT; $collected_text = FALSE; $sections = $this->getSectionsText($this->content); $current_font = new Font($this->document); $current_position_td = array('x' => false, 'y' => false); $current_position_tm = array('x' => false, 'y' => false); //UN $prev_font_size = FALSE; $font_size = FALSE; $y_delta = 0; $font_size_delta = 0; foreach ($sections as $ks => $section) { unset($sections[$ks]); $commands = $this->getCommandsText($section); unset($section); foreach ($commands as $kc => $command) { unset($commands[$kc]); if ($bDebug) { echo $command[self::OPERATOR] . ':'; } switch ($command[self::OPERATOR]) { // set character spacing case 'Tc': if ($bDebug) { echo 'Command:' . $command[self::COMMAND]; } break; // move text current point // move text current point case 'Td': $args = preg_split('/\\s/s', $command[self::COMMAND]); $y = array_pop($args); if ($y > 0) { $y = floor(100 * $y) / 100; } elseif ($y < 0) { $y = ceil(100 * $y) / 100; } $x = array_pop($args); if ($bDebug) { echo '/x=' . $x . '/y=' . $y; } if (floatval($x) <= 0 || $current_position_td['y'] !== false && floatval($y) < floatval($current_position_td['y'])) { // vertical offset //$text .= "\n"; if ($bDebug) { echo '/LF'; } $current_position_tm['x'] = 0; $current_position_tm['y'] -= 11; $this->newLine('', $current_position_tm['x'], $current_position_tm['y'], $collected_text); } elseif ($current_position_td['x'] !== false && floatval($x) > floatval($current_position_td['x'])) { /** * horizontal offset * nonjemu, jo atseviskjos gadijumos lv burtiem prieksha lika tukshumu */ //$text .= ' '; } $current_position_td = array('x' => $x, 'y' => $y); break; // move text current point and set leading // move text current point and set leading case 'TD': $args = preg_split('/\\s/s', $command[self::COMMAND]); $y = array_pop($args); $x = array_pop($args); if ($bDebug) { echo '/x=' . $x . '/y=' . $y; } if (empty($collected_text)) { if ($bDebug) { echo '/LF'; } $this->newLine('', $x, $y, $collected_text); $current_position_tm = array('x' => $x, 'y' => $y); if ($bDebug) { echo '|:added new empty collect'; } } if (floatval($y) > 0) { $y_delta = $y; } if (floatval($y) < 0) { //$text = "\n"; $current_position_tm['y'] += $y; $current_position_tm['x'] += $x; if ($bDebug) { echo '/LF'; } $this->newLine('', $current_position_tm['x'], $current_position_tm['y'], $collected_text); if ($bDebug) { echo '/fixY=' . $current_position_tm['y'] . '|:added new empty collect'; } } elseif (floatval($x) <= 0) { $this->appendToLine(' ', $collected_text); if ($bDebug) { echo '|:horizontal offset'; } } break; case 'Tf': if ($bDebug) { echo 'command:' . $command[self::COMMAND]; } $prev_font_size = $font_size; list($id, $font_size) = preg_split('/\\s/s', $command[self::COMMAND]); $font_size_delta = $prev_font_size - $font_size; $id = trim($id, '/'); $current_font = $page->getFont($id); if ($bDebug) { echo '/fontId=' . $id . '/size=' . $font_size; } break; case "'": case 'Tj': //if($bDebug) echo 'command:'.$command; $command[self::COMMAND] = array($command); case 'TJ': // Skip if not previously defined, should never happened. $text = ''; if (is_null($current_font)) { // Fallback // TODO : Improve $text .= $command[self::COMMAND][0][self::COMMAND]; if ($bDebug) { echo '/fd:' . $font_size_delta . '/yd:' . $y_delta . '/AddText:' . $text; } continue; } $text .= $current_font->decodeText($command[self::COMMAND]); //identifice prim pantu if ($font_size_delta > 3 && $font_size_delta < 8 && $y_delta > 4 && $y_delta < 8 && trim($text) != '') { $text = '<sup>' . trim($text) . '</sup>'; } //$this->collected_text[$y_actual]['text'] .= $text . $sub_text; $this->appendToLine($text, $collected_text); if ($bDebug) { echo '/fsd:' . $font_size_delta . '/yd:' . $y_delta . '/AddText:' . $text; } break; // set leading // set leading case 'TL': //$text = ' '; if ($bDebug) { echo 'add space'; } //$this->collected_text[$y_actual]['text'] .= ' '; $this->appendToLine(' ', $collected_text); break; case 'Tm': $args = preg_split('/\\s/s', $command[self::COMMAND]); $y = array_pop($args); $x = array_pop($args); if ($bDebug) { echo '/x=' . $x . '/y=' . $y; } // $text = FALSE; // if ($current_position_tm['y'] !== false) { // $delta = abs(floatval($y) - floatval($current_position_tm['y'])); // if ($delta > 10) { // $this->collected_text[] = array('text' => '', 'y' => $y); // end($this->collected_text); // $y_actual = key($this->collected_text); // // } // } $y_delta = $y - $current_position_tm['y']; //// //identifice prim pantu //if($bDebug) echo '/fsd:'.$font_size_delta.'/pfs:'.$prev_font_size.'/fs:'.$font_size.'/yd:'.$y_delta; if ($font_size_delta > 3 && $prev_font_size - $font_size < 8 && $y_delta > 4 && $y_delta < 8) { break; } if ($bDebug) { echo '/LF'; } $this->newLine('', $x, $y, $collected_text); //$current_position_tm = array('x' => $x, 'y' => $y); $current_position_tm = array('x' => $x, 'y' => $y); break; // set super/subscripting text rise // set super/subscripting text rise case 'Ts': break; // set word spacing // set word spacing case 'Tw': break; // set horizontal scaling // set horizontal scaling case 'Tz': //$text = "\n"; $current_position_tm['y'] -= 16; //pieliku uz dullo -16 if ($bDebug) { echo '/new_y=' . $current_position_tm['y'] . '/add new collect'; } $this->newLine('', 0, $current_position_tm['y'], $collected_text); break; // move to start of next line // move to start of next line case 'T*': //$text = "\n"; $current_position_tm['y'] -= 16; //pieliku uz dullo -16 if ($bDebug) { echo '/new_y=' . $current_position_tm['y'] . '/add new collect'; } $this->newLine('', 0, $current_position_tm['y'], $collected_text); break; case 'Da': break; case 'Do': if (!is_null($page)) { $args = preg_split('/\\s/s', $command[self::COMMAND]); $id = trim(array_pop($args), '/ '); if ($xobject = $page->getXObject($id)) { $text = $xobject->getText($page); $this->newLine($text, $current_position_tm['x'], $current_position_tm['y'], $collected_text); if ($bDebug) { echo $text; } } } break; case 'rg': case 'RG': break; case 're': break; case 'co': break; case 'cs': break; case 'gs': break; case 'en': break; case 'sc': case 'SC': break; case 'g': case 'G': break; case 'V': break; case 'vo': case 'Vo': break; default: } if ($bDebug) { echo PHP_EOL; } } } return $this->implodeCollectedText($collected_text); }