/** * @access private */ protected static function pdf2text($filename) { if (!extension_loaded('zlib')) { return ''; } $pdf = new PDF2Text(); $pdf->setFilename($filename); try { $pdf->decodePDF(); } catch (Exception $e) { return ''; } $content = $pdf->output(); if ($content == '') { // try with different multibyte setting $pdf->setUnicode(true); $pdf->decodePDF(); $content = $pdf->output(); } return $content; }
protected function execute(array $arguments = array(), array $options = array()) { $file = $arguments['pdf-file']; $this->logLine(sprintf('Extracting text from pdf file: %s', $file), nbLogger::COMMENT); $pdf2text = new PDF2Text(); $pdf2text->setFilename($file); $pdf2text->setUnicode(isset($options['unicode'])); $pdf2text->decodePDF(); $output = $pdf2text->output(); file_put_contents(sprintf('%s.txt', $file), $output); $this->logLine('Text extracted!', nbLogger::COMMENT); }
/** * @access private */ protected static function pdf2text($filename) { $pdf = new PDF2Text(); $pdf->setFilename($filename); $pdf->decodePDF(); $content = $pdf->output(); if ($content == '') { // try with different multibyte setting $pdf->setUnicode(true); $pdf->decodePDF(); $content = $pdf->output(); } return $content; }
public function save(Doctrine_Connection $conn = null) { if ($this->isNew()) { $this->changeUri(); if ($this->getMimeType() == 'application/pdf') { $pdf = new PDF2Text(); $pdf->setFilename($this->getFullURI()); $content = ''; try { $pdf->decodePDF(); $content = $pdf->output(); if ($content == '') { // try with different multibyte setting $pdf->setUnicode(true); $pdf->decodePDF(); $content = $pdf->output(); } } catch (Exception $e) { } if ($content != '') { $this->setExtractedInfo(utf8_encode($content)); } } if ($this->getMimeType() == 'text/plain') { $content = file_get_contents($this->getFullURI()); $this->setExtractedInfo(utf8_encode($content)); } } parent::save($conn); }