/**
  * @access private
  */
 protected static function pdf2text($filename)
 {
     if (!extension_loaded('zlib')) {
         return '';
     }
     $pdf = new PDF2Text();
     $pdf->setFilename($filename);
     try {
         $pdf->decodePDF();
     } catch (Exception $e) {
         return '';
     }
     $content = $pdf->output();
     if ($content == '') {
         // try with different multibyte setting
         $pdf->setUnicode(true);
         $pdf->decodePDF();
         $content = $pdf->output();
     }
     return $content;
 }
示例#2
0
 protected function execute(array $arguments = array(), array $options = array())
 {
     $file = $arguments['pdf-file'];
     $this->logLine(sprintf('Extracting text from pdf file: %s', $file), nbLogger::COMMENT);
     $pdf2text = new PDF2Text();
     $pdf2text->setFilename($file);
     $pdf2text->setUnicode(isset($options['unicode']));
     $pdf2text->decodePDF();
     $output = $pdf2text->output();
     file_put_contents(sprintf('%s.txt', $file), $output);
     $this->logLine('Text extracted!', nbLogger::COMMENT);
 }
 /**
  * @access private
  */
 protected static function pdf2text($filename)
 {
     $pdf = new PDF2Text();
     $pdf->setFilename($filename);
     $pdf->decodePDF();
     $content = $pdf->output();
     if ($content == '') {
         // try with different multibyte setting
         $pdf->setUnicode(true);
         $pdf->decodePDF();
         $content = $pdf->output();
     }
     return $content;
 }
 public function save(Doctrine_Connection $conn = null)
 {
     if ($this->isNew()) {
         $this->changeUri();
         if ($this->getMimeType() == 'application/pdf') {
             $pdf = new PDF2Text();
             $pdf->setFilename($this->getFullURI());
             $content = '';
             try {
                 $pdf->decodePDF();
                 $content = $pdf->output();
                 if ($content == '') {
                     // try with different multibyte setting
                     $pdf->setUnicode(true);
                     $pdf->decodePDF();
                     $content = $pdf->output();
                 }
             } catch (Exception $e) {
             }
             if ($content != '') {
                 $this->setExtractedInfo(utf8_encode($content));
             }
         }
         if ($this->getMimeType() == 'text/plain') {
             $content = file_get_contents($this->getFullURI());
             $this->setExtractedInfo(utf8_encode($content));
         }
     }
     parent::save($conn);
 }