Example #1
0
 public function extractContent(\AppBundle\Entity\File $file)
 {
     switch ($file->getMimeType()) {
         case 'application/pdf':
             // https://wiki.ubuntuusers.de/poppler-utils/
             $cmd = sprintf('pdftotext "%s" -', $this->storage . $file->getPath() . '/' . $file->getFilename());
             exec($cmd, $output, $return_var);
             if ($return_var == 0) {
                 $file->setContent(join(" ", $output));
             }
             break;
         case 'application/vnd.oasis.opendocument.text':
             // http://stackoverflow.com/a/3293756
             // sudo apt-get install unoconv
             $cmd = sprintf('unoconv --format=txt --stdout "%s"', $this->storage . $file->getPath() . '/' . $file->getFilename());
             exec($cmd, $output, $return_var);
             if ($return_var == 0) {
                 $file->setContent(join(" ", $output));
             }
             break;
             // doc WORD
             // http://superuser.com/a/186791
         // doc WORD
         // http://superuser.com/a/186791
         case 'image/png':
         case 'image/jpeg':
         case 'image/gif':
             // sudo apt-get install tesseract-ocr tesseract-ocr-deu
             //
             print "BILD";
             $cmd = sprintf('tesseract "%s" stdout -l deu', $this->storage . $file->getPath() . '/' . $file->getFilename());
             exec($cmd, $output, $return_var);
             if ($return_var == 0) {
                 $file->setContent(join(" ", $output));
             }
             break;
         default:
             print "Kein Extractor implementiert";
             die;
     }
 }
Example #2
0
 public function load(ObjectManager $manager)
 {
     /*
             $connection = $manager->getConnection();
             $dbPlatform = $connection->getDatabasePlatform();
     #        $connection->query('SET FOREIGN_KEY_CHECKS=0');
             $q = $dbPlatform->getTruncateTableSql('documents');
             $connection->executeUpdate($q);
             $q = $dbPlatform->getTruncateTableSql('files');
             $connection->executeUpdate($q);
      #       $connection->query('SET FOREIGN_KEY_CHECKS=1');
     */
     for ($i = 0; $i <= 10; $i++) {
         $document = new Document();
         $document->setTitle('Der Titel Nr. ' . $i);
         $document->setDocDate(new \Datetime());
         $document->setPath('/home/jacek/dir');
         $document->setCreated(new \Datetime());
         $document->setTags('tag1 tag2 tag3');
         $r = mt_rand(1, 2);
         for ($j = 0; $j <= $r; $j++) {
             $file = new File();
             $file->setTitle('Titel der Datei Nr. ' . $j);
             $file->setFilename('Dateiname.ext');
             $file->setPath('/home/jacek/dir');
             $file->setFiletype('ext');
             $file->setContent('blablabla');
             $file->setCreated(new \Datetime());
             $file->setDocument($document);
             $manager->persist($file);
             print "File angelegt: {$file->getTitle()} \n";
         }
         $manager->persist($document);
         print "Doc angelegt: {$document->getTitle()} \n";
     }
     $manager->flush();
 }