public function extractContent(\AppBundle\Entity\File $file) { switch ($file->getMimeType()) { case 'application/pdf': // https://wiki.ubuntuusers.de/poppler-utils/ $cmd = sprintf('pdftotext "%s" -', $this->storage . $file->getPath() . '/' . $file->getFilename()); exec($cmd, $output, $return_var); if ($return_var == 0) { $file->setContent(join(" ", $output)); } break; case 'application/vnd.oasis.opendocument.text': // http://stackoverflow.com/a/3293756 // sudo apt-get install unoconv $cmd = sprintf('unoconv --format=txt --stdout "%s"', $this->storage . $file->getPath() . '/' . $file->getFilename()); exec($cmd, $output, $return_var); if ($return_var == 0) { $file->setContent(join(" ", $output)); } break; // doc WORD // http://superuser.com/a/186791 // doc WORD // http://superuser.com/a/186791 case 'image/png': case 'image/jpeg': case 'image/gif': // sudo apt-get install tesseract-ocr tesseract-ocr-deu // print "BILD"; $cmd = sprintf('tesseract "%s" stdout -l deu', $this->storage . $file->getPath() . '/' . $file->getFilename()); exec($cmd, $output, $return_var); if ($return_var == 0) { $file->setContent(join(" ", $output)); } break; default: print "Kein Extractor implementiert"; die; } }
public function load(ObjectManager $manager) { /* $connection = $manager->getConnection(); $dbPlatform = $connection->getDatabasePlatform(); # $connection->query('SET FOREIGN_KEY_CHECKS=0'); $q = $dbPlatform->getTruncateTableSql('documents'); $connection->executeUpdate($q); $q = $dbPlatform->getTruncateTableSql('files'); $connection->executeUpdate($q); # $connection->query('SET FOREIGN_KEY_CHECKS=1'); */ for ($i = 0; $i <= 10; $i++) { $document = new Document(); $document->setTitle('Der Titel Nr. ' . $i); $document->setDocDate(new \Datetime()); $document->setPath('/home/jacek/dir'); $document->setCreated(new \Datetime()); $document->setTags('tag1 tag2 tag3'); $r = mt_rand(1, 2); for ($j = 0; $j <= $r; $j++) { $file = new File(); $file->setTitle('Titel der Datei Nr. ' . $j); $file->setFilename('Dateiname.ext'); $file->setPath('/home/jacek/dir'); $file->setFiletype('ext'); $file->setContent('blablabla'); $file->setCreated(new \Datetime()); $file->setDocument($document); $manager->persist($file); print "File angelegt: {$file->getTitle()} \n"; } $manager->persist($document); print "Doc angelegt: {$document->getTitle()} \n"; } $manager->flush(); }