Ejemplo n.º 1
0
 /**
  * Extract content from the document.
  *
  * @return void
  */
 public function convert()
 {
     // In the future, we might break this into multiple different
     // kinds of actions supported by CERMINE, but for now it only
     // performs extraction.
     $this->logger->infoTranslate('cermine.cermine.startExtraction');
     $command = new Command();
     // Run Java or JRE...
     $command->setCommand($this->config['jre']);
     // ... with CERMINE in the classpath, ...
     $command->addSwitch('-cp', $this->config['cerminejar']);
     // ... the content extraction command, ...
     $command->addArgument('pl.edu.icm.cermine.PdfNLMContentExtractor');
     // ... the input file, ...
     $command->addSwitch('-path', $this->inputFile);
     // Send STDERR to STDOUT, so we can capture it, but send
     // STDOUT to our destination.
     $command->addRedirect('2>&1 >' . $this->outputFile);
     $this->logger->debugTranslate('cermine.cermine.executeCommandLog', $command->getCommand());
     // Execute the conversion
     $command->execute();
     $this->status = $command->isSuccess();
     $this->output = $command->getOutputString();
     $this->logger->debugTranslate('cermine.cermine.executeCommandOutputLog', $this->getOutput());
 }
Ejemplo n.º 2
0
 /**
  * Convert the document
  *
  * @return void
  */
 public function convert()
 {
     $command = new Command();
     // Set the base command (Python fails with unicode issues if
     // PYTHONIOENCODING is not set)
     $command->setCommand('export PYTHONIOENCODING=UTF-8; ' . $this->config['command']);
     // Set the debug switch
     $command->addSwitch('-d');
     // Disable git debug filesystem
     $command->addSwitch('--nogit');
     // Set the input document type
     $command->addArgument('docx');
     // Add the input file
     if (!$this->inputFile) {
         throw new \Exception('No input file given');
     }
     $command->addArgument($this->inputFile);
     // Add the output directory
     if (!$this->outputDirectory) {
         throw new \Exception('No output directory given');
     }
     $command->addArgument($this->outputDirectory);
     // Redirect STDERR to STDOUT to captue it in $this->output
     $command->addRedirect('2>&1 >/dev/null');
     $this->logger->debugTranslate('nlmxmlconversion.metypeset.executeCommandLog', $command->getCommand());
     // Execute the conversion
     $command->execute();
     $this->status = $command->isSuccess();
     $this->output = $command->getOutputString();
     $this->logger->debugTranslate('nlmxmlconversion.metypeset.executeCommandOutputLog', $this->output);
 }
Ejemplo n.º 3
0
 /**
  * Convert the references xml to Bibtex
  *
  * @return void
  */
 public function convert()
 {
     $this->logger->debugTranslate('bibtexconversion.converter.startLog');
     $command = new Command();
     $command->setCommand($this->config['command']);
     $command->addArgument($this->inputFile);
     $command->addRedirect('2> /dev/null');
     $this->logger->debugTranslate('bibtexconversion.converter.xml2bib.commandLog', $command->getCommand());
     // Run the xml2bib conversion
     $command->execute();
     $this->logger->debugTranslate('bibtexconversion.converter.OutputLog', $command->getOutputString());
     if ($this->status = $command->isSuccess()) {
         file_put_contents($this->outputFile, $command->getOutputString());
     }
 }
Ejemplo n.º 4
0
 /**
  * Convert the document
  */
 public function convert()
 {
     // load article body text
     $filepath = $this->extractArticleBodyTextIntoTempFile();
     $this->logger->infoTranslate('ner.extractor.startExtraction');
     $command = new Command();
     $command->setCommand("cat {$filepath} | {$this->config['ner']} {$this->config['model']}");
     $this->logger->debugTranslate('ner.extractor.executeCommandLog', $command->getCommand());
     // Execute the conversion
     $command->execute();
     $this->status = $command->isSuccess();
     $this->output = $command->getOutputString();
     if (!$this->status) {
         throw new \Exception("NER command did not run successfully");
     }
     // extract named entities from command output
     $entities = $this->parseCommandOutput($this->output);
     // serialization to json document
     file_put_contents(utf8_encode($this->outputFile), json_encode($entities, JSON_PRETTY_PRINT));
     // clean up
     if (file_exists($filepath)) {
         unlink($filepath);
     }
 }
Ejemplo n.º 5
0
 /**
  * Converts the output from the Bibtex conversion into a temporary XML
  * document using bibtex2xml
  *
  * @return DOMDocument Document containing a parsed reference list
  */
 protected function biblatex2xmlConvert()
 {
     $command = new Command();
     $command->setCommand($this->config['command']);
     $command->addArgument($this->inputFileBibtex);
     $command->addRedirect('2> /dev/null');
     $this->logger->debugTranslate('bibtexreferencesconversion.converter.biblatex2xml.commandLog', $command->getCommand());
     // Run the xml2bib conversion
     $command->execute();
     $this->logger->debugTranslate('bibtexreferencesconversion.converter.OutputLog', $command->getOutputString());
     if (!($this->status = $command->isSuccess())) {
         return false;
     }
     $dom = new DOMDocument();
     if (!$dom->loadXML($command->getOutputString())) {
         $this->logger->debugTranslate('bibtexreferencesconversion.converter.biblatex2xml.noDOMLog', $this->libxmlErrors());
         return false;
     }
     return $dom;
 }
Ejemplo n.º 6
0
 /**
  * Do the wkhtmltopdf conversion
  *
  * @return void
  */
 protected function execute()
 {
     $command = new Command();
     // Set the base command
     $command->setCommand($this->config['wkhtmltopdf']['command']);
     // Add the input file
     $inputFile = $this->outputTmpPath . '/document.html';
     if (!$inputFile) {
         throw new \Exception('No input file given');
     }
     $command->addArgument($inputFile);
     // Add the output directory
     if (!$this->outputFile) {
         throw new \Exception('No output file given');
     }
     $command->addArgument($this->outputFile);
     // Redirect STDERR to STDOUT to captue it in $this->output
     $command->addRedirect('2>&1');
     $this->logger->debugTranslate('pdfconversion.wkhtmltopdf.executeCommandLog', $command->getCommand());
     // Execute the conversion
     $command->execute();
     $this->status = $command->isSuccess();
     $this->output = $command->getOutputString();
     $this->logger->debugTranslate('pdfconversion.wkhtmltopdf.executeCommandOutputLog', $this->output);
 }
Ejemplo n.º 7
0
 /**
  * Runs the citation parser
  *
  * @param string $referencesFile Reference file to parse
  *
  * @return void
  */
 protected function parsCitExecute($referencesFile)
 {
     // Build the shell command
     $command = new Command();
     $command->setCommand($this->config['command']);
     $command->addSwitch('-m', 'extract_citations');
     $command->addArgument($referencesFile);
     $this->logger->debugTranslate('referencesconversion.converter.parsCit.commandLog', $command->getCommand());
     // Run the ParsCit conversion
     $command->execute();
     $this->status = $command->isSuccess();
     $this->output = $command->getOutputString();
     // Remove the temporary files
     $this->parsCitCleanup($referencesFile);
 }
Ejemplo n.º 8
0
 /**
  * Convert the NLM XML document to Epub
  *
  * @return void
  */
 public function convert()
 {
     $this->logger->debugTranslate('epubconversion.converter.startLog');
     $cmdStr = $this->config['command'];
     // This mktemp code should probably be factored out.
     $this->logger->debugTranslate('epubconversion.converter.startMktemp');
     // The jats2epub script uses hardcoded directories for
     // output.  We should make our own temp directories for those
     // to live in, so we can clean them up without stepping on
     // other conversions’ toes.  UNIX ONLY. d-:
     $sysTmp = sys_get_temp_dir();
     if (substr($sysTmp, -1, 1) == '/') {
         $sysTmp = substr($sysTmp, 0, -1);
     }
     $mktemp = new Command();
     $mktemp->setCommand('mktemp');
     $mktemp->addSwitch('-d');
     $mktemp->addArgument($sysTmp . '/jats2epub.XXXXX');
     $mktemp->addRedirect('2>&1');
     $mktemp->execute();
     if (!$mktemp->isSuccess()) {
         $this->logger->infoTranslate('epubconversion.converter.errorMktemp', $mktemp->getOutputString());
         $this->status = false;
         return;
     }
     $thisTmp = $mktemp->getOutputString();
     // We’re going to cd to the working directory, so we need an
     // absolute path to the command.
     $cmdStr = realpath($cmdStr);
     $command = new Command();
     // Do our conversion work in /tmp (or other appropriate
     // place).  Only argument is the input file.
     $command->setCommand('cd ' . $thisTmp . ' && ' . $cmdStr);
     $command->addArgument($this->inputFile);
     // Look for a media directory.
     $mediaDir = dirname($this->inputFile) . '/metypeset/media';
     // If it exists, copy it to a subdirectory of our temp work
     // space, then point jats2epub at the parent.
     if (file_exists($mediaDir)) {
         $jatsMediaDir = $thisTmp . "/extras";
         @mkdir($jatsMediaDir);
         $this->copy_dir($mediaDir, $jatsMediaDir);
         $command->addArgument($jatsMediaDir);
     }
     // Redirect STDERR to STDOUT to captue it in $this->output
     $command->addRedirect('2>&1');
     $this->logger->debugTranslate('epubconversion.converter.startJats2epub');
     // Execute the conversion
     $command->execute();
     $this->status = $command->isSuccess();
     $this->output = $command->getOutputString();
     if (!$this->status) {
         $this->logger->infoTranslate('epubconversion.converter.errorJats2epub', $this->output);
         return;
     }
     $this->logger->debugTranslate('epubconversion.converter.executeCommandOutputLog', $this->output);
     // Find the output file(s).
     $outfiles = glob($thisTmp . '/output_final/*.epub');
     if (sizeof($outfiles) != 1) {
         $this->status = false;
         $this->logger->infoTranslate('epubconversion.converter.errorGlob');
         return;
     }
     // If there was only one candidate file, move it to the
     // target, and clean up after ourselves.
     rename($outfiles[0], $this->outputFile);
     $this->del_dir($thisTmp);
     $this->logger->debugTranslate('epubconversion.converter.endLog');
     return;
 }
Ejemplo n.º 9
0
 /**
  * Execute the pandoc conversion
  *
  * This step takes a text file that lists the reference id's (prefixed with
  * @, one line each) used in the NLMXML document and a bibtex file
  * containing the bibliography and formats them according to a provided
  * citation style file
  *
  * @return void
  */
 protected function execute()
 {
     $command = new Command();
     // Pandoc expects $HOME to be set
     $commandPrefix = 'HOME=' . $this->outputPath . ' ';
     $command->setCommand($commandPrefix . $this->config['command']);
     // Produce typographically correct output
     $command->addSwitch('--smart');
     // Add the bibliography file
     $command->addSwitch('--bibliography', $this->inputFileBibtex);
     // Add the citation style file
     $command->addSwitch('--csl', $this->citationStyleFile);
     // Add the reference file
     $command->addArgument($this->referencesFile);
     $this->logger->debugTranslate('citationstyleconversion.converter.pandoc.commandLog', $command->getCommand());
     // Run the pandoc conversion
     $command->execute();
     $this->status = $command->isSuccess();
     $this->output = $command->getOutputString();
     $this->logger->debugTranslate('citationstyleconversion.converter.OutputLog', $command->getOutputString());
 }
Ejemplo n.º 10
0
 /**
  * Add the XMP sidecar to the PDF document
  *
  * @return void
  */
 protected function addXmpSidecar()
 {
     $command = new Command();
     // Set the base command
     $command->setCommand($this->config['exiftool']['command']);
     // Allow duplicates to be extracted
     $command->addSwitch('-duplicates');
     // Be verbose
     $command->addSwitch('-verbose');
     // Read tags from XMP sidecar
     $command->addSwitch('-TagsFromFile');
     // The XMP file
     $command->addArgument($this->outputFileXmp);
     // The PDF file
     $command->addArgument($this->inputFilePdf);
     // Redirect STDERR to STDOUT to captue it in $this->output
     $command->addRedirect('2>&1');
     $this->logger->debugTranslate('xmpconversion.exiftool.executePdfCommandLog', $command->getCommand());
     // Add the XMP sidecar
     $command->execute();
     $this->status = $command->isSuccess();
     $this->output = $command->getOutputString();
     $this->logger->debugTranslate('xmpconversion.exiftool.executePdfCommandOutputLog', $this->output);
 }
Ejemplo n.º 11
0
 /**
  * Convert the document
  *
  * @return void
  */
 public function convert()
 {
     $command = new Command();
     // Set the base command.  If HOME is not set to a writeable
     // directory, unoconv won’t work.
     $command->setCommand($this->config['command']);
     // Add verbosity switch
     if ($this->verbose) {
         $command->addSwitch('-vvv');
     }
     // Add the filter
     if ($this->filter) {
         $command->addSwitch('-f', $this->filter);
     }
     // Add the output file
     if (!$this->outputFile) {
         throw new \Exception('No output file given');
     }
     $command->addSwitch('-o', $this->outputFile);
     // Add the input file
     if (!$this->inputFile) {
         throw new \Exception('No input file given');
     }
     $command->addArgument($this->inputFile);
     // Redirect STDERR to STDOUT to captue it in $this->output
     $command->addRedirect('2>&1');
     $this->logger->debugTranslate('docxconversion.unoconv.executeCommandLog', $command->getCommand());
     // Execute the conversion
     $command->execute();
     $this->status = $command->isSuccess();
     $this->output = $command->getOutputString();
     $this->logger->debugTranslate('docxconversion.unoconv.executeCommandOutputLog', $this->getOutput());
 }