/** * Convert the references xml to Bibtex * * @return void */ public function convert() { $this->logger->debugTranslate('bibtexconversion.converter.startLog'); $command = new Command(); $command->setCommand($this->config['command']); $command->addArgument($this->inputFile); $command->addRedirect('2> /dev/null'); $this->logger->debugTranslate('bibtexconversion.converter.xml2bib.commandLog', $command->getCommand()); // Run the xml2bib conversion $command->execute(); $this->logger->debugTranslate('bibtexconversion.converter.OutputLog', $command->getOutputString()); if ($this->status = $command->isSuccess()) { file_put_contents($this->outputFile, $command->getOutputString()); } }
/** * Extract content from the document. * * @return void */ public function convert() { // In the future, we might break this into multiple different // kinds of actions supported by CERMINE, but for now it only // performs extraction. $this->logger->infoTranslate('cermine.cermine.startExtraction'); $command = new Command(); // Run Java or JRE... $command->setCommand($this->config['jre']); // ... with CERMINE in the classpath, ... $command->addSwitch('-cp', $this->config['cerminejar']); // ... the content extraction command, ... $command->addArgument('pl.edu.icm.cermine.PdfNLMContentExtractor'); // ... the input file, ... $command->addSwitch('-path', $this->inputFile); // Send STDERR to STDOUT, so we can capture it, but send // STDOUT to our destination. $command->addRedirect('2>&1 >' . $this->outputFile); $this->logger->debugTranslate('cermine.cermine.executeCommandLog', $command->getCommand()); // Execute the conversion $command->execute(); $this->status = $command->isSuccess(); $this->output = $command->getOutputString(); $this->logger->debugTranslate('cermine.cermine.executeCommandOutputLog', $this->getOutput()); }
/** * Convert the document */ public function convert() { // load article body text $filepath = $this->extractArticleBodyTextIntoTempFile(); $this->logger->infoTranslate('ner.extractor.startExtraction'); $command = new Command(); $command->setCommand("cat {$filepath} | {$this->config['ner']} {$this->config['model']}"); $this->logger->debugTranslate('ner.extractor.executeCommandLog', $command->getCommand()); // Execute the conversion $command->execute(); $this->status = $command->isSuccess(); $this->output = $command->getOutputString(); if (!$this->status) { throw new \Exception("NER command did not run successfully"); } // extract named entities from command output $entities = $this->parseCommandOutput($this->output); // serialization to json document file_put_contents(utf8_encode($this->outputFile), json_encode($entities, JSON_PRETTY_PRINT)); // clean up if (file_exists($filepath)) { unlink($filepath); } }
/** * Converts the output from the Bibtex conversion into a temporary XML * document using bibtex2xml * * @return DOMDocument Document containing a parsed reference list */ protected function biblatex2xmlConvert() { $command = new Command(); $command->setCommand($this->config['command']); $command->addArgument($this->inputFileBibtex); $command->addRedirect('2> /dev/null'); $this->logger->debugTranslate('bibtexreferencesconversion.converter.biblatex2xml.commandLog', $command->getCommand()); // Run the xml2bib conversion $command->execute(); $this->logger->debugTranslate('bibtexreferencesconversion.converter.OutputLog', $command->getOutputString()); if (!($this->status = $command->isSuccess())) { return false; } $dom = new DOMDocument(); if (!$dom->loadXML($command->getOutputString())) { $this->logger->debugTranslate('bibtexreferencesconversion.converter.biblatex2xml.noDOMLog', $this->libxmlErrors()); return false; } return $dom; }
/** * Do the wkhtmltopdf conversion * * @return void */ protected function execute() { $command = new Command(); // Set the base command $command->setCommand($this->config['wkhtmltopdf']['command']); // Add the input file $inputFile = $this->outputTmpPath . '/document.html'; if (!$inputFile) { throw new \Exception('No input file given'); } $command->addArgument($inputFile); // Add the output directory if (!$this->outputFile) { throw new \Exception('No output file given'); } $command->addArgument($this->outputFile); // Redirect STDERR to STDOUT to captue it in $this->output $command->addRedirect('2>&1'); $this->logger->debugTranslate('pdfconversion.wkhtmltopdf.executeCommandLog', $command->getCommand()); // Execute the conversion $command->execute(); $this->status = $command->isSuccess(); $this->output = $command->getOutputString(); $this->logger->debugTranslate('pdfconversion.wkhtmltopdf.executeCommandOutputLog', $this->output); }
/** * Runs the citation parser * * @param string $referencesFile Reference file to parse * * @return void */ protected function parsCitExecute($referencesFile) { // Build the shell command $command = new Command(); $command->setCommand($this->config['command']); $command->addSwitch('-m', 'extract_citations'); $command->addArgument($referencesFile); $this->logger->debugTranslate('referencesconversion.converter.parsCit.commandLog', $command->getCommand()); // Run the ParsCit conversion $command->execute(); $this->status = $command->isSuccess(); $this->output = $command->getOutputString(); // Remove the temporary files $this->parsCitCleanup($referencesFile); }
/** * Convert the document * * @return void */ public function convert() { $command = new Command(); // Set the base command (Python fails with unicode issues if // PYTHONIOENCODING is not set) $command->setCommand('export PYTHONIOENCODING=UTF-8; ' . $this->config['command']); // Set the debug switch $command->addSwitch('-d'); // Disable git debug filesystem $command->addSwitch('--nogit'); // Set the input document type $command->addArgument('docx'); // Add the input file if (!$this->inputFile) { throw new \Exception('No input file given'); } $command->addArgument($this->inputFile); // Add the output directory if (!$this->outputDirectory) { throw new \Exception('No output directory given'); } $command->addArgument($this->outputDirectory); // Redirect STDERR to STDOUT to captue it in $this->output $command->addRedirect('2>&1 >/dev/null'); $this->logger->debugTranslate('nlmxmlconversion.metypeset.executeCommandLog', $command->getCommand()); // Execute the conversion $command->execute(); $this->status = $command->isSuccess(); $this->output = $command->getOutputString(); $this->logger->debugTranslate('nlmxmlconversion.metypeset.executeCommandOutputLog', $this->output); }
/** * Convert the NLM XML document to Epub * * @return void */ public function convert() { $this->logger->debugTranslate('epubconversion.converter.startLog'); $cmdStr = $this->config['command']; // This mktemp code should probably be factored out. $this->logger->debugTranslate('epubconversion.converter.startMktemp'); // The jats2epub script uses hardcoded directories for // output. We should make our own temp directories for those // to live in, so we can clean them up without stepping on // other conversions’ toes. UNIX ONLY. d-: $sysTmp = sys_get_temp_dir(); if (substr($sysTmp, -1, 1) == '/') { $sysTmp = substr($sysTmp, 0, -1); } $mktemp = new Command(); $mktemp->setCommand('mktemp'); $mktemp->addSwitch('-d'); $mktemp->addArgument($sysTmp . '/jats2epub.XXXXX'); $mktemp->addRedirect('2>&1'); $mktemp->execute(); if (!$mktemp->isSuccess()) { $this->logger->infoTranslate('epubconversion.converter.errorMktemp', $mktemp->getOutputString()); $this->status = false; return; } $thisTmp = $mktemp->getOutputString(); // We’re going to cd to the working directory, so we need an // absolute path to the command. $cmdStr = realpath($cmdStr); $command = new Command(); // Do our conversion work in /tmp (or other appropriate // place). Only argument is the input file. $command->setCommand('cd ' . $thisTmp . ' && ' . $cmdStr); $command->addArgument($this->inputFile); // Look for a media directory. $mediaDir = dirname($this->inputFile) . '/metypeset/media'; // If it exists, copy it to a subdirectory of our temp work // space, then point jats2epub at the parent. if (file_exists($mediaDir)) { $jatsMediaDir = $thisTmp . "/extras"; @mkdir($jatsMediaDir); $this->copy_dir($mediaDir, $jatsMediaDir); $command->addArgument($jatsMediaDir); } // Redirect STDERR to STDOUT to captue it in $this->output $command->addRedirect('2>&1'); $this->logger->debugTranslate('epubconversion.converter.startJats2epub'); // Execute the conversion $command->execute(); $this->status = $command->isSuccess(); $this->output = $command->getOutputString(); if (!$this->status) { $this->logger->infoTranslate('epubconversion.converter.errorJats2epub', $this->output); return; } $this->logger->debugTranslate('epubconversion.converter.executeCommandOutputLog', $this->output); // Find the output file(s). $outfiles = glob($thisTmp . '/output_final/*.epub'); if (sizeof($outfiles) != 1) { $this->status = false; $this->logger->infoTranslate('epubconversion.converter.errorGlob'); return; } // If there was only one candidate file, move it to the // target, and clean up after ourselves. rename($outfiles[0], $this->outputFile); $this->del_dir($thisTmp); $this->logger->debugTranslate('epubconversion.converter.endLog'); return; }
/** * Execute the pandoc conversion * * This step takes a text file that lists the reference id's (prefixed with * @, one line each) used in the NLMXML document and a bibtex file * containing the bibliography and formats them according to a provided * citation style file * * @return void */ protected function execute() { $command = new Command(); // Pandoc expects $HOME to be set $commandPrefix = 'HOME=' . $this->outputPath . ' '; $command->setCommand($commandPrefix . $this->config['command']); // Produce typographically correct output $command->addSwitch('--smart'); // Add the bibliography file $command->addSwitch('--bibliography', $this->inputFileBibtex); // Add the citation style file $command->addSwitch('--csl', $this->citationStyleFile); // Add the reference file $command->addArgument($this->referencesFile); $this->logger->debugTranslate('citationstyleconversion.converter.pandoc.commandLog', $command->getCommand()); // Run the pandoc conversion $command->execute(); $this->status = $command->isSuccess(); $this->output = $command->getOutputString(); $this->logger->debugTranslate('citationstyleconversion.converter.OutputLog', $command->getOutputString()); }
/** * Add the XMP sidecar to the PDF document * * @return void */ protected function addXmpSidecar() { $command = new Command(); // Set the base command $command->setCommand($this->config['exiftool']['command']); // Allow duplicates to be extracted $command->addSwitch('-duplicates'); // Be verbose $command->addSwitch('-verbose'); // Read tags from XMP sidecar $command->addSwitch('-TagsFromFile'); // The XMP file $command->addArgument($this->outputFileXmp); // The PDF file $command->addArgument($this->inputFilePdf); // Redirect STDERR to STDOUT to captue it in $this->output $command->addRedirect('2>&1'); $this->logger->debugTranslate('xmpconversion.exiftool.executePdfCommandLog', $command->getCommand()); // Add the XMP sidecar $command->execute(); $this->status = $command->isSuccess(); $this->output = $command->getOutputString(); $this->logger->debugTranslate('xmpconversion.exiftool.executePdfCommandOutputLog', $this->output); }
/** * Convert the document * * @return void */ public function convert() { $command = new Command(); // Set the base command. If HOME is not set to a writeable // directory, unoconv won’t work. $command->setCommand($this->config['command']); // Add verbosity switch if ($this->verbose) { $command->addSwitch('-vvv'); } // Add the filter if ($this->filter) { $command->addSwitch('-f', $this->filter); } // Add the output file if (!$this->outputFile) { throw new \Exception('No output file given'); } $command->addSwitch('-o', $this->outputFile); // Add the input file if (!$this->inputFile) { throw new \Exception('No input file given'); } $command->addArgument($this->inputFile); // Redirect STDERR to STDOUT to captue it in $this->output $command->addRedirect('2>&1'); $this->logger->debugTranslate('docxconversion.unoconv.executeCommandLog', $command->getCommand()); // Execute the conversion $command->execute(); $this->status = $command->isSuccess(); $this->output = $command->getOutputString(); $this->logger->debugTranslate('docxconversion.unoconv.executeCommandOutputLog', $this->getOutput()); }