Esempio n. 1
0
 /**
  * Generate document
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $unoconv = $this->sm->get('DocxConversion\\Model\\Converter\\Unoconv');
     // Fetch the document to convert
     $unconvertedDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_WP_IN);
     if (!$unconvertedDocument) {
         throw new \Exception('Couldn\'t find the stage document');
     }
     // Convert the document
     $unoconv->setFilter('docx7');
     $unoconv->setInputFile($unconvertedDocument->path);
     $outputPath = $job->getDocumentPath() . '/document.docx';
     $unoconv->setOutputFile($outputPath);
     $unoconv->convert();
     if (!$unoconv->getStatus()) {
         $job->status = JOB_STATUS_FAILED;
         return $job;
     }
     $documentDAO = $this->sm->get('Manager\\Model\\DAO\\DocumentDAO');
     $docxDocument = $documentDAO->getInstance();
     $docxDocument->path = $outputPath;
     $docxDocument->mimeType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
     $docxDocument->job = $job;
     $docxDocument->conversionStage = JOB_CONVERSION_STAGE_DOCX;
     $job->documents[] = $docxDocument;
     $job->conversionStage = JOB_CONVERSION_STAGE_DOCX;
     return $job;
 }
Esempio n. 2
0
 /**
  * Extract named entities
  * 
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $extractor = $this->sm->get('NERExtraction\\Model\\Converter\\NERExtractor');
     // fetch xml document
     $doc = $job->getStageDocument(JOB_CONVERSION_STAGE_XML_MERGE);
     if (!$doc) {
         throw new \Exception("Couldn't find the stage document");
     }
     // generate named entities document
     $extractor->setInputFile($doc->path);
     $outputPath = $job->getDocumentPath() . '/named-entities.json';
     $extractor->setOutputFile($outputPath);
     $extractor->convert();
     if (!$extractor->getStatus()) {
         $job->status = JOB_STATUS_FAILED;
         return $job;
     }
     $documentDAO = $this->sm->get('Manager\\Model\\DAO\\DocumentDAO');
     $jsonDocument = $documentDAO->getInstance();
     $jsonDocument->path = $outputPath;
     $jsonDocument->mimeType = 'application/json';
     $jsonDocument->job = $job;
     $jsonDocument->conversionStage = JOB_CONVERSION_STAGE_NER_EXTRACT;
     $job->documents[] = $jsonDocument;
     $job->conversionStage = JOB_CONVERSION_STAGE_NER_EXTRACT;
     return $job;
 }
Esempio n. 3
0
 /**
  * Parse Bibtex
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $bibtex = $this->sm->get('BibtexConversion\\Model\\Converter\\Bibtex');
     // Fetch the document to convert
     $referencesDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_REFERENCES);
     if (!$referencesDocument) {
         throw new \Exception('Couldn\'t find the stage document');
     }
     // Parse the bibtex
     $outputFile = $job->getDocumentPath() . '/document.bib';
     $bibtex->setInputFile($referencesDocument->path);
     $bibtex->setOutputFile($outputFile);
     $bibtex->convert();
     $job->conversionStage = JOB_CONVERSION_STAGE_BIBTEX;
     if (!$bibtex->getStatus()) {
         $job->status = JOB_STATUS_FAILED;
         return $job;
     }
     $documentDAO = $this->sm->get('Manager\\Model\\DAO\\DocumentDAO');
     $bibtexDocument = $documentDAO->getInstance();
     $bibtexDocument->path = $outputFile;
     $bibtexDocument->job = $job;
     $bibtexDocument->conversionStage = JOB_CONVERSION_STAGE_BIBTEX;
     $job->documents[] = $bibtexDocument;
     return $job;
 }
Esempio n. 4
0
 /**
  * Process job
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     if (!class_exists('\\ZipArchive')) {
         throw new \Exception('Zip extension is missing');
     }
     $zipFile = $job->getDocumentPath() . '/documents.zip';
     if (file_exists($zipFile)) {
         @unlink($zipFile);
     }
     $zip = new \ZipArchive();
     if ($zip->open($zipFile, \ZipArchive::CREATE) !== TRUE) {
         throw new \Exception('Couldn\'t create zip archive');
     }
     foreach ($job->getOutputDocuments() as $document) {
         $zip->addFile($document->path, str_replace($job->getDocumentPath() . '/', '', $document->path));
     }
     $zip->close();
     $documentDAO = $this->sm->get('Manager\\Model\\DAO\\DocumentDAO');
     $docxDocument = $documentDAO->getInstance();
     $docxDocument->path = $zipFile;
     $docxDocument->job = $job;
     $docxDocument->conversionStage = JOB_CONVERSION_STAGE_ZIP;
     $job->documents[] = $docxDocument;
     $job->conversionStage = JOB_CONVERSION_STAGE_ZIP;
     return $job;
 }
Esempio n. 5
0
 /**
  * Parse references
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $references = $this->sm->get('ReferencesConversion\\Model\\Converter\\References');
     // Fetch the document to convert
     if ($job->inputFileFormat == JOB_INPUT_TYPE_PDF) {
         $xmlDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_PDF_EXTRACT);
     } else {
         $xmlDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_NLMXML);
     }
     if (!$xmlDocument) {
         throw new \Exception('Couldn\'t find the stage document');
     }
     // Parse the references
     $outputFile = $job->getDocumentPath() . '/document.bib.xml';
     $references->setInputFile($xmlDocument->path);
     $references->setOutputDirectory($job->getDocumentPath());
     $references->setOutputFile($outputFile);
     $references->convert();
     $job->conversionStage = JOB_CONVERSION_STAGE_REFERENCES;
     if (!$references->getStatus()) {
         $job->status = JOB_STATUS_FAILED;
         return $job;
     }
     $documentDAO = $this->sm->get('Manager\\Model\\DAO\\DocumentDAO');
     $referenceXmlDocument = $documentDAO->getInstance();
     $referenceXmlDocument->path = $outputFile;
     $referenceXmlDocument->job = $job;
     $referenceXmlDocument->conversionStage = JOB_CONVERSION_STAGE_REFERENCES;
     $job->documents[] = $referenceXmlDocument;
     // Flag the reference parsing as successful. This will
     // influence which conversion steps will be executed.
     $job->referenceParsingSuccess = true;
     return $job;
 }
Esempio n. 6
0
 /**
  * Convert citation style
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $citationstyle = $this->sm->get('CitationstyleConversion\\Model\\Converter\\Pandoc');
     // Fetch the NLMXML document; if the references step failed fall back to
     // the NLMXML document before the references conversion took place
     if (!($documentNlmxml = $job->getStageDocument(JOB_CONVERSION_STAGE_BIBTEXREFERENCES))) {
         !($documentNlmxml = $job->getStageDocument(JOB_CONVERSION_STAGE_NLMXML));
     }
     if (!$documentNlmxml) {
         throw new \Exception('Couldn\'t find the NLMXML stage document');
     }
     // Fetch the Bibtex document
     if (!($documentBibtex = $job->getStageDocument(JOB_CONVERSION_STAGE_BIBTEX))) {
         throw new \Exception('Couldn\'t find the Bibtex stage document');
     }
     // Fetch the Html document
     if (!($documentHtml = $job->getStageDocument(JOB_CONVERSION_STAGE_HTML))) {
         throw new \Exception('Couldn\'t find the HTML stage document');
     }
     // Parse the citationstyle
     $citationstyle->setInputFileNlmxml($documentNlmxml->path);
     $citationstyle->setInputFileBibtex($documentBibtex->path);
     $citationstyle->setInputFileHtml($documentHtml->path);
     $citationstyle->setCitationStyleFile($job->citationStyleFile);
     $citationstyle->convert();
     $job->conversionStage = JOB_CONVERSION_STAGE_CITATIONSTYLE;
     if (!$citationstyle->getStatus()) {
         $job->status = JOB_STATUS_FAILED;
         return $job;
     }
     // Update the conversion stage of the HTML document
     $documentHtml->conversionStage = $job->conversionStage;
     return $job;
 }
Esempio n. 7
0
 /**
  * Convert the NLMXML to Epub
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $epub = $this->sm->get('EpubConversion\\Model\\Converter\\Epub');
     // Fetch the NLMXML document resulting from merge, or from
     // reference correction, or as extracted from PDF, in order of
     // preference.
     $document = $job->getStageDocument(JOB_CONVERSION_STAGE_XML_MERGE);
     if (!$document) {
         $document = $job->getStageDocument(JOB_CONVERSION_STAGE_BIBTEXREFERENCES);
     }
     if (!$document) {
         $document = $job->getStageDocument(JOB_CONVERSION_STAGE_PDF_EXTRACT);
     }
     if (!$document) {
         throw new \Exception('Couldn\'t find the stage document');
     }
     $outputFile = $job->getDocumentPath() . '/document.epub';
     $epub->setInputFile($document->path);
     $epub->setOutputFile($outputFile);
     $epub->convert();
     $job->conversionStage = JOB_CONVERSION_STAGE_EPUB;
     if (!$epub->getStatus()) {
         $job->status = JOB_STATUS_FAILED;
         return $job;
     }
     $documentDAO = $this->sm->get('Manager\\Model\\DAO\\DocumentDAO');
     $epubDocument = $documentDAO->getInstance();
     $epubDocument->path = $outputFile;
     $epubDocument->job = $job;
     $epubDocument->conversionStage = JOB_CONVERSION_STAGE_EPUB;
     $job->documents[] = $epubDocument;
     return $job;
 }
Esempio n. 8
0
 /**
  * Set flags and state depending on input
  *
  * @param Job $job
  * 
  * @return Job $job
  *
  * @throws Exception if input stage document can’t be found
  */
 public function process(Job $job)
 {
     // Fetch the initial input document.
     $unconvertedDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_UNCONVERTED);
     if (!$unconvertedDocument) {
         throw new \Exception("Couldn’t find the initial input document");
     }
     $finfo = finfo_open(FILEINFO_MIME_TYPE);
     $mimeType = finfo_file($finfo, $unconvertedDocument->path);
     if ($mimeType == 'application/pdf') {
         $job->inputFileFormat = JOB_INPUT_TYPE_PDF;
         $job->conversionStage = JOB_CONVERSION_STAGE_PDF_IN;
         $unconvertedDocument->conversionStage = JOB_CONVERSION_STAGE_PDF_IN;
     } else {
         $job->inputFileFormat = JOB_INPUT_TYPE_WP;
         $job->conversionStage = JOB_CONVERSION_STAGE_WP_IN;
         $unconvertedDocument->conversionStage = JOB_CONVERSION_STAGE_WP_IN;
     }
     return $job;
 }
Esempio n. 9
0
 /**
  * Generate document
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $metypeset = $this->sm->get('NlmxmlConversion\\Model\\Converter\\Metypeset');
     // Fetch the document to convert
     $docxDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_DOCX);
     if (!$docxDocument) {
         throw new \Exception('Couldn\'t find the stage document');
     }
     // Convert the document
     $metypeset->setInputFile($docxDocument->path);
     $outputDirectory = $job->getDocumentPath() . '/metypeset';
     $metypeset->setOutputDirectory($outputDirectory);
     $metypeset->convert();
     $xmlFile = $docxDocument->getFileName(true) . '.xml';
     $meTypesetOutputPath = $outputDirectory . '/nlm/' . $xmlFile;
     if (!$metypeset->getStatus() or !file_exists($meTypesetOutputPath)) {
         $job->status = JOB_STATUS_FAILED;
         return $job;
     }
     $outputPath = $job->getDocumentPath() . '/document_metypeset.xml';
     @copy($meTypesetOutputPath, $outputPath);
     $documentDAO = $this->sm->get('Manager\\Model\\DAO\\DocumentDAO');
     $docxDocument = $documentDAO->getInstance();
     $docxDocument->path = $outputPath;
     $docxDocument->job = $job;
     $docxDocument->conversionStage = JOB_CONVERSION_STAGE_NLMXML;
     $job->documents[] = $docxDocument;
     $job->conversionStage = JOB_CONVERSION_STAGE_NLMXML;
     return $job;
 }
Esempio n. 10
0
 /**
  * Extract content
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $cermine = $this->sm->get('Cermine\\Model\\Converter\\Cermine');
     // Fetch the document to convert
     if ($job->inputFileFormat == JOB_INPUT_TYPE_PDF) {
         $pdfDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_PDF_IN);
     } else {
         $pdfDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_WP_PDF);
     }
     if (!$pdfDocument) {
         throw new \Exception('Couldn\'t find the stage document');
     }
     // Convert the document
     $cermine->setInputFile($pdfDocument->path);
     $outputPath = $job->getDocumentPath() . '/document_from_pdf.xml';
     $cermine->setOutputFile($outputPath);
     $cermine->convert();
     if (!$cermine->getStatus()) {
         $job->status = JOB_STATUS_FAILED;
         return $job;
     }
     $documentDAO = $this->sm->get('Manager\\Model\\DAO\\DocumentDAO');
     $xmlDocument = $documentDAO->getInstance();
     $xmlDocument->path = $outputPath;
     $xmlDocument->mimeType = 'application/xml';
     $xmlDocument->job = $job;
     $xmlDocument->conversionStage = JOB_CONVERSION_STAGE_PDF_EXTRACT;
     $job->documents[] = $xmlDocument;
     $job->conversionStage = JOB_CONVERSION_STAGE_PDF_EXTRACT;
     return $job;
 }
Esempio n. 11
0
 /**
  * Create and add the XMP sidecar
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $pdf = $this->sm->get('XmpConversion\\Model\\Converter\\Xmp');
     // Fetch the NLMXML document; if the references step failed fall back to
     // the NLMXML document before the references conversion took place
     if (!($documentNlmxml = $job->getStageDocument(JOB_CONVERSION_STAGE_BIBTEXREFERENCES))) {
         !($documentNlmxml = $job->getStageDocument(JOB_CONVERSION_STAGE_NLMXML));
     }
     if (!$documentNlmxml) {
         throw new \Exception('Couldn\'t find the NLMXML stage document');
     }
     // Fetch the PDF document
     $documentPdf = $job->getStageDocument(JOB_CONVERSION_STAGE_PDF);
     if (!$documentPdf) {
         throw new \Exception('Couldn\'t find the PDF stage document');
     }
     $pdf->setInputFileNlmxml($documentNlmxml->path);
     $pdf->setInputFilePdf($documentPdf->path);
     $pdf->convert();
     $job->conversionStage = JOB_CONVERSION_STAGE_XMP;
     if (!$pdf->getStatus()) {
         $job->status = JOB_STATUS_FAILED;
         return $job;
     }
     // We didn't create a new document; just change the conversion stage of
     // the PDF document
     $documentPdf->conversionStage = JOB_CONVERSION_STAGE_XMP;
     return $job;
 }
Esempio n. 12
0
 /**
  * Convert the HTML to PDF
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $pdf = $this->sm->get('PdfConversion\\Model\\Converter\\Pdf');
     // Fetch the zip file containing the html; check if we got one that has
     // the citations converted first and fall back to unconverted HTML
     if (!($document = $job->getStageDocument(JOB_CONVERSION_STAGE_CITATIONSTYLE)) and !($document = $job->getStageDocument(JOB_CONVERSION_STAGE_HTML))) {
         throw new \Exception('Couldn\'t find the stage document');
     }
     $outputFile = $job->getDocumentPath() . '/document.pdf';
     $pdf->setInputFile($document->path);
     $pdf->setOutputFile($outputFile);
     $pdf->convert();
     $job->conversionStage = JOB_CONVERSION_STAGE_PDF;
     if (!$pdf->getStatus()) {
         $job->status = JOB_STATUS_FAILED;
         return $job;
     }
     $documentDAO = $this->sm->get('Manager\\Model\\DAO\\DocumentDAO');
     $pdfDocument = $documentDAO->getInstance();
     $pdfDocument->path = $outputFile;
     $pdfDocument->job = $job;
     $pdfDocument->conversionStage = JOB_CONVERSION_STAGE_PDF;
     $job->documents[] = $pdfDocument;
     return $job;
 }
Esempio n. 13
0
 /**
  * Replace reference list
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $bibtexreferences = $this->sm->get('BibtexreferencesConversion\\Model\\Converter\\Bibtexreferences');
     // Fetch the NLMXML document in which the references will be
     // replaced.
     if ($job->inputFileFormat == JOB_INPUT_TYPE_PDF) {
         $xmlDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_PDF_EXTRACT);
     } else {
         $xmlDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_NLMXML);
     }
     if (!$xmlDocument) {
         throw new \Exception('Couldn\'t find the stage document');
     }
     // Fetch the bibtex document which will be converted to NLMXML
     $bibtexDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_BIBTEX);
     if (!$bibtexDocument) {
         throw new \Exception('Couldn\'t find the stage document');
     }
     // Do the reference list replacement
     $bibtexreferences->setInputFileNlmxml($xmlDocument->path);
     $bibtexreferences->setInputFileBibtex($bibtexDocument->path);
     $bibtexreferences->convert();
     $job->conversionStage = JOB_CONVERSION_STAGE_BIBTEXREFERENCES;
     if (!$bibtexreferences->getStatus()) {
         $job->status = JOB_STATUS_FAILED;
         return $job;
     }
     // We didn't create a new document; just change the conversion stage
     $xmlDocument->conversionStage = JOB_CONVERSION_STAGE_BIBTEXREFERENCES;
     return $job;
 }
Esempio n. 14
0
 /**
  * Merge the XML outputs to one file.
  *
  * @param Job $job
  * @return Job $job
  */
 public function process(Job $job)
 {
     $mergedXML = $this->sm->get('MergeXMLOutputs\\Model\\Converter\\Merge');
     $cermineDocument = null;
     if ($job->inputFileFormat == JOB_INPUT_TYPE_PDF) {
         $cermineDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_BIBTEXREFERENCES);
     }
     if (!$cermineDocument) {
         $cermineDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_PDF_EXTRACT);
     }
     if (!$cermineDocument) {
         throw new \Exception('Couldn\'t find the CERMINE output');
     }
     $outputFile = $job->getDocumentPath() . '/document.xml';
     $job->conversionStage = JOB_CONVERSION_STAGE_XML_MERGE;
     if ($job->inputFileFormat == JOB_INPUT_TYPE_PDF) {
         @copy($cermineDocument->path, $outputFile);
     } else {
         if (!($meTypesetDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_BIBTEXREFERENCES))) {
             $meTypesetDocument = $job->getStageDocument(JOB_CONVERSION_STAGE_NLMXML);
         }
         if (!$meTypesetDocument) {
             throw new \Exception('Couldn\'t find the meTypeset output');
         }
         $mergedXML->setInputFileNlmxml($meTypesetDocument->path);
         $mergedXML->setInputFileCermine($cermineDocument->path);
         $mergedXML->setOutputFile($outputFile);
         $mergedXML->convert();
         if (!$mergedXML->getStatus()) {
             $job->status = JOB_STATUS_FAILED;
             return $job;
         }
     }
     $documentDAO = $this->sm->get('Manager\\Model\\DAO\\DocumentDAO');
     $mergedXMLDocument = $documentDAO->getInstance();
     $mergedXMLDocument->path = $outputFile;
     $mergedXMLDocument->job = $job;
     $mergedXMLDocument->conversionStage = JOB_CONVERSION_STAGE_XML_MERGE;
     $job->documents[] = $mergedXMLDocument;
     return $job;
 }