public function __construct($originalXliffFilename, $segments, $trg_lang = null, $outputFile = null)
 {
     self::$INTERNAL_TAG_PLACEHOLDER = "§" . base64_encode(openssl_random_pseudo_bytes(3, $_crypto_strong));
     if (is_resource($outputFile)) {
         $this->outputFP = $outputFile;
         rewind($this->outputFP);
     } else {
         $this->outputFP = fopen($outputFile, 'w+');
     }
     if (!($this->originalFP = fopen($originalXliffFilename, "r"))) {
         die("could not open XML input");
     }
     $this->segments = $segments;
     $this->target_lang = $trg_lang;
     $this->sourceInTarget = false;
 }
 public function doAction()
 {
     $debug = array();
     $debug['total'][] = time();
     //get job language and data
     //Fixed Bug: need a specific job, because we need The target Language
     //Removed from within the foreach cycle, the job is always the same....
     $jobData = $this->jobInfo = getJobData($this->id_job, $this->password);
     $pCheck = new AjaxPasswordCheck();
     //check for Password correctness
     if (empty($jobData) || !$pCheck->grantJobAccessByJobData($jobData, $this->password)) {
         $msg = "Error : wrong password provided for download \n\n " . var_export($_POST, true) . "\n";
         Log::doLog($msg);
         Utils::sendErrMailReport($msg);
         return null;
     }
     $debug['get_file'][] = time();
     $files_job = getFilesForJob($this->id_job, $this->id_file);
     $debug['get_file'][] = time();
     $nonew = 0;
     $output_content = array();
     /*
      * the procedure is now as follows:
      * 1)original file is loaded from DB into RAM and the flushed in a temp file on disk; a file handler is obtained
      * 2)RAM gets freed from original content
      * 3)the file is read chunk by chunk by a stream parser: for each tran-unit that is encountered,
      *     target is replaced (or added) with the corresponding translation among segments
      *     the current string in the buffer is flushed on standard output
      * 4)the temporary file is deleted by another process after some time
      *
      */
     //file array is chuncked. Each chunk will be used for a parallel conversion request.
     $files_job = array_chunk($files_job, self::FILES_CHUNK_SIZE);
     foreach ($files_job as $chunk) {
         $converter = new FileFormatConverter();
         $files_buffer = array();
         foreach ($chunk as $file) {
             $mime_type = $file['mime_type'];
             $fileID = $file['id_file'];
             $current_filename = $file['filename'];
             $original_xliff = $file['xliff_file'];
             //get path
             $path = INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/' . $fileID . '/' . $current_filename . "_" . uniqid('', true) . '.sdlxliff';
             //make dir if doesn't exist
             if (!file_exists(dirname($path))) {
                 Log::doLog('exec ("chmod 666 ' . escapeshellarg($path) . '");');
                 mkdir(dirname($path), 0777, true);
                 exec("chmod 666 " . escapeshellarg($path));
             }
             //create file
             $fp = fopen($path, 'w+');
             //flush file to disk
             fwrite($fp, $original_xliff);
             //free memory, as we can work with file on disk now
             unset($original_xliff);
             $debug['get_segments'][] = time();
             $data = getSegmentsDownload($this->id_job, $this->password, $fileID, $nonew);
             $debug['get_segments'][] = time();
             //create a secondary indexing mechanism on segments' array; this will be useful
             //prepend a string so non-trans unit id ( ex: numerical ) are not overwritten
             //clean also not valid xml entities ( charactes with ascii < 32 and different from 0A, 0D and 09
             $regexpEntity = '/&#x(0[0-8BCEF]|1[0-9A-F]|7F);/u';
             //remove binary chars in some xliff files
             $regexpAscii = '/([\\x{00}-\\x{1F}\\x{7F}]{1})/u';
             foreach ($data as $i => $k) {
                 $data['matecat|' . $k['internal_id']][] = $i;
                 //FIXME: temporary patch
                 $data[$i]['translation'] = str_replace('<x id="nbsp"/>', '&#xA0;', $data[$i]['translation']);
                 $data[$i]['segment'] = str_replace('<x id="nbsp"/>', '&#xA0;', $data[$i]['segment']);
                 $sanitized_src = preg_replace($regexpAscii, '', $data[$i]['segment']);
                 $sanitized_trg = preg_replace($regexpAscii, '', $data[$i]['translation']);
                 $sanitized_src = preg_replace($regexpEntity, '', $sanitized_src);
                 $sanitized_trg = preg_replace($regexpEntity, '', $sanitized_trg);
                 if ($sanitized_src != null) {
                     $data[$i]['segment'] = $sanitized_src;
                 }
                 if ($sanitized_trg != null) {
                     $data[$i]['translation'] = $sanitized_trg;
                 }
             }
             $debug['replace'][] = time();
             //instatiate parser
             $xsp = new XliffSAXTranslationReplacer($path, $data, Langs_Languages::getInstance()->getLangRegionCode($jobData['target']), $fp);
             if ($this->download_type == 'omegat') {
                 $xsp->setSourceInTarget(true);
             }
             //run parsing
             Log::doLog("work on " . $fileID . " " . $current_filename);
             $xsp->replaceTranslation();
             fclose($fp);
             unset($xsp);
             $debug['replace'][] = time();
             $output_xliff = file_get_contents($path . '.out.sdlxliff');
             $output_content[$fileID]['documentContent'] = $output_xliff;
             $output_content[$fileID]['filename'] = $current_filename;
             unset($output_xliff);
             if ($this->forceXliff) {
                 $file_info_details = pathinfo($output_content[$fileID]['filename']);
                 $output_content[$fileID]['filename'] = $file_info_details['filename'] . ".out.sdlxliff";
             }
             //TODO set a flag in database when file uploaded to know if this file is a proprietary xlf converted
             //TODO so we can load from database the original file blob ONLY when needed
             /**
              * Conversion Enforce
              */
             $convertBackToOriginal = true;
             try {
                 //if it is a not converted file ( sdlxliff ) we have an empty field original_file
                 //so we can simplify all the logic with:
                 // is empty original_file? if it is, we don't need conversion back because
                 // we already have an sdlxliff or an accepted file
                 $file['original_file'] = @gzinflate($file['original_file']);
                 if (!INIT::$CONVERSION_ENABLED || empty($file['original_file']) && $mime_type == 'sdlxliff' || $this->forceXliff) {
                     $convertBackToOriginal = false;
                     Log::doLog("SDLXLIFF: {$file['filename']} --- " . var_export($convertBackToOriginal, true));
                 } else {
                     //TODO: dos2unix ??? why??
                     //force unix type files
                     Log::doLog("NO SDLXLIFF, Conversion enforced: {$file['filename']} --- " . var_export($convertBackToOriginal, true));
                 }
             } catch (Exception $e) {
                 Log::doLog($e->getMessage());
             }
             if ($convertBackToOriginal) {
                 $output_content[$fileID]['out_xliff_name'] = $path . '.out.sdlxliff';
                 $output_content[$fileID]['source'] = $jobData['source'];
                 $output_content[$fileID]['target'] = $jobData['target'];
                 $files_buffer[$fileID] = $output_content[$fileID];
             } elseif ($this->forceXliff) {
                 $this->cleanFilePath($output_content[$fileID]['documentContent']);
             }
         }
         $debug['do_conversion'][] = time();
         $convertResult = $converter->multiConvertToOriginal($files_buffer, $chosen_machine = false);
         foreach (array_keys($files_buffer) as $fileID) {
             $output_content[$fileID]['documentContent'] = $this->removeTargetMarks($convertResult[$fileID]['documentContent'], $files_buffer[$fileID]['filename']);
             //in case of .strings, they are required to be in UTF-16
             //get extension to perform file detection
             $extension = pathinfo($output_content[$fileID]['filename'], PATHINFO_EXTENSION);
             if (strtoupper($extension) == 'STRINGS') {
                 //use this function to convert stuff
                 $encodingConvertedFile = CatUtils::convertEncoding('UTF-16', $output_content[$fileID]['documentContent']);
                 //strip previously added BOM
                 $encodingConvertedFile[1] = $converter->stripBOM($encodingConvertedFile[1], 16);
                 //store new content
                 $output_content[$fileID]['documentContent'] = $encodingConvertedFile[1];
                 //trash temporary data
                 unset($encodingConvertedFile);
             }
         }
         //            $output_content[ $fileID ][ 'documentContent' ] = $convertResult[ 'documentContent' ];
         unset($convertResult);
         $debug['do_conversion'][] = time();
     }
     //set the file Name
     $pathinfo = pathinfo($this->fname);
     $this->filename = $pathinfo['filename'] . "_" . $jobData['target'] . "." . $pathinfo['extension'];
     //qui prodest to check download type?
     if ($this->download_type == 'omegat') {
         $this->filename .= ".zip";
         $tmsService = new TMSService();
         $tmsService->setOutputType('tm');
         /**
          * @var $tmFile SplTempFileObject
          */
         $tmFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']);
         $tmsService->setOutputType('mt');
         /**
          * @var $mtFile SplTempFileObject
          */
         $mtFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']);
         $tm_id = uniqid('tm');
         $mt_id = uniqid('mt');
         $output_content[$tm_id] = array('documentContent' => '', 'filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_TM . tmx");
         foreach ($tmFile as $lineNumber => $content) {
             $output_content[$tm_id]['documentContent'] .= $content;
         }
         $output_content[$mt_id] = array('documentContent' => '', 'filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_MT . tmx");
         foreach ($mtFile as $lineNumber => $content) {
             $output_content[$mt_id]['documentContent'] .= $content;
         }
         $this->createOmegaTZip($output_content, $jobData['source'], $jobData['target']);
         //add zip archive content here;
     } else {
         if (count($output_content) > 1) {
             if ($pathinfo['extension'] != 'zip') {
                 if ($this->forceXliff) {
                     $this->filename = $this->id_job . ".zip";
                 } else {
                     $this->filename = $pathinfo['basename'] . ".zip";
                 }
             }
             $this->composeZip($output_content, $jobData['source']);
             //add zip archive content here;
         } else {
             //always an array with 1 element, pop it, Ex: array( array() )
             $output_content = array_pop($output_content);
             $this->setContent($output_content);
         }
     }
     $debug['total'][] = time();
     Utils::deleteDir(INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/');
 }
Exemplo n.º 3
0
 public function doAction()
 {
     $debug = array();
     $debug['total'][] = time();
     //get job language and data
     //Fixed Bug: need a specific job, because we need The target Language
     //Removed from within the foreach cycle, the job is always the same....
     $jobData = $this->jobInfo = getJobData($this->id_job, $this->password);
     $pCheck = new AjaxPasswordCheck();
     //check for Password correctness
     if (empty($jobData) || !$pCheck->grantJobAccessByJobData($jobData, $this->password)) {
         $msg = "Error : wrong password provided for download \n\n " . var_export($_POST, true) . "\n";
         Log::doLog($msg);
         Utils::sendErrMailReport($msg);
         return null;
     }
     $debug['get_file'][] = time();
     //get storage object
     $fs = new FilesStorage();
     $files_job = $fs->getFilesForJob($this->id_job, $this->id_file);
     $debug['get_file'][] = time();
     $nonew = 0;
     $output_content = array();
     $thereIsAZipFile = false;
     /*
       the procedure:
       1)original xliff file is read directly from disk; a file handler is obtained
       2)the file is read chunk by chunk by a stream parser: for each trans-unit that is encountered, target is replaced (or added) with the corresponding translation obtained from the DB
       3)the parsed portion of xliff in the buffer is flushed on temporary file
       4)the temporary file is sent to the converter and an original file is obtained
       5)the temporary file is deleted
     */
     //file array is chuncked. Each chunk will be used for a parallel conversion request.
     $files_job = array_chunk($files_job, self::FILES_CHUNK_SIZE);
     foreach ($files_job as $chunk) {
         $converter = new FileFormatConverter();
         $files_to_be_converted = array();
         foreach ($chunk as $file) {
             $mime_type = $file['mime_type'];
             $fileID = $file['id_file'];
             $current_filename = $file['filename'];
             //get path for the output file converted to know it's right extension
             $_fileName = explode(DIRECTORY_SEPARATOR, $file['xliffFilePath']);
             $outputPath = INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/' . $fileID . '/' . uniqid('', true) . "_.out." . array_pop($_fileName);
             //make dir if doesn't exist
             if (!file_exists(dirname($outputPath))) {
                 Log::doLog('Create Directory ' . escapeshellarg(dirname($outputPath)) . '');
                 mkdir(dirname($outputPath), 0775, true);
             }
             $debug['get_segments'][] = time();
             $data = getSegmentsDownload($this->id_job, $this->password, $fileID, $nonew);
             $debug['get_segments'][] = time();
             //prepare regexp for nest step
             $regexpEntity = '/&#x(0[0-8BCEF]|1[0-9A-F]|7F);/u';
             $regexpAscii = '/([\\x{00}-\\x{1F}\\x{7F}]{1})/u';
             foreach ($data as $i => $k) {
                 //create a secondary indexing mechanism on segments' array; this will be useful
                 //prepend a string so non-trans unit id ( ex: numerical ) are not overwritten
                 $data['matecat|' . $k['internal_id']][] = $i;
                 //FIXME: temporary patch
                 $data[$i]['translation'] = str_replace('<x id="nbsp"/>', '&#xA0;', $data[$i]['translation']);
                 $data[$i]['segment'] = str_replace('<x id="nbsp"/>', '&#xA0;', $data[$i]['segment']);
                 //remove binary chars in some xliff files
                 $sanitized_src = preg_replace($regexpAscii, '', $data[$i]['segment']);
                 $sanitized_trg = preg_replace($regexpAscii, '', $data[$i]['translation']);
                 //clean invalid xml entities ( charactes with ascii < 32 and different from 0A, 0D and 09
                 $sanitized_src = preg_replace($regexpEntity, '', $sanitized_src);
                 $sanitized_trg = preg_replace($regexpEntity, '', $sanitized_trg);
                 if ($sanitized_src != null) {
                     $data[$i]['segment'] = $sanitized_src;
                 }
                 if ($sanitized_trg != null) {
                     $data[$i]['translation'] = $sanitized_trg;
                 }
             }
             $debug['replace'][] = time();
             //instatiate parser
             $xsp = new XliffSAXTranslationReplacer($file['xliffFilePath'], $data, Langs_Languages::getInstance()->getLangRegionCode($jobData['target']), $outputPath);
             if ($this->download_type == 'omegat') {
                 $xsp->setSourceInTarget(true);
             }
             //run parsing
             Log::doLog("work on " . $fileID . " " . $current_filename);
             $xsp->replaceTranslation();
             //free memory
             unset($xsp);
             unset($data);
             $debug['replace'][] = time();
             $output_content[$fileID]['document_content'] = file_get_contents($outputPath);
             $output_content[$fileID]['output_filename'] = $current_filename;
             if ($this->forceXliff) {
                 $file_info_details = FilesStorage::pathinfo_fix($output_content[$fileID]['output_filename']);
                 //clean the output filename by removing
                 // the unique hash identifier 55e5739b467109.05614837_.out.Test_English.doc.sdlxliff
                 $output_content[$fileID]['output_filename'] = preg_replace('#[0-9a-f]+\\.[0-9_]+\\.out\\.#i', '', FilesStorage::basename_fix($outputPath));
             }
             /**
              * Conversion Enforce
              */
             $convertBackToOriginal = true;
             try {
                 //if it is a not converted file ( sdlxliff ) we have originalFile equals to xliffFile (it has just been copied)
                 $file['original_file'] = file_get_contents($file['originalFilePath']);
                 if (!INIT::$CONVERSION_ENABLED || ($file['originalFilePath'] == $file['xliffFilePath'] and $mime_type == 'sdlxliff') or $this->forceXliff) {
                     $convertBackToOriginal = false;
                     Log::doLog("SDLXLIFF: {$file['filename']} --- " . var_export($convertBackToOriginal, true));
                 } else {
                     //TODO: dos2unix ??? why??
                     //force unix type files
                     Log::doLog("NO SDLXLIFF, Conversion enforced: {$file['filename']} --- " . var_export($convertBackToOriginal, true));
                 }
             } catch (Exception $e) {
                 Log::doLog($e->getMessage());
             }
             if ($convertBackToOriginal) {
                 $output_content[$fileID]['out_xliff_name'] = $outputPath;
                 $output_content[$fileID]['source'] = $jobData['source'];
                 $output_content[$fileID]['target'] = $jobData['target'];
                 $files_to_be_converted[$fileID] = $output_content[$fileID];
             } elseif ($this->forceXliff) {
                 $this->cleanFilePath($output_content[$fileID]['document_content']);
             }
         }
         $debug['do_conversion'][] = time();
         $convertResult = $converter->multiConvertToOriginal($files_to_be_converted, $chosen_machine = false);
         foreach (array_keys($files_to_be_converted) as $fileID) {
             $output_content[$fileID]['document_content'] = $this->ifGlobalSightXliffRemoveTargetMarks($convertResult[$fileID]['document_content'], $files_to_be_converted[$fileID]['output_filename']);
             //in case of .strings, they are required to be in UTF-16
             //get extension to perform file detection
             $extension = FilesStorage::pathinfo_fix($output_content[$fileID]['output_filename'], PATHINFO_EXTENSION);
             if (strtoupper($extension) == 'STRINGS') {
                 //use this function to convert stuff
                 $encodingConvertedFile = CatUtils::convertEncoding('UTF-16', $output_content[$fileID]['document_content']);
                 //strip previously added BOM
                 $encodingConvertedFile[1] = $converter->stripBOM($encodingConvertedFile[1], 16);
                 //store new content
                 $output_content[$fileID]['document_content'] = $encodingConvertedFile[1];
                 //trash temporary data
                 unset($encodingConvertedFile);
             }
         }
         //            $output_content[ $fileID ][ 'document_content' ] = $convertResult[ 'document_content' ];
         unset($convertResult);
         $debug['do_conversion'][] = time();
     }
     foreach ($output_content as $idFile => $fileInformations) {
         $zipPathInfo = ZipArchiveExtended::zipPathInfo($output_content[$idFile]['output_filename']);
         if (is_array($zipPathInfo)) {
             $thereIsAZipFile = true;
             $output_content[$idFile]['zipfilename'] = $zipPathInfo['zipfilename'];
             $output_content[$idFile]['zipinternalPath'] = $zipPathInfo['dirname'];
             $output_content[$idFile]['output_filename'] = $zipPathInfo['basename'];
         }
     }
     //set the file Name
     $pathinfo = FilesStorage::pathinfo_fix($this->fname);
     $this->_filename = $pathinfo['filename'] . "_" . $jobData['target'] . "." . $pathinfo['extension'];
     //qui prodest to check download type?
     if ($this->download_type == 'omegat') {
         $this->_filename .= ".zip";
         $tmsService = new TMSService();
         $tmsService->setOutputType('tm');
         /**
          * @var $tmFile SplTempFileObject
          */
         $tmFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']);
         $tmsService->setOutputType('mt');
         /**
          * @var $mtFile SplTempFileObject
          */
         $mtFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']);
         $tm_id = uniqid('tm');
         $mt_id = uniqid('mt');
         $output_content[$tm_id] = array('document_content' => '', 'output_filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_TM . tmx");
         foreach ($tmFile as $lineNumber => $content) {
             $output_content[$tm_id]['document_content'] .= $content;
         }
         $output_content[$mt_id] = array('document_content' => '', 'output_filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_MT . tmx");
         foreach ($mtFile as $lineNumber => $content) {
             $output_content[$mt_id]['document_content'] .= $content;
         }
         $this->createOmegaTZip($output_content, $jobData['source'], $jobData['target']);
         //add zip archive content here;
     } else {
         $output_content = $this->getOutputContentsWithZipFiles($output_content);
         if (count($output_content) > 1) {
             //cast $output_content elements to ZipContentObject
             foreach ($output_content as $key => $__output_content_elem) {
                 $output_content[$key] = new ZipContentObject($__output_content_elem);
             }
             if ($pathinfo['extension'] != 'zip') {
                 if ($this->forceXliff) {
                     $this->_filename = $this->id_job . ".zip";
                 } else {
                     $this->_filename = $pathinfo['basename'] . ".zip";
                 }
             }
             $this->content = self::composeZip($output_content);
             //add zip archive content here;
         } else {
             //always an array with 1 element, pop it, Ex: array( array() )
             $output_content = array_pop($output_content);
             $this->setContent($output_content);
         }
     }
     $debug['total'][] = time();
     try {
         Utils::deleteDir(INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/');
     } catch (Exception $e) {
         Log::doLog('Failed to delete dir:' . $e->getMessage());
     }
 }