public function doAction() { //get storage object $fs = new FilesStorage(); $files_job = $fs->getOriginalFilesForJob($this->id_job, $this->id_file, $this->password); //take the project ID and creation date, array index zero is good, all id are equals $this->id_project = $files_job[0]['id_project']; $this->project_date = $files_job[0]['create_date']; $output_content = array(); foreach ($files_job as $file) { $id_file = $file['id_file']; $zipPathInfo = ZipArchiveExtended::zipPathInfo($file['filename']); if (is_array($zipPathInfo)) { $output_content[$id_file]['output_filename'] = $zipPathInfo['zipfilename']; $output_content[$id_file]['input_filename'] = $fs->getOriginalZipPath($this->project_date, $this->id_project, $zipPathInfo['zipfilename']); } else { $output_content[$id_file]['output_filename'] = $file['filename']; $output_content[$id_file]['input_filename'] = $file['originalFilePath']; } } /* * get Unique file zip because there are more than one file in the zip * array_unique compares items using a string comparison. * * From the docs: * Note: Two elements are considered equal if and only if (string) $elem1 === (string) $elem2. * In words: when the string representation is the same. The first element will be used. */ $output_content = array_map('unserialize', array_unique(array_map('serialize', $output_content))); foreach ($output_content as $key => $iFile) { $output_content[$key] = new ZipContentObject($iFile); } if ($this->download_type == 'all') { if (count($output_content) > 1) { $this->_filename = $this->fname; $pathInfo = FilesStorage::pathinfo_fix($this->fname); if ($pathInfo['extension'] != 'zip') { $this->_filename = $pathInfo['basename'] . ".zip"; } $this->content = self::composeZip($output_content); //add zip archive content here; } elseif (count($output_content) == 1) { $this->setContent($output_content); } } else { $this->setContent($output_content); } }
public function doAction() { //get job language and data //Fixed Bug: need a specific job, because we need The target Language //Removed from within the foreach cycle, the job is always the same.... $jobData = $this->jobInfo = getJobData($this->id_job, $this->password); $pCheck = new AjaxPasswordCheck(); //check for Password correctness if (empty($jobData) || !$pCheck->grantJobAccessByJobData($jobData, $this->password)) { $msg = "Error : wrong password provided for download \n\n " . var_export($_POST, true) . "\n"; Log::doLog($msg); Utils::sendErrMailReport($msg); return null; } //get storage object $fs = new FilesStorage(); $files_job = $fs->getFilesForJob($this->id_job, $this->id_file); $nonew = 0; $output_content = array(); /* the procedure: 1)original xliff file is read directly from disk; a file handler is obtained 2)the file is read chunk by chunk by a stream parser: for each trans-unit that is encountered, target is replaced (or added) with the corresponding translation obtained from the DB 3)the parsed portion of xliff in the buffer is flushed on temporary file 4)the temporary file is sent to the converter and an original file is obtained 5)the temporary file is deleted */ // This array will contain all the files of $files_job split by // converter version. $files_job_by_converter_version = array(); // Detect the converter's version to use for each file, then store // file info accordingly. foreach ($files_job as $file) { $fileType = DetectProprietaryXliff::getInfo($file['xliffFilePath']); $files_job_by_converter_version[$fileType['converter_version']][] = $file; } // Process files according to the converters' versions, one version // at a time foreach ($files_job_by_converter_version as $converter_version => $files_job) { //file array is chuncked. Each chunk will be used for a parallel conversion request. $files_job = array_chunk($files_job, self::FILES_CHUNK_SIZE); foreach ($files_job as $chunk) { $converter = new FileFormatConverter($converter_version); $files_to_be_converted = array(); foreach ($chunk as $file) { $mime_type = $file['mime_type']; $fileID = $file['id_file']; $current_filename = $file['filename']; //get path for the output file converted to know it's right extension $_fileName = explode(DIRECTORY_SEPARATOR, $file['xliffFilePath']); $outputPath = INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/' . $fileID . '/' . uniqid('', true) . "_.out." . array_pop($_fileName); //make dir if doesn't exist if (!file_exists(dirname($outputPath))) { Log::doLog('Create Directory ' . escapeshellarg(dirname($outputPath)) . ''); mkdir(dirname($outputPath), 0775, true); } $data = getSegmentsDownload($this->id_job, $this->password, $fileID, $nonew); //prepare regexp for nest step $regexpEntity = '/&#x(0[0-8BCEF]|1[0-9A-F]|7F);/u'; $regexpAscii = '/([\\x{00}-\\x{1F}\\x{7F}]{1})/u'; foreach ($data as $i => $k) { //create a secondary indexing mechanism on segments' array; this will be useful //prepend a string so non-trans unit id ( ex: numerical ) are not overwritten $data['matecat|' . $k['internal_id']][] = $i; //FIXME: temporary patch $data[$i]['translation'] = str_replace('<x id="nbsp"/>', ' ', $data[$i]['translation']); $data[$i]['segment'] = str_replace('<x id="nbsp"/>', ' ', $data[$i]['segment']); //remove binary chars in some xliff files $sanitized_src = preg_replace($regexpAscii, '', $data[$i]['segment']); $sanitized_trg = preg_replace($regexpAscii, '', $data[$i]['translation']); //clean invalid xml entities ( charactes with ascii < 32 and different from 0A, 0D and 09 $sanitized_src = preg_replace($regexpEntity, '', $sanitized_src); $sanitized_trg = preg_replace($regexpEntity, '', $sanitized_trg); if ($sanitized_src != null) { $data[$i]['segment'] = $sanitized_src; } if ($sanitized_trg != null) { $data[$i]['translation'] = $sanitized_trg; } } //instatiate parser $xsp = new SdlXliffSAXTranslationReplacer($file['xliffFilePath'], $data, Langs_Languages::getInstance()->getLangRegionCode($jobData['target']), $outputPath); if ($this->download_type == 'omegat') { $xsp->setSourceInTarget(true); } //run parsing Log::doLog("work on " . $fileID . " " . $current_filename); $xsp->replaceTranslation(); //free memory unset($xsp); unset($data); $output_content[$fileID]['document_content'] = file_get_contents($outputPath); $output_content[$fileID]['output_filename'] = $current_filename; $fileType = DetectProprietaryXliff::getInfo($file['xliffFilePath']); if ($this->forceXliff) { //clean the output filename by removing // the unique hash identifier 55e5739b467109.05614837_.out.Test_English.doc.sdlxliff $output_content[$fileID]['output_filename'] = preg_replace('#[0-9a-f]+\\.[0-9_]+\\.out\\.#i', '', FilesStorage::basename_fix($outputPath)); if ($fileType['proprietary_short_name'] === 'matecat_converter') { // Set the XLIFF extension to .xlf // Internally, MateCat continues using .sdlxliff as default // extension for the XLIFF behind the projects. // Changing this behavior requires a huge refactoring that // it's scheduled for future versions. // We quickly fixed the behaviour from the user standpoint // using the following line of code, that changes the XLIFF's // extension just a moment before it is downloaded by the user. $output_content[$fileID]['output_filename'] = preg_replace("|\\.sdlxliff\$|i", ".xlf", $output_content[$fileID]['output_filename']); } } /** * Conversion Enforce */ $convertBackToOriginal = true; //if it is a not converted file ( sdlxliff ) we have originalFile equals to xliffFile (it has just been copied) $file['original_file'] = file_get_contents($file['originalFilePath']); // When the 'proprietary' flag is set to false, the xliff // is not passed to any converter, because is handled // directly inside MateCAT. $xliffWasNotConverted = $fileType['proprietary'] === false; if (!INIT::$CONVERSION_ENABLED || ($file['originalFilePath'] == $file['xliffFilePath'] and $xliffWasNotConverted) or $this->forceXliff) { $convertBackToOriginal = false; Log::doLog("SDLXLIFF: {$file['filename']} --- " . var_export($convertBackToOriginal, true)); } else { //TODO: dos2unix ??? why?? //force unix type files Log::doLog("NO SDLXLIFF, Conversion enforced: {$file['filename']} --- " . var_export($convertBackToOriginal, true)); } if ($convertBackToOriginal) { $output_content[$fileID]['out_xliff_name'] = $outputPath; $output_content[$fileID]['source'] = $jobData['source']; $output_content[$fileID]['target'] = $jobData['target']; $files_to_be_converted[$fileID] = $output_content[$fileID]; } elseif ($this->forceXliff) { $this->cleanFilePath($output_content[$fileID]['document_content']); } } $convertResult = $converter->multiConvertToOriginal($files_to_be_converted, $chosen_machine = false); foreach (array_keys($files_to_be_converted) as $fileID) { $output_content[$fileID]['document_content'] = $this->ifGlobalSightXliffRemoveTargetMarks($convertResult[$fileID]['document_content'], $files_to_be_converted[$fileID]['output_filename']); //in case of .strings, they are required to be in UTF-16 //get extension to perform file detection $extension = FilesStorage::pathinfo_fix($output_content[$fileID]['output_filename'], PATHINFO_EXTENSION); if (strtoupper($extension) == 'STRINGS') { //use this function to convert stuff $encodingConvertedFile = CatUtils::convertEncoding('UTF-16', $output_content[$fileID]['document_content']); //strip previously added BOM $encodingConvertedFile[1] = $converter->stripBOM($encodingConvertedFile[1], 16); //store new content $output_content[$fileID]['document_content'] = $encodingConvertedFile[1]; //trash temporary data unset($encodingConvertedFile); } } unset($convertResult); } } foreach ($output_content as $idFile => $fileInformations) { $zipPathInfo = ZipArchiveExtended::zipPathInfo($output_content[$idFile]['output_filename']); if (is_array($zipPathInfo)) { $output_content[$idFile]['zipfilename'] = $zipPathInfo['zipfilename']; $output_content[$idFile]['zipinternalPath'] = $zipPathInfo['dirname']; $output_content[$idFile]['output_filename'] = $zipPathInfo['basename']; } } //set the file Name $pathinfo = FilesStorage::pathinfo_fix($this->fname); $this->_filename = $pathinfo['filename'] . "_" . $jobData['target'] . "." . $pathinfo['extension']; //qui prodest to check download type? if ($this->download_type == 'omegat') { $this->_filename .= ".zip"; $tmsService = new TMSService(); $tmsService->setOutputType('tm'); /** * @var $tmFile SplTempFileObject */ $tmFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']); $tmsService->setOutputType('mt'); /** * @var $mtFile SplTempFileObject */ $mtFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']); $tm_id = uniqid('tm'); $mt_id = uniqid('mt'); $output_content[$tm_id] = array('document_content' => '', 'output_filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_TM . tmx"); foreach ($tmFile as $lineNumber => $content) { $output_content[$tm_id]['document_content'] .= $content; } $output_content[$mt_id] = array('document_content' => '', 'output_filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_MT . tmx"); foreach ($mtFile as $lineNumber => $content) { $output_content[$mt_id]['document_content'] .= $content; } $this->createOmegaTZip($output_content, $jobData['source'], $jobData['target']); //add zip archive content here; } else { try { $output_content = $this->getOutputContentsWithZipFiles($output_content); if (count($output_content) > 1) { //cast $output_content elements to ZipContentObject foreach ($output_content as $key => $__output_content_elem) { $output_content[$key] = new ZipContentObject($__output_content_elem); } if ($pathinfo['extension'] != 'zip') { if ($this->forceXliff) { $this->_filename = $this->id_job . ".zip"; } else { $this->_filename = $pathinfo['basename'] . ".zip"; } } $this->content = self::composeZip($output_content); //add zip archive content here; } else { //always an array with 1 element, pop it, Ex: array( array() ) $output_content = array_pop($output_content); $this->setContent($output_content); } } catch (Exception $e) { $msg = "\n\n Error retrieving file content, Conversion failed??? \n\n Error: {$e->getMessage()} \n\n" . var_export($e->getTraceAsString(), true); $msg .= "\n\n Request: " . var_export($_REQUEST, true); Log::$fileName = 'fatal_errors.txt'; Log::doLog($msg); Utils::sendErrMailReport($msg); $this->unlockToken(array("code" => -110, "message" => "Download failed. Please contact " . INIT::$SUPPORT_MAIL)); throw $e; // avoid sent Headers and empty file content with finalize method } } try { Utils::deleteDir(INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/'); } catch (Exception $e) { Log::doLog('Failed to delete dir:' . $e->getMessage()); } }