public function doAction() { $debug = array(); $debug['total'][] = time(); //get job language and data //Fixed Bug: need a specific job, because we need The target Language //Removed from within the foreach cycle, the job is always the same.... $jobData = $this->jobInfo = getJobData($this->id_job, $this->password); $pCheck = new AjaxPasswordCheck(); //check for Password correctness if (empty($jobData) || !$pCheck->grantJobAccessByJobData($jobData, $this->password)) { $msg = "Error : wrong password provided for download \n\n " . var_export($_POST, true) . "\n"; Log::doLog($msg); Utils::sendErrMailReport($msg); return null; } $debug['get_file'][] = time(); $files_job = getFilesForJob($this->id_job, $this->id_file); $debug['get_file'][] = time(); $nonew = 0; $output_content = array(); /* * the procedure is now as follows: * 1)original file is loaded from DB into RAM and the flushed in a temp file on disk; a file handler is obtained * 2)RAM gets freed from original content * 3)the file is read chunk by chunk by a stream parser: for each tran-unit that is encountered, * target is replaced (or added) with the corresponding translation among segments * the current string in the buffer is flushed on standard output * 4)the temporary file is deleted by another process after some time * */ //file array is chuncked. Each chunk will be used for a parallel conversion request. $files_job = array_chunk($files_job, self::FILES_CHUNK_SIZE); foreach ($files_job as $chunk) { $converter = new FileFormatConverter(); $files_buffer = array(); foreach ($chunk as $file) { $mime_type = $file['mime_type']; $fileID = $file['id_file']; $current_filename = $file['filename']; $original_xliff = $file['xliff_file']; //get path $path = INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/' . $fileID . '/' . $current_filename . "_" . uniqid('', true) . '.sdlxliff'; //make dir if doesn't exist if (!file_exists(dirname($path))) { Log::doLog('exec ("chmod 666 ' . escapeshellarg($path) . '");'); mkdir(dirname($path), 0777, true); exec("chmod 666 " . escapeshellarg($path)); } //create file $fp = fopen($path, 'w+'); //flush file to disk fwrite($fp, $original_xliff); //free memory, as we can work with file on disk now unset($original_xliff); $debug['get_segments'][] = time(); $data = getSegmentsDownload($this->id_job, $this->password, $fileID, $nonew); $debug['get_segments'][] = time(); //create a secondary indexing mechanism on segments' array; this will be useful //prepend a string so non-trans unit id ( ex: numerical ) are not overwritten //clean also not valid xml entities ( charactes with ascii < 32 and different from 0A, 0D and 09 $regexpEntity = '/&#x(0[0-8BCEF]|1[0-9A-F]|7F);/u'; //remove binary chars in some xliff files $regexpAscii = '/([\\x{00}-\\x{1F}\\x{7F}]{1})/u'; foreach ($data as $i => $k) { $data['matecat|' . $k['internal_id']][] = $i; //FIXME: temporary patch $data[$i]['translation'] = str_replace('<x id="nbsp"/>', ' ', $data[$i]['translation']); $data[$i]['segment'] = str_replace('<x id="nbsp"/>', ' ', $data[$i]['segment']); $sanitized_src = preg_replace($regexpAscii, '', $data[$i]['segment']); $sanitized_trg = preg_replace($regexpAscii, '', $data[$i]['translation']); $sanitized_src = preg_replace($regexpEntity, '', $sanitized_src); $sanitized_trg = preg_replace($regexpEntity, '', $sanitized_trg); if ($sanitized_src != null) { $data[$i]['segment'] = $sanitized_src; } if ($sanitized_trg != null) { $data[$i]['translation'] = $sanitized_trg; } } $debug['replace'][] = time(); //instatiate parser $xsp = new XliffSAXTranslationReplacer($path, $data, Langs_Languages::getInstance()->getLangRegionCode($jobData['target']), $fp); if ($this->download_type == 'omegat') { $xsp->setSourceInTarget(true); } //run parsing Log::doLog("work on " . $fileID . " " . $current_filename); $xsp->replaceTranslation(); fclose($fp); unset($xsp); $debug['replace'][] = time(); $output_xliff = file_get_contents($path . '.out.sdlxliff'); $output_content[$fileID]['documentContent'] = $output_xliff; $output_content[$fileID]['filename'] = $current_filename; unset($output_xliff); if ($this->forceXliff) { $file_info_details = pathinfo($output_content[$fileID]['filename']); $output_content[$fileID]['filename'] = $file_info_details['filename'] . ".out.sdlxliff"; } //TODO set a flag in database when file uploaded to know if this file is a proprietary xlf converted //TODO so we can load from database the original file blob ONLY when needed /** * Conversion Enforce */ $convertBackToOriginal = true; try { //if it is a not converted file ( sdlxliff ) we have an empty field original_file //so we can simplify all the logic with: // is empty original_file? if it is, we don't need conversion back because // we already have an sdlxliff or an accepted file $file['original_file'] = @gzinflate($file['original_file']); if (!INIT::$CONVERSION_ENABLED || empty($file['original_file']) && $mime_type == 'sdlxliff' || $this->forceXliff) { $convertBackToOriginal = false; Log::doLog("SDLXLIFF: {$file['filename']} --- " . var_export($convertBackToOriginal, true)); } else { //TODO: dos2unix ??? why?? //force unix type files Log::doLog("NO SDLXLIFF, Conversion enforced: {$file['filename']} --- " . var_export($convertBackToOriginal, true)); } } catch (Exception $e) { Log::doLog($e->getMessage()); } if ($convertBackToOriginal) { $output_content[$fileID]['out_xliff_name'] = $path . '.out.sdlxliff'; $output_content[$fileID]['source'] = $jobData['source']; $output_content[$fileID]['target'] = $jobData['target']; $files_buffer[$fileID] = $output_content[$fileID]; } elseif ($this->forceXliff) { $this->cleanFilePath($output_content[$fileID]['documentContent']); } } $debug['do_conversion'][] = time(); $convertResult = $converter->multiConvertToOriginal($files_buffer, $chosen_machine = false); foreach (array_keys($files_buffer) as $fileID) { $output_content[$fileID]['documentContent'] = $this->removeTargetMarks($convertResult[$fileID]['documentContent'], $files_buffer[$fileID]['filename']); //in case of .strings, they are required to be in UTF-16 //get extension to perform file detection $extension = pathinfo($output_content[$fileID]['filename'], PATHINFO_EXTENSION); if (strtoupper($extension) == 'STRINGS') { //use this function to convert stuff $encodingConvertedFile = CatUtils::convertEncoding('UTF-16', $output_content[$fileID]['documentContent']); //strip previously added BOM $encodingConvertedFile[1] = $converter->stripBOM($encodingConvertedFile[1], 16); //store new content $output_content[$fileID]['documentContent'] = $encodingConvertedFile[1]; //trash temporary data unset($encodingConvertedFile); } } // $output_content[ $fileID ][ 'documentContent' ] = $convertResult[ 'documentContent' ]; unset($convertResult); $debug['do_conversion'][] = time(); } //set the file Name $pathinfo = pathinfo($this->fname); $this->filename = $pathinfo['filename'] . "_" . $jobData['target'] . "." . $pathinfo['extension']; //qui prodest to check download type? if ($this->download_type == 'omegat') { $this->filename .= ".zip"; $tmsService = new TMSService(); $tmsService->setOutputType('tm'); /** * @var $tmFile SplTempFileObject */ $tmFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']); $tmsService->setOutputType('mt'); /** * @var $mtFile SplTempFileObject */ $mtFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']); $tm_id = uniqid('tm'); $mt_id = uniqid('mt'); $output_content[$tm_id] = array('documentContent' => '', 'filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_TM . tmx"); foreach ($tmFile as $lineNumber => $content) { $output_content[$tm_id]['documentContent'] .= $content; } $output_content[$mt_id] = array('documentContent' => '', 'filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_MT . tmx"); foreach ($mtFile as $lineNumber => $content) { $output_content[$mt_id]['documentContent'] .= $content; } $this->createOmegaTZip($output_content, $jobData['source'], $jobData['target']); //add zip archive content here; } else { if (count($output_content) > 1) { if ($pathinfo['extension'] != 'zip') { if ($this->forceXliff) { $this->filename = $this->id_job . ".zip"; } else { $this->filename = $pathinfo['basename'] . ".zip"; } } $this->composeZip($output_content, $jobData['source']); //add zip archive content here; } else { //always an array with 1 element, pop it, Ex: array( array() ) $output_content = array_pop($output_content); $this->setContent($output_content); } } $debug['total'][] = time(); Utils::deleteDir(INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/'); }
public function doAction() { // specs for filename at the task https://app.asana.com/0/1096066951381/2263196383117 /*if ($this->download_type == 'all') { //in this case fname contains the project name (see html) $pathinfo = pathinfo($this->fname); if ($pathinfo['extension'] != "xliff" and $pathinfo['extension'] != "sdlxliff" and $pathinfo['extension'] != "xlf" and $pathinfo['extension'] != "zip") { $this->filename = $pathinfo['basename'] . ".sdlxliff"; } else { $this->filename = $this->fname; } } */ $files_job = getFilesForJob($this->id_job, $this->id_file); //print_r ($files_job); ; $output_content = array(); foreach ($files_job as $file) { $id_file = $file['id_file']; $current_filename = $file['filename']; $original = $file['original_file']; $data = getSegmentsDownload($this->id_job, $this->password, $id_file); //print_r ($data); exit; $transunit_translation = ""; //echo "<pre>"; foreach ($data as $i => $seg) { // echo $seg['internal_id']."\n"; $end_tags = ""; $translation = empty($seg['translation']) ? $seg['segment'] : $seg['translation']; // echo "t1 : $translation\n"; @($xml_valid = simplexml_load_string("<placeholder>{$translation}</placeholder>")); if (!$xml_valid) { $temp = preg_split("|\\<|si", $translation); $item = end($temp); if (preg_match('|/.*?>\\W*$|si', $item)) { $end_tags .= "<{$item}"; } while ($item = prev($temp)) { if (preg_match('|/.*?>\\W*$|si', $item)) { $end_tags = "<{$item}{$end_tags}"; //insert at the top of the string } } $translation = str_replace($end_tags, "", $translation); //echo "t2 : $translation\n"; } if (!empty($seg['mrk_id'])) { $translation = "<mrk mtype=\"seg\" mid=\"" . $seg['mrk_id'] . "\">{$translation}</mrk>"; } //echo "t3 : $translation\n"; //echo "\n\n"; $transunit_translation .= $seg['prev_tags'] . $translation . $end_tags . $seg['succ_tags']; //echo "t4 :" .$seg['prev_tags'] . $translation . $end_tags.$seg['succ_tags']."\n"; if (isset($data[$i + 1]) and $seg['internal_id'] == $data[$i + 1]['internal_id']) { // current segment and subsequent has the same internal id --> // they are two mrk of the same source segment --> // the translation of the subsequentsegment will be queued to the current continue; } //this snippet could be temporary and cover the case if the segment is enclosed into a <g> tag // but the translation, due the tag stripping, does not contain it // ANTONIO : deleted because it's wrong !! if a segmemnt began by <g> tag its closure tag shoud be in the middle not only at the end of it. Instead we could check if the trans-unit is xml valid. /*if (strpos($transunit_translation, "<g") === 0) { // I mean $transunit_translation began with <g tag $endsWith = substr($transunit_translation, -strlen("</g>")) == "</g>"; if (!$endsWith) { $transunit_translation.="</g>"; } }*/ $res_match_2 = false; $res_match_1 = false; $pattern = '|(<trans-unit id="' . $seg['internal_id'] . '".*?>.*?)(<source.*?>.*?</source>.*?)(<seg-source.*?>.*?</seg-source>.*?)?(<target.*?>).*?(</target>)(.*?)(</trans-unit>)|si'; $res_match_1 = preg_match($pattern, $original, $match_target); if (!$res_match_1) { $pattern = '|(<trans-unit id="' . $seg['internal_id'] . '".*?>.*?)(<source.*?>.*?</source>.*?)(<seg-source.*?>.*?</seg-source>.*?)?(.*?</trans-unit>)|si'; $res_match_2 = preg_match($pattern, $original, $match_target); if (!$res_match_2) { // exception !!! see the segment format } } if ($res_match_1) { //target esiste $replacement = "\$1\$2\$3\$4" . $transunit_translation . "\$5\$6\$7"; } if ($res_match_2) { //target non esiste $replacement = "\$1\$2\$3<target>{$transunit_translation}</target>\$4"; } if (!$res_match_1 and !$res_match_2) { continue; // none of pattern verify the file structure for current segmen t: go to next loop. In the worst case the procedure will return the original file } $original = preg_replace($pattern, $replacement, $original); $transunit_translation = ""; // empty the translation before the end of the loop } $output_content[$id_file]['content'] = $original; $output_content[$id_file]['filename'] = $current_filename; } // print_r ($output_content); //exit; $ext = ""; if ($this->download_type == 'all') { if (count($output_content) > 1) { $this->filename = $this->fname; $pathinfo = pathinfo($this->fname); if ($pathinfo['extension'] != 'zip') { $this->filename = $pathinfo['basename'] . ".zip"; } $this->content = $this->composeZip($output_content); //add zip archive content here; } elseif (count($output_content) == 1) { foreach ($output_content as $oc) { $pathinfo = pathinfo($oc['filename']); $this->filename = $oc['filename']; if (!in_array($pathinfo['extension'], array("xliff", "sdlxliff", "xlf"))) { $this->filename = $pathinfo['basename'] . ".sdlxliff"; } $this->content = $oc['content']; } } } else { foreach ($output_content as $oc) { $pathinfo = pathinfo($oc['filename']); $this->filename = $oc['filename']; if (!in_array($pathinfo['extension'], array("xliff", "sdlxliff", "xlf"))) { $this->filename = $pathinfo['basename'] . ".sdlxliff"; } $this->content = $oc['content']; } } }