コード例 #1
0
 public function doAction()
 {
     // Just add the XLIFF extension, the DetectProprietaryXliff class needs it
     $file_path = $_FILES['xliff']['tmp_name'] . '.xlf';
     move_uploaded_file($_FILES['xliff']['tmp_name'], $file_path);
     // Detect XLIFF type
     $fileInfo = DetectProprietaryXliff::getInfo($file_path);
     $converterVersion = $fileInfo['converter_version'];
     $converter = new FileFormatConverter($converterVersion);
     $conversion = $converter->multiConvertToOriginal(array(1 => array('document_content' => file_get_contents($file_path))));
     if ($conversion[1]['isSuccess'] !== true) {
         $this->error = true;
         $this->errorMessage = $conversion[1]['errorMessage'];
     } else {
         $this->content = json_encode(array("fileName" => $conversion[1]['filename'], "fileContent" => base64_encode($conversion[1]['document_content']), "size" => filesize($file_path), "type" => mime_content_type($file_path), "message" => "File downloaded! Check your download folder"));
     }
 }
コード例 #2
0
 /**
  * Create a new FileFormatConverter, using old or new converters.
  * This function looks for converters in the 'converters' table in the db.
  * $converterVersion can be "legacy", "latest", or something like "1.0.0".
  * In the first case legacy converters will be used; in the second case,
  * the latest version of new converters will be used; in the third case,
  * this function will look for the converters with the provided version,
  * and if not found will use the converters with higher but closest version.
  * Version check is done on the conversion_api_version field of the
  * converters db table; new converters are expected to have a value like
  * "open 1.0.0".
  */
 public function __construct($converterVersion = null)
 {
     $this->converterVersion = $converterVersion;
     $this->opt['httpheader'] = array("Content-Type:multipart/form-data;charset=UTF-8");
     $this->lang_handler = Langs_Languages::getInstance();
     $this->conversionObject = new ArrayObject(array('ip_machine' => null, 'ip_client' => null, 'path_name' => null, 'file_name' => null, 'path_backup' => null, 'file_size' => 0, 'direction' => null, 'error_message' => null, 'src_lang' => null, 'trg_lang' => null, 'status' => 'ok', 'conversion_time' => 0), ArrayObject::ARRAY_AS_PROPS);
     // Get converters instances list from database,
     $db = Database::obtain();
     // The base query to obtain the converters
     $baseQuery = 'SELECT ip_converter, cpu_weight, ip_storage, segmentation_rule' . ' FROM converters' . ' WHERE status_active = 1 AND status_offline = 0';
     // Complete the $baseQuery according to the converter's version
     if ($this->converterVersion == Constants_ConvertersVersions::LEGACY) {
         // Retrieve only old converters
         $query = $baseQuery . (INIT::$USE_ONLY_STABLE_CONVERTERS ? ' AND stable = 1' : '') . ' AND conversion_api_version NOT LIKE "open %"';
     } else {
         // Here we use new converters
         if ($this->converterVersion == Constants_ConvertersVersions::LATEST) {
             // Get the converters with the latest version
             $query = $baseQuery . ' AND conversion_api_version = (' . 'SELECT MAX(conversion_api_version)' . ' FROM converters' . ' WHERE conversion_api_version LIKE "open %"' . (INIT::$USE_ONLY_STABLE_CONVERTERS ? ' AND stable = 1' : '') . ' AND status_active = 1 AND status_offline = 0' . ')';
         } else {
             $closest_conversion_api_version = self::getClosestConversionApiVersion($this->converterVersion);
             $query = $baseQuery . (INIT::$USE_ONLY_STABLE_CONVERTERS ? ' AND stable = 1' : '') . ' AND conversion_api_version = "' . $closest_conversion_api_version . '"';
         }
     }
     $converters = $db->fetch_array($query);
     // SUUUPER ugly, those variables should not be static at all! No way!
     // But now the class works around these 3 static variables, and a
     // refactoring is too risky. Enter this patch: I empty these 3 vars
     // every time I create a new FileFormatConverter to be sure that new
     // converters never mix with old converters
     self::$converters = array();
     self::$Storage_Lookup_IP_Map = array();
     self::$converter2segmRule = array();
     foreach ($converters as $converter_storage) {
         self::$converters[$converter_storage['ip_converter']] = $converter_storage['cpu_weight'];
         self::$Storage_Lookup_IP_Map[$converter_storage['ip_converter']] = $converter_storage['ip_storage'];
         self::$converter2segmRule[$converter_storage['ip_converter']] = $converter_storage['segmentation_rule'];
     }
     //        self::$converters = array('10.30.1.32' => 1);//for debugging purposes
     //        self::$Storage_Lookup_IP_Map = array('10.30.1.32' => '10.30.1.32');//for debugging purposes
     $this->storage_lookup_map = self::$Storage_Lookup_IP_Map;
 }
コード例 #3
0
 public function doAction()
 {
     //get job language and data
     //Fixed Bug: need a specific job, because we need The target Language
     //Removed from within the foreach cycle, the job is always the same....
     $jobData = $this->jobInfo = getJobData($this->id_job, $this->password);
     $pCheck = new AjaxPasswordCheck();
     //check for Password correctness
     if (empty($jobData) || !$pCheck->grantJobAccessByJobData($jobData, $this->password)) {
         $msg = "Error : wrong password provided for download \n\n " . var_export($_POST, true) . "\n";
         Log::doLog($msg);
         Utils::sendErrMailReport($msg);
         return null;
     }
     //get storage object
     $fs = new FilesStorage();
     $files_job = $fs->getFilesForJob($this->id_job, $this->id_file);
     $nonew = 0;
     $output_content = array();
     /*
       the procedure:
       1)original xliff file is read directly from disk; a file handler is obtained
       2)the file is read chunk by chunk by a stream parser: for each trans-unit that is encountered, target is replaced (or added) with the corresponding translation obtained from the DB
       3)the parsed portion of xliff in the buffer is flushed on temporary file
       4)the temporary file is sent to the converter and an original file is obtained
       5)the temporary file is deleted
     */
     // This array will contain all the files of $files_job split by
     // converter version.
     $files_job_by_converter_version = array();
     // Detect the converter's version to use for each file, then store
     // file info accordingly.
     foreach ($files_job as $file) {
         $fileType = DetectProprietaryXliff::getInfo($file['xliffFilePath']);
         $files_job_by_converter_version[$fileType['converter_version']][] = $file;
     }
     // Process files according to the converters' versions, one version
     // at a time
     foreach ($files_job_by_converter_version as $converter_version => $files_job) {
         //file array is chuncked. Each chunk will be used for a parallel conversion request.
         $files_job = array_chunk($files_job, self::FILES_CHUNK_SIZE);
         foreach ($files_job as $chunk) {
             $converter = new FileFormatConverter($converter_version);
             $files_to_be_converted = array();
             foreach ($chunk as $file) {
                 $mime_type = $file['mime_type'];
                 $fileID = $file['id_file'];
                 $current_filename = $file['filename'];
                 //get path for the output file converted to know it's right extension
                 $_fileName = explode(DIRECTORY_SEPARATOR, $file['xliffFilePath']);
                 $outputPath = INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/' . $fileID . '/' . uniqid('', true) . "_.out." . array_pop($_fileName);
                 //make dir if doesn't exist
                 if (!file_exists(dirname($outputPath))) {
                     Log::doLog('Create Directory ' . escapeshellarg(dirname($outputPath)) . '');
                     mkdir(dirname($outputPath), 0775, true);
                 }
                 $data = getSegmentsDownload($this->id_job, $this->password, $fileID, $nonew);
                 //prepare regexp for nest step
                 $regexpEntity = '/&#x(0[0-8BCEF]|1[0-9A-F]|7F);/u';
                 $regexpAscii = '/([\\x{00}-\\x{1F}\\x{7F}]{1})/u';
                 foreach ($data as $i => $k) {
                     //create a secondary indexing mechanism on segments' array; this will be useful
                     //prepend a string so non-trans unit id ( ex: numerical ) are not overwritten
                     $data['matecat|' . $k['internal_id']][] = $i;
                     //FIXME: temporary patch
                     $data[$i]['translation'] = str_replace('<x id="nbsp"/>', '&#xA0;', $data[$i]['translation']);
                     $data[$i]['segment'] = str_replace('<x id="nbsp"/>', '&#xA0;', $data[$i]['segment']);
                     //remove binary chars in some xliff files
                     $sanitized_src = preg_replace($regexpAscii, '', $data[$i]['segment']);
                     $sanitized_trg = preg_replace($regexpAscii, '', $data[$i]['translation']);
                     //clean invalid xml entities ( charactes with ascii < 32 and different from 0A, 0D and 09
                     $sanitized_src = preg_replace($regexpEntity, '', $sanitized_src);
                     $sanitized_trg = preg_replace($regexpEntity, '', $sanitized_trg);
                     if ($sanitized_src != null) {
                         $data[$i]['segment'] = $sanitized_src;
                     }
                     if ($sanitized_trg != null) {
                         $data[$i]['translation'] = $sanitized_trg;
                     }
                 }
                 //instatiate parser
                 $xsp = new SdlXliffSAXTranslationReplacer($file['xliffFilePath'], $data, Langs_Languages::getInstance()->getLangRegionCode($jobData['target']), $outputPath);
                 if ($this->download_type == 'omegat') {
                     $xsp->setSourceInTarget(true);
                 }
                 //run parsing
                 Log::doLog("work on " . $fileID . " " . $current_filename);
                 $xsp->replaceTranslation();
                 //free memory
                 unset($xsp);
                 unset($data);
                 $output_content[$fileID]['document_content'] = file_get_contents($outputPath);
                 $output_content[$fileID]['output_filename'] = $current_filename;
                 $fileType = DetectProprietaryXliff::getInfo($file['xliffFilePath']);
                 if ($this->forceXliff) {
                     //clean the output filename by removing
                     // the unique hash identifier 55e5739b467109.05614837_.out.Test_English.doc.sdlxliff
                     $output_content[$fileID]['output_filename'] = preg_replace('#[0-9a-f]+\\.[0-9_]+\\.out\\.#i', '', FilesStorage::basename_fix($outputPath));
                     if ($fileType['proprietary_short_name'] === 'matecat_converter') {
                         // Set the XLIFF extension to .xlf
                         // Internally, MateCat continues using .sdlxliff as default
                         // extension for the XLIFF behind the projects.
                         // Changing this behavior requires a huge refactoring that
                         // it's scheduled for future versions.
                         // We quickly fixed the behaviour from the user standpoint
                         // using the following line of code, that changes the XLIFF's
                         // extension just a moment before it is downloaded by the user.
                         $output_content[$fileID]['output_filename'] = preg_replace("|\\.sdlxliff\$|i", ".xlf", $output_content[$fileID]['output_filename']);
                     }
                 }
                 /**
                  * Conversion Enforce
                  */
                 $convertBackToOriginal = true;
                 //if it is a not converted file ( sdlxliff ) we have originalFile equals to xliffFile (it has just been copied)
                 $file['original_file'] = file_get_contents($file['originalFilePath']);
                 // When the 'proprietary' flag is set to false, the xliff
                 // is not passed to any converter, because is handled
                 // directly inside MateCAT.
                 $xliffWasNotConverted = $fileType['proprietary'] === false;
                 if (!INIT::$CONVERSION_ENABLED || ($file['originalFilePath'] == $file['xliffFilePath'] and $xliffWasNotConverted) or $this->forceXliff) {
                     $convertBackToOriginal = false;
                     Log::doLog("SDLXLIFF: {$file['filename']} --- " . var_export($convertBackToOriginal, true));
                 } else {
                     //TODO: dos2unix ??? why??
                     //force unix type files
                     Log::doLog("NO SDLXLIFF, Conversion enforced: {$file['filename']} --- " . var_export($convertBackToOriginal, true));
                 }
                 if ($convertBackToOriginal) {
                     $output_content[$fileID]['out_xliff_name'] = $outputPath;
                     $output_content[$fileID]['source'] = $jobData['source'];
                     $output_content[$fileID]['target'] = $jobData['target'];
                     $files_to_be_converted[$fileID] = $output_content[$fileID];
                 } elseif ($this->forceXliff) {
                     $this->cleanFilePath($output_content[$fileID]['document_content']);
                 }
             }
             $convertResult = $converter->multiConvertToOriginal($files_to_be_converted, $chosen_machine = false);
             foreach (array_keys($files_to_be_converted) as $fileID) {
                 $output_content[$fileID]['document_content'] = $this->ifGlobalSightXliffRemoveTargetMarks($convertResult[$fileID]['document_content'], $files_to_be_converted[$fileID]['output_filename']);
                 //in case of .strings, they are required to be in UTF-16
                 //get extension to perform file detection
                 $extension = FilesStorage::pathinfo_fix($output_content[$fileID]['output_filename'], PATHINFO_EXTENSION);
                 if (strtoupper($extension) == 'STRINGS') {
                     //use this function to convert stuff
                     $encodingConvertedFile = CatUtils::convertEncoding('UTF-16', $output_content[$fileID]['document_content']);
                     //strip previously added BOM
                     $encodingConvertedFile[1] = $converter->stripBOM($encodingConvertedFile[1], 16);
                     //store new content
                     $output_content[$fileID]['document_content'] = $encodingConvertedFile[1];
                     //trash temporary data
                     unset($encodingConvertedFile);
                 }
             }
             unset($convertResult);
         }
     }
     foreach ($output_content as $idFile => $fileInformations) {
         $zipPathInfo = ZipArchiveExtended::zipPathInfo($output_content[$idFile]['output_filename']);
         if (is_array($zipPathInfo)) {
             $output_content[$idFile]['zipfilename'] = $zipPathInfo['zipfilename'];
             $output_content[$idFile]['zipinternalPath'] = $zipPathInfo['dirname'];
             $output_content[$idFile]['output_filename'] = $zipPathInfo['basename'];
         }
     }
     //set the file Name
     $pathinfo = FilesStorage::pathinfo_fix($this->fname);
     $this->_filename = $pathinfo['filename'] . "_" . $jobData['target'] . "." . $pathinfo['extension'];
     //qui prodest to check download type?
     if ($this->download_type == 'omegat') {
         $this->_filename .= ".zip";
         $tmsService = new TMSService();
         $tmsService->setOutputType('tm');
         /**
          * @var $tmFile SplTempFileObject
          */
         $tmFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']);
         $tmsService->setOutputType('mt');
         /**
          * @var $mtFile SplTempFileObject
          */
         $mtFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']);
         $tm_id = uniqid('tm');
         $mt_id = uniqid('mt');
         $output_content[$tm_id] = array('document_content' => '', 'output_filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_TM . tmx");
         foreach ($tmFile as $lineNumber => $content) {
             $output_content[$tm_id]['document_content'] .= $content;
         }
         $output_content[$mt_id] = array('document_content' => '', 'output_filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_MT . tmx");
         foreach ($mtFile as $lineNumber => $content) {
             $output_content[$mt_id]['document_content'] .= $content;
         }
         $this->createOmegaTZip($output_content, $jobData['source'], $jobData['target']);
         //add zip archive content here;
     } else {
         try {
             $output_content = $this->getOutputContentsWithZipFiles($output_content);
             if (count($output_content) > 1) {
                 //cast $output_content elements to ZipContentObject
                 foreach ($output_content as $key => $__output_content_elem) {
                     $output_content[$key] = new ZipContentObject($__output_content_elem);
                 }
                 if ($pathinfo['extension'] != 'zip') {
                     if ($this->forceXliff) {
                         $this->_filename = $this->id_job . ".zip";
                     } else {
                         $this->_filename = $pathinfo['basename'] . ".zip";
                     }
                 }
                 $this->content = self::composeZip($output_content);
                 //add zip archive content here;
             } else {
                 //always an array with 1 element, pop it, Ex: array( array() )
                 $output_content = array_pop($output_content);
                 $this->setContent($output_content);
             }
         } catch (Exception $e) {
             $msg = "\n\n Error retrieving file content, Conversion failed??? \n\n Error: {$e->getMessage()} \n\n" . var_export($e->getTraceAsString(), true);
             $msg .= "\n\n Request: " . var_export($_REQUEST, true);
             Log::$fileName = 'fatal_errors.txt';
             Log::doLog($msg);
             Utils::sendErrMailReport($msg);
             $this->unlockToken(array("code" => -110, "message" => "Download failed. Please contact " . INIT::$SUPPORT_MAIL));
             throw $e;
             // avoid sent Headers and empty file content with finalize method
         }
     }
     try {
         Utils::deleteDir(INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/');
     } catch (Exception $e) {
         Log::doLog('Failed to delete dir:' . $e->getMessage());
     }
 }
コード例 #4
0
 public function doAction()
 {
     $this->file_name = html_entity_decode($this->file_name, ENT_QUOTES);
     $file_path = $this->intDir . DIRECTORY_SEPARATOR . $this->file_name;
     if (!file_exists($file_path)) {
         $this->result['code'] = -6;
         // No Good, Default
         $this->result['errors'][] = array("code" => -6, "message" => "Error during upload. Please retry.", 'debug' => FilesStorage::basename_fix($this->file_name));
         return -1;
     }
     //XLIFF Conversion management
     //cyclomatic complexity 9999999 ..... but it works, for now.
     try {
         $fileType = DetectProprietaryXliff::getInfo($file_path);
         if (DetectProprietaryXliff::isXliffExtension() || DetectProprietaryXliff::getMemoryFileType()) {
             if (INIT::$CONVERSION_ENABLED) {
                 //conversion enforce
                 if (!INIT::$FORCE_XLIFF_CONVERSION) {
                     //if file is not proprietary AND Enforce is disabled
                     //we take it as is
                     if (!$fileType['proprietary'] || DetectProprietaryXliff::getMemoryFileType()) {
                         $this->result['code'] = 1;
                         // OK for client
                         //This file has to be linked to cache!
                         return 0;
                         //ok don't convert a standard sdlxliff
                     }
                 } else {
                     // if conversion enforce is active
                     // we force all xliff files but not files produced by
                     // SDL Studio or by the MateCAT converters, because we
                     // can handle them
                     if ($fileType['proprietary_short_name'] == 'matecat_converter' || $fileType['proprietary_short_name'] == 'trados' || DetectProprietaryXliff::getMemoryFileType()) {
                         $this->result['code'] = 1;
                         // OK for client
                         $this->result['errors'][] = array("code" => 0, "message" => "OK");
                         return 0;
                         //ok don't convert a standard sdlxliff
                     }
                 }
             } elseif ($fileType['proprietary']) {
                 unlink($file_path);
                 $this->result['code'] = -7;
                 // No Good, Default
                 $this->result['errors'][] = array("code" => -7, "message" => 'Matecat Open-Source does not support ' . ucwords($fileType['proprietary_name']) . '. Use MatecatPro.', 'debug' => FilesStorage::basename_fix($this->file_name));
                 return -1;
             } elseif (!$fileType['proprietary']) {
                 $this->result['code'] = 1;
                 // OK for client
                 $this->result['errors'][] = array("code" => 0, "message" => "OK");
                 return 0;
                 //ok don't convert a standard sdlxliff
             }
         }
     } catch (Exception $e) {
         //try catch not used because of exception no more raised
         $this->result['code'] = -8;
         // No Good, Default
         $this->result['errors'][] = array("code" => -8, "message" => $e->getMessage());
         Log::doLog($e->getMessage());
         return -1;
     }
     //compute hash to locate the file in the cache
     $sha1 = sha1_file($file_path);
     //initialize path variable
     $cachedXliffPath = false;
     //get storage object
     $fs = new FilesStorage();
     //TODO: REMOVE SET ENVIRONMENT FOR LEGACY CONVERSION INSTANCES
     if (INIT::$LEGACY_CONVERSION !== false) {
         INIT::$SAVE_SHASUM_FOR_FILES_LOADED = false;
     }
     //if already present in database cache get the converted without convert it again
     if (INIT::$SAVE_SHASUM_FOR_FILES_LOADED) {
         //move the file in the right directory from the packages to the file dir
         $cachedXliffPath = $fs->getXliffFromCache($sha1, $this->source_lang);
         if (!$cachedXliffPath) {
             Log::doLog("Failed to fetch xliff for {$sha1} from disk cache (is file there?)");
         }
     }
     //if invalid or no cached version
     if (!isset($cachedXliffPath) or empty($cachedXliffPath)) {
         //we have to convert it
         // By default, use always the new converters...
         $converterVersion = Constants_ConvertersVersions::LATEST;
         if ($this->segmentation_rule !== null) {
             // ...but new converters don't support custom segmentation rules.
             // if $this->segmentation_rule is set use the old ones.
             $converterVersion = Constants_ConvertersVersions::LEGACY;
         }
         //TODO: REMOVE SET ENVIRONMENT FOR LEGACY CONVERSION INSTANCES
         if (INIT::$LEGACY_CONVERSION !== false) {
             $converterVersion = Constants_ConvertersVersions::LEGACY;
         }
         $converter = new FileFormatConverter($converterVersion);
         if (strpos($this->target_lang, ',') !== false) {
             $single_language = explode(',', $this->target_lang);
             $single_language = $single_language[0];
         } else {
             $single_language = $this->target_lang;
         }
         $convertResult = $converter->convertToSdlxliff($file_path, $this->source_lang, $single_language, false, $this->segmentation_rule);
         if ($convertResult['isSuccess'] == 1) {
             /* try to back convert the file */
             $output_content = array();
             $output_content['out_xliff_name'] = $file_path . '.out.sdlxliff';
             $output_content['source'] = $this->source_lang;
             $output_content['target'] = $single_language;
             $output_content['content'] = $convertResult['xliffContent'];
             $output_content['filename'] = $this->file_name;
             $back_convertResult = $converter->convertToOriginal($output_content);
             /* try to back convert the file */
             if ($back_convertResult['isSuccess'] == false) {
                 //custom error message passed directly to javascript client and displayed as is
                 $convertResult['errorMessage'] = "Error: there is a problem with this file, it cannot be converted back to the original one.";
                 $this->result['code'] = -110;
                 $this->result['errors'][] = array("code" => -110, "message" => $convertResult['errorMessage'], 'debug' => FilesStorage::basename_fix($this->file_name));
                 return false;
             }
             //store converted content on a temporary path on disk (and off RAM)
             $cachedXliffPath = tempnam("/tmp", "MAT_XLF");
             file_put_contents($cachedXliffPath, $convertResult['xliffContent']);
             unset($convertResult['xliffContent']);
             /*
               store the converted file in the cache
               put a reference in the upload dir to the cache dir, so that from the UUID we can reach the converted file in the cache
               (this is independent by the "save xliff for caching" options, since we always end up storing original and xliff on disk)
             */
             //save in cache
             $res_insert = $fs->makeCachePackage($sha1, $this->source_lang, $file_path, $cachedXliffPath);
             if (!$res_insert) {
                 //custom error message passed directly to javascript client and displayed as is
                 $convertResult['errorMessage'] = "Error: File upload failed because you have MateCat running in multiple tabs. Please close all other MateCat tabs in your browser.";
                 $this->result['code'] = -103;
                 $this->result['errors'][] = array("code" => -103, "message" => $convertResult['errorMessage'], 'debug' => FilesStorage::basename_fix($this->file_name));
                 unset($cachedXliffPath);
                 return false;
             }
         } else {
             $file = FilesStorage::pathinfo_fix($this->file_name);
             switch ($file['extension']) {
                 case 'docx':
                     $defaultError = "Importing error. Try opening and saving the document with a new name. If this does not work, try converting to DOC.";
                     break;
                 case 'doc':
                 case 'rtf':
                     $defaultError = "Importing error. Try opening and saving the document with a new name. If this does not work, try converting to DOCX.";
                     break;
                 case 'inx':
                     $defaultError = "Importing Error. Try to commit changes in InDesign before importing.";
                     break;
                 case 'idml':
                     $defaultError = "Importing Error. MateCat does not support this version of InDesign, try converting it to a previous one.";
                     break;
                 default:
                     $defaultError = "Importing error. Try opening and saving the document with a new name.";
                     break;
             }
             if (stripos($convertResult['errorMessage'], "failed to create SDLXLIFF.") !== false || stripos($convertResult['errorMessage'], "COM target does not implement IDispatch") !== false) {
                 $convertResult['errorMessage'] = "Error: failed importing file.";
             } elseif (stripos($convertResult['errorMessage'], "Unable to open Excel file - it may be password protected") !== false) {
                 $convertResult['errorMessage'] = $convertResult['errorMessage'] . " Try to remove protection using the Unprotect Sheet command on Windows Excel.";
             } elseif (stripos($convertResult['errorMessage'], "The document contains unaccepted changes") !== false) {
                 $convertResult['errorMessage'] = "The document contains track changes. Accept all changes before uploading it.";
             } elseif (stripos($convertResult['errorMessage'], "Error: Could not find file") !== false || stripos($convertResult['errorMessage'], "tw4winMark") !== false) {
                 $convertResult['errorMessage'] = $defaultError;
             } elseif (stripos($convertResult['errorMessage'], "Attempted to read or write protected memory") !== false) {
                 $convertResult['errorMessage'] = $defaultError;
             } elseif (stripos($convertResult['errorMessage'], "The document was created in Microsoft Word 97 or earlier")) {
                 $convertResult['errorMessage'] = $defaultError;
             } elseif ($file['extension'] == 'csv' && empty($convertResult['errorMessage'])) {
                 $convertResult['errorMessage'] = "This CSV file is not eligible to be imported due internal wrong format. Try to convert in TXT using UTF8 encoding";
             } elseif (empty($convertResult['errorMessage'])) {
                 $convertResult['errorMessage'] = "Failed to convert file. Internal error. Please Try again.";
             } elseif (stripos($convertResult['errorMessage'], "DocumentFormat.OpenXml.dll") !== false) {
                 //this error is triggered on DOCX when converter's parser can't decode some regions of the file
                 $convertResult['errorMessage'] = "Conversion error. Try opening and saving the document with a new name. If this does not work, try converting to DOC.";
             } elseif ($file['extension'] == 'idml') {
                 $convertResult['errorMessage'] = $defaultError;
             } elseif (stripos($convertResult['errorMessage'], "Error: The source language of the file") !== false) {
                 //Error: The source language of the file (English (United States)) is different from the project source language.
                 //we take the error, is good
             } else {
                 $convertResult['errorMessage'] = "Import error. Try converting it to a compatible file format (e.g. doc > docx, xlsx > xls)";
             }
             //custom error message passed directly to javascript client and displayed as is
             $this->result['code'] = -100;
             $this->result['errors'][] = array("code" => -100, "message" => $convertResult['errorMessage'], "debug" => $file['basename']);
         }
     }
     //if everything went well and we've obtained a path toward a valid package (original+xliff), either via cache or conversion
     if (isset($cachedXliffPath) and !empty($cachedXliffPath)) {
         //FILE Found in cache, destroy the already present shasum for other languages ( if user swapped languages )
         $uploadDir = INIT::$UPLOAD_REPOSITORY . DIRECTORY_SEPARATOR . $this->cookieDir;
         $fs->deleteHashFromUploadDir($uploadDir, $sha1 . "|" . $this->source_lang);
         //put reference to cache in upload dir to link cache to session
         $fs->linkSessionToCache($sha1, $this->source_lang, $this->cookieDir, FilesStorage::basename_fix($file_path));
         //a usable package is available, give positive feedback
         $this->result['code'] = 1;
     }
     return 0;
 }
コード例 #5
0
 public function doAction()
 {
     $debug = array();
     $debug['total'][] = time();
     //get job language and data
     //Fixed Bug: need a specific job, because we need The target Language
     //Removed from within the foreach cycle, the job is always the same....
     $jobData = $this->jobInfo = getJobData($this->id_job, $this->password);
     $pCheck = new AjaxPasswordCheck();
     //check for Password correctness
     if (empty($jobData) || !$pCheck->grantJobAccessByJobData($jobData, $this->password)) {
         $msg = "Error : wrong password provided for download \n\n " . var_export($_POST, true) . "\n";
         Log::doLog($msg);
         Utils::sendErrMailReport($msg);
         return null;
     }
     $debug['get_file'][] = time();
     $files_job = getFilesForJob($this->id_job, $this->id_file);
     $debug['get_file'][] = time();
     $nonew = 0;
     $output_content = array();
     /*
      * the procedure is now as follows:
      * 1)original file is loaded from DB into RAM and the flushed in a temp file on disk; a file handler is obtained
      * 2)RAM gets freed from original content
      * 3)the file is read chunk by chunk by a stream parser: for each tran-unit that is encountered,
      *     target is replaced (or added) with the corresponding translation among segments
      *     the current string in the buffer is flushed on standard output
      * 4)the temporary file is deleted by another process after some time
      *
      */
     //file array is chuncked. Each chunk will be used for a parallel conversion request.
     $files_job = array_chunk($files_job, self::FILES_CHUNK_SIZE);
     foreach ($files_job as $chunk) {
         $converter = new FileFormatConverter();
         $files_buffer = array();
         foreach ($chunk as $file) {
             $mime_type = $file['mime_type'];
             $fileID = $file['id_file'];
             $current_filename = $file['filename'];
             $original_xliff = $file['xliff_file'];
             //get path
             $path = INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/' . $fileID . '/' . $current_filename . "_" . uniqid('', true) . '.sdlxliff';
             //make dir if doesn't exist
             if (!file_exists(dirname($path))) {
                 Log::doLog('exec ("chmod 666 ' . escapeshellarg($path) . '");');
                 mkdir(dirname($path), 0777, true);
                 exec("chmod 666 " . escapeshellarg($path));
             }
             //create file
             $fp = fopen($path, 'w+');
             //flush file to disk
             fwrite($fp, $original_xliff);
             //free memory, as we can work with file on disk now
             unset($original_xliff);
             $debug['get_segments'][] = time();
             $data = getSegmentsDownload($this->id_job, $this->password, $fileID, $nonew);
             $debug['get_segments'][] = time();
             //create a secondary indexing mechanism on segments' array; this will be useful
             //prepend a string so non-trans unit id ( ex: numerical ) are not overwritten
             //clean also not valid xml entities ( charactes with ascii < 32 and different from 0A, 0D and 09
             $regexpEntity = '/&#x(0[0-8BCEF]|1[0-9A-F]|7F);/u';
             //remove binary chars in some xliff files
             $regexpAscii = '/([\\x{00}-\\x{1F}\\x{7F}]{1})/u';
             foreach ($data as $i => $k) {
                 $data['matecat|' . $k['internal_id']][] = $i;
                 //FIXME: temporary patch
                 $data[$i]['translation'] = str_replace('<x id="nbsp"/>', '&#xA0;', $data[$i]['translation']);
                 $data[$i]['segment'] = str_replace('<x id="nbsp"/>', '&#xA0;', $data[$i]['segment']);
                 $sanitized_src = preg_replace($regexpAscii, '', $data[$i]['segment']);
                 $sanitized_trg = preg_replace($regexpAscii, '', $data[$i]['translation']);
                 $sanitized_src = preg_replace($regexpEntity, '', $sanitized_src);
                 $sanitized_trg = preg_replace($regexpEntity, '', $sanitized_trg);
                 if ($sanitized_src != null) {
                     $data[$i]['segment'] = $sanitized_src;
                 }
                 if ($sanitized_trg != null) {
                     $data[$i]['translation'] = $sanitized_trg;
                 }
             }
             $debug['replace'][] = time();
             //instatiate parser
             $xsp = new XliffSAXTranslationReplacer($path, $data, Langs_Languages::getInstance()->getLangRegionCode($jobData['target']), $fp);
             if ($this->download_type == 'omegat') {
                 $xsp->setSourceInTarget(true);
             }
             //run parsing
             Log::doLog("work on " . $fileID . " " . $current_filename);
             $xsp->replaceTranslation();
             fclose($fp);
             unset($xsp);
             $debug['replace'][] = time();
             $output_xliff = file_get_contents($path . '.out.sdlxliff');
             $output_content[$fileID]['documentContent'] = $output_xliff;
             $output_content[$fileID]['filename'] = $current_filename;
             unset($output_xliff);
             if ($this->forceXliff) {
                 $file_info_details = pathinfo($output_content[$fileID]['filename']);
                 $output_content[$fileID]['filename'] = $file_info_details['filename'] . ".out.sdlxliff";
             }
             //TODO set a flag in database when file uploaded to know if this file is a proprietary xlf converted
             //TODO so we can load from database the original file blob ONLY when needed
             /**
              * Conversion Enforce
              */
             $convertBackToOriginal = true;
             try {
                 //if it is a not converted file ( sdlxliff ) we have an empty field original_file
                 //so we can simplify all the logic with:
                 // is empty original_file? if it is, we don't need conversion back because
                 // we already have an sdlxliff or an accepted file
                 $file['original_file'] = @gzinflate($file['original_file']);
                 if (!INIT::$CONVERSION_ENABLED || empty($file['original_file']) && $mime_type == 'sdlxliff' || $this->forceXliff) {
                     $convertBackToOriginal = false;
                     Log::doLog("SDLXLIFF: {$file['filename']} --- " . var_export($convertBackToOriginal, true));
                 } else {
                     //TODO: dos2unix ??? why??
                     //force unix type files
                     Log::doLog("NO SDLXLIFF, Conversion enforced: {$file['filename']} --- " . var_export($convertBackToOriginal, true));
                 }
             } catch (Exception $e) {
                 Log::doLog($e->getMessage());
             }
             if ($convertBackToOriginal) {
                 $output_content[$fileID]['out_xliff_name'] = $path . '.out.sdlxliff';
                 $output_content[$fileID]['source'] = $jobData['source'];
                 $output_content[$fileID]['target'] = $jobData['target'];
                 $files_buffer[$fileID] = $output_content[$fileID];
             } elseif ($this->forceXliff) {
                 $this->cleanFilePath($output_content[$fileID]['documentContent']);
             }
         }
         $debug['do_conversion'][] = time();
         $convertResult = $converter->multiConvertToOriginal($files_buffer, $chosen_machine = false);
         foreach (array_keys($files_buffer) as $fileID) {
             $output_content[$fileID]['documentContent'] = $this->removeTargetMarks($convertResult[$fileID]['documentContent'], $files_buffer[$fileID]['filename']);
             //in case of .strings, they are required to be in UTF-16
             //get extension to perform file detection
             $extension = pathinfo($output_content[$fileID]['filename'], PATHINFO_EXTENSION);
             if (strtoupper($extension) == 'STRINGS') {
                 //use this function to convert stuff
                 $encodingConvertedFile = CatUtils::convertEncoding('UTF-16', $output_content[$fileID]['documentContent']);
                 //strip previously added BOM
                 $encodingConvertedFile[1] = $converter->stripBOM($encodingConvertedFile[1], 16);
                 //store new content
                 $output_content[$fileID]['documentContent'] = $encodingConvertedFile[1];
                 //trash temporary data
                 unset($encodingConvertedFile);
             }
         }
         //            $output_content[ $fileID ][ 'documentContent' ] = $convertResult[ 'documentContent' ];
         unset($convertResult);
         $debug['do_conversion'][] = time();
     }
     //set the file Name
     $pathinfo = pathinfo($this->fname);
     $this->filename = $pathinfo['filename'] . "_" . $jobData['target'] . "." . $pathinfo['extension'];
     //qui prodest to check download type?
     if ($this->download_type == 'omegat') {
         $this->filename .= ".zip";
         $tmsService = new TMSService();
         $tmsService->setOutputType('tm');
         /**
          * @var $tmFile SplTempFileObject
          */
         $tmFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']);
         $tmsService->setOutputType('mt');
         /**
          * @var $mtFile SplTempFileObject
          */
         $mtFile = $tmsService->exportJobAsTMX($this->id_job, $this->password, $jobData['source'], $jobData['target']);
         $tm_id = uniqid('tm');
         $mt_id = uniqid('mt');
         $output_content[$tm_id] = array('documentContent' => '', 'filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_TM . tmx");
         foreach ($tmFile as $lineNumber => $content) {
             $output_content[$tm_id]['documentContent'] .= $content;
         }
         $output_content[$mt_id] = array('documentContent' => '', 'filename' => $pathinfo['filename'] . "_" . $jobData['target'] . "_MT . tmx");
         foreach ($mtFile as $lineNumber => $content) {
             $output_content[$mt_id]['documentContent'] .= $content;
         }
         $this->createOmegaTZip($output_content, $jobData['source'], $jobData['target']);
         //add zip archive content here;
     } else {
         if (count($output_content) > 1) {
             if ($pathinfo['extension'] != 'zip') {
                 if ($this->forceXliff) {
                     $this->filename = $this->id_job . ".zip";
                 } else {
                     $this->filename = $pathinfo['basename'] . ".zip";
                 }
             }
             $this->composeZip($output_content, $jobData['source']);
             //add zip archive content here;
         } else {
             //always an array with 1 element, pop it, Ex: array( array() )
             $output_content = array_pop($output_content);
             $this->setContent($output_content);
         }
     }
     $debug['total'][] = time();
     Utils::deleteDir(INIT::$TMP_DOWNLOAD . '/' . $this->id_job . '/');
 }
コード例 #6
0
 public function doAction()
 {
     $this->result['code'] = 0;
     // No Good, Default
     if (empty($this->file_name)) {
         $this->result['code'] = -1;
         // No Good, Default
         $this->result['errors'][] = array("code" => -1, "message" => "Error: missing file name.");
         return false;
     }
     $this->file_name = html_entity_decode($this->file_name, ENT_QUOTES);
     $file_path = $this->intDir . DIRECTORY_SEPARATOR . $this->file_name;
     if (!file_exists($file_path)) {
         $this->result['code'] = -6;
         // No Good, Default
         $this->result['errors'][] = array("code" => -6, "message" => "Error during upload. Please retry.");
         return -1;
     }
     //get uploaded file from disk
     $original_content = file_get_contents($file_path);
     $sha1 = sha1($original_content);
     //if already present in database cache get the converted without convert it again
     if (INIT::$SAVE_SHASUM_FOR_FILES_LOADED) {
         $xliffContent = getXliffBySHA1($sha1, $this->source_lang, $this->target_lang, $this->cache_days, $this->segmentation_rule);
     }
     //XLIFF Conversion management
     //cyclomatic complexity 9999999 ..... but it works, for now.
     try {
         $fileType = DetectProprietaryXliff::getInfo($file_path);
         if (DetectProprietaryXliff::isXliffExtension()) {
             if (INIT::$CONVERSION_ENABLED) {
                 //conversion enforce
                 if (!INIT::$FORCE_XLIFF_CONVERSION) {
                     //ONLY IDIOM is forced to be converted
                     //if file is not proprietary like idiom AND Enforce is disabled
                     //we take it as is
                     if (!$fileType['proprietary'] || $fileType['info']['extension'] == 'tmx') {
                         $this->result['code'] = 1;
                         // OK for client
                         $this->result['errors'][] = array("code" => 0, "message" => "OK");
                         return 0;
                         //ok don't convert a standard sdlxliff
                     }
                 } else {
                     //if conversion enforce is active
                     //we force all xliff files but not files produced by SDL Studio because we can handle them
                     if ($fileType['proprietary_short_name'] == 'trados' || $fileType['info']['extension'] == 'tmx') {
                         $this->result['code'] = 1;
                         // OK for client
                         $this->result['errors'][] = array("code" => 0, "message" => "OK");
                         return 0;
                         //ok don't convert a standard sdlxliff
                     }
                 }
             } elseif ($fileType['proprietary']) {
                 unlink($file_path);
                 $this->result['code'] = -7;
                 // No Good, Default
                 $this->result['errors'][] = array("code" => -7, "message" => 'Matecat Open-Source does not support ' . ucwords($fileType['proprietary_name']) . '. Use MatecatPro.', 'debug' => basename($this->file_name));
                 return -1;
             } elseif (!$fileType['proprietary']) {
                 $this->result['code'] = 1;
                 // OK for client
                 $this->result['errors'][] = array("code" => 0, "message" => "OK");
                 return 0;
                 //ok don't convert a standard sdlxliff
             }
         }
     } catch (Exception $e) {
         //try catch not used because of exception no more raised
         $this->result['code'] = -8;
         // No Good, Default
         $this->result['errors'][] = array("code" => -8, "message" => $e->getMessage());
         Log::doLog($e->getMessage());
         return -1;
     }
     //there is a cached copy of conversion? inflate
     if (isset($xliffContent) && !empty($xliffContent)) {
         $xliffContent = gzinflate($xliffContent);
         $res = $this->put_xliff_on_file($xliffContent, $this->intDir);
         if (!$res) {
             //custom error message passed directly to javascript client and displayed as is
             $convertResult['errorMessage'] = "Error: failed to save converted file from cache to disk";
             $this->result['code'] = -101;
             $this->result['errors'][] = array("code" => -101, "message" => $convertResult['errorMessage'], 'debug' => basename($this->file_name));
         }
         //else whe have to convert it
     } else {
         $original_content_zipped = gzdeflate($original_content, 5);
         unset($original_content);
         $converter = new FileFormatConverter($this->segmentation_rule);
         if (strpos($this->target_lang, ',') !== false) {
             $single_language = explode(',', $this->target_lang);
             $single_language = $single_language[0];
         } else {
             $single_language = $this->target_lang;
         }
         $convertResult = $converter->convertToSdlxliff($file_path, $this->source_lang, $single_language, false, $this->segmentation_rule);
         if ($convertResult['isSuccess'] == 1) {
             /* try to back convert the file */
             $output_content = array();
             $output_content['out_xliff_name'] = $file_path . '.out.sdlxliff';
             $output_content['source'] = $this->source_lang;
             $output_content['target'] = $single_language;
             $output_content['content'] = $convertResult['xliffContent'];
             $output_content['filename'] = $this->file_name;
             $back_convertResult = $converter->convertToOriginal($output_content);
             /* try to back convert the file */
             if ($back_convertResult['isSuccess'] == false) {
                 //custom error message passed directly to javascript client and displayed as is
                 $convertResult['errorMessage'] = "Error: there is a problem with this file, it cannot be converted back to the original one.";
                 $this->result['code'] = -110;
                 $this->result['errors'][] = array("code" => -110, "message" => $convertResult['errorMessage'], 'debug' => basename($this->file_name));
                 return false;
             }
             //$uid = $convertResult['uid']; // va inserito nel database
             $xliffContent = $convertResult['xliffContent'];
             $xliffContentZipped = gzdeflate($xliffContent, 5);
             //cache the converted file
             if (INIT::$SAVE_SHASUM_FOR_FILES_LOADED) {
                 $res_insert = insertFileIntoMap($sha1, $this->source_lang, $this->target_lang, $original_content_zipped, $xliffContentZipped, $this->segmentation_rule);
                 if ($res_insert < 0) {
                     //custom error message passed directly to javascript client and displayed as is
                     $convertResult['errorMessage'] = "Error: File too large";
                     $this->result['code'] = -102;
                     $this->result['errors'][] = array("code" => -102, "message" => $convertResult['errorMessage'], 'debug' => basename($this->file_name));
                     return;
                 }
             }
             unset($xliffContentZipped);
             $res = $this->put_xliff_on_file($xliffContent, $this->intDir);
             if (!$res) {
                 //custom error message passed directly to javascript client and displayed as is
                 $convertResult['errorMessage'] = "Error: failed to save file on disk";
                 $this->result['code'] = -103;
                 $this->result['errors'][] = array("code" => -103, "message" => $convertResult['errorMessage'], 'debug' => basename($this->file_name));
                 return false;
             }
         } else {
             $file = pathinfo($this->file_name);
             switch ($file['extension']) {
                 case 'docx':
                     $defaultError = "Importing error. Try opening and saving the document with a new name. If this does not work, try converting to DOC.";
                     break;
                 case 'doc':
                 case 'rtf':
                     $defaultError = "Importing error. Try opening and saving the document with a new name. If this does not work, try converting to DOCX.";
                     break;
                 case 'inx':
                     $defaultError = "Importing Error. Try to commit changes in InDesign before importing.";
                     break;
                 case 'idml':
                     $defaultError = "Importing Error. MateCat does not support this version of InDesign, try converting it to a previous one.";
                     break;
                 default:
                     $defaultError = "Importing error. Try opening and saving the document with a new name.";
                     break;
             }
             if (stripos($convertResult['errorMessage'], "failed to create SDLXLIFF.") !== false || stripos($convertResult['errorMessage'], "COM target does not implement IDispatch") !== false) {
                 $convertResult['errorMessage'] = "Error: failed importing file.";
             } elseif (stripos($convertResult['errorMessage'], "Unable to open Excel file - it may be password protected") !== false) {
                 $convertResult['errorMessage'] = $convertResult['errorMessage'] . " Try to remove protection using the Unprotect Sheet command on Windows Excel.";
             } elseif (stripos($convertResult['errorMessage'], "The document contains unaccepted changes") !== false) {
                 $convertResult['errorMessage'] = "The document contains track changes. Accept all changes before uploading it.";
             } elseif (stripos($convertResult['errorMessage'], "Error: Could not find file") !== false || stripos($convertResult['errorMessage'], "tw4winMark") !== false) {
                 $convertResult['errorMessage'] = $defaultError;
             } elseif (stripos($convertResult['errorMessage'], "Attempted to read or write protected memory") !== false) {
                 $convertResult['errorMessage'] = $defaultError;
             } elseif (stripos($convertResult['errorMessage'], "The document was created in Microsoft Word 97 or earlier")) {
                 $convertResult['errorMessage'] = $defaultError;
             } elseif ($file['extension'] == 'csv' && empty($convertResult['errorMessage'])) {
                 $convertResult['errorMessage'] = "This CSV file is not eligible to be imported due internal wrong format. Try to convert in TXT using UTF8 encoding";
             } elseif (empty($convertResult['errorMessage'])) {
                 $convertResult['errorMessage'] = "Failed to convert file. Internal error. Please Try again.";
             } elseif (stripos($convertResult['errorMessage'], "DocumentFormat.OpenXml.dll") !== false) {
                 //this error is triggered on DOCX when converter's parser can't decode some regions of the file
                 $convertResult['errorMessage'] = "Conversion error. Try opening and saving the document with a new name. If this does not work, try converting to DOC.";
             } elseif ($file['extension'] == 'idml') {
                 $convertResult['errorMessage'] = $defaultError;
             } else {
                 $convertResult['errorMessage'] = "Import error. Try converting it to a compatible file format (e.g. doc > docx, xlsx > xls)";
             }
             //custom error message passed directly to javascript client and displayed as is
             $this->result['code'] = -100;
             $this->result['errors'][] = array("code" => -100, "message" => $convertResult['errorMessage'], "debug" => $file['basename']);
         }
     }
 }