Ejemplo n.º 1
0
 /**
  * Remove the tag mrk if the file is an xlif and if the file is a globalsight file
  *
  * Also, check for encoding and transform utf16 to utf8 and back
  *
  * @param $documentContent
  * @param $path
  *
  * @return string
  */
 public function ifGlobalSightXliffRemoveTargetMarks($documentContent, $path)
 {
     $extension = FilesStorage::pathinfo_fix($path);
     if (!DetectProprietaryXliff::isXliffExtension($extension)) {
         return $documentContent;
     }
     $is_utf8 = true;
     $original_charset = 'utf-8';
     //not used, useful only to avoid IDE warning for not used variable
     //The file is UTF-16 Encoded
     if (stripos(substr($documentContent, 0, 100), "<?xml ") === false) {
         $is_utf8 = false;
         list($original_charset, $documentContent) = CatUtils::convertEncoding('UTF-8', $documentContent);
     }
     //avoid in memory copy of very large files if possible
     $detect_result = DetectProprietaryXliff::getInfoByStringData(substr($documentContent, 0, 1024));
     //clean mrk tags for GlobalSight application compatibility
     //this should be a sax parser instead of in memory copy for every trans-unit
     if ($detect_result['proprietary_short_name'] == 'globalsight') {
         // Getting Trans-units
         $trans_units = explode('<trans-unit', $documentContent);
         foreach ($trans_units as $pos => $trans_unit) {
             // First element in the XLIFF split is the header, not the first file
             if ($pos > 0) {
                 //remove seg-source tags
                 $trans_unit = preg_replace('|<seg-source.*?</seg-source>|si', '', $trans_unit);
                 //take the target content
                 $trans_unit = preg_replace('#<mrk[^>]+>|</mrk>#si', '', $trans_unit);
                 $trans_units[$pos] = $trans_unit;
             }
         }
         // End of trans-units
         $documentContent = implode('<trans-unit', $trans_units);
     }
     if (!$is_utf8) {
         list($__utf8, $documentContent) = CatUtils::convertEncoding($original_charset, $documentContent);
     }
     return $documentContent;
 }
Ejemplo n.º 2
0
 public function convertToSdlxliff($file_path, $source_lang, $target_lang, $chosen_by_user_machine = false, $segm_rule = null)
 {
     if (!file_exists($file_path)) {
         throw new Exception("Conversion Error : the file <{$file_path}> not exists");
     }
     $fileContent = file_get_contents($file_path);
     $extension = pathinfo($file_path, PATHINFO_EXTENSION);
     $filename = pathinfo($file_path, PATHINFO_FILENAME);
     if (strtoupper($extension) == 'TXT' or strtoupper($extension) == 'STRINGS') {
         $encoding = mb_detect_encoding($fileContent);
         //in case of .strings, they may be in UTF-16
         if (strtoupper($extension) == 'STRINGS') {
             //use this function to convert stuff
             $convertedFile = CatUtils::convertEncoding('UTF-8', $fileContent);
             //retrieve new content
             $fileContent = $convertedFile[1];
         } else {
             if ($encoding != 'UTF-8') {
                 $fileContent = iconv($encoding, "UTF-8//IGNORE", $fileContent);
             }
         }
         if (!$this->hasBOM($fileContent)) {
             $fileContent = $this->addBOM($fileContent);
         }
     }
     //get random name for temporary location
     $tmp_name = tempnam("/tmp", "MAT_FW");
     //write encoded file to temporary location
     $fileSize = file_put_contents($tmp_name, $fileContent);
     //assign file pointer for POST
     $data['documentContent'] = "@{$tmp_name}";
     //flush memory
     unset($fileContent);
     //assign converter
     if (!$chosen_by_user_machine) {
         $this->ip = $this->pickRandConverter($segm_rule);
     } else {
         $this->ip = $chosen_by_user_machine;
     }
     $url = "{$this->ip}:{$this->port}/{$this->toXliffFunction}";
     $data['fileExtension'] = $extension;
     $data['fileName'] = "{$filename}.{$extension}";
     $data['sourceLocale'] = $this->lang_handler->getLangRegionCode($source_lang);
     $data['targetLocale'] = $this->lang_handler->getLangRegionCode($target_lang);
     log::doLog($this->ip . " start conversion to xliff of {$file_path}");
     $start_time = microtime(true);
     $curl_result = $this->curl_post($url, $data, $this->opt);
     $end_time = microtime(true);
     $time_diff = $end_time - $start_time;
     log::doLog($this->ip . " took {$time_diff} secs for {$file_path}");
     $this->conversionObject->ip_machine = $this->ip;
     $this->conversionObject->ip_client = Utils::getRealIpAddr();
     $this->conversionObject->path_name = $file_path;
     $this->conversionObject->file_name = $data['fileName'];
     $this->conversionObject->direction = 'fw';
     $this->conversionObject->src_lang = $data['sourceLocale'];
     $this->conversionObject->trg_lang = $data['targetLocale'];
     $this->conversionObject->file_size = $fileSize;
     $this->conversionObject->conversion_time = $time_diff;
     $decode = json_decode($curl_result, true);
     $curl_result = null;
     $res = $this->__parseOutput($decode);
     //remove temporary file
     unlink($tmp_name);
     return $res;
 }