/** * Backwards compatibility method and forward * * Works by Reference variable * * @param $fileMetaData [ * "id_file", * "filename", * "source", * "mime_type", * "sha1_original_file" * ] */ private function migrateFileDB2FS(&$fileMetaData) { //create temporary storage to place stuff $tempDir = "/tmp" . DIRECTORY_SEPARATOR . uniqid("", true); mkdir($tempDir, 0755); //fetch xliff from the files database $xliffContent = $this->getXliffFromDB($fileMetaData['id_file']); //try pulling the original content too (if it's empty it means that it was an unconverted xliff) $fileContent = $this->getOriginalFromDB($fileMetaData['id_file']); if (!empty($fileContent)) { //it's a converted file //i'd like to know it's real extension.... //create temporary file with appropriately modified name $result = DetectProprietaryXliff::getInfoByStringData($xliffContent); if ($result['proprietary_short_name'] == 'trados') { $tempXliff = $tempDir . DIRECTORY_SEPARATOR . $fileMetaData['filename'] . ".sdlxliff"; } else { $tempXliff = $tempDir . DIRECTORY_SEPARATOR . $fileMetaData['filename'] . ".xlf"; } //create file $tempOriginal = $tempDir . DIRECTORY_SEPARATOR . $fileMetaData['filename']; //flush content file_put_contents($tempOriginal, $fileContent); //get hash, based on original $sha1 = sha1($fileContent); //free memory unset($fileContent); } else { //if it's a unconverted xliff //create temporary file with original name $tempXliff = $tempDir . DIRECTORY_SEPARATOR . $fileMetaData['filename']; // set original to empty $tempOriginal = false; //get hash $sha1 = sha1($xliffContent); } //flush xliff file content file_put_contents($tempXliff, $xliffContent); //free memory unset($xliffContent); if (stripos($fileMetaData['sha1_original_file'], DIRECTORY_SEPARATOR) === false) { $query = "select create_date from projects where id = {$fileMetaData['id_project']}"; $db = Database::obtain(); $results = $db->fetch_array($query); $dateHashPath = date_create($results[0]['create_date'])->format('Ymd') . DIRECTORY_SEPARATOR . $sha1; $db->update('files', array("sha1_original_file" => $dateHashPath), 'id = ' . $fileMetaData['id_file']); //update Reference $fileMetaData['sha1_original_file'] = $dateHashPath; } //build a cache package $this->makeCachePackage($sha1, $fileMetaData['source'], $tempOriginal, $tempXliff); //build a file package $this->moveFromCacheToFileDir($fileMetaData['sha1_original_file'], $fileMetaData['source'], $fileMetaData['id_file']); //clean temporary stuff Utils::deleteDir($tempDir); }
/** * Remove the tag mrk if the file is an xlif and if the file is a globalsight file * * Also, check for encoding and transform utf16 to utf8 and back * * @param $documentContent * @param $path * * @return string */ public function ifGlobalSightXliffRemoveTargetMarks($documentContent, $path) { $extension = FilesStorage::pathinfo_fix($path); if (!DetectProprietaryXliff::isXliffExtension($extension)) { return $documentContent; } $is_utf8 = true; $original_charset = 'utf-8'; //not used, useful only to avoid IDE warning for not used variable //The file is UTF-16 Encoded if (stripos(substr($documentContent, 0, 100), "<?xml ") === false) { $is_utf8 = false; list($original_charset, $documentContent) = CatUtils::convertEncoding('UTF-8', $documentContent); } //avoid in memory copy of very large files if possible $detect_result = DetectProprietaryXliff::getInfoByStringData(substr($documentContent, 0, 1024)); //clean mrk tags for GlobalSight application compatibility //this should be a sax parser instead of in memory copy for every trans-unit if ($detect_result['proprietary_short_name'] == 'globalsight') { // Getting Trans-units $trans_units = explode('<trans-unit', $documentContent); foreach ($trans_units as $pos => $trans_unit) { // First element in the XLIFF split is the header, not the first file if ($pos > 0) { //remove seg-source tags $trans_unit = preg_replace('|<seg-source.*?</seg-source>|si', '', $trans_unit); //take the target content $trans_unit = preg_replace('#<mrk[^>]+>|</mrk>#si', '', $trans_unit); $trans_units[$pos] = $trans_unit; } } // End of trans-units $documentContent = implode('<trans-unit', $trans_units); } if (!$is_utf8) { list($__utf8, $documentContent) = CatUtils::convertEncoding($original_charset, $documentContent); } return $documentContent; }