Ejemplo n.º 1
0
 /**
  * Backwards compatibility method and forward
  *
  * Works by Reference variable
  *
  * @param $fileMetaData [
  *                          "id_file",
  *                          "filename",
  *                          "source",
  *                          "mime_type",
  *                          "sha1_original_file"
  *                      ]
  */
 private function migrateFileDB2FS(&$fileMetaData)
 {
     //create temporary storage to place stuff
     $tempDir = "/tmp" . DIRECTORY_SEPARATOR . uniqid("", true);
     mkdir($tempDir, 0755);
     //fetch xliff from the files database
     $xliffContent = $this->getXliffFromDB($fileMetaData['id_file']);
     //try pulling the original content too (if it's empty it means that it was an unconverted xliff)
     $fileContent = $this->getOriginalFromDB($fileMetaData['id_file']);
     if (!empty($fileContent)) {
         //it's a converted file
         //i'd like to know it's real extension....
         //create temporary file with appropriately modified name
         $result = DetectProprietaryXliff::getInfoByStringData($xliffContent);
         if ($result['proprietary_short_name'] == 'trados') {
             $tempXliff = $tempDir . DIRECTORY_SEPARATOR . $fileMetaData['filename'] . ".sdlxliff";
         } else {
             $tempXliff = $tempDir . DIRECTORY_SEPARATOR . $fileMetaData['filename'] . ".xlf";
         }
         //create file
         $tempOriginal = $tempDir . DIRECTORY_SEPARATOR . $fileMetaData['filename'];
         //flush content
         file_put_contents($tempOriginal, $fileContent);
         //get hash, based on original
         $sha1 = sha1($fileContent);
         //free memory
         unset($fileContent);
     } else {
         //if it's a unconverted xliff
         //create temporary file with original name
         $tempXliff = $tempDir . DIRECTORY_SEPARATOR . $fileMetaData['filename'];
         // set original to empty
         $tempOriginal = false;
         //get hash
         $sha1 = sha1($xliffContent);
     }
     //flush xliff file content
     file_put_contents($tempXliff, $xliffContent);
     //free memory
     unset($xliffContent);
     if (stripos($fileMetaData['sha1_original_file'], DIRECTORY_SEPARATOR) === false) {
         $query = "select create_date from projects where id = {$fileMetaData['id_project']}";
         $db = Database::obtain();
         $results = $db->fetch_array($query);
         $dateHashPath = date_create($results[0]['create_date'])->format('Ymd') . DIRECTORY_SEPARATOR . $sha1;
         $db->update('files', array("sha1_original_file" => $dateHashPath), 'id = ' . $fileMetaData['id_file']);
         //update Reference
         $fileMetaData['sha1_original_file'] = $dateHashPath;
     }
     //build a cache package
     $this->makeCachePackage($sha1, $fileMetaData['source'], $tempOriginal, $tempXliff);
     //build a file package
     $this->moveFromCacheToFileDir($fileMetaData['sha1_original_file'], $fileMetaData['source'], $fileMetaData['id_file']);
     //clean temporary stuff
     Utils::deleteDir($tempDir);
 }
Ejemplo n.º 2
0
 /**
  * Remove the tag mrk if the file is an xlif and if the file is a globalsight file
  *
  * Also, check for encoding and transform utf16 to utf8 and back
  *
  * @param $documentContent
  * @param $path
  *
  * @return string
  */
 public function ifGlobalSightXliffRemoveTargetMarks($documentContent, $path)
 {
     $extension = FilesStorage::pathinfo_fix($path);
     if (!DetectProprietaryXliff::isXliffExtension($extension)) {
         return $documentContent;
     }
     $is_utf8 = true;
     $original_charset = 'utf-8';
     //not used, useful only to avoid IDE warning for not used variable
     //The file is UTF-16 Encoded
     if (stripos(substr($documentContent, 0, 100), "<?xml ") === false) {
         $is_utf8 = false;
         list($original_charset, $documentContent) = CatUtils::convertEncoding('UTF-8', $documentContent);
     }
     //avoid in memory copy of very large files if possible
     $detect_result = DetectProprietaryXliff::getInfoByStringData(substr($documentContent, 0, 1024));
     //clean mrk tags for GlobalSight application compatibility
     //this should be a sax parser instead of in memory copy for every trans-unit
     if ($detect_result['proprietary_short_name'] == 'globalsight') {
         // Getting Trans-units
         $trans_units = explode('<trans-unit', $documentContent);
         foreach ($trans_units as $pos => $trans_unit) {
             // First element in the XLIFF split is the header, not the first file
             if ($pos > 0) {
                 //remove seg-source tags
                 $trans_unit = preg_replace('|<seg-source.*?</seg-source>|si', '', $trans_unit);
                 //take the target content
                 $trans_unit = preg_replace('#<mrk[^>]+>|</mrk>#si', '', $trans_unit);
                 $trans_units[$pos] = $trans_unit;
             }
         }
         // End of trans-units
         $documentContent = implode('<trans-unit', $trans_units);
     }
     if (!$is_utf8) {
         list($__utf8, $documentContent) = CatUtils::convertEncoding($original_charset, $documentContent);
     }
     return $documentContent;
 }