Ejemplo n.º 1
0
 /**
  * Extract sources and pre-translations from sdlxliff file and put them in Database
  *
  * @param $xliff_file_content
  * @param $fid
  *
  * @throws Exception
  */
 protected function _extractSegments($xliff_file_content, $fid)
 {
     //create Structure fro multiple files
     $this->projectStructure['segments']->offsetSet($fid, new ArrayObject(array()));
     $xliff_obj = new Xliff_Parser();
     $xliff = $xliff_obj->Xliff2Array($xliff_file_content);
     // Checking that parsing went well
     if (isset($xliff['parser-errors']) or !isset($xliff['files'])) {
         Log::doLog("Xliff Import: Error parsing. " . join("\n", $xliff['parser-errors']));
         throw new Exception("Xliff Import: Error parsing. Check Log file.", -4);
     }
     //needed to check if a file has only one segment
     //for correctness: we could have more tag files in the xliff
     $fileCounter_Show_In_Cattool = 0;
     // Creating the Query
     foreach ($xliff['files'] as $xliff_file) {
         if (!array_key_exists('trans-units', $xliff_file)) {
             continue;
         }
         //extract internal reference base64 files and store their index in $this->projectStructure
         $this->_extractFileReferences($fid, $xliff_file);
         foreach ($xliff_file['trans-units'] as $xliff_trans_unit) {
             //initialize flag
             $show_in_cattool = 1;
             if (!isset($xliff_trans_unit['attr']['translate'])) {
                 $xliff_trans_unit['attr']['translate'] = 'yes';
             }
             if ($xliff_trans_unit['attr']['translate'] == "no") {
                 //No segments to translate
                 //don't increment global counter '$fileCounter_Show_In_Cattool'
                 $show_in_cattool = 0;
             } else {
                 // If the XLIFF is already segmented (has <seg-source>)
                 if (isset($xliff_trans_unit['seg-source'])) {
                     foreach ($xliff_trans_unit['seg-source'] as $position => $seg_source) {
                         $tempSeg = strip_tags($seg_source['raw-content']);
                         $tempSeg = trim($tempSeg);
                         //init tags
                         $seg_source['mrk-ext-prec-tags'] = '';
                         $seg_source['mrk-ext-succ-tags'] = '';
                         if (is_null($tempSeg) || $tempSeg === '') {
                             $show_in_cattool = 0;
                         } else {
                             $extract_external = $this->_strip_external($seg_source['raw-content']);
                             $seg_source['mrk-ext-prec-tags'] = $extract_external['prec'];
                             $seg_source['mrk-ext-succ-tags'] = $extract_external['succ'];
                             $seg_source['raw-content'] = $extract_external['seg'];
                             if (isset($xliff_trans_unit['seg-target'][$position]['raw-content'])) {
                                 $target_extract_external = $this->_strip_external($xliff_trans_unit['seg-target'][$position]['raw-content']);
                                 //we don't want THE CONTENT OF TARGET TAG IF PRESENT and EQUAL TO SOURCE???
                                 //AND IF IT IS ONLY A CHAR? like "*" ?
                                 //we can't distinguish if it is translated or not
                                 //this means that we lose the tags id inside the target if different from source
                                 $src = strip_tags(html_entity_decode($extract_external['seg'], ENT_QUOTES, 'UTF-8'));
                                 $trg = strip_tags(html_entity_decode($target_extract_external['seg'], ENT_QUOTES, 'UTF-8'));
                                 if ($src != $trg && !is_numeric($src)) {
                                     //treat 0,1,2.. as translated content!
                                     $target_extract_external['seg'] = CatUtils::raw2DatabaseXliff($target_extract_external['seg']);
                                     $target = $this->dbHandler->escape($target_extract_external['seg']);
                                     //add an empty string to avoid casting to int: 0001 -> 1
                                     //useful for idiom internal xliff id
                                     $this->projectStructure['translations']->offsetSet("" . $xliff_trans_unit['attr']['id'], new ArrayObject(array(2 => $target)));
                                     //seg-source and target translation can have different mrk id
                                     //override the seg-source surrounding mrk-id with them of target
                                     $seg_source['mrk-ext-prec-tags'] = $target_extract_external['prec'];
                                     $seg_source['mrk-ext-succ-tags'] = $target_extract_external['succ'];
                                 }
                             }
                         }
                         //Log::doLog( $xliff_trans_unit ); die();
                         //                            $seg_source[ 'raw-content' ] = CatUtils::placeholdnbsp( $seg_source[ 'raw-content' ] );
                         $mid = $this->dbHandler->escape($seg_source['mid']);
                         $ext_tags = $this->dbHandler->escape($seg_source['ext-prec-tags']);
                         $source = $this->dbHandler->escape(CatUtils::raw2DatabaseXliff($seg_source['raw-content']));
                         $source_hash = $this->dbHandler->escape(md5($seg_source['raw-content']));
                         $ext_succ_tags = $this->dbHandler->escape($seg_source['ext-succ-tags']);
                         $num_words = CatUtils::segment_raw_wordcount($seg_source['raw-content'], $xliff_file['attr']['source-language']);
                         $trans_unit_id = $this->dbHandler->escape($xliff_trans_unit['attr']['id']);
                         $mrk_ext_prec_tags = $this->dbHandler->escape($seg_source['mrk-ext-prec-tags']);
                         $mrk_ext_succ_tags = $this->dbHandler->escape($seg_source['mrk-ext-succ-tags']);
                         if ($this->projectStructure['file_references']->offsetExists($fid)) {
                             $file_reference = (int) $this->projectStructure['file_references'][$fid];
                         } else {
                             $file_reference = 'NULL';
                         }
                         $this->projectStructure['segments'][$fid]->append("('{$trans_unit_id}',{$fid},{$file_reference},'{$source}','{$source_hash}',{$num_words},'{$mid}','{$ext_tags}','{$ext_succ_tags}',{$show_in_cattool},'{$mrk_ext_prec_tags}','{$mrk_ext_succ_tags}')");
                     }
                 } else {
                     $tempSeg = strip_tags($xliff_trans_unit['source']['raw-content']);
                     $tempSeg = trim($tempSeg);
                     //                        $tempSeg = CatUtils::placeholdnbsp( $tempSeg );
                     $prec_tags = null;
                     $succ_tags = null;
                     if (empty($tempSeg)) {
                         //|| $tempSeg == NBSPPLACEHOLDER ) { //@see CatUtils.php, ( DEFINE NBSPPLACEHOLDER ) don't show <x id=\"nbsp\"/>
                         $show_in_cattool = 0;
                     } else {
                         $extract_external = $this->_strip_external($xliff_trans_unit['source']['raw-content']);
                         $prec_tags = empty($extract_external['prec']) ? null : $extract_external['prec'];
                         $succ_tags = empty($extract_external['succ']) ? null : $extract_external['succ'];
                         $xliff_trans_unit['source']['raw-content'] = $extract_external['seg'];
                         if (isset($xliff_trans_unit['target']['raw-content'])) {
                             $target_extract_external = $this->_strip_external($xliff_trans_unit['target']['raw-content']);
                             if ($xliff_trans_unit['source']['raw-content'] != $target_extract_external['seg']) {
                                 $target = CatUtils::raw2DatabaseXliff($target_extract_external['seg']);
                                 $target = $this->dbHandler->escape($target);
                                 //add an empty string to avoid casting to int: 0001 -> 1
                                 //useful for idiom internal xliff id
                                 $this->projectStructure['translations']->offsetSet("" . $xliff_trans_unit['attr']['id'], new ArrayObject(array(2 => $target)));
                             }
                         }
                     }
                     $source = $xliff_trans_unit['source']['raw-content'];
                     //we do the word count after the place-holding with <x id="nbsp"/>
                     //so &nbsp; are now not recognized as word and not counted as payable
                     $num_words = CatUtils::segment_raw_wordcount($source, $xliff_file['attr']['source-language']);
                     //applying escaping after raw count
                     $source = $this->dbHandler->escape(CatUtils::raw2DatabaseXliff($source));
                     $source_hash = $this->dbHandler->escape(md5($source));
                     $trans_unit_id = $this->dbHandler->escape($xliff_trans_unit['attr']['id']);
                     if (!is_null($prec_tags)) {
                         $prec_tags = $this->dbHandler->escape($prec_tags);
                     }
                     if (!is_null($succ_tags)) {
                         $succ_tags = $this->dbHandler->escape($succ_tags);
                     }
                     if ($this->projectStructure['file_references']->offsetExists($fid)) {
                         $file_reference = (int) $this->projectStructure['file_references'][$fid];
                     } else {
                         $file_reference = 'NULL';
                     }
                     $this->projectStructure['segments'][$fid]->append("('{$trans_unit_id}',{$fid}, {$file_reference},'{$source}','{$source_hash}',{$num_words},NULL,'{$prec_tags}','{$succ_tags}',{$show_in_cattool},NULL,NULL)");
                 }
             }
             //increment the counter for not empty segments
             $fileCounter_Show_In_Cattool += $show_in_cattool;
         }
     }
     // *NOTE*: PHP>=5.3 throws UnexpectedValueException, but PHP 5.2 throws ErrorException
     //use generic
     if (empty($this->projectStructure['segments'][$fid]) || $fileCounter_Show_In_Cattool == 0) {
         Log::doLog("Segment import - no segments found\n");
         throw new Exception("Segment import - no segments found", -1);
     }
     $baseQuery = "INSERT INTO segments ( internal_id, id_file, id_file_part, segment, segment_hash, raw_word_count, xliff_mrk_id, xliff_ext_prec_tags, xliff_ext_succ_tags, show_in_cattool,xliff_mrk_ext_prec_tags,xliff_mrk_ext_succ_tags) values ";
     Log::doLog("Segments: Total Rows to insert: " . count($this->projectStructure['segments'][$fid]));
     //split the query in to chunks if there are too much segments
     $this->projectStructure['segments'][$fid]->exchangeArray(array_chunk($this->projectStructure['segments'][$fid]->getArrayCopy(), 200));
     Log::doLog("Segments: Total Queries to execute: " . count($this->projectStructure['segments'][$fid]));
     foreach ($this->projectStructure['segments'][$fid] as $i => $chunk) {
         $this->dbHandler->query($baseQuery . join(",\n", $chunk));
         Log::doLog("Segments: Executed Query " . ($i + 1));
         if ($this->dbHandler->get_error_number()) {
             Log::doLog("Segment import - DB Error: " . mysql_error() . " - \n");
             throw new Exception("Segment import - DB Error: " . mysql_error() . " - {$chunk}", -2);
         }
     }
     //Log::doLog( $this->projectStructure );
     if (!empty($this->projectStructure['translations'])) {
         $last_segments_query = "SELECT id, internal_id, segment_hash from segments WHERE id_file = %u";
         $last_segments_query = sprintf($last_segments_query, $fid);
         $_last_segments = $this->dbHandler->fetch_array($last_segments_query);
         foreach ($_last_segments as $row) {
             if ($this->projectStructure['translations']->offsetExists("" . $row['internal_id'])) {
                 $this->projectStructure['translations']["" . $row['internal_id']]->offsetSet(0, $row['id']);
                 $this->projectStructure['translations']["" . $row['internal_id']]->offsetSet(1, $row['internal_id']);
                 //WARNING offset 2 are the target translations
                 $this->projectStructure['translations']["" . $row['internal_id']]->offsetSet(3, $row['segment_hash']);
             }
         }
     }
 }
Ejemplo n.º 2
0
function extractSegments($files_path, $file, $pid, $fid, $jid)
{
    // Output
    // true = ok
    // -1   = Extension not supported
    // -2   = Parse Error
    // -3   = DB Error
    $mysql_hostname = INIT::$DB_SERVER;
    // Database Server machine
    $mysql_database = INIT::$DB_DATABASE;
    // Database Name
    $mysql_username = INIT::$DB_USER;
    // Database User
    $mysql_password = INIT::$DB_PASS;
    // Database Password
    $mysql_link = mysql_connect($mysql_hostname, $mysql_username, $mysql_password);
    mysql_select_db($mysql_database, $mysql_link);
    $query_segment = array();
    // Checking Extentions
    $info = pathinfo($file);
    if ($info['extension'] == 'xliff' || $info['extension'] == 'sdlxliff' || $info['extension'] == 'xlf') {
        $content = file_get_contents("{$files_path}/{$file}");
    } else {
        log::doLog("Xliff Import: Extension " . $info['extension'] . " not managed");
        return false;
    }
    $xliff_obj = new Xliff_Parser();
    $xliff = $xliff_obj->Xliff2Array($content);
    //log::doLog($xliff);
    // Checking that parsing went well
    if (isset($xliff['parser-errors']) or !isset($xliff['files'])) {
        log::doLog("Xliff Import: Error parsing. " . join("\n", $xliff['parser-errors']));
        return false;
    }
    // Creating the Query
    foreach ($xliff['files'] as $xliff_file) {
        $count = 0;
        foreach ($xliff_file['trans-units'] as $xliff_trans_unit) {
            $count = $count + 1;
            if (!isset($xliff_trans_unit['attr']['translate'])) {
                $xliff_trans_unit['attr']['translate'] = 'yes';
            }
            if ($xliff_trans_unit['attr']['translate'] == "no") {
                log::doLog("Xliff Import: Skipping segment marked as non-translatable: " . $xliff_trans_unit['source']['raw-content']);
            } else {
                // If the XLIFF is already segmented (has <seg-source>)
                if (isset($xliff_trans_unit['seg-source'])) {
                    foreach ($xliff_trans_unit['seg-source'] as $seg_source) {
                        $show_in_cattool = 1;
                        $tempSeg = stripTagsFromSource2($seg_source['raw-content']);
                        $tempSeg = trim($tempSeg);
                        if (empty($tempSeg)) {
                            $show_in_cattool = 0;
                        }
                        $mid = mysql_real_escape_string($seg_source['mid']);
                        $ext_tags = mysql_real_escape_string($seg_source['ext-prec-tags']);
                        $source = mysql_real_escape_string($seg_source['raw-content']);
                        $ext_succ_tags = mysql_real_escape_string($seg_source['ext-succ-tags']);
                        $num_words = CatUtils::segment_raw_wordcount($seg_source['raw-content']);
                        $trans_unit_id = mysql_real_escape_string($xliff_trans_unit['attr']['id']);
                        $query_segment = "('{$trans_unit_id}',{$fid},'{$source}',{$num_words},'{$mid}','{$ext_tags}','{$ext_succ_tags}',{$show_in_cattool})";
                    }
                } else {
                    $show_in_cattool = 1;
                    $tempSeg = stripTagsFromSource2($xliff_trans_unit['source']['raw-content']);
                    $tempSeg = trim($tempSeg);
                    if (empty($tempSeg)) {
                        $show_in_cattool = 0;
                    }
                    $source = mysql_real_escape_string($xliff_trans_unit['source']['raw-content']);
                    $num_words = CatUtils::segment_raw_wordcount($xliff_trans_unit['source']['raw-content']);
                    $trans_unit_id = mysql_real_escape_string($xliff_trans_unit['attr']['id']);
                    $query_segment = "('{$trans_unit_id}',{$fid},'{$source}',{$num_words},NULL,NULL,NULL,{$show_in_cattool})";
                }
                $ret = true;
                // Executing the Query
                $query_segment = "INSERT INTO segments (internal_id,id_file, segment, raw_word_count, xliff_mrk_id, xliff_ext_prec_tags, xliff_ext_succ_tags, show_in_cattool)\n                             values " . $query_segment;
                //log::doLog($query_segment); //exit;
                $res = mysql_query($query_segment, $mysql_link);
                if (!$res) {
                    log::doLog("File import - DB Error: " . mysql_error() . " - {$query_segment}\n");
                    $ret = false;
                }
                if (isset($xliff_trans_unit['target'])) {
                    $target = mysql_real_escape_string($xliff_trans_unit['target']['raw-content']);
                    //log::doLog("Target: ".$target);
                    if (!empty($target)) {
                        $last_id = mysql_insert_id($mysql_link);
                        //log::doLog("Last_id: ".$last_id);
                        $query_segment_translations = "('{$last_id}', '{$jid}','TRANSLATED','{$target}',NULL,NULL,NULL,NULL, NULL, NULL, '{$target}',NULL,NULL,NULL)";
                        // Executing the Query
                        $query_segment_translations = "INSERT INTO segment_translations (id_segment, id_job,status, translation, translation_date, time_to_edit, match_type, context_hash, eq_word_count, suggestions_array, suggestion, suggestion_match, suggestion_source, suggestion_position)\n                                 values " . $query_segment_translations;
                        //log::doLog($query_segment_translations);
                        $res2 = mysql_query($query_segment_translations, $mysql_link);
                        if (!$res2) {
                            log::doLog("File import - DB Error: " . mysql_error() . " - {$query_segment_translations}\n");
                            $ret = false;
                        }
                    }
                }
            }
        }
    }
    return $ret;
}