Ejemplo n.º 1
0
 /**
  * Extract sources and pre-translations from sdlxliff file and put them in Database
  *
  * @param $xliff_file_content
  * @param $fid
  *
  * @throws Exception
  */
 protected function _extractSegments($xliff_file_content, $fid)
 {
     //create Structure fro multiple files
     $this->projectStructure['segments']->offsetSet($fid, new ArrayObject(array()));
     $xliff_obj = new Xliff_Parser();
     $xliff = $xliff_obj->Xliff2Array($xliff_file_content);
     // Checking that parsing went well
     if (isset($xliff['parser-errors']) or !isset($xliff['files'])) {
         Log::doLog("Xliff Import: Error parsing. " . join("\n", $xliff['parser-errors']));
         throw new Exception("Xliff Import: Error parsing. Check Log file.", -4);
     }
     //needed to check if a file has only one segment
     //for correctness: we could have more tag files in the xliff
     $fileCounter_Show_In_Cattool = 0;
     // Creating the Query
     foreach ($xliff['files'] as $xliff_file) {
         if (!array_key_exists('trans-units', $xliff_file)) {
             continue;
         }
         //extract internal reference base64 files and store their index in $this->projectStructure
         $this->_extractFileReferences($fid, $xliff_file);
         foreach ($xliff_file['trans-units'] as $xliff_trans_unit) {
             //initialize flag
             $show_in_cattool = 1;
             if (!isset($xliff_trans_unit['attr']['translate'])) {
                 $xliff_trans_unit['attr']['translate'] = 'yes';
             }
             if ($xliff_trans_unit['attr']['translate'] == "no") {
                 //No segments to translate
                 //don't increment global counter '$fileCounter_Show_In_Cattool'
                 $show_in_cattool = 0;
             } else {
                 // If the XLIFF is already segmented (has <seg-source>)
                 if (isset($xliff_trans_unit['seg-source'])) {
                     foreach ($xliff_trans_unit['seg-source'] as $position => $seg_source) {
                         $tempSeg = strip_tags($seg_source['raw-content']);
                         $tempSeg = trim($tempSeg);
                         //init tags
                         $seg_source['mrk-ext-prec-tags'] = '';
                         $seg_source['mrk-ext-succ-tags'] = '';
                         if (is_null($tempSeg) || $tempSeg === '') {
                             $show_in_cattool = 0;
                         } else {
                             $extract_external = $this->_strip_external($seg_source['raw-content']);
                             $seg_source['mrk-ext-prec-tags'] = $extract_external['prec'];
                             $seg_source['mrk-ext-succ-tags'] = $extract_external['succ'];
                             $seg_source['raw-content'] = $extract_external['seg'];
                             if (isset($xliff_trans_unit['seg-target'][$position]['raw-content'])) {
                                 $target_extract_external = $this->_strip_external($xliff_trans_unit['seg-target'][$position]['raw-content']);
                                 //we don't want THE CONTENT OF TARGET TAG IF PRESENT and EQUAL TO SOURCE???
                                 //AND IF IT IS ONLY A CHAR? like "*" ?
                                 //we can't distinguish if it is translated or not
                                 //this means that we lose the tags id inside the target if different from source
                                 $src = strip_tags(html_entity_decode($extract_external['seg'], ENT_QUOTES, 'UTF-8'));
                                 $trg = strip_tags(html_entity_decode($target_extract_external['seg'], ENT_QUOTES, 'UTF-8'));
                                 if ($src != $trg && !is_numeric($src)) {
                                     //treat 0,1,2.. as translated content!
                                     $target_extract_external['seg'] = CatUtils::raw2DatabaseXliff($target_extract_external['seg']);
                                     $target = $this->dbHandler->escape($target_extract_external['seg']);
                                     //add an empty string to avoid casting to int: 0001 -> 1
                                     //useful for idiom internal xliff id
                                     $this->projectStructure['translations']->offsetSet("" . $xliff_trans_unit['attr']['id'], new ArrayObject(array(2 => $target)));
                                     //seg-source and target translation can have different mrk id
                                     //override the seg-source surrounding mrk-id with them of target
                                     $seg_source['mrk-ext-prec-tags'] = $target_extract_external['prec'];
                                     $seg_source['mrk-ext-succ-tags'] = $target_extract_external['succ'];
                                 }
                             }
                         }
                         //Log::doLog( $xliff_trans_unit ); die();
                         //                            $seg_source[ 'raw-content' ] = CatUtils::placeholdnbsp( $seg_source[ 'raw-content' ] );
                         $mid = $this->dbHandler->escape($seg_source['mid']);
                         $ext_tags = $this->dbHandler->escape($seg_source['ext-prec-tags']);
                         $source = $this->dbHandler->escape(CatUtils::raw2DatabaseXliff($seg_source['raw-content']));
                         $source_hash = $this->dbHandler->escape(md5($seg_source['raw-content']));
                         $ext_succ_tags = $this->dbHandler->escape($seg_source['ext-succ-tags']);
                         $num_words = CatUtils::segment_raw_wordcount($seg_source['raw-content'], $xliff_file['attr']['source-language']);
                         $trans_unit_id = $this->dbHandler->escape($xliff_trans_unit['attr']['id']);
                         $mrk_ext_prec_tags = $this->dbHandler->escape($seg_source['mrk-ext-prec-tags']);
                         $mrk_ext_succ_tags = $this->dbHandler->escape($seg_source['mrk-ext-succ-tags']);
                         if ($this->projectStructure['file_references']->offsetExists($fid)) {
                             $file_reference = (int) $this->projectStructure['file_references'][$fid];
                         } else {
                             $file_reference = 'NULL';
                         }
                         $this->projectStructure['segments'][$fid]->append("('{$trans_unit_id}',{$fid},{$file_reference},'{$source}','{$source_hash}',{$num_words},'{$mid}','{$ext_tags}','{$ext_succ_tags}',{$show_in_cattool},'{$mrk_ext_prec_tags}','{$mrk_ext_succ_tags}')");
                     }
                 } else {
                     $tempSeg = strip_tags($xliff_trans_unit['source']['raw-content']);
                     $tempSeg = trim($tempSeg);
                     //                        $tempSeg = CatUtils::placeholdnbsp( $tempSeg );
                     $prec_tags = null;
                     $succ_tags = null;
                     if (empty($tempSeg)) {
                         //|| $tempSeg == NBSPPLACEHOLDER ) { //@see CatUtils.php, ( DEFINE NBSPPLACEHOLDER ) don't show <x id=\"nbsp\"/>
                         $show_in_cattool = 0;
                     } else {
                         $extract_external = $this->_strip_external($xliff_trans_unit['source']['raw-content']);
                         $prec_tags = empty($extract_external['prec']) ? null : $extract_external['prec'];
                         $succ_tags = empty($extract_external['succ']) ? null : $extract_external['succ'];
                         $xliff_trans_unit['source']['raw-content'] = $extract_external['seg'];
                         if (isset($xliff_trans_unit['target']['raw-content'])) {
                             $target_extract_external = $this->_strip_external($xliff_trans_unit['target']['raw-content']);
                             if ($xliff_trans_unit['source']['raw-content'] != $target_extract_external['seg']) {
                                 $target = CatUtils::raw2DatabaseXliff($target_extract_external['seg']);
                                 $target = $this->dbHandler->escape($target);
                                 //add an empty string to avoid casting to int: 0001 -> 1
                                 //useful for idiom internal xliff id
                                 $this->projectStructure['translations']->offsetSet("" . $xliff_trans_unit['attr']['id'], new ArrayObject(array(2 => $target)));
                             }
                         }
                     }
                     $source = $xliff_trans_unit['source']['raw-content'];
                     //we do the word count after the place-holding with <x id="nbsp"/>
                     //so &nbsp; are now not recognized as word and not counted as payable
                     $num_words = CatUtils::segment_raw_wordcount($source, $xliff_file['attr']['source-language']);
                     //applying escaping after raw count
                     $source = $this->dbHandler->escape(CatUtils::raw2DatabaseXliff($source));
                     $source_hash = $this->dbHandler->escape(md5($source));
                     $trans_unit_id = $this->dbHandler->escape($xliff_trans_unit['attr']['id']);
                     if (!is_null($prec_tags)) {
                         $prec_tags = $this->dbHandler->escape($prec_tags);
                     }
                     if (!is_null($succ_tags)) {
                         $succ_tags = $this->dbHandler->escape($succ_tags);
                     }
                     if ($this->projectStructure['file_references']->offsetExists($fid)) {
                         $file_reference = (int) $this->projectStructure['file_references'][$fid];
                     } else {
                         $file_reference = 'NULL';
                     }
                     $this->projectStructure['segments'][$fid]->append("('{$trans_unit_id}',{$fid}, {$file_reference},'{$source}','{$source_hash}',{$num_words},NULL,'{$prec_tags}','{$succ_tags}',{$show_in_cattool},NULL,NULL)");
                 }
             }
             //increment the counter for not empty segments
             $fileCounter_Show_In_Cattool += $show_in_cattool;
         }
     }
     // *NOTE*: PHP>=5.3 throws UnexpectedValueException, but PHP 5.2 throws ErrorException
     //use generic
     if (empty($this->projectStructure['segments'][$fid]) || $fileCounter_Show_In_Cattool == 0) {
         Log::doLog("Segment import - no segments found\n");
         throw new Exception("Segment import - no segments found", -1);
     }
     $baseQuery = "INSERT INTO segments ( internal_id, id_file, id_file_part, segment, segment_hash, raw_word_count, xliff_mrk_id, xliff_ext_prec_tags, xliff_ext_succ_tags, show_in_cattool,xliff_mrk_ext_prec_tags,xliff_mrk_ext_succ_tags) values ";
     Log::doLog("Segments: Total Rows to insert: " . count($this->projectStructure['segments'][$fid]));
     //split the query in to chunks if there are too much segments
     $this->projectStructure['segments'][$fid]->exchangeArray(array_chunk($this->projectStructure['segments'][$fid]->getArrayCopy(), 200));
     Log::doLog("Segments: Total Queries to execute: " . count($this->projectStructure['segments'][$fid]));
     foreach ($this->projectStructure['segments'][$fid] as $i => $chunk) {
         $this->dbHandler->query($baseQuery . join(",\n", $chunk));
         Log::doLog("Segments: Executed Query " . ($i + 1));
         if ($this->dbHandler->get_error_number()) {
             Log::doLog("Segment import - DB Error: " . mysql_error() . " - \n");
             throw new Exception("Segment import - DB Error: " . mysql_error() . " - {$chunk}", -2);
         }
     }
     //Log::doLog( $this->projectStructure );
     if (!empty($this->projectStructure['translations'])) {
         $last_segments_query = "SELECT id, internal_id, segment_hash from segments WHERE id_file = %u";
         $last_segments_query = sprintf($last_segments_query, $fid);
         $_last_segments = $this->dbHandler->fetch_array($last_segments_query);
         foreach ($_last_segments as $row) {
             if ($this->projectStructure['translations']->offsetExists("" . $row['internal_id'])) {
                 $this->projectStructure['translations']["" . $row['internal_id']]->offsetSet(0, $row['id']);
                 $this->projectStructure['translations']["" . $row['internal_id']]->offsetSet(1, $row['internal_id']);
                 //WARNING offset 2 are the target translations
                 $this->projectStructure['translations']["" . $row['internal_id']]->offsetSet(3, $row['segment_hash']);
             }
         }
     }
 }