Beispiel #1
0
 private function getEditLogData($use_ter_diff = false)
 {
     $editLogDao = new EditLog_EditLogDao(Database::obtain());
     $data = $editLogDao->getSegments($this->getJid(), $this->getPassword(), self::$start_id);
     //get translation mismatches and convert the array in a hashmap
     $translationMismatchList = $editLogDao->getTranslationMismatches($this->getJid());
     foreach ($translationMismatchList as $idx => $translMismRow) {
         $translMismRow[$translMismRow['segment_hash']] = (bool) $translMismRow['translation_mismatch'];
     }
     $__pagination_prev = PHP_INT_MAX;
     $__pagination_next = -2147483648;
     //PHP_INT_MIN
     $stat_too_slow = array();
     $stat_too_fast = array();
     if (!$data) {
         throw new Exception('There are no changes in this job', -1);
     }
     $stats['total-word-count'] = 0;
     $stat_mt = array();
     $stat_valid_rwc = array();
     $stat_rwc = array();
     $stat_valid_tte = array();
     $stat_pee = array();
     $stat_ter = array();
     $output_data = array();
     foreach ($data as $seg) {
         //if the segment is before the current one
         if ($seg->id < self::$start_id) {
             if ($seg->id <= $__pagination_prev) {
                 $__pagination_prev = $seg->id;
             }
             continue;
         }
         if ($seg->id > $__pagination_next) {
             $__pagination_next = $seg->id;
         }
         $displaySeg = new EditLog_EditLogSegmentClientStruct($seg->toArray());
         $displaySeg->suggestion_match .= "%";
         $displaySeg->job_id = $this->jid;
         $tte = CatUtils::parse_time_to_edit($displaySeg->time_to_edit);
         $displaySeg->display_time_to_edit = "{$tte['1']}m:{$tte['2']}s";
         $stat_rwc[] = $seg->raw_word_count;
         // by definition we cannot have a 0 word sentence. It is probably a - or a tag, so we want to consider at least a word.
         if ($seg->raw_word_count < 1) {
             $displaySeg->raw_word_count = 1;
         }
         //todo: remove this
         $displaySeg->secs_per_word = $seg->getSecsPerWord();
         if ($displaySeg->secs_per_word < self::EDIT_TIME_SLOW_CUT && $displaySeg->secs_per_word > self::EDIT_TIME_FAST_CUT) {
             $displaySeg->stats_valid = true;
             $stat_valid_rwc[] = $seg->raw_word_count;
             $stat_spw[] = $displaySeg->secs_per_word;
         } else {
             $displaySeg->stats_valid = false;
         }
         // Stats
         if ($displaySeg->secs_per_word >= self::EDIT_TIME_SLOW_CUT) {
             $stat_too_slow[] = $seg->raw_word_count;
         }
         if ($displaySeg->secs_per_word <= self::EDIT_TIME_FAST_CUT) {
             $stat_too_fast[] = $seg->raw_word_count;
         }
         $displaySeg->secs_per_word .= "s";
         $displaySeg->pe_effort_perc = $displaySeg->getPeePerc();
         if ($displaySeg->pe_effort_perc < 0) {
             $displaySeg->pe_effort_perc = 0;
         }
         if ($displaySeg->pe_effort_perc > 100) {
             $displaySeg->pe_effort_perc = 100;
         }
         $stat_pee[] = $displaySeg->pe_effort_perc * $seg->raw_word_count;
         $displaySeg->pe_effort_perc .= "%";
         $lh = Langs_Languages::getInstance();
         $lang = $lh->getIsoCode($lh->getLocalizedName($seg->job_target));
         $sug_for_diff = CatUtils::placehold_xliff_tags($seg->suggestion);
         $tra_for_diff = CatUtils::placehold_xliff_tags($seg->translation);
         //with this patch we have warnings when accessing indexes
         if ($use_ter_diff) {
             $ter = MyMemory::diff_tercpp($sug_for_diff, $tra_for_diff, $lang);
         } else {
             $ter = array();
         }
         $displaySeg->ter = @$ter[1] * 100;
         $stat_ter[] = $displaySeg->ter * $seg->raw_word_count;
         $displaySeg->ter = round(@$ter[1] * 100) . "%";
         $diff_ter = @$ter[0];
         if ($seg->suggestion != $seg->translation) {
             //force use of third party ter diff
             if ($use_ter_diff) {
                 $displaySeg->diff = $diff_ter;
             } else {
                 $diff_PE = MyMemory::diff_html($sug_for_diff, $tra_for_diff);
                 // we will use diff_PE until ter_diff will not work properly
                 $displaySeg->diff = $diff_PE;
             }
             //$seg[ 'diff_ter' ] = $diff_ter;
         } else {
             $displaySeg->diff = '';
         }
         $displaySeg->diff = CatUtils::restore_xliff_tags_for_view($displaySeg->diff);
         // BUG: While suggestions source is not correctly set
         if ($displaySeg->suggestion_match == "85%" || $displaySeg->suggestion_match == "86%") {
             $displaySeg->suggestion_source = 'Machine Translation';
             $stat_mt[] = $seg->raw_word_count;
         } else {
             $displaySeg->suggestion_source = 'TM';
         }
         $array_patterns = array(rtrim(CatUtils::lfPlaceholderRegex, 'g'), rtrim(CatUtils::crPlaceholderRegex, 'g'), rtrim(CatUtils::crlfPlaceholderRegex, 'g'), rtrim(CatUtils::tabPlaceholderRegex, 'g'), rtrim(CatUtils::nbspPlaceholderRegex, 'g'));
         $array_replacements_csv = array('\\n', '\\r', '\\r\\n', '\\t', Utils::unicode2chr(0xa0));
         $displaySeg->source_csv = preg_replace($array_patterns, $array_replacements_csv, $seg->source);
         $displaySeg->translation_csv = preg_replace($array_patterns, $array_replacements_csv, $seg->translation);
         $displaySeg->sug_csv = preg_replace($array_patterns, $array_replacements_csv, $displaySeg->suggestion_view);
         $displaySeg->diff_csv = preg_replace($array_patterns, $array_replacements_csv, $displaySeg->diff);
         $array_replacements = array('<span class="_0A"></span><br />', '<span class="_0D"></span><br />', '<span class="_0D0A"></span><br />', '<span class="_tab">&#9;</span>', '<span class="_nbsp">&nbsp;</span>');
         $displaySeg->source = preg_replace($array_patterns, $array_replacements, $seg->source);
         $displaySeg->translation = preg_replace($array_patterns, $array_replacements, $seg->translation);
         $displaySeg->suggestion_view = preg_replace($array_patterns, $array_replacements, $displaySeg->suggestion_view);
         $displaySeg->diff = preg_replace($array_patterns, $array_replacements, $displaySeg->diff);
         $displaySeg->source = trim(CatUtils::rawxliff2view($seg->source));
         $displaySeg->suggestion_view = trim(CatUtils::rawxliff2view($seg->suggestion));
         $displaySeg->translation = trim(CatUtils::rawxliff2view($seg->translation));
         if ($seg->mt_qe == 0) {
             $displaySeg->mt_qe = 'N/A';
         }
         $displaySeg->num_translation_mismatch = @(int) $translationMismatchList[$displaySeg->segment_hash];
         $displaySeg->evaluateWarningString();
         $output_data[] = $displaySeg;
     }
     $pagination = $this->evaluatePagination($__pagination_prev, $__pagination_next + 1);
     $globalStats = $this->evaluateGlobalStats();
     $stats['valid-word-count'] = $globalStats['raw_words'];
     //TODO: this will not work anymore
     $stats['edited-word-count'] = array_sum($stat_rwc);
     if ($stats['edited-word-count'] > 0) {
         $stats['too-slow-words'] = round(array_sum($stat_too_slow) / $stats['edited-word-count'], 2) * 100;
         $stats['too-fast-words'] = round(array_sum($stat_too_fast) / $stats['edited-word-count'], 2) * 100;
         $stats['avg-pee'] = round(array_sum($stat_pee) / array_sum($stat_rwc)) . "%";
         $stats['avg-ter'] = round(array_sum($stat_ter) / array_sum($stat_rwc)) . "%";
     }
     $stats['mt-words'] = round(array_sum($stat_mt) / $stats['edited-word-count'], 2) * 100;
     $stats['tm-words'] = 100 - $stats['mt-words'];
     $stats['total-valid-tte'] = round($globalStats['tot_tte']);
     // Non weighted...
     // $stats['avg-secs-per-word'] = round(array_sum($stat_spw)/count($stat_spw),1);
     // Weighted
     $stats['avg-secs-per-word'] = round($globalStats['secs_per_word'] / 1000, 1);
     $stats['est-words-per-day'] = number_format(round(3600 * 8 / $stats['avg-secs-per-word']), 0, '.', ',');
     // Last minute formatting (after calculations)
     $temp = CatUtils::parse_time_to_edit(round($stats['total-valid-tte']));
     $stats['total-valid-tte'] = "{$temp['0']}h:{$temp['1']}m:{$temp['2']}s";
     $stats['total-tte-seconds'] = $temp[0] * 3600 + $temp[1] * 60 + $temp[2];
     $stats['avg-pee'] = round($globalStats['avg_pee'], 2);
     $stats['avg-pee'] .= "%";
     return array($output_data, $stats, $pagination);
 }