public static function getEditingLogData($jid, $password, $use_ter_diff = false) { $data = getEditLog($jid, $password); $slow_cut = 30; $fast_cut = 0.25; $stat_too_slow = array(); $stat_too_fast = array(); if (!$data) { return false; } $stats['total-word-count'] = 0; $stat_mt = array(); foreach ($data as &$seg) { $seg['sm'] .= "%"; $seg['jid'] = $jid; $tte = self::parse_time_to_edit($seg['tte']); $seg['time_to_edit'] = "{$tte['1']}m:{$tte['2']}s"; $stat_rwc[] = $seg['rwc']; // by definition we cannot have a 0 word sentence. It is probably a - or a tag, so we want to consider at least a word. if ($seg['rwc'] < 1) { $seg['rwc'] = 1; } $seg['secs-per-word'] = round($seg['tte'] / 1000 / $seg['rwc'], 1); if ($seg['secs-per-word'] < $slow_cut and $seg['secs-per-word'] > $fast_cut) { $seg['stats-valid'] = 'Yes'; $seg['stats-valid-color'] = ''; $seg['stats-valid-style'] = ''; $stat_valid_rwc[] = $seg['rwc']; $stat_valid_tte[] = $seg['tte']; $stat_spw[] = $seg['secs-per-word']; } else { $seg['stats-valid'] = 'No'; $seg['stats-valid-color'] = '#ee6633'; $seg['stats-valid-style'] = 'border:2px solid #EE6633'; } // Stats if ($seg['secs-per-word'] >= $slow_cut) { $stat_too_slow[] = $seg['rwc']; } if ($seg['secs-per-word'] <= $fast_cut) { $stat_too_fast[] = $seg['rwc']; } $seg['pe_effort_perc'] = round((1 - MyMemory::TMS_MATCH($seg['sug'], $seg['translation'])) * 100); if ($seg['pe_effort_perc'] < 0) { $seg['pe_effort_perc'] = 0; } if ($seg['pe_effort_perc'] > 100) { $seg['pe_effort_perc'] = 100; } $stat_pee[] = $seg['pe_effort_perc'] * $seg['rwc']; $seg['pe_effort_perc'] .= "%"; $lh = Langs_Languages::getInstance(); $lang = $lh->getIsoCode($lh->getLocalizedName($seg['target_lang'])); $sug_for_diff = self::placehold_xliff_tags($seg['sug']); $tra_for_diff = self::placehold_xliff_tags($seg['translation']); // possible patch // $sug_for_diff = html_entity_decode($sug_for_diff, ENT_NOQUOTES, 'UTF-8'); // $tra_for_diff = html_entity_decode($tra_for_diff, ENT_NOQUOTES, 'UTF-8'); //with this patch we have warnings when accessing indexes if ($use_ter_diff) { $ter = MyMemory::diff_tercpp($sug_for_diff, $tra_for_diff, $lang); } else { $ter = array(); } // Log::doLog( $sug_for_diff ); // Log::doLog( $tra_for_diff ); // Log::doLog( $ter ); $seg['ter'] = @$ter[1] * 100; $stat_ter[] = $seg['ter'] * $seg['rwc']; $seg['ter'] = round(@$ter[1] * 100) . "%"; $diff_ter = @$ter[0]; if ($seg['sug'] != $seg['translation']) { //force use of third party ter diff if ($use_ter_diff) { $seg['diff'] = $diff_ter; } else { $diff_PE = MyMemory::diff_html($sug_for_diff, $tra_for_diff); // we will use diff_PE until ter_diff will not work properly $seg['diff'] = $diff_PE; } //$seg[ 'diff_ter' ] = $diff_ter; } else { $seg['diff'] = ''; //$seg[ 'diff_ter' ] = ''; } $seg['diff'] = self::restore_xliff_tags_for_view($seg['diff']); //$seg['diff_ter'] = self::restore_xliff_tags_for_view($seg['diff_ter']); // BUG: While suggestions source is not correctly set if ($seg['sm'] == "85%" or $seg['sm'] == "86%") { $seg['ss'] = 'Machine Translation'; $stat_mt[] = $seg['rwc']; } else { $seg['ss'] = 'Translation Memory'; } $seg['sug_view'] = trim(CatUtils::rawxliff2view($seg['sug'])); $seg['source'] = trim(CatUtils::rawxliff2view($seg['source'])); $seg['translation'] = trim(CatUtils::rawxliff2view($seg['translation'])); $array_patterns = array(rtrim(self::lfPlaceholderRegex, 'g'), rtrim(self::crPlaceholderRegex, 'g'), rtrim(self::crlfPlaceholderRegex, 'g'), rtrim(self::tabPlaceholderRegex, 'g'), rtrim(self::nbspPlaceholderRegex, 'g')); $array_replacements_csv = array('\\n', '\\r', '\\r\\n', '\\t', Utils::unicode2chr(0xa0)); $seg['source_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['source']); $seg['translation_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['translation']); $seg['sug_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['sug_view']); $seg['diff_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['diff']); $array_replacements = array('<span class="_0A"></span><br />', '<span class="_0D"></span><br />', '<span class="_0D0A"></span><br />', '<span class="_tab">	</span>', '<span class="_nbsp"> </span>'); $seg['source'] = preg_replace($array_patterns, $array_replacements, $seg['source']); $seg['translation'] = preg_replace($array_patterns, $array_replacements, $seg['translation']); $seg['sug_view'] = preg_replace($array_patterns, $array_replacements, $seg['sug_view']); $seg['diff'] = preg_replace($array_patterns, $array_replacements, $seg['diff']); if ($seg['mt_qe'] == 0) { $seg['mt_qe'] = 'N/A'; } } $stats['edited-word-count'] = array_sum($stat_rwc); $stats['valid-word-count'] = array_sum($stat_valid_rwc); if ($stats['edited-word-count'] > 0) { $stats['too-slow-words'] = round(array_sum($stat_too_slow) / $stats['edited-word-count'], 2) * 100; $stats['too-fast-words'] = round(array_sum($stat_too_fast) / $stats['edited-word-count'], 2) * 100; $stats['avg-pee'] = round(array_sum($stat_pee) / array_sum($stat_rwc)) . "%"; $stats['avg-ter'] = round(array_sum($stat_ter) / array_sum($stat_rwc)) . "%"; } // echo array_sum($stat_ter); // echo "@@@"; // echo array_sum($stat_rwc); // exit; $stats['mt-words'] = round(array_sum($stat_mt) / $stats['edited-word-count'], 2) * 100; $stats['tm-words'] = 100 - $stats['mt-words']; $stats['total-valid-tte'] = round(array_sum($stat_valid_tte) / 1000); // Non weighted... // $stats['avg-secs-per-word'] = round(array_sum($stat_spw)/count($stat_spw),1); // Weighted $stats['avg-secs-per-word'] = round($stats['total-valid-tte'] / $stats['valid-word-count'], 1); $stats['est-words-per-day'] = number_format(round(3600 * 8 / $stats['avg-secs-per-word']), 0, '.', ','); // Last minute formatting (after calculations) $temp = self::parse_time_to_edit(round(array_sum($stat_valid_tte))); $stats['total-valid-tte'] = "{$temp['0']}h:{$temp['1']}m:{$temp['2']}s"; $stats['total-tte-seconds'] = $temp[0] * 3600 + $temp[1] * 60 + $temp[2]; return array($data, $stats); }
private function getEditLogData($use_ter_diff = false) { $editLogDao = new EditLog_EditLogDao(Database::obtain()); $data = $editLogDao->getSegments($this->getJid(), $this->getPassword(), self::$start_id); //get translation mismatches and convert the array in a hashmap $translationMismatchList = $editLogDao->getTranslationMismatches($this->getJid()); foreach ($translationMismatchList as $idx => $translMismRow) { $translMismRow[$translMismRow['segment_hash']] = (bool) $translMismRow['translation_mismatch']; } $__pagination_prev = PHP_INT_MAX; $__pagination_next = -2147483648; //PHP_INT_MIN $stat_too_slow = array(); $stat_too_fast = array(); if (!$data) { throw new Exception('There are no changes in this job', -1); } $stats['total-word-count'] = 0; $stat_mt = array(); $stat_valid_rwc = array(); $stat_rwc = array(); $stat_valid_tte = array(); $stat_pee = array(); $stat_ter = array(); $output_data = array(); foreach ($data as $seg) { //if the segment is before the current one if ($seg->id < self::$start_id) { if ($seg->id <= $__pagination_prev) { $__pagination_prev = $seg->id; } continue; } if ($seg->id > $__pagination_next) { $__pagination_next = $seg->id; } $displaySeg = new EditLog_EditLogSegmentClientStruct($seg->toArray()); $displaySeg->suggestion_match .= "%"; $displaySeg->job_id = $this->jid; $tte = CatUtils::parse_time_to_edit($displaySeg->time_to_edit); $displaySeg->display_time_to_edit = "{$tte['1']}m:{$tte['2']}s"; $stat_rwc[] = $seg->raw_word_count; // by definition we cannot have a 0 word sentence. It is probably a - or a tag, so we want to consider at least a word. if ($seg->raw_word_count < 1) { $displaySeg->raw_word_count = 1; } //todo: remove this $displaySeg->secs_per_word = $seg->getSecsPerWord(); if ($displaySeg->secs_per_word < self::EDIT_TIME_SLOW_CUT && $displaySeg->secs_per_word > self::EDIT_TIME_FAST_CUT) { $displaySeg->stats_valid = true; $stat_valid_rwc[] = $seg->raw_word_count; $stat_spw[] = $displaySeg->secs_per_word; } else { $displaySeg->stats_valid = false; } // Stats if ($displaySeg->secs_per_word >= self::EDIT_TIME_SLOW_CUT) { $stat_too_slow[] = $seg->raw_word_count; } if ($displaySeg->secs_per_word <= self::EDIT_TIME_FAST_CUT) { $stat_too_fast[] = $seg->raw_word_count; } $displaySeg->secs_per_word .= "s"; $displaySeg->pe_effort_perc = $displaySeg->getPeePerc(); if ($displaySeg->pe_effort_perc < 0) { $displaySeg->pe_effort_perc = 0; } if ($displaySeg->pe_effort_perc > 100) { $displaySeg->pe_effort_perc = 100; } $stat_pee[] = $displaySeg->pe_effort_perc * $seg->raw_word_count; $displaySeg->pe_effort_perc .= "%"; $lh = Langs_Languages::getInstance(); $lang = $lh->getIsoCode($lh->getLocalizedName($seg->job_target)); $sug_for_diff = CatUtils::placehold_xliff_tags($seg->suggestion); $tra_for_diff = CatUtils::placehold_xliff_tags($seg->translation); //with this patch we have warnings when accessing indexes if ($use_ter_diff) { $ter = MyMemory::diff_tercpp($sug_for_diff, $tra_for_diff, $lang); } else { $ter = array(); } $displaySeg->ter = @$ter[1] * 100; $stat_ter[] = $displaySeg->ter * $seg->raw_word_count; $displaySeg->ter = round(@$ter[1] * 100) . "%"; $diff_ter = @$ter[0]; if ($seg->suggestion != $seg->translation) { //force use of third party ter diff if ($use_ter_diff) { $displaySeg->diff = $diff_ter; } else { $diff_PE = MyMemory::diff_html($sug_for_diff, $tra_for_diff); // we will use diff_PE until ter_diff will not work properly $displaySeg->diff = $diff_PE; } //$seg[ 'diff_ter' ] = $diff_ter; } else { $displaySeg->diff = ''; } $displaySeg->diff = CatUtils::restore_xliff_tags_for_view($displaySeg->diff); // BUG: While suggestions source is not correctly set if ($displaySeg->suggestion_match == "85%" || $displaySeg->suggestion_match == "86%") { $displaySeg->suggestion_source = 'Machine Translation'; $stat_mt[] = $seg->raw_word_count; } else { $displaySeg->suggestion_source = 'TM'; } $array_patterns = array(rtrim(CatUtils::lfPlaceholderRegex, 'g'), rtrim(CatUtils::crPlaceholderRegex, 'g'), rtrim(CatUtils::crlfPlaceholderRegex, 'g'), rtrim(CatUtils::tabPlaceholderRegex, 'g'), rtrim(CatUtils::nbspPlaceholderRegex, 'g')); $array_replacements_csv = array('\\n', '\\r', '\\r\\n', '\\t', Utils::unicode2chr(0xa0)); $displaySeg->source_csv = preg_replace($array_patterns, $array_replacements_csv, $seg->source); $displaySeg->translation_csv = preg_replace($array_patterns, $array_replacements_csv, $seg->translation); $displaySeg->sug_csv = preg_replace($array_patterns, $array_replacements_csv, $displaySeg->suggestion_view); $displaySeg->diff_csv = preg_replace($array_patterns, $array_replacements_csv, $displaySeg->diff); $array_replacements = array('<span class="_0A"></span><br />', '<span class="_0D"></span><br />', '<span class="_0D0A"></span><br />', '<span class="_tab">	</span>', '<span class="_nbsp"> </span>'); $displaySeg->source = preg_replace($array_patterns, $array_replacements, $seg->source); $displaySeg->translation = preg_replace($array_patterns, $array_replacements, $seg->translation); $displaySeg->suggestion_view = preg_replace($array_patterns, $array_replacements, $displaySeg->suggestion_view); $displaySeg->diff = preg_replace($array_patterns, $array_replacements, $displaySeg->diff); $displaySeg->source = trim(CatUtils::rawxliff2view($seg->source)); $displaySeg->suggestion_view = trim(CatUtils::rawxliff2view($seg->suggestion)); $displaySeg->translation = trim(CatUtils::rawxliff2view($seg->translation)); if ($seg->mt_qe == 0) { $displaySeg->mt_qe = 'N/A'; } $displaySeg->num_translation_mismatch = @(int) $translationMismatchList[$displaySeg->segment_hash]; $displaySeg->evaluateWarningString(); $output_data[] = $displaySeg; } $pagination = $this->evaluatePagination($__pagination_prev, $__pagination_next + 1); $globalStats = $this->evaluateGlobalStats(); $stats['valid-word-count'] = $globalStats['raw_words']; //TODO: this will not work anymore $stats['edited-word-count'] = array_sum($stat_rwc); if ($stats['edited-word-count'] > 0) { $stats['too-slow-words'] = round(array_sum($stat_too_slow) / $stats['edited-word-count'], 2) * 100; $stats['too-fast-words'] = round(array_sum($stat_too_fast) / $stats['edited-word-count'], 2) * 100; $stats['avg-pee'] = round(array_sum($stat_pee) / array_sum($stat_rwc)) . "%"; $stats['avg-ter'] = round(array_sum($stat_ter) / array_sum($stat_rwc)) . "%"; } $stats['mt-words'] = round(array_sum($stat_mt) / $stats['edited-word-count'], 2) * 100; $stats['tm-words'] = 100 - $stats['mt-words']; $stats['total-valid-tte'] = round($globalStats['tot_tte']); // Non weighted... // $stats['avg-secs-per-word'] = round(array_sum($stat_spw)/count($stat_spw),1); // Weighted $stats['avg-secs-per-word'] = round($globalStats['secs_per_word'] / 1000, 1); $stats['est-words-per-day'] = number_format(round(3600 * 8 / $stats['avg-secs-per-word']), 0, '.', ','); // Last minute formatting (after calculations) $temp = CatUtils::parse_time_to_edit(round($stats['total-valid-tte'])); $stats['total-valid-tte'] = "{$temp['0']}h:{$temp['1']}m:{$temp['2']}s"; $stats['total-tte-seconds'] = $temp[0] * 3600 + $temp[1] * 60 + $temp[2]; $stats['avg-pee'] = round($globalStats['avg_pee'], 2); $stats['avg-pee'] .= "%"; return array($output_data, $stats, $pagination); }