private function updateJobPEE(array $old_translation, array $new_translation)
 {
     $segmentEquivalentWordCount = $old_translation['eq_word_count'];
     $segment = new EditLog_EditLogSegmentClientStruct(array('suggestion' => $old_translation['suggestion'], 'translation' => $old_translation['translation']));
     $oldPEE = $segment->getPeePerc();
     if ($oldPEE < 0) {
         $oldPEE = 0;
     } else {
         if ($oldPEE > 100) {
             $oldPEE = 100;
         }
     }
     $oldPee_weighted = $oldPEE * $segmentEquivalentWordCount;
     $segment->translation = $new_translation['translation'];
     $segment->pe_effort_perc = null;
     $newPEE = $segment->getPeePerc();
     if ($newPEE < 0) {
         $newPEE = 0;
     } else {
         if ($newPEE > 100) {
             $newPEE = 100;
         }
     }
     $newPee_weighted = $newPEE * $segmentEquivalentWordCount;
     $newTotalJobPee = $this->jobData['avg_post_editing_effort'] - $oldPee_weighted + $newPee_weighted;
     $queryUpdateJob = "update jobs\n                                set avg_post_editing_effort = %f\n                                where id = %d and password = '******'";
     $db = Database::obtain();
     $db->query(sprintf($queryUpdateJob, $newTotalJobPee, $this->id_job, $this->password));
 }
Example #2
0
 private function getEditLogData($use_ter_diff = false)
 {
     $editLogDao = new EditLog_EditLogDao(Database::obtain());
     $data = $editLogDao->getSegments($this->getJid(), $this->getPassword(), self::$start_id);
     //get translation mismatches and convert the array in a hashmap
     $translationMismatchList = $editLogDao->getTranslationMismatches($this->getJid());
     foreach ($translationMismatchList as $idx => $translMismRow) {
         $translMismRow[$translMismRow['segment_hash']] = (bool) $translMismRow['translation_mismatch'];
     }
     $__pagination_prev = PHP_INT_MAX;
     $__pagination_next = -2147483648;
     //PHP_INT_MIN
     $stat_too_slow = array();
     $stat_too_fast = array();
     if (!$data) {
         throw new Exception('There are no changes in this job', -1);
     }
     $stats['total-word-count'] = 0;
     $stat_mt = array();
     $stat_valid_rwc = array();
     $stat_rwc = array();
     $stat_valid_tte = array();
     $stat_pee = array();
     $stat_ter = array();
     $output_data = array();
     foreach ($data as $seg) {
         //if the segment is before the current one
         if ($seg->id < self::$start_id) {
             if ($seg->id <= $__pagination_prev) {
                 $__pagination_prev = $seg->id;
             }
             continue;
         }
         if ($seg->id > $__pagination_next) {
             $__pagination_next = $seg->id;
         }
         $displaySeg = new EditLog_EditLogSegmentClientStruct($seg->toArray());
         $displaySeg->suggestion_match .= "%";
         $displaySeg->job_id = $this->jid;
         $tte = CatUtils::parse_time_to_edit($displaySeg->time_to_edit);
         $displaySeg->display_time_to_edit = "{$tte['1']}m:{$tte['2']}s";
         $stat_rwc[] = $seg->raw_word_count;
         // by definition we cannot have a 0 word sentence. It is probably a - or a tag, so we want to consider at least a word.
         if ($seg->raw_word_count < 1) {
             $displaySeg->raw_word_count = 1;
         }
         //todo: remove this
         $displaySeg->secs_per_word = $seg->getSecsPerWord();
         if ($displaySeg->secs_per_word < self::EDIT_TIME_SLOW_CUT && $displaySeg->secs_per_word > self::EDIT_TIME_FAST_CUT) {
             $displaySeg->stats_valid = true;
             $stat_valid_rwc[] = $seg->raw_word_count;
             $stat_spw[] = $displaySeg->secs_per_word;
         } else {
             $displaySeg->stats_valid = false;
         }
         // Stats
         if ($displaySeg->secs_per_word >= self::EDIT_TIME_SLOW_CUT) {
             $stat_too_slow[] = $seg->raw_word_count;
         }
         if ($displaySeg->secs_per_word <= self::EDIT_TIME_FAST_CUT) {
             $stat_too_fast[] = $seg->raw_word_count;
         }
         $displaySeg->secs_per_word .= "s";
         $displaySeg->pe_effort_perc = $displaySeg->getPeePerc();
         if ($displaySeg->pe_effort_perc < 0) {
             $displaySeg->pe_effort_perc = 0;
         }
         if ($displaySeg->pe_effort_perc > 100) {
             $displaySeg->pe_effort_perc = 100;
         }
         $stat_pee[] = $displaySeg->pe_effort_perc * $seg->raw_word_count;
         $displaySeg->pe_effort_perc .= "%";
         $lh = Langs_Languages::getInstance();
         $lang = $lh->getIsoCode($lh->getLocalizedName($seg->job_target));
         $sug_for_diff = CatUtils::placehold_xliff_tags($seg->suggestion);
         $tra_for_diff = CatUtils::placehold_xliff_tags($seg->translation);
         //with this patch we have warnings when accessing indexes
         if ($use_ter_diff) {
             $ter = MyMemory::diff_tercpp($sug_for_diff, $tra_for_diff, $lang);
         } else {
             $ter = array();
         }
         $displaySeg->ter = @$ter[1] * 100;
         $stat_ter[] = $displaySeg->ter * $seg->raw_word_count;
         $displaySeg->ter = round(@$ter[1] * 100) . "%";
         $diff_ter = @$ter[0];
         if ($seg->suggestion != $seg->translation) {
             //force use of third party ter diff
             if ($use_ter_diff) {
                 $displaySeg->diff = $diff_ter;
             } else {
                 $diff_PE = MyMemory::diff_html($sug_for_diff, $tra_for_diff);
                 // we will use diff_PE until ter_diff will not work properly
                 $displaySeg->diff = $diff_PE;
             }
             //$seg[ 'diff_ter' ] = $diff_ter;
         } else {
             $displaySeg->diff = '';
         }
         $displaySeg->diff = CatUtils::restore_xliff_tags_for_view($displaySeg->diff);
         // BUG: While suggestions source is not correctly set
         if ($displaySeg->suggestion_match == "85%" || $displaySeg->suggestion_match == "86%") {
             $displaySeg->suggestion_source = 'Machine Translation';
             $stat_mt[] = $seg->raw_word_count;
         } else {
             $displaySeg->suggestion_source = 'TM';
         }
         $array_patterns = array(rtrim(CatUtils::lfPlaceholderRegex, 'g'), rtrim(CatUtils::crPlaceholderRegex, 'g'), rtrim(CatUtils::crlfPlaceholderRegex, 'g'), rtrim(CatUtils::tabPlaceholderRegex, 'g'), rtrim(CatUtils::nbspPlaceholderRegex, 'g'));
         $array_replacements_csv = array('\\n', '\\r', '\\r\\n', '\\t', Utils::unicode2chr(0xa0));
         $displaySeg->source_csv = preg_replace($array_patterns, $array_replacements_csv, $seg->source);
         $displaySeg->translation_csv = preg_replace($array_patterns, $array_replacements_csv, $seg->translation);
         $displaySeg->sug_csv = preg_replace($array_patterns, $array_replacements_csv, $displaySeg->suggestion_view);
         $displaySeg->diff_csv = preg_replace($array_patterns, $array_replacements_csv, $displaySeg->diff);
         $array_replacements = array('<span class="_0A"></span><br />', '<span class="_0D"></span><br />', '<span class="_0D0A"></span><br />', '<span class="_tab">&#9;</span>', '<span class="_nbsp">&nbsp;</span>');
         $displaySeg->source = preg_replace($array_patterns, $array_replacements, $seg->source);
         $displaySeg->translation = preg_replace($array_patterns, $array_replacements, $seg->translation);
         $displaySeg->suggestion_view = preg_replace($array_patterns, $array_replacements, $displaySeg->suggestion_view);
         $displaySeg->diff = preg_replace($array_patterns, $array_replacements, $displaySeg->diff);
         $displaySeg->source = trim(CatUtils::rawxliff2view($seg->source));
         $displaySeg->suggestion_view = trim(CatUtils::rawxliff2view($seg->suggestion));
         $displaySeg->translation = trim(CatUtils::rawxliff2view($seg->translation));
         if ($seg->mt_qe == 0) {
             $displaySeg->mt_qe = 'N/A';
         }
         $displaySeg->num_translation_mismatch = @(int) $translationMismatchList[$displaySeg->segment_hash];
         $displaySeg->evaluateWarningString();
         $output_data[] = $displaySeg;
     }
     $pagination = $this->evaluatePagination($__pagination_prev, $__pagination_next + 1);
     $globalStats = $this->evaluateGlobalStats();
     $stats['valid-word-count'] = $globalStats['raw_words'];
     //TODO: this will not work anymore
     $stats['edited-word-count'] = array_sum($stat_rwc);
     if ($stats['edited-word-count'] > 0) {
         $stats['too-slow-words'] = round(array_sum($stat_too_slow) / $stats['edited-word-count'], 2) * 100;
         $stats['too-fast-words'] = round(array_sum($stat_too_fast) / $stats['edited-word-count'], 2) * 100;
         $stats['avg-pee'] = round(array_sum($stat_pee) / array_sum($stat_rwc)) . "%";
         $stats['avg-ter'] = round(array_sum($stat_ter) / array_sum($stat_rwc)) . "%";
     }
     $stats['mt-words'] = round(array_sum($stat_mt) / $stats['edited-word-count'], 2) * 100;
     $stats['tm-words'] = 100 - $stats['mt-words'];
     $stats['total-valid-tte'] = round($globalStats['tot_tte']);
     // Non weighted...
     // $stats['avg-secs-per-word'] = round(array_sum($stat_spw)/count($stat_spw),1);
     // Weighted
     $stats['avg-secs-per-word'] = round($globalStats['secs_per_word'] / 1000, 1);
     $stats['est-words-per-day'] = number_format(round(3600 * 8 / $stats['avg-secs-per-word']), 0, '.', ',');
     // Last minute formatting (after calculations)
     $temp = CatUtils::parse_time_to_edit(round($stats['total-valid-tte']));
     $stats['total-valid-tte'] = "{$temp['0']}h:{$temp['1']}m:{$temp['2']}s";
     $stats['total-tte-seconds'] = $temp[0] * 3600 + $temp[1] * 60 + $temp[2];
     $stats['avg-pee'] = round($globalStats['avg_pee'], 2);
     $stats['avg-pee'] .= "%";
     return array($output_data, $stats, $pagination);
 }
 private function updateJobPEE(array $old_translation, array $new_translation)
 {
     $segmentDao = new Segments_SegmentDao(Database::obtain());
     $segment_original = $segmentDao->getById($this->id_segment);
     $segmentRawWordCount = $segment_original->raw_word_count;
     $segment = new EditLog_EditLogSegmentClientStruct(array('suggestion' => $old_translation['suggestion'], 'translation' => $old_translation['translation'], 'raw_word_count' => $segmentRawWordCount, 'time_to_edit' => $old_translation['time_to_edit'] + $new_translation['time_to_edit']));
     $oldSegment = clone $segment;
     $oldSegment->time_to_edit = $old_translation['time_to_edit'];
     $oldPEE = $segment->getPeePerc();
     $oldPee_weighted = $oldPEE * $segmentRawWordCount;
     $segment->translation = $new_translation['translation'];
     $segment->pe_effort_perc = null;
     $newPEE = $segment->getPeePerc();
     $newPee_weighted = $newPEE * $segmentRawWordCount;
     if ($segment->isValidForEditLog()) {
         //if the segment was not valid for editlog and now it is, then just add the weighted pee
         if (!$oldSegment->isValidForEditLog()) {
             $newTotalJobPee = $this->jobData['avg_post_editing_effort'] + $newPee_weighted;
         } else {
             $newTotalJobPee = $this->jobData['avg_post_editing_effort'] - $oldPee_weighted + $newPee_weighted;
         }
         $queryUpdateJob = "update jobs\n                                set avg_post_editing_effort = %f\n                                where id = %d and password = '******'";
         $db = Database::obtain();
         $db->query(sprintf($queryUpdateJob, $newTotalJobPee, $this->id_job, $this->password));
     } else {
         if ($oldSegment->isValidForEditLog()) {
             $newTotalJobPee = $this->jobData['avg_post_editing_effort'] - $oldPee_weighted;
             $queryUpdateJob = "update jobs\n                                set avg_post_editing_effort = %f\n                                where id = %d and password = '******'";
             $db = Database::obtain();
             $db->query(sprintf($queryUpdateJob, $newTotalJobPee, $this->id_job, $this->password));
         }
     }
 }