/** * @return float|string */ public function getPeePerc() { if (is_null($this->pe_effort_perc)) { $this->pe_effort_perc = round((1 - MyMemory::TMS_MATCH($this->suggestion, $this->translation)) * 100); } return $this->pe_effort_perc; }
public static function getEditingLogData($jid, $password, $use_ter_diff = false) { $data = getEditLog($jid, $password); $slow_cut = 30; $fast_cut = 0.25; $stat_too_slow = array(); $stat_too_fast = array(); if (!$data) { return false; } $stats['total-word-count'] = 0; $stat_mt = array(); foreach ($data as &$seg) { $seg['sm'] .= "%"; $seg['jid'] = $jid; $tte = self::parse_time_to_edit($seg['tte']); $seg['time_to_edit'] = "{$tte['1']}m:{$tte['2']}s"; $stat_rwc[] = $seg['rwc']; // by definition we cannot have a 0 word sentence. It is probably a - or a tag, so we want to consider at least a word. if ($seg['rwc'] < 1) { $seg['rwc'] = 1; } $seg['secs-per-word'] = round($seg['tte'] / 1000 / $seg['rwc'], 1); if ($seg['secs-per-word'] < $slow_cut and $seg['secs-per-word'] > $fast_cut) { $seg['stats-valid'] = 'Yes'; $seg['stats-valid-color'] = ''; $seg['stats-valid-style'] = ''; $stat_valid_rwc[] = $seg['rwc']; $stat_valid_tte[] = $seg['tte']; $stat_spw[] = $seg['secs-per-word']; } else { $seg['stats-valid'] = 'No'; $seg['stats-valid-color'] = '#ee6633'; $seg['stats-valid-style'] = 'border:2px solid #EE6633'; } // Stats if ($seg['secs-per-word'] >= $slow_cut) { $stat_too_slow[] = $seg['rwc']; } if ($seg['secs-per-word'] <= $fast_cut) { $stat_too_fast[] = $seg['rwc']; } $seg['pe_effort_perc'] = round((1 - MyMemory::TMS_MATCH($seg['sug'], $seg['translation'])) * 100); if ($seg['pe_effort_perc'] < 0) { $seg['pe_effort_perc'] = 0; } if ($seg['pe_effort_perc'] > 100) { $seg['pe_effort_perc'] = 100; } $stat_pee[] = $seg['pe_effort_perc'] * $seg['rwc']; $seg['pe_effort_perc'] .= "%"; $lh = Langs_Languages::getInstance(); $lang = $lh->getIsoCode($lh->getLocalizedName($seg['target_lang'])); $sug_for_diff = self::placehold_xliff_tags($seg['sug']); $tra_for_diff = self::placehold_xliff_tags($seg['translation']); // possible patch // $sug_for_diff = html_entity_decode($sug_for_diff, ENT_NOQUOTES, 'UTF-8'); // $tra_for_diff = html_entity_decode($tra_for_diff, ENT_NOQUOTES, 'UTF-8'); //with this patch we have warnings when accessing indexes if ($use_ter_diff) { $ter = MyMemory::diff_tercpp($sug_for_diff, $tra_for_diff, $lang); } else { $ter = array(); } // Log::doLog( $sug_for_diff ); // Log::doLog( $tra_for_diff ); // Log::doLog( $ter ); $seg['ter'] = @$ter[1] * 100; $stat_ter[] = $seg['ter'] * $seg['rwc']; $seg['ter'] = round(@$ter[1] * 100) . "%"; $diff_ter = @$ter[0]; if ($seg['sug'] != $seg['translation']) { //force use of third party ter diff if ($use_ter_diff) { $seg['diff'] = $diff_ter; } else { $diff_PE = MyMemory::diff_html($sug_for_diff, $tra_for_diff); // we will use diff_PE until ter_diff will not work properly $seg['diff'] = $diff_PE; } //$seg[ 'diff_ter' ] = $diff_ter; } else { $seg['diff'] = ''; //$seg[ 'diff_ter' ] = ''; } $seg['diff'] = self::restore_xliff_tags_for_view($seg['diff']); //$seg['diff_ter'] = self::restore_xliff_tags_for_view($seg['diff_ter']); // BUG: While suggestions source is not correctly set if ($seg['sm'] == "85%" or $seg['sm'] == "86%") { $seg['ss'] = 'Machine Translation'; $stat_mt[] = $seg['rwc']; } else { $seg['ss'] = 'Translation Memory'; } $seg['sug_view'] = trim(CatUtils::rawxliff2view($seg['sug'])); $seg['source'] = trim(CatUtils::rawxliff2view($seg['source'])); $seg['translation'] = trim(CatUtils::rawxliff2view($seg['translation'])); $array_patterns = array(rtrim(self::lfPlaceholderRegex, 'g'), rtrim(self::crPlaceholderRegex, 'g'), rtrim(self::crlfPlaceholderRegex, 'g'), rtrim(self::tabPlaceholderRegex, 'g'), rtrim(self::nbspPlaceholderRegex, 'g')); $array_replacements_csv = array('\\n', '\\r', '\\r\\n', '\\t', Utils::unicode2chr(0xa0)); $seg['source_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['source']); $seg['translation_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['translation']); $seg['sug_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['sug_view']); $seg['diff_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['diff']); $array_replacements = array('<span class="_0A"></span><br />', '<span class="_0D"></span><br />', '<span class="_0D0A"></span><br />', '<span class="_tab">	</span>', '<span class="_nbsp"> </span>'); $seg['source'] = preg_replace($array_patterns, $array_replacements, $seg['source']); $seg['translation'] = preg_replace($array_patterns, $array_replacements, $seg['translation']); $seg['sug_view'] = preg_replace($array_patterns, $array_replacements, $seg['sug_view']); $seg['diff'] = preg_replace($array_patterns, $array_replacements, $seg['diff']); if ($seg['mt_qe'] == 0) { $seg['mt_qe'] = 'N/A'; } } $stats['edited-word-count'] = array_sum($stat_rwc); $stats['valid-word-count'] = array_sum($stat_valid_rwc); if ($stats['edited-word-count'] > 0) { $stats['too-slow-words'] = round(array_sum($stat_too_slow) / $stats['edited-word-count'], 2) * 100; $stats['too-fast-words'] = round(array_sum($stat_too_fast) / $stats['edited-word-count'], 2) * 100; $stats['avg-pee'] = round(array_sum($stat_pee) / array_sum($stat_rwc)) . "%"; $stats['avg-ter'] = round(array_sum($stat_ter) / array_sum($stat_rwc)) . "%"; } // echo array_sum($stat_ter); // echo "@@@"; // echo array_sum($stat_rwc); // exit; $stats['mt-words'] = round(array_sum($stat_mt) / $stats['edited-word-count'], 2) * 100; $stats['tm-words'] = 100 - $stats['mt-words']; $stats['total-valid-tte'] = round(array_sum($stat_valid_tte) / 1000); // Non weighted... // $stats['avg-secs-per-word'] = round(array_sum($stat_spw)/count($stat_spw),1); // Weighted $stats['avg-secs-per-word'] = round($stats['total-valid-tte'] / $stats['valid-word-count'], 1); $stats['est-words-per-day'] = number_format(round(3600 * 8 / $stats['avg-secs-per-word']), 0, '.', ','); // Last minute formatting (after calculations) $temp = self::parse_time_to_edit(round(array_sum($stat_valid_tte))); $stats['total-valid-tte'] = "{$temp['0']}h:{$temp['1']}m:{$temp['2']}s"; $stats['total-tte-seconds'] = $temp[0] * 3600 + $temp[1] * 60 + $temp[2]; return array($data, $stats); }
function main($args) { $db = Database::obtain(); $lastProcessedJob = (int) file_get_contents(self::$last_job_file_name); do { $queryMaxJob = "select min(id) as min, max(id) as max\n from jobs\n where completed = 1\n and id > %d"; $queryFirst = "select id, password, job_first_segment, job_last_segment\n from jobs\n where completed = 1\n and id >= %d and id <= %d"; $querySegments = "select suggestion,\n translation,\n raw_word_count,\n time_to_edit\n from segment_translations st\n join segments s on st.id_segment = s.id\n and s.id between %d and %d\n where status='translated'\n and id_job = %d\n and show_in_cattool = 1\n and id_segment > %d\n limit %d"; $queryUpdateJob = "update jobs\n set avg_post_editing_effort = %f,\n total_time_to_edit = %f\n where id = %d and password = '******'"; $minJobMaxJob = $db->query_first(sprintf($queryMaxJob, (int) $lastProcessedJob)); $maxJob = (int) $minJobMaxJob['max']; $minJob = (int) $minJobMaxJob['min']; $start = time(); for ($firstJob = $minJob; self::$RUNNING && $firstJob < $maxJob; $firstJob += self::NR_OF_JOBS) { $jobs = $db->fetch_array(sprintf($queryFirst, $firstJob, $firstJob + self::NR_OF_JOBS)); //iterate over completed jobs, evaluate PEE and save it in the job row for ($j = 0; self::$RUNNING && $j < count($jobs); $j++) { $job = $jobs[$j]; //BEGIN TRANSACTION $db->begin(); $_jid = $job['id']; $_password = $job['password']; $_job_first_segment = $job['job_first_segment']; $_job_last_segment = $job['job_last_segment']; Log::doLog("job {$_jid} -> " . ($_job_last_segment - $_job_first_segment) . " segments"); echo "job {$_jid} -> " . ($_job_last_segment - $_job_first_segment) . " segments\n"; $raw_post_editing_effort_job = 0; $raw_wc_job = 0; $time_to_edit_job = 0; for ($firstSeg = $_job_first_segment; $firstSeg <= $_job_last_segment; $firstSeg += self::NR_OF_SEGS) { if ($firstSeg > $_job_last_segment) { $firstSeg = $_job_last_segment; } Log::doLog("starting from segment {$firstSeg}"); echo "starting from segment {$firstSeg}\n"; $segments = $db->fetch_array(sprintf($querySegments, $_job_first_segment, $_job_last_segment, $_jid, $firstSeg, self::NR_OF_SEGS)); foreach ($segments as $i => $segment) { $post_editing_effort = round((1 - MyMemory::TMS_MATCH($segment['suggestion'], $segment['translation'])) * 100); if ($post_editing_effort < 0) { $post_editing_effort = 0; } else { if ($post_editing_effort > 100) { $post_editing_effort = 100; } } $raw_wc_job += $segment['raw_word_count']; $time_to_edit_job += $segment['time_to_edit']; $raw_post_editing_effort_job += $post_editing_effort * $segment['raw_word_count']; } //sleep 100 nanosecs usleep(100); } $job_pee = round($raw_post_editing_effort_job / $raw_wc_job, 3); Log::doLog("job pee: {$job_pee}\njob time to edit: {$time_to_edit_job}\nWriting into DB"); echo "job pee: {$job_pee}\njob time to edit: {$time_to_edit_job}\nWriting into DB\n"; $db->query(sprintf($queryUpdateJob, $job_pee, $time_to_edit_job, $_jid, $_password)); Log::doLog("done"); echo "done.\n"; if (!file_put_contents(self::$last_job_file_name, $_jid)) { $db->rollback(); Utils::sendErrMailReport("", "[JobPostEditingEffortRunner] Failed to process job {$_jid}"); self::$RUNNING = false; continue; //exit; } //COMMIT TRANSACTION $db->commit(); } } Log::doLog("took " . (time() - $start) / 60 . " seconds"); echo "took " . (time() - $start) / 60 . " seconds\n"; Log::doLog("sleeping for 1 month"); echo "sleeping for 1 month\n"; if (self::$RUNNING) { sleep(self::$sleeptime); } } while (self::$RUNNING); }
public static function getEditingLogData($jid, $password) { $data = getEditLog($jid, $password); $slow_cut = 30; $fast_cut = 0.25; $stat_too_slow = array(); $stat_too_fast = array(); if (!$data) { return false; } $stats['total-word-count'] = 0; $stat_mt = array(); foreach ($data as &$seg) { //$seg['source'] = self::stripTagesFromSource($seg['source']); $seg['source'] = trim($seg['source']); $seg['sm'] .= "%"; $seg['jid'] = $jid; $tte = self::parse_time_to_edit($seg['tte']); $seg['time_to_edit'] = "{$tte['1']}m:{$tte['2']}s"; $stat_rwc[] = $seg['rwc']; // by definition we cannot have a 0 word sentence. It is probably a - or a tag, so we want to consider at least a word. if ($seg['rwc'] < 1) { $seg['rwc'] = 1; } $seg['secs-per-word'] = round($seg['tte'] / 1000 / $seg['rwc'], 1); if ($seg['secs-per-word'] < $slow_cut and $seg['secs-per-word'] > $fast_cut) { $seg['stats-valid'] = 'Yes'; $seg['stats-valid-color'] = ''; $seg['stats-valid-style'] = ''; $stat_valid_rwc[] = $seg['rwc']; $stat_valid_tte[] = $seg['tte']; $stat_spw[] = $seg['secs-per-word']; } else { $seg['stats-valid'] = 'No'; $seg['stats-valid-color'] = '#ee6633'; $seg['stats-valid-style'] = 'border:2px solid #EE6633'; } // Stats if ($seg['secs-per-word'] >= $slow_cut) { $stat_too_slow[] = $seg['rwc']; } if ($seg['secs-per-word'] <= $fast_cut) { $stat_too_fast[] = $seg['rwc']; } $seg['pe_effort_perc'] = round((1 - MyMemory::TMS_MATCH($seg['sug'], $seg['translation'])) * 100); if ($seg['pe_effort_perc'] < 0) { $seg['pe_effort_perc'] = 0; } if ($seg['pe_effort_perc'] > 100) { $seg['pe_effort_perc'] = 100; } $stat_pee[] = $seg['pe_effort_perc'] * $seg['rwc']; $seg['pe_effort_perc'] .= "%"; $seg['sug_view'] = html_entity_decode($seg['sug']); if ($seg['sug'] != $seg['translation']) { $seg['diff'] = MyMemory::diff_html($seg['sug'], $seg['translation']); } else { $seg['diff'] = ''; } // BUG: While suggestions source is not correctly set if ($seg['sm'] == "85%" or $seg['sm'] == "86%") { $seg['ss'] = 'Machine Translation'; $stat_mt[] = $seg['rwc']; } else { $seg['ss'] = 'Translation Memory'; } } $stats['edited-word-count'] = array_sum($stat_rwc); $stats['valid-word-count'] = array_sum($stat_valid_rwc); if ($stats['edited-word-count'] > 0) { $stats['too-slow-words'] = round(array_sum($stat_too_slow) / $stats['edited-word-count'], 2) * 100; $stats['too-fast-words'] = round(array_sum($stat_too_fast) / $stats['edited-word-count'], 2) * 100; $stats['avg-pee'] = round(array_sum($stat_pee) / array_sum($stat_rwc)) . "%"; } $stats['mt-words'] = round(array_sum($stat_mt) / $stats['edited-word-count'], 2) * 100; $stats['tm-words'] = 100 - $stats['mt-words']; $stats['total-valid-tte'] = round(array_sum($stat_valid_tte) / 1000); // Non weighted... // $stats['avg-secs-per-word'] = round(array_sum($stat_spw)/count($stat_spw),1); // Weighted $stats['avg-secs-per-word'] = round($stats['total-valid-tte'] / $stats['valid-word-count'], 1); $stats['est-words-per-day'] = number_format(round(3600 * 8 / $stats['avg-secs-per-word']), 0, '.', ','); // Last minute formatting (after calculations) $temp = self::parse_time_to_edit(round(array_sum($stat_valid_tte))); $stats['total-valid-tte'] = "{$temp['0']}h:{$temp['1']}m:{$temp['2']}s"; return array($data, $stats); }
/** * @return float|int */ public function getPEE() { $post_editing_effort = round((1 - MyMemory::TMS_MATCH($this->suggestion, $this->translation)) * 100); if ($post_editing_effort < 0) { $post_editing_effort = 0; } else { if ($post_editing_effort > 100) { $post_editing_effort = 100; } } return $post_editing_effort; }