public function __construct() { //NEEDED TO UNIFORM DATA as array( $matches ) $args = func_get_args(); if (empty($args)) { throw new Exception("No args defined for " . __CLASS__ . " constructor"); } $match = array(); if (count($args) == 1 and is_array($args[0])) { $match = $args[0]; if ($match['last-update-date'] == "0000-00-00 00:00:00") { $match['last-update-date'] = "0000-00-00"; } if (!empty($match['last-update-date']) and $match['last-update-date'] != '0000-00-00') { $match['last-update-date'] = date("Y-m-d", strtotime($match['last-update-date'])); } if (empty($match['created-by'])) { $match['created-by'] = "Anonymous"; } $match['match'] = $match['match'] * 100; $match['match'] = $match['match'] . "%"; isset($match['prop']) ? $match['prop'] = json_decode($match['prop']) : ($match['prop'] = array()); } if (count($args) > 1 and is_array($args[0])) { throw new Exception("Invalid arg 1 " . __CLASS__ . " constructor"); } if (count($args) == 5 and !is_array($args[0])) { $match['segment'] = CatUtils::rawxliff2view($args[0]); $match['raw_segment'] = $args[0]; $match['translation'] = CatUtils::rawxliff2view($args[1]); $match['raw_translation'] = $args[1]; $match['match'] = $args[2]; $match['created-by'] = $args[3]; $match['last-update-date'] = $args[4]; $match['prop'] = isset($args[5]) ? $args[5] : array(); } $this->id = array_key_exists('id', $match) ? $match['id'] : '0'; $this->create_date = array_key_exists('create-date', $match) ? $match['create-date'] : '0000-00-00'; $this->segment = array_key_exists('segment', $match) ? $match['segment'] : ''; $this->raw_segment = array_key_exists('raw_segment', $match) ? $match['raw_segment'] : ''; $this->translation = array_key_exists('translation', $match) ? $match['translation'] : ''; $this->source_note = array_key_exists('source_note', $match) ? $match['source_note'] : ''; $this->target_note = array_key_exists('target_note', $match) ? $match['target_note'] : ''; $this->raw_translation = array_key_exists('raw_translation', $match) ? $match['raw_translation'] : ''; $this->quality = array_key_exists('quality', $match) ? $match['quality'] : 0; $this->reference = array_key_exists('reference', $match) ? $match['reference'] : ''; $this->usage_count = array_key_exists('usage-count', $match) ? $match['usage-count'] : 0; $this->subject = array_key_exists('subject', $match) ? $match['subject'] : ''; $this->created_by = array_key_exists('created-by', $match) ? $match['created-by'] : ''; $this->last_updated_by = array_key_exists('last-updated-by', $match) ? $match['last-updated-by'] : ''; $this->last_update_date = array_key_exists('last-update-date', $match) ? $match['last-update-date'] : '0000-00-00'; $this->match = array_key_exists('match', $match) ? $match['match'] : 0; $this->memory_key = array_key_exists('key', $match) ? $match['key'] : ''; $this->prop = $match['prop']; }
public function testRawXliff2View() { $source_seg = <<<SRC <g id="43">bang & olufsen < 3 ' > 1</g> <x id="33"/> SRC; $source_expected = <<<SRC <g id="43">bang & olufsen < 3 ' > 1</g> <x id="33"/> SRC; $source_seg = CatUtils::rawxliff2view($source_seg); $this->assertEquals($source_seg, $source_expected); }
public function __construct($result) { // print_r($result); $this->error = new MT_ERROR(); if (is_array($result) and array_key_exists("data", $result)) { $this->translatedText = $result['data']['translations'][0]['translatedText']; $this->translatedText = CatUtils::rawxliff2view($this->translatedText); } if (is_array($result) and array_key_exists("error", $result)) { $this->error = new MT_ERROR($result['error']); } }
public function __construct($result) { $this->responseData = isset($result['responseData']) ? $result['responseData'] : ''; $this->responseDetails = isset($result['responseDetails']) ? $result['responseDetails'] : ''; $this->responseStatus = isset($result['responseStatus']) ? $result['responseStatus'] : ''; if (is_array($result) and !empty($result) and array_key_exists('matches', $result)) { $matches = $result['matches']; if (is_array($matches) and !empty($matches)) { foreach ($matches as $match) { $match['raw_segment'] = $match['segment']; $match['segment'] = CatUtils::rawxliff2view($match['segment']); $match['raw_translation'] = $match['translation']; $match['translation'] = CatUtils::rawxliff2view($match['translation']); $currMatch = new Engines_Results_MyMemory_Matches($match); $this->matches[] = $currMatch; } } } }
/** * When Called it perform the controller action to retrieve/manipulate data * * @return mixed */ function doAction() { $this->parseIDSegment(); $_thereArePossiblePropagations = countThisTranslatedHashInJob($this->id_job, $this->password, $this->id_segment); $thereArePossiblePropagations = intval($_thereArePossiblePropagations['available']); $Translation_mismatches = array(); if ($thereArePossiblePropagations) { $Translation_mismatches = getTranslationsMismatches($this->id_job, $this->password, $this->id_segment); } $result = array('editable' => array(), 'not_editable' => array(), 'prop_available' => $thereArePossiblePropagations); foreach ($Translation_mismatches as $position => $row) { if ($row['editable']) { $result['editable'][] = array('translation' => CatUtils::rawxliff2view($row['translation']), 'TOT' => $row['TOT'], 'involved_id' => explode(",", $row['involved_id'])); } else { $result['not_editable'][] = array('translation' => CatUtils::rawxliff2view($row['translation']), 'TOT' => $row['TOT'], 'involved_id' => explode(",", $row['involved_id'])); } } $this->result['code'] = 1; $this->result['data'] = $result; }
public static function getEditingLogData($jid, $password, $use_ter_diff = false) { $data = getEditLog($jid, $password); $slow_cut = 30; $fast_cut = 0.25; $stat_too_slow = array(); $stat_too_fast = array(); if (!$data) { return false; } $stats['total-word-count'] = 0; $stat_mt = array(); foreach ($data as &$seg) { $seg['sm'] .= "%"; $seg['jid'] = $jid; $tte = self::parse_time_to_edit($seg['tte']); $seg['time_to_edit'] = "{$tte['1']}m:{$tte['2']}s"; $stat_rwc[] = $seg['rwc']; // by definition we cannot have a 0 word sentence. It is probably a - or a tag, so we want to consider at least a word. if ($seg['rwc'] < 1) { $seg['rwc'] = 1; } $seg['secs-per-word'] = round($seg['tte'] / 1000 / $seg['rwc'], 1); if ($seg['secs-per-word'] < $slow_cut and $seg['secs-per-word'] > $fast_cut) { $seg['stats-valid'] = 'Yes'; $seg['stats-valid-color'] = ''; $seg['stats-valid-style'] = ''; $stat_valid_rwc[] = $seg['rwc']; $stat_valid_tte[] = $seg['tte']; $stat_spw[] = $seg['secs-per-word']; } else { $seg['stats-valid'] = 'No'; $seg['stats-valid-color'] = '#ee6633'; $seg['stats-valid-style'] = 'border:2px solid #EE6633'; } // Stats if ($seg['secs-per-word'] >= $slow_cut) { $stat_too_slow[] = $seg['rwc']; } if ($seg['secs-per-word'] <= $fast_cut) { $stat_too_fast[] = $seg['rwc']; } $seg['pe_effort_perc'] = round((1 - MyMemory::TMS_MATCH($seg['sug'], $seg['translation'])) * 100); if ($seg['pe_effort_perc'] < 0) { $seg['pe_effort_perc'] = 0; } if ($seg['pe_effort_perc'] > 100) { $seg['pe_effort_perc'] = 100; } $stat_pee[] = $seg['pe_effort_perc'] * $seg['rwc']; $seg['pe_effort_perc'] .= "%"; $lh = Langs_Languages::getInstance(); $lang = $lh->getIsoCode($lh->getLocalizedName($seg['target_lang'])); $sug_for_diff = self::placehold_xliff_tags($seg['sug']); $tra_for_diff = self::placehold_xliff_tags($seg['translation']); // possible patch // $sug_for_diff = html_entity_decode($sug_for_diff, ENT_NOQUOTES, 'UTF-8'); // $tra_for_diff = html_entity_decode($tra_for_diff, ENT_NOQUOTES, 'UTF-8'); //with this patch we have warnings when accessing indexes if ($use_ter_diff) { $ter = MyMemory::diff_tercpp($sug_for_diff, $tra_for_diff, $lang); } else { $ter = array(); } // Log::doLog( $sug_for_diff ); // Log::doLog( $tra_for_diff ); // Log::doLog( $ter ); $seg['ter'] = @$ter[1] * 100; $stat_ter[] = $seg['ter'] * $seg['rwc']; $seg['ter'] = round(@$ter[1] * 100) . "%"; $diff_ter = @$ter[0]; if ($seg['sug'] != $seg['translation']) { //force use of third party ter diff if ($use_ter_diff) { $seg['diff'] = $diff_ter; } else { $diff_PE = MyMemory::diff_html($sug_for_diff, $tra_for_diff); // we will use diff_PE until ter_diff will not work properly $seg['diff'] = $diff_PE; } //$seg[ 'diff_ter' ] = $diff_ter; } else { $seg['diff'] = ''; //$seg[ 'diff_ter' ] = ''; } $seg['diff'] = self::restore_xliff_tags_for_view($seg['diff']); //$seg['diff_ter'] = self::restore_xliff_tags_for_view($seg['diff_ter']); // BUG: While suggestions source is not correctly set if ($seg['sm'] == "85%" or $seg['sm'] == "86%") { $seg['ss'] = 'Machine Translation'; $stat_mt[] = $seg['rwc']; } else { $seg['ss'] = 'Translation Memory'; } $seg['sug_view'] = trim(CatUtils::rawxliff2view($seg['sug'])); $seg['source'] = trim(CatUtils::rawxliff2view($seg['source'])); $seg['translation'] = trim(CatUtils::rawxliff2view($seg['translation'])); $array_patterns = array(rtrim(self::lfPlaceholderRegex, 'g'), rtrim(self::crPlaceholderRegex, 'g'), rtrim(self::crlfPlaceholderRegex, 'g'), rtrim(self::tabPlaceholderRegex, 'g'), rtrim(self::nbspPlaceholderRegex, 'g')); $array_replacements_csv = array('\\n', '\\r', '\\r\\n', '\\t', Utils::unicode2chr(0xa0)); $seg['source_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['source']); $seg['translation_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['translation']); $seg['sug_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['sug_view']); $seg['diff_csv'] = preg_replace($array_patterns, $array_replacements_csv, $seg['diff']); $array_replacements = array('<span class="_0A"></span><br />', '<span class="_0D"></span><br />', '<span class="_0D0A"></span><br />', '<span class="_tab">	</span>', '<span class="_nbsp"> </span>'); $seg['source'] = preg_replace($array_patterns, $array_replacements, $seg['source']); $seg['translation'] = preg_replace($array_patterns, $array_replacements, $seg['translation']); $seg['sug_view'] = preg_replace($array_patterns, $array_replacements, $seg['sug_view']); $seg['diff'] = preg_replace($array_patterns, $array_replacements, $seg['diff']); if ($seg['mt_qe'] == 0) { $seg['mt_qe'] = 'N/A'; } } $stats['edited-word-count'] = array_sum($stat_rwc); $stats['valid-word-count'] = array_sum($stat_valid_rwc); if ($stats['edited-word-count'] > 0) { $stats['too-slow-words'] = round(array_sum($stat_too_slow) / $stats['edited-word-count'], 2) * 100; $stats['too-fast-words'] = round(array_sum($stat_too_fast) / $stats['edited-word-count'], 2) * 100; $stats['avg-pee'] = round(array_sum($stat_pee) / array_sum($stat_rwc)) . "%"; $stats['avg-ter'] = round(array_sum($stat_ter) / array_sum($stat_rwc)) . "%"; } // echo array_sum($stat_ter); // echo "@@@"; // echo array_sum($stat_rwc); // exit; $stats['mt-words'] = round(array_sum($stat_mt) / $stats['edited-word-count'], 2) * 100; $stats['tm-words'] = 100 - $stats['mt-words']; $stats['total-valid-tte'] = round(array_sum($stat_valid_tte) / 1000); // Non weighted... // $stats['avg-secs-per-word'] = round(array_sum($stat_spw)/count($stat_spw),1); // Weighted $stats['avg-secs-per-word'] = round($stats['total-valid-tte'] / $stats['valid-word-count'], 1); $stats['est-words-per-day'] = number_format(round(3600 * 8 / $stats['avg-secs-per-word']), 0, '.', ','); // Last minute formatting (after calculations) $temp = self::parse_time_to_edit(round(array_sum($stat_valid_tte))); $stats['total-valid-tte'] = "{$temp['0']}h:{$temp['1']}m:{$temp['2']}s"; $stats['total-tte-seconds'] = $temp[0] * 3600 + $temp[1] * 60 + $temp[2]; return array($data, $stats); }
public function doAction() { $this->parseIDSegment(); //get Job Infos $job_data = getJobData((int) $this->id_job); $pCheck = new AjaxPasswordCheck(); if (!$pCheck->grantJobAccessByJobData($job_data, $this->password)) { $this->result['errors'][] = array("code" => -10, "message" => "wrong password"); } if (empty($this->id_segment)) { $this->result['errors'][] = array("code" => -1, "message" => "missing segment id"); } if (empty($this->id_job)) { $this->result['errors'][] = array("code" => -2, "message" => "missing Job id"); } if (!empty($this->result['errors'])) { //no action on errors return; } $segmentStruct = TranslationsSplit_SplitStruct::getStruct(); $segmentStruct->id_segment = $this->id_segment; $segmentStruct->id_job = $this->id_job; $translationDao = new TranslationsSplit_SplitDAO(Database::obtain()); $currSegmentInfo = $translationDao->read($segmentStruct); /** * Split check control */ $isASplittedSegment = false; $isLastSegmentChunk = true; if (count($currSegmentInfo) > 0) { $isASplittedSegment = true; $currSegmentInfo = array_shift($currSegmentInfo); //get the chunk number and check whether it is the last one or not $isLastSegmentChunk = $this->split_num == count($currSegmentInfo->source_chunk_lengths) - 1; if (!$isLastSegmentChunk) { $nextSegmentId = $this->id_segment . "-" . ($this->split_num + 1); } } /** * End Split check control */ if (!$isASplittedSegment || $isLastSegmentChunk) { $segmentList = getNextSegment($this->id_segment, $this->id_job, $this->password, !self::isRevision() ? false : true); if (!self::isRevision()) { $nextSegmentId = fetchStatus($this->id_segment, $segmentList); } else { $nextSegmentId = fetchStatus($this->id_segment, $segmentList, Constants_TranslationStatus::STATUS_TRANSLATED); if (!$nextSegmentId) { $nextSegmentId = fetchStatus($this->id_segment, $segmentList, Constants_TranslationStatus::STATUS_APPROVED); } } } $insertRes = setCurrentSegmentInsert($this->id_segment, $this->id_job, $this->password); $this->result['code'] = 1; $this->result['data'] = array(); //get segment revision informations $reviseDao = new Revise_ReviseDAO(Database::obtain()); $searchReviseStruct = Revise_ReviseStruct::getStruct(); $searchReviseStruct->id_job = $this->id_job; $searchReviseStruct->id_segment = $this->id_segment; $_dbReviseStruct = $reviseDao->read($searchReviseStruct); if (count($_dbReviseStruct) > 0) { $_dbReviseStruct = $_dbReviseStruct[0]; } else { $_dbReviseStruct = Revise_ReviseStruct::getStruct(); } $_dbReviseStruct = Revise_ReviseStruct::setDefaultValues($_dbReviseStruct); $dbReviseStruct = self::prepareReviseStructReturnValues($_dbReviseStruct); $this->result['nextSegmentId'] = $nextSegmentId; $this->result['error_data'] = $dbReviseStruct; $this->result['original'] = CatUtils::rawxliff2view($_dbReviseStruct->original_translation); }
public function doAction() { //get Job Infos $job_data = getJobData($this->jid); $pCheck = new AjaxPasswordCheck(); //check for Password correctness if (!$pCheck->grantJobAccessByJobData($job_data, $this->password)) { $this->result['errors'][] = array("code" => -10, "message" => "wrong password"); return; } $lang_handler = Langs_Languages::getInstance(); if ($this->ref_segment == '') { $this->ref_segment = 0; } $data = getMoreSegments($this->jid, $this->password, $this->step, $this->ref_segment, $this->where); $this->prepareNotes($data); foreach ($data as $i => $seg) { if ($this->where == 'before') { if ((double) $seg['sid'] >= (double) $this->ref_segment) { break; } } if (empty($this->pname)) { $this->pname = $seg['pname']; } if (empty($this->last_opened_segment)) { $this->last_opened_segment = $seg['last_opened_segment']; } if (empty($this->cid)) { $this->cid = $seg['cid']; } if (empty($this->pid)) { $this->pid = $seg['pid']; } if (empty($this->tid)) { $this->tid = $seg['tid']; } if (empty($this->create_date)) { $this->create_date = $seg['create_date']; } if (empty($this->source_code)) { $this->source_code = $seg['source']; } if (empty($this->target_code)) { $this->target_code = $seg['target']; } if (empty($this->source)) { $s = explode("-", $seg['source']); $source = strtoupper($s[0]); $this->source = $source; } if (empty($this->target)) { $t = explode("-", $seg['target']); $target = strtoupper($t[0]); $this->target = $target; } if (empty($this->err)) { $this->err = $seg['serialized_errors_list']; } $id_file = $seg['id_file']; if (!isset($this->data["{$id_file}"])) { $this->data["{$id_file}"]['jid'] = $seg['jid']; $this->data["{$id_file}"]["filename"] = ZipArchiveExtended::getFileName($seg['filename']); $this->data["{$id_file}"]["mime_type"] = $seg['mime_type']; $this->data["{$id_file}"]['source'] = $lang_handler->getLocalizedName($seg['source']); $this->data["{$id_file}"]['target'] = $lang_handler->getLocalizedName($seg['target']); $this->data["{$id_file}"]['source_code'] = $seg['source']; $this->data["{$id_file}"]['target_code'] = $seg['target']; $this->data["{$id_file}"]['segments'] = array(); } unset($seg['id_file']); unset($seg['source']); unset($seg['target']); unset($seg['source_code']); unset($seg['target_code']); unset($seg['mime_type']); unset($seg['filename']); unset($seg['jid']); unset($seg['pid']); unset($seg['cid']); unset($seg['tid']); unset($seg['pname']); unset($seg['create_date']); unset($seg['id_segment_end']); unset($seg['id_segment_start']); unset($seg['serialized_errors_list']); $seg['parsed_time_to_edit'] = CatUtils::parse_time_to_edit($seg['time_to_edit']); $seg['source_chunk_lengths'] === null ? $seg['source_chunk_lengths'] = '[]' : null; $seg['target_chunk_lengths'] === null ? $seg['target_chunk_lengths'] = '{"len":[0],"statuses":["DRAFT"]}' : null; $seg['source_chunk_lengths'] = json_decode($seg['source_chunk_lengths'], true); $seg['target_chunk_lengths'] = json_decode($seg['target_chunk_lengths'], true); $seg['segment'] = CatUtils::rawxliff2view(CatUtils::reApplySegmentSplit($seg['segment'], $seg['source_chunk_lengths'])); $seg['translation'] = CatUtils::rawxliff2view(CatUtils::reApplySegmentSplit($seg['translation'], $seg['target_chunk_lengths']['len'])); $this->attachNotes($seg); $this->data["{$id_file}"]['segments'][] = $seg; } $this->result['data']['files'] = $this->data; $this->result['data']['where'] = $this->where; }
public function doAction() { if (!$this->concordance_search) { //execute these lines only in segment contribution search, //in case of user concordance search skip these lines //because segment can be optional if (empty($this->id_segment)) { $this->result['errors'][] = array("code" => -1, "message" => "missing id_segment"); } } if (is_null($this->text) || $this->text === '') { $this->result['errors'][] = array("code" => -2, "message" => "missing text"); } if (empty($this->id_job)) { $this->result['errors'][] = array("code" => -3, "message" => "missing id_job"); } if (empty($this->num_results)) { $this->num_results = INIT::$DEFAULT_NUM_RESULTS_FROM_TM; } if (!empty($this->result['errors'])) { return -1; } //get Job Infos, we need only a row of jobs ( split ) $this->jobData = getJobData($this->id_job, $this->password); $pCheck = new AjaxPasswordCheck(); //check for Password correctness if (empty($this->jobData) || !$pCheck->grantJobAccessByJobData($this->jobData, $this->password)) { $this->result['errors'][] = array("code" => -10, "message" => "wrong password"); return -1; } /* * string manipulation strategy * */ if (!$this->concordance_search) { // $this->text = CatUtils::view2rawxliff($this->text); $this->source = $this->jobData['source']; $this->target = $this->jobData['target']; } else { $regularExpressions = $this->tokenizeSourceSearch(); if ($this->switch_languages) { /* * * switch languages from user concordances search on the target language value * Example: * Job is in * source: it_IT, * target: de_DE * * user perform a right click for concordance help on a german word or phrase * we want result in italian from german source * */ $this->source = $this->jobData['target']; $this->target = $this->jobData['source']; } else { $this->source = $this->jobData['source']; $this->target = $this->jobData['target']; } } $this->id_mt_engine = $this->jobData['id_mt_engine']; $this->id_tms = $this->jobData['id_tms']; $this->tm_keys = $this->jobData['tm_keys']; $config = array(); if ($this->id_tms == 1) { /** * MyMemory Enabled */ $config['get_mt'] = true; $config['mt_only'] = false; if ($this->id_mt_engine != 1) { /** * Don't get MT contribution from MyMemory ( Custom MT ) */ $config['get_mt'] = false; } $_TMS = $this->id_tms; } else { if ($this->id_tms == 0 && $this->id_mt_engine == 1) { /** * MyMemory disabled but MT Enabled and it is NOT a Custom one * So tell to MyMemory to get MT only */ $config['get_mt'] = true; $config['mt_only'] = true; $_TMS = 1; /* MyMemory */ } } /** * if No TM server and No MT selected $_TMS is not defined * so we want not to perform TMS Call * */ if (isset($_TMS)) { /** * @var $tms Engines_MyMemory */ $tms = Engine::getInstance($_TMS); $config = array_merge($tms->getConfigStruct(), $config); $config['segment'] = $this->text; $config['source'] = $this->source; $config['target'] = $this->target; $config['email'] = INIT::$MYMEMORY_API_KEY; $config['id_user'] = array(); $config['num_result'] = $this->num_results; $config['isConcordance'] = $this->concordance_search; //get job's TM keys $this->checkLogin(); try { if (self::isRevision()) { $this->userRole = TmKeyManagement_Filter::ROLE_REVISOR; } $tm_keys = TmKeyManagement_TmKeyManagement::getJobTmKeys($this->tm_keys, 'r', 'tm', $this->uid, $this->userRole); if (is_array($tm_keys) && !empty($tm_keys)) { foreach ($tm_keys as $tm_key) { $config['id_user'][] = $tm_key->key; } } } catch (Exception $e) { $this->result['errors'][] = array("code" => -11, "message" => "Cannot retrieve TM keys info."); Log::doLog($e->getMessage()); return; } $tms_match = $tms->get($config); $tms_match = $tms_match->get_matches_as_array(); } if ($this->id_mt_engine > 1) { /** * @var $mt Engines_Moses */ $mt = Engine::getInstance($this->id_mt_engine); $config = $mt->getConfigStruct(); $config['segment'] = $this->text; $config['source'] = $this->source; $config['target'] = $this->target; $config['id_user'] = INIT::$MYMEMORY_API_KEY; $config['segid'] = $this->id_segment; $mt_result = $mt->get($config); if (isset($mt_result['error']['code'])) { $mt_result['error']['created_by_type'] = 'MT'; $this->result['errors'][] = $mt_result['error']; $mt_result = false; } } $matches = array(); if (!empty($tms_match)) { $matches = $tms_match; } if (!empty($mt_result)) { $matches[] = $mt_result; usort($matches, array("getContributionController", "__compareScore")); //this is necessary since usort sorts is ascending order, thus inverting the ranking $matches = array_reverse($matches); } $matches = array_slice($matches, 0, $this->num_results); isset($matches[0]['match']) ? $firstMatchVal = floatval($matches[0]['match']) : null; if (isset($firstMatchVal) && $firstMatchVal >= 90 && $firstMatchVal < 100) { $srcSearch = strip_tags($this->text); $segmentFound = strip_tags($matches[0]['raw_segment']); $srcSearch = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $srcSearch)); $segmentFound = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $segmentFound)); $fuzzy = levenshtein($srcSearch, $segmentFound) / log10(mb_strlen($srcSearch . $segmentFound) + 1); //levenshtein handle max 255 chars per string and returns -1, so fuzzy var can be less than 0 !! if ($srcSearch == $segmentFound || $fuzzy < 2.5 && $fuzzy >= 0) { $qaRealign = new QA($this->text, html_entity_decode($matches[0]['raw_translation'])); $qaRealign->tryRealignTagID(); $log_prepend = "CLIENT REALIGN IDS PROCEDURE | "; if (!$qaRealign->thereAreErrors()) { /* Log::doLog( $log_prepend . " - Requested Segment: " . var_export( $this->__postInput, true) ); Log::doLog( $log_prepend . "Fuzzy: " . $fuzzy . " - Try to Execute Tag ID Realignment." ); Log::doLog( $log_prepend . "TMS RAW RESULT:" ); Log::doLog( $log_prepend . var_export($matches[0], true) ); Log::doLog( $log_prepend . "Realignment Success:"); */ $matches[0]['segment'] = CatUtils::rawxliff2view($this->text); $matches[0]['translation'] = CatUtils::rawxliff2view($qaRealign->getTrgNormalized()); $matches[0]['match'] = $fuzzy == 0 ? '100%' : '99%'; /* Log::doLog( $log_prepend . "View Segment: " . var_export($matches[0]['segment'], true) ); Log::doLog( $log_prepend . "View Translation: " . var_export($matches[0]['translation'], true) ); */ } else { Log::doLog($log_prepend . 'Realignment Failed. Skip. Segment: ' . $this->__postInput['id_segment']); } } } /* New Feature only if this is not a MT and if it is a ( 90 =< MATCH < 100 ) */ if (!$this->concordance_search) { //execute these lines only in segment contribution search, //in case of user concordance search skip these lines $res = $this->setSuggestionReport($matches); if (is_array($res) and array_key_exists("error", $res)) { // error occurred } // } foreach ($matches as &$match) { if (strpos($match['created_by'], 'MT') !== false) { $match['match'] = 'MT'; $QA = new PostProcess($match['raw_segment'], $match['raw_translation']); $QA->realignMTSpaces(); //this should every time be ok because MT preserve tags, but we use the check on the errors //for logic correctness if (!$QA->thereAreErrors()) { $match['raw_translation'] = $QA->getTrgNormalized(); $match['translation'] = CatUtils::rawxliff2view($match['raw_translation']); } else { Log::doLog($QA->getErrors()); } } if ($match['created_by'] == 'MT!') { $match['created_by'] = 'MT'; //MyMemory returns MT! } else { $match['created_by'] = $this->__changeSuggestionSource($match); } if (!empty($match['sentence_confidence'])) { $match['sentence_confidence'] = round($match['sentence_confidence'], 0) . "%"; } if ($this->concordance_search) { $match['segment'] = strip_tags(html_entity_decode($match['segment'])); $match['segment'] = preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $match['segment']); //Do something with &$match, tokenize strings and send to client $match['segment'] = preg_replace(array_keys($regularExpressions), array_values($regularExpressions), $match['segment']); $match['translation'] = strip_tags(html_entity_decode($match['translation'])); } } $this->result['data']['matches'] = $matches; }
private function getEditLogData($use_ter_diff = false) { $editLogDao = new EditLog_EditLogDao(Database::obtain()); $data = $editLogDao->getSegments($this->getJid(), $this->getPassword(), self::$start_id); //get translation mismatches and convert the array in a hashmap $translationMismatchList = $editLogDao->getTranslationMismatches($this->getJid()); foreach ($translationMismatchList as $idx => $translMismRow) { $translMismRow[$translMismRow['segment_hash']] = (bool) $translMismRow['translation_mismatch']; } $__pagination_prev = PHP_INT_MAX; $__pagination_next = -2147483648; //PHP_INT_MIN $stat_too_slow = array(); $stat_too_fast = array(); if (!$data) { throw new Exception('There are no changes in this job', -1); } $stats['total-word-count'] = 0; $stat_mt = array(); $stat_valid_rwc = array(); $stat_rwc = array(); $stat_valid_tte = array(); $stat_pee = array(); $stat_ter = array(); $output_data = array(); foreach ($data as $seg) { //if the segment is before the current one if ($seg->id < self::$start_id) { if ($seg->id <= $__pagination_prev) { $__pagination_prev = $seg->id; } continue; } if ($seg->id > $__pagination_next) { $__pagination_next = $seg->id; } $displaySeg = new EditLog_EditLogSegmentClientStruct($seg->toArray()); $displaySeg->suggestion_match .= "%"; $displaySeg->job_id = $this->jid; $tte = CatUtils::parse_time_to_edit($displaySeg->time_to_edit); $displaySeg->display_time_to_edit = "{$tte['1']}m:{$tte['2']}s"; $stat_rwc[] = $seg->raw_word_count; // by definition we cannot have a 0 word sentence. It is probably a - or a tag, so we want to consider at least a word. if ($seg->raw_word_count < 1) { $displaySeg->raw_word_count = 1; } //todo: remove this $displaySeg->secs_per_word = $seg->getSecsPerWord(); if ($displaySeg->secs_per_word < self::EDIT_TIME_SLOW_CUT && $displaySeg->secs_per_word > self::EDIT_TIME_FAST_CUT) { $displaySeg->stats_valid = true; $stat_valid_rwc[] = $seg->raw_word_count; $stat_spw[] = $displaySeg->secs_per_word; } else { $displaySeg->stats_valid = false; } // Stats if ($displaySeg->secs_per_word >= self::EDIT_TIME_SLOW_CUT) { $stat_too_slow[] = $seg->raw_word_count; } if ($displaySeg->secs_per_word <= self::EDIT_TIME_FAST_CUT) { $stat_too_fast[] = $seg->raw_word_count; } $displaySeg->secs_per_word .= "s"; $displaySeg->pe_effort_perc = $displaySeg->getPeePerc(); if ($displaySeg->pe_effort_perc < 0) { $displaySeg->pe_effort_perc = 0; } if ($displaySeg->pe_effort_perc > 100) { $displaySeg->pe_effort_perc = 100; } $stat_pee[] = $displaySeg->pe_effort_perc * $seg->raw_word_count; $displaySeg->pe_effort_perc .= "%"; $lh = Langs_Languages::getInstance(); $lang = $lh->getIsoCode($lh->getLocalizedName($seg->job_target)); $sug_for_diff = CatUtils::placehold_xliff_tags($seg->suggestion); $tra_for_diff = CatUtils::placehold_xliff_tags($seg->translation); //with this patch we have warnings when accessing indexes if ($use_ter_diff) { $ter = MyMemory::diff_tercpp($sug_for_diff, $tra_for_diff, $lang); } else { $ter = array(); } $displaySeg->ter = @$ter[1] * 100; $stat_ter[] = $displaySeg->ter * $seg->raw_word_count; $displaySeg->ter = round(@$ter[1] * 100) . "%"; $diff_ter = @$ter[0]; if ($seg->suggestion != $seg->translation) { //force use of third party ter diff if ($use_ter_diff) { $displaySeg->diff = $diff_ter; } else { $diff_PE = MyMemory::diff_html($sug_for_diff, $tra_for_diff); // we will use diff_PE until ter_diff will not work properly $displaySeg->diff = $diff_PE; } //$seg[ 'diff_ter' ] = $diff_ter; } else { $displaySeg->diff = ''; } $displaySeg->diff = CatUtils::restore_xliff_tags_for_view($displaySeg->diff); // BUG: While suggestions source is not correctly set if ($displaySeg->suggestion_match == "85%" || $displaySeg->suggestion_match == "86%") { $displaySeg->suggestion_source = 'Machine Translation'; $stat_mt[] = $seg->raw_word_count; } else { $displaySeg->suggestion_source = 'TM'; } $array_patterns = array(rtrim(CatUtils::lfPlaceholderRegex, 'g'), rtrim(CatUtils::crPlaceholderRegex, 'g'), rtrim(CatUtils::crlfPlaceholderRegex, 'g'), rtrim(CatUtils::tabPlaceholderRegex, 'g'), rtrim(CatUtils::nbspPlaceholderRegex, 'g')); $array_replacements_csv = array('\\n', '\\r', '\\r\\n', '\\t', Utils::unicode2chr(0xa0)); $displaySeg->source_csv = preg_replace($array_patterns, $array_replacements_csv, $seg->source); $displaySeg->translation_csv = preg_replace($array_patterns, $array_replacements_csv, $seg->translation); $displaySeg->sug_csv = preg_replace($array_patterns, $array_replacements_csv, $displaySeg->suggestion_view); $displaySeg->diff_csv = preg_replace($array_patterns, $array_replacements_csv, $displaySeg->diff); $array_replacements = array('<span class="_0A"></span><br />', '<span class="_0D"></span><br />', '<span class="_0D0A"></span><br />', '<span class="_tab">	</span>', '<span class="_nbsp"> </span>'); $displaySeg->source = preg_replace($array_patterns, $array_replacements, $seg->source); $displaySeg->translation = preg_replace($array_patterns, $array_replacements, $seg->translation); $displaySeg->suggestion_view = preg_replace($array_patterns, $array_replacements, $displaySeg->suggestion_view); $displaySeg->diff = preg_replace($array_patterns, $array_replacements, $displaySeg->diff); $displaySeg->source = trim(CatUtils::rawxliff2view($seg->source)); $displaySeg->suggestion_view = trim(CatUtils::rawxliff2view($seg->suggestion)); $displaySeg->translation = trim(CatUtils::rawxliff2view($seg->translation)); if ($seg->mt_qe == 0) { $displaySeg->mt_qe = 'N/A'; } $displaySeg->num_translation_mismatch = @(int) $translationMismatchList[$displaySeg->segment_hash]; $displaySeg->evaluateWarningString(); $output_data[] = $displaySeg; } $pagination = $this->evaluatePagination($__pagination_prev, $__pagination_next + 1); $globalStats = $this->evaluateGlobalStats(); $stats['valid-word-count'] = $globalStats['raw_words']; //TODO: this will not work anymore $stats['edited-word-count'] = array_sum($stat_rwc); if ($stats['edited-word-count'] > 0) { $stats['too-slow-words'] = round(array_sum($stat_too_slow) / $stats['edited-word-count'], 2) * 100; $stats['too-fast-words'] = round(array_sum($stat_too_fast) / $stats['edited-word-count'], 2) * 100; $stats['avg-pee'] = round(array_sum($stat_pee) / array_sum($stat_rwc)) . "%"; $stats['avg-ter'] = round(array_sum($stat_ter) / array_sum($stat_rwc)) . "%"; } $stats['mt-words'] = round(array_sum($stat_mt) / $stats['edited-word-count'], 2) * 100; $stats['tm-words'] = 100 - $stats['mt-words']; $stats['total-valid-tte'] = round($globalStats['tot_tte']); // Non weighted... // $stats['avg-secs-per-word'] = round(array_sum($stat_spw)/count($stat_spw),1); // Weighted $stats['avg-secs-per-word'] = round($globalStats['secs_per_word'] / 1000, 1); $stats['est-words-per-day'] = number_format(round(3600 * 8 / $stats['avg-secs-per-word']), 0, '.', ','); // Last minute formatting (after calculations) $temp = CatUtils::parse_time_to_edit(round($stats['total-valid-tte'])); $stats['total-valid-tte'] = "{$temp['0']}h:{$temp['1']}m:{$temp['2']}s"; $stats['total-tte-seconds'] = $temp[0] * 3600 + $temp[1] * 60 + $temp[2]; $stats['avg-pee'] = round($globalStats['avg_pee'], 2); $stats['avg-pee'] .= "%"; return array($output_data, $stats, $pagination); }
public function doAction() { $lang_handler = languages::getInstance("en"); // log::doLog('REF SEGMENT: '.$this->ref_segment); if ($this->ref_segment == '') { $this->ref_segment = 0; } // CASMACAT extension start if ($this->casIsReplaying) { $data = getMoreSegmentsWithoutTranslation($this->jid, $this->password, $this->step, $this->ref_segment, $this->where); } else { $data = getMoreSegments($this->jid, $this->password, $this->step, $this->ref_segment, $this->where); } // CASMACAT extension end $first_not_translated_found = false; //log::doLog('REF SEGMENT: '.$this->ref_segment); // print_r($data); exit; foreach ($data as $i => $seg) { if ($this->where == 'before') { if ((double) $seg['sid'] >= (double) $this->ref_segment) { break; } } // remove this when tag management enabled // $seg['segment'] = $this->stripTagsFromSource($seg['segment']); if (empty($this->pname)) { $this->pname = $seg['pname']; } if (empty($this->last_opened_segment)) { $this->last_opened_segment = $seg['last_opened_segment']; } if (empty($this->cid)) { $this->cid = $seg['cid']; } if (empty($this->pid)) { $this->pid = $seg['pid']; } if (empty($this->tid)) { $this->tid = $seg['tid']; } if (empty($this->create_date)) { $this->create_date = $seg['create_date']; } if (empty($this->source_code)) { $this->source_code = $seg['source']; } if (empty($this->target_code)) { $this->target_code = $seg['target']; } if (empty($this->source)) { $s = explode("-", $seg['source']); $source = strtoupper($s[0]); $this->source = $source; } if (empty($this->target)) { $t = explode("-", $seg['target']); $target = strtoupper($t[0]); $this->target = $target; } $id_file = $seg['id_file']; $file_stats = CatUtils::getStatsForFile($id_file); if (!isset($this->data["{$id_file}"])) { $this->data["{$id_file}"]['jid'] = $seg['jid']; $this->data["{$id_file}"]["filename"] = $seg['filename']; $this->data["{$id_file}"]["mime_type"] = $seg['mime_type']; $this->data["{$id_file}"]['id_segment_start'] = $seg['id_segment_start']; $this->data["{$id_file}"]['id_segment_end'] = $seg['id_segment_end']; $this->data["{$id_file}"]['source'] = $lang_handler->iso2Language($seg['source']); $this->data["{$id_file}"]['target'] = $lang_handler->iso2Language($seg['target']); $this->data["{$id_file}"]['source_code'] = $seg['source']; $this->data["{$id_file}"]['target_code'] = $seg['target']; $this->data["{$id_file}"]['file_stats'] = $file_stats; $this->data["{$id_file}"]['segments'] = array(); } //if (count($this->data["$id_file"]['segments'])>100){continue;} $this->filetype_handler = new filetype($seg['mime_type']); unset($seg['id_file']); unset($seg['source']); unset($seg['target']); unset($seg['source_code']); unset($seg['target_code']); unset($seg['mime_type']); unset($seg['filename']); unset($seg['jid']); unset($seg['pid']); unset($seg['cid']); unset($seg['tid']); unset($seg['pname']); unset($seg['create_date']); unset($seg['id_segment_end']); unset($seg['id_segment_start']); // log::doLog('A'); $seg['segment'] = $this->filetype_handler->parse($seg['segment']); // ASKED. MARCO CONFIRMED: in the web interface do not show xliff_ext_prec_tags and xliff_ext_succ_tags // $seg['segment'] = $seg['xliff_ext_prec_tags'] . $seg['segment'].$seg['xliff_ext_succ_tags'] ; $seg['segment'] = CatUtils::rawxliff2view($seg['segment']); $seg['translation'] = CatUtils::rawxliff2view($seg['translation']); $seg['parsed_time_to_edit'] = $this->parse_time_to_edit($seg['time_to_edit']); $this->data["{$id_file}"]['segments'][] = $seg; } //log::doLog ($this->data); $this->result['data']['files'] = $this->data; $this->result['data']['where'] = $this->where; }
public function __construct($result) { $this->responseData = $result['responseData']; $this->responseDetails = isset($result['responseDetails']) ? $result['responseDetails'] : ''; $this->responseStatus = $result['responseStatus']; if (is_array($result) and !empty($result) and array_key_exists('matches', $result)) { $matches = $result['matches']; if (is_array($matches) and !empty($matches)) { foreach ($matches as $match) { $match['segment'] = CatUtils::rawxliff2view($match['segment']); $match['translation'] = CatUtils::rawxliff2view($match['translation']); $match['raw_translation'] = $match['translation']; $a = new TMS_GET_MATCHES($match); $this->matches[] = $a; } } } }