public function doAction() { if (!$this->concordance_search) { //execute these lines only in segment contribution search, //in case of user concordance search skip these lines //because segment can be optional if (empty($this->id_segment)) { $this->result['errors'][] = array("code" => -1, "message" => "missing id_segment"); } } if (is_null($this->text) || $this->text === '') { $this->result['errors'][] = array("code" => -2, "message" => "missing text"); } if (empty($this->id_job)) { $this->result['errors'][] = array("code" => -3, "message" => "missing id_job"); } if (empty($this->num_results)) { $this->num_results = INIT::$DEFAULT_NUM_RESULTS_FROM_TM; } if (!empty($this->result['errors'])) { return -1; } //get Job Infos, we need only a row of jobs ( split ) $this->jobData = getJobData($this->id_job, $this->password); $pCheck = new AjaxPasswordCheck(); //check for Password correctness if (empty($this->jobData) || !$pCheck->grantJobAccessByJobData($this->jobData, $this->password)) { $this->result['errors'][] = array("code" => -10, "message" => "wrong password"); return -1; } /* * string manipulation strategy * */ if (!$this->concordance_search) { // $this->text = CatUtils::view2rawxliff($this->text); $this->source = $this->jobData['source']; $this->target = $this->jobData['target']; } else { $regularExpressions = $this->tokenizeSourceSearch(); if ($this->switch_languages) { /* * * switch languages from user concordances search on the target language value * Example: * Job is in * source: it_IT, * target: de_DE * * user perform a right click for concordance help on a german word or phrase * we want result in italian from german source * */ $this->source = $this->jobData['target']; $this->target = $this->jobData['source']; } else { $this->source = $this->jobData['source']; $this->target = $this->jobData['target']; } } $this->id_mt_engine = $this->jobData['id_mt_engine']; $this->id_tms = $this->jobData['id_tms']; $this->tm_keys = $this->jobData['tm_keys']; $config = array(); if ($this->id_tms == 1) { /** * MyMemory Enabled */ $config['get_mt'] = true; $config['mt_only'] = false; if ($this->id_mt_engine != 1) { /** * Don't get MT contribution from MyMemory ( Custom MT ) */ $config['get_mt'] = false; } $_TMS = $this->id_tms; } else { if ($this->id_tms == 0 && $this->id_mt_engine == 1) { /** * MyMemory disabled but MT Enabled and it is NOT a Custom one * So tell to MyMemory to get MT only */ $config['get_mt'] = true; $config['mt_only'] = true; $_TMS = 1; /* MyMemory */ } } /** * if No TM server and No MT selected $_TMS is not defined * so we want not to perform TMS Call * */ if (isset($_TMS)) { /** * @var $tms Engines_MyMemory */ $tms = Engine::getInstance($_TMS); $config = array_merge($tms->getConfigStruct(), $config); $config['segment'] = $this->text; $config['source'] = $this->source; $config['target'] = $this->target; $config['email'] = INIT::$MYMEMORY_API_KEY; $config['id_user'] = array(); $config['num_result'] = $this->num_results; $config['isConcordance'] = $this->concordance_search; //get job's TM keys $this->checkLogin(); try { if (self::isRevision()) { $this->userRole = TmKeyManagement_Filter::ROLE_REVISOR; } $tm_keys = TmKeyManagement_TmKeyManagement::getJobTmKeys($this->tm_keys, 'r', 'tm', $this->uid, $this->userRole); if (is_array($tm_keys) && !empty($tm_keys)) { foreach ($tm_keys as $tm_key) { $config['id_user'][] = $tm_key->key; } } } catch (Exception $e) { $this->result['errors'][] = array("code" => -11, "message" => "Cannot retrieve TM keys info."); Log::doLog($e->getMessage()); return; } $tms_match = $tms->get($config); $tms_match = $tms_match->get_matches_as_array(); } if ($this->id_mt_engine > 1) { /** * @var $mt Engines_Moses */ $mt = Engine::getInstance($this->id_mt_engine); $config = $mt->getConfigStruct(); $config['segment'] = $this->text; $config['source'] = $this->source; $config['target'] = $this->target; $config['id_user'] = INIT::$MYMEMORY_API_KEY; $config['segid'] = $this->id_segment; $mt_result = $mt->get($config); if (isset($mt_result['error']['code'])) { $mt_result['error']['created_by_type'] = 'MT'; $this->result['errors'][] = $mt_result['error']; $mt_result = false; } } $matches = array(); if (!empty($tms_match)) { $matches = $tms_match; } if (!empty($mt_result)) { $matches[] = $mt_result; usort($matches, array("getContributionController", "__compareScore")); //this is necessary since usort sorts is ascending order, thus inverting the ranking $matches = array_reverse($matches); } $matches = array_slice($matches, 0, $this->num_results); isset($matches[0]['match']) ? $firstMatchVal = floatval($matches[0]['match']) : null; if (isset($firstMatchVal) && $firstMatchVal >= 90 && $firstMatchVal < 100) { $srcSearch = strip_tags($this->text); $segmentFound = strip_tags($matches[0]['raw_segment']); $srcSearch = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $srcSearch)); $segmentFound = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $segmentFound)); $fuzzy = levenshtein($srcSearch, $segmentFound) / log10(mb_strlen($srcSearch . $segmentFound) + 1); //levenshtein handle max 255 chars per string and returns -1, so fuzzy var can be less than 0 !! if ($srcSearch == $segmentFound || $fuzzy < 2.5 && $fuzzy >= 0) { $qaRealign = new QA($this->text, html_entity_decode($matches[0]['raw_translation'])); $qaRealign->tryRealignTagID(); $log_prepend = "CLIENT REALIGN IDS PROCEDURE | "; if (!$qaRealign->thereAreErrors()) { /* Log::doLog( $log_prepend . " - Requested Segment: " . var_export( $this->__postInput, true) ); Log::doLog( $log_prepend . "Fuzzy: " . $fuzzy . " - Try to Execute Tag ID Realignment." ); Log::doLog( $log_prepend . "TMS RAW RESULT:" ); Log::doLog( $log_prepend . var_export($matches[0], true) ); Log::doLog( $log_prepend . "Realignment Success:"); */ $matches[0]['segment'] = CatUtils::rawxliff2view($this->text); $matches[0]['translation'] = CatUtils::rawxliff2view($qaRealign->getTrgNormalized()); $matches[0]['match'] = $fuzzy == 0 ? '100%' : '99%'; /* Log::doLog( $log_prepend . "View Segment: " . var_export($matches[0]['segment'], true) ); Log::doLog( $log_prepend . "View Translation: " . var_export($matches[0]['translation'], true) ); */ } else { Log::doLog($log_prepend . 'Realignment Failed. Skip. Segment: ' . $this->__postInput['id_segment']); } } } /* New Feature only if this is not a MT and if it is a ( 90 =< MATCH < 100 ) */ if (!$this->concordance_search) { //execute these lines only in segment contribution search, //in case of user concordance search skip these lines $res = $this->setSuggestionReport($matches); if (is_array($res) and array_key_exists("error", $res)) { // error occurred } // } foreach ($matches as &$match) { if (strpos($match['created_by'], 'MT') !== false) { $match['match'] = 'MT'; $QA = new PostProcess($match['raw_segment'], $match['raw_translation']); $QA->realignMTSpaces(); //this should every time be ok because MT preserve tags, but we use the check on the errors //for logic correctness if (!$QA->thereAreErrors()) { $match['raw_translation'] = $QA->getTrgNormalized(); $match['translation'] = CatUtils::rawxliff2view($match['raw_translation']); } else { Log::doLog($QA->getErrors()); } } if ($match['created_by'] == 'MT!') { $match['created_by'] = 'MT'; //MyMemory returns MT! } else { $match['created_by'] = $this->__changeSuggestionSource($match); } if (!empty($match['sentence_confidence'])) { $match['sentence_confidence'] = round($match['sentence_confidence'], 0) . "%"; } if ($this->concordance_search) { $match['segment'] = strip_tags(html_entity_decode($match['segment'])); $match['segment'] = preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $match['segment']); //Do something with &$match, tokenize strings and send to client $match['segment'] = preg_replace(array_keys($regularExpressions), array_values($regularExpressions), $match['segment']); $match['translation'] = strip_tags(html_entity_decode($match['translation'])); } } $this->result['data']['matches'] = $matches; }
public function testRealString1() { $source_seg = <<<TRG <g id="1877">31-235</g>\t<g id="1878">The default PR upper alarm is120.</g> TRG; $target_seg = <<<SRC <g id="1877"> 31-235 </g><g id="1878"> L'impostazione predefinita PR IS120 allarme. </g> SRC; $source_seg = CatUtils::view2rawxliff($source_seg); $target_seg = CatUtils::view2rawxliff($target_seg); $check = new PostProcess($source_seg, $target_seg); $check->realignMTSpaces(); $warnings = $check->getWarnings(); $errors = $check->getErrors(); $this->assertFalse($check->thereAreErrors()); $this->assertFalse($check->thereAreWarnings()); $this->assertEquals(count($warnings), 1); $this->assertEquals(0, $warnings[0]->outcome); $this->assertEquals(count($errors), 1); $this->assertEquals(0, $errors[0]->outcome); $normalized = $check->getTrgNormalized(); //trick strings are not exactly the same .. there's a tab between tags in source string $this->assertEquals('<g id="1877">31-235</g><g id="1878">L\'impostazione predefinita PR IS120 allarme.</g>', $normalized); }
$suggestion_match = $matches[0]['match']; $suggestion_json = json_encode($matches); $suggestion_source = $matches[0]['created_by']; $equivalentWordMapping = json_decode($payable_rates, true); $new_match_type = getNewMatchType($tm_match_type, $fast_match_type, $equivalentWordMapping, empty($matches[0]['memory_key'])); $eq_words = $equivalentWordMapping[$new_match_type] * $raw_wc / 100; $standard_words = $eq_words; //if the first match is MT perform QA realignment if ($new_match_type == 'MT') { $standard_words = $equivalentWordMapping["NO_MATCH"] * $raw_wc / 100; $check = new PostProcess($matches[0]['raw_segment'], $suggestion); $check->realignMTSpaces(); //this should every time be ok because MT preserve tags, but we use the check on the errors //for logic correctness if (!$check->thereAreErrors()) { $suggestion = CatUtils::view2rawxliff($check->getTrgNormalized()); $err_json = ''; } else { $err_json = $check->getErrorsJSON(); } } else { //try to perform only the tagCheck $check = new PostProcess($text, $suggestion); $check->performTagCheckOnly(); //_TimeStampMsg( $check->getErrors() ); if ($check->thereAreErrors()) { $err_json = $check->getErrorsJSON(); } else { $err_json = ''; } }
/** * @param QueueElement $queueElement * * @throws Exception * @throws ReQueueException */ protected function _updateRecord(QueueElement $queueElement) { $tm_match_type = $this->_matches[0]['match']; if (stripos($this->_matches[0]['created_by'], "MT") !== false) { $tm_match_type = "MT"; } $suggestion = \CatUtils::view2rawxliff($this->_matches[0]['raw_translation']); //preg_replace all x tags <x not closed > inside suggestions with correctly closed $suggestion = preg_replace('|<x([^/]*?)>|', '<x\\1/>', $suggestion); $suggestion_match = $this->_matches[0]['match']; $suggestion_json = json_encode($this->_matches); $suggestion_source = $this->_matches[0]['created_by']; $equivalentWordMapping = json_decode($queueElement->params->payable_rates, true); $new_match_type = $this->_getNewMatchType($tm_match_type, $queueElement->params->match_type, $equivalentWordMapping, empty($this->_matches[0]['memory_key'])); $eq_words = $equivalentWordMapping[$new_match_type] * $queueElement->params->raw_word_count / 100; $standard_words = $eq_words; //if the first match is MT perform QA realignment if ($new_match_type == 'MT') { $standard_words = $equivalentWordMapping["NO_MATCH"] * $queueElement->params->raw_word_count / 100; $check = new \PostProcess($this->_matches[0]['raw_segment'], $suggestion); $check->realignMTSpaces(); //this should every time be ok because MT preserve tags, but we use the check on the errors //for logic correctness if (!$check->thereAreErrors()) { $suggestion = \CatUtils::view2rawxliff($check->getTrgNormalized()); $err_json = ''; } else { $err_json = $check->getErrorsJSON(); } } else { //try to perform only the tagCheck $check = new \PostProcess($queueElement->params->segment, $suggestion); $check->performTagCheckOnly(); //_TimeStampMsg( $check->getErrors() ); if ($check->thereAreErrors()) { $err_json = $check->getErrorsJSON(); } else { $err_json = ''; } } !empty($this->_matches[0]['sentence_confidence']) ? $mt_qe = floatval($this->_matches[0]['sentence_confidence']) : ($mt_qe = null); $tm_data = array(); $tm_data['id_job'] = $queueElement->params->id_job; $tm_data['id_segment'] = $queueElement->params->id_segment; $tm_data['suggestions_array'] = $suggestion_json; $tm_data['suggestion'] = $suggestion; $tm_data['match_type'] = $new_match_type; $tm_data['eq_word_count'] = $eq_words; $tm_data['standard_word_count'] = $standard_words; $tm_data['translation'] = $suggestion; $tm_data['tm_analysis_status'] = "DONE"; $tm_data['warning'] = (int) $check->thereAreErrors(); $tm_data['serialized_errors_list'] = $err_json; $tm_data['mt_qe'] = $mt_qe; $tm_data['suggestion_source'] = $suggestion_source; if (!empty($tm_data['suggestion_source'])) { if (strpos($tm_data['suggestion_source'], "MT") === false) { $tm_data['suggestion_source'] = 'TM'; } else { $tm_data['suggestion_source'] = 'MT'; } } //check the value of suggestion_match $tm_data['suggestion_match'] = $suggestion_match; if ($tm_data['suggestion_match'] == "100%" && $queueElement->params->pretranslate_100) { $tm_data['status'] = \Constants_TranslationStatus::STATUS_TRANSLATED; } $updateRes = setSuggestionUpdate($tm_data); if ($updateRes < 0) { $this->_doLog("**** Error occurred during the storing (UPDATE) of the suggestions for the segment {$tm_data['id_segment']}"); throw new ReQueueException("**** Error occurred during the storing (UPDATE) of the suggestions for the segment {$tm_data['id_segment']}", self::ERR_REQUEUE); } elseif ($updateRes == 0) { //There was not a fast Analysis??? Impossible. $this->_doLog("No row found: " . $tm_data['id_segment'] . "-" . $tm_data['id_job']); } else { $this->_doLog("Row found: " . $tm_data['id_segment'] . "-" . $tm_data['id_job'] . " - UPDATED."); } //set redis cache $this->_incrementAnalyzedCount($queueElement->params->pid, $eq_words, $standard_words); $this->_decSegmentsToAnalyzeOfWaitingProjects($queueElement->params->pid); $this->_tryToCloseProject($queueElement->params->pid); }