public function doAction() { if (!$this->concordance_search) { //execute these lines only in segment contribution search, //in case of user concordance search skip these lines //because segment can be optional if (empty($this->id_segment)) { $this->result['errors'][] = array("code" => -1, "message" => "missing id_segment"); } } if (is_null($this->text) || $this->text === '') { $this->result['errors'][] = array("code" => -2, "message" => "missing text"); } if (empty($this->id_job)) { $this->result['errors'][] = array("code" => -3, "message" => "missing id_job"); } if (empty($this->num_results)) { $this->num_results = INIT::$DEFAULT_NUM_RESULTS_FROM_TM; } if (!empty($this->result['errors'])) { return -1; } //get Job Infos, we need only a row of jobs ( split ) $this->jobData = getJobData($this->id_job, $this->password); $pCheck = new AjaxPasswordCheck(); //check for Password correctness if (empty($this->jobData) || !$pCheck->grantJobAccessByJobData($this->jobData, $this->password)) { $this->result['errors'][] = array("code" => -10, "message" => "wrong password"); return -1; } /* * string manipulation strategy * */ if (!$this->concordance_search) { // $this->text = CatUtils::view2rawxliff($this->text); $this->source = $this->jobData['source']; $this->target = $this->jobData['target']; } else { $regularExpressions = $this->tokenizeSourceSearch(); if ($this->switch_languages) { /* * * switch languages from user concordances search on the target language value * Example: * Job is in * source: it_IT, * target: de_DE * * user perform a right click for concordance help on a german word or phrase * we want result in italian from german source * */ $this->source = $this->jobData['target']; $this->target = $this->jobData['source']; } else { $this->source = $this->jobData['source']; $this->target = $this->jobData['target']; } } $this->id_mt_engine = $this->jobData['id_mt_engine']; $this->id_tms = $this->jobData['id_tms']; $this->tm_keys = $this->jobData['tm_keys']; $config = array(); if ($this->id_tms == 1) { /** * MyMemory Enabled */ $config['get_mt'] = true; $config['mt_only'] = false; if ($this->id_mt_engine != 1) { /** * Don't get MT contribution from MyMemory ( Custom MT ) */ $config['get_mt'] = false; } $_TMS = $this->id_tms; } else { if ($this->id_tms == 0 && $this->id_mt_engine == 1) { /** * MyMemory disabled but MT Enabled and it is NOT a Custom one * So tell to MyMemory to get MT only */ $config['get_mt'] = true; $config['mt_only'] = true; $_TMS = 1; /* MyMemory */ } } /** * if No TM server and No MT selected $_TMS is not defined * so we want not to perform TMS Call * */ if (isset($_TMS)) { /** * @var $tms Engines_MyMemory */ $tms = Engine::getInstance($_TMS); $config = array_merge($tms->getConfigStruct(), $config); $config['segment'] = $this->text; $config['source'] = $this->source; $config['target'] = $this->target; $config['email'] = INIT::$MYMEMORY_API_KEY; $config['id_user'] = array(); $config['num_result'] = $this->num_results; $config['isConcordance'] = $this->concordance_search; //get job's TM keys $this->checkLogin(); try { if (self::isRevision()) { $this->userRole = TmKeyManagement_Filter::ROLE_REVISOR; } $tm_keys = TmKeyManagement_TmKeyManagement::getJobTmKeys($this->tm_keys, 'r', 'tm', $this->uid, $this->userRole); if (is_array($tm_keys) && !empty($tm_keys)) { foreach ($tm_keys as $tm_key) { $config['id_user'][] = $tm_key->key; } } } catch (Exception $e) { $this->result['errors'][] = array("code" => -11, "message" => "Cannot retrieve TM keys info."); Log::doLog($e->getMessage()); return; } $tms_match = $tms->get($config); $tms_match = $tms_match->get_matches_as_array(); } if ($this->id_mt_engine > 1) { /** * @var $mt Engines_Moses */ $mt = Engine::getInstance($this->id_mt_engine); $config = $mt->getConfigStruct(); $config['segment'] = $this->text; $config['source'] = $this->source; $config['target'] = $this->target; $config['id_user'] = INIT::$MYMEMORY_API_KEY; $config['segid'] = $this->id_segment; $mt_result = $mt->get($config); if (isset($mt_result['error']['code'])) { $mt_result['error']['created_by_type'] = 'MT'; $this->result['errors'][] = $mt_result['error']; $mt_result = false; } } $matches = array(); if (!empty($tms_match)) { $matches = $tms_match; } if (!empty($mt_result)) { $matches[] = $mt_result; usort($matches, array("getContributionController", "__compareScore")); //this is necessary since usort sorts is ascending order, thus inverting the ranking $matches = array_reverse($matches); } $matches = array_slice($matches, 0, $this->num_results); isset($matches[0]['match']) ? $firstMatchVal = floatval($matches[0]['match']) : null; if (isset($firstMatchVal) && $firstMatchVal >= 90 && $firstMatchVal < 100) { $srcSearch = strip_tags($this->text); $segmentFound = strip_tags($matches[0]['raw_segment']); $srcSearch = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $srcSearch)); $segmentFound = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $segmentFound)); $fuzzy = levenshtein($srcSearch, $segmentFound) / log10(mb_strlen($srcSearch . $segmentFound) + 1); //levenshtein handle max 255 chars per string and returns -1, so fuzzy var can be less than 0 !! if ($srcSearch == $segmentFound || $fuzzy < 2.5 && $fuzzy >= 0) { $qaRealign = new QA($this->text, html_entity_decode($matches[0]['raw_translation'])); $qaRealign->tryRealignTagID(); $log_prepend = "CLIENT REALIGN IDS PROCEDURE | "; if (!$qaRealign->thereAreErrors()) { /* Log::doLog( $log_prepend . " - Requested Segment: " . var_export( $this->__postInput, true) ); Log::doLog( $log_prepend . "Fuzzy: " . $fuzzy . " - Try to Execute Tag ID Realignment." ); Log::doLog( $log_prepend . "TMS RAW RESULT:" ); Log::doLog( $log_prepend . var_export($matches[0], true) ); Log::doLog( $log_prepend . "Realignment Success:"); */ $matches[0]['segment'] = CatUtils::rawxliff2view($this->text); $matches[0]['translation'] = CatUtils::rawxliff2view($qaRealign->getTrgNormalized()); $matches[0]['match'] = $fuzzy == 0 ? '100%' : '99%'; /* Log::doLog( $log_prepend . "View Segment: " . var_export($matches[0]['segment'], true) ); Log::doLog( $log_prepend . "View Translation: " . var_export($matches[0]['translation'], true) ); */ } else { Log::doLog($log_prepend . 'Realignment Failed. Skip. Segment: ' . $this->__postInput['id_segment']); } } } /* New Feature only if this is not a MT and if it is a ( 90 =< MATCH < 100 ) */ if (!$this->concordance_search) { //execute these lines only in segment contribution search, //in case of user concordance search skip these lines $res = $this->setSuggestionReport($matches); if (is_array($res) and array_key_exists("error", $res)) { // error occurred } // } foreach ($matches as &$match) { if (strpos($match['created_by'], 'MT') !== false) { $match['match'] = 'MT'; $QA = new PostProcess($match['raw_segment'], $match['raw_translation']); $QA->realignMTSpaces(); //this should every time be ok because MT preserve tags, but we use the check on the errors //for logic correctness if (!$QA->thereAreErrors()) { $match['raw_translation'] = $QA->getTrgNormalized(); $match['translation'] = CatUtils::rawxliff2view($match['raw_translation']); } else { Log::doLog($QA->getErrors()); } } if ($match['created_by'] == 'MT!') { $match['created_by'] = 'MT'; //MyMemory returns MT! } else { $match['created_by'] = $this->__changeSuggestionSource($match); } if (!empty($match['sentence_confidence'])) { $match['sentence_confidence'] = round($match['sentence_confidence'], 0) . "%"; } if ($this->concordance_search) { $match['segment'] = strip_tags(html_entity_decode($match['segment'])); $match['segment'] = preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $match['segment']); //Do something with &$match, tokenize strings and send to client $match['segment'] = preg_replace(array_keys($regularExpressions), array_values($regularExpressions), $match['segment']); $match['translation'] = strip_tags(html_entity_decode($match['translation'])); } } $this->result['data']['matches'] = $matches; }
public function testRealString1() { $source_seg = <<<TRG <g id="1877">31-235</g>\t<g id="1878">The default PR upper alarm is120.</g> TRG; $target_seg = <<<SRC <g id="1877"> 31-235 </g><g id="1878"> L'impostazione predefinita PR IS120 allarme. </g> SRC; $source_seg = CatUtils::view2rawxliff($source_seg); $target_seg = CatUtils::view2rawxliff($target_seg); $check = new PostProcess($source_seg, $target_seg); $check->realignMTSpaces(); $warnings = $check->getWarnings(); $errors = $check->getErrors(); $this->assertFalse($check->thereAreErrors()); $this->assertFalse($check->thereAreWarnings()); $this->assertEquals(count($warnings), 1); $this->assertEquals(0, $warnings[0]->outcome); $this->assertEquals(count($errors), 1); $this->assertEquals(0, $errors[0]->outcome); $normalized = $check->getTrgNormalized(); //trick strings are not exactly the same .. there's a tab between tags in source string $this->assertEquals('<g id="1877">31-235</g><g id="1878">L\'impostazione predefinita PR IS120 allarme.</g>', $normalized); }