public function doAction()
 {
     if (!$this->concordance_search) {
         //execute these lines only in segment contribution search,
         //in case of user concordance search skip these lines
         //because segment can be optional
         if (empty($this->id_segment)) {
             $this->result['errors'][] = array("code" => -1, "message" => "missing id_segment");
         }
     }
     if (is_null($this->text) || $this->text === '') {
         $this->result['errors'][] = array("code" => -2, "message" => "missing text");
     }
     if (empty($this->id_job)) {
         $this->result['errors'][] = array("code" => -3, "message" => "missing id_job");
     }
     if (empty($this->num_results)) {
         $this->num_results = INIT::$DEFAULT_NUM_RESULTS_FROM_TM;
     }
     if (!empty($this->result['errors'])) {
         return -1;
     }
     //get Job Infos, we need only a row of jobs ( split )
     $this->jobData = getJobData($this->id_job, $this->password);
     $pCheck = new AjaxPasswordCheck();
     //check for Password correctness
     if (empty($this->jobData) || !$pCheck->grantJobAccessByJobData($this->jobData, $this->password)) {
         $this->result['errors'][] = array("code" => -10, "message" => "wrong password");
         return -1;
     }
     /*
      * string manipulation strategy
      *
      */
     if (!$this->concordance_search) {
         //
         $this->text = CatUtils::view2rawxliff($this->text);
         $this->source = $this->jobData['source'];
         $this->target = $this->jobData['target'];
     } else {
         $regularExpressions = $this->tokenizeSourceSearch();
         if ($this->switch_languages) {
             /*
              *
              * switch languages from user concordances search on the target language value
              * Example:
              * Job is in
              *      source: it_IT,
              *      target: de_DE
              *
              * user perform a right click for concordance help on a german word or phrase
              * we want result in italian from german source
              *
              */
             $this->source = $this->jobData['target'];
             $this->target = $this->jobData['source'];
         } else {
             $this->source = $this->jobData['source'];
             $this->target = $this->jobData['target'];
         }
     }
     $this->id_mt_engine = $this->jobData['id_mt_engine'];
     $this->id_tms = $this->jobData['id_tms'];
     $this->tm_keys = $this->jobData['tm_keys'];
     $config = array();
     if ($this->id_tms == 1) {
         /**
          * MyMemory Enabled
          */
         $config['get_mt'] = true;
         $config['mt_only'] = false;
         if ($this->id_mt_engine != 1) {
             /**
              * Don't get MT contribution from MyMemory ( Custom MT )
              */
             $config['get_mt'] = false;
         }
         $_TMS = $this->id_tms;
     } else {
         if ($this->id_tms == 0 && $this->id_mt_engine == 1) {
             /**
              * MyMemory disabled but MT Enabled and it is NOT a Custom one
              * So tell to MyMemory to get MT only
              */
             $config['get_mt'] = true;
             $config['mt_only'] = true;
             $_TMS = 1;
             /* MyMemory */
         }
     }
     /**
      * if No TM server and No MT selected $_TMS is not defined
      * so we want not to perform TMS Call
      *
      */
     if (isset($_TMS)) {
         /**
          * @var $tms Engines_MyMemory
          */
         $tms = Engine::getInstance($_TMS);
         $config = array_merge($tms->getConfigStruct(), $config);
         $config['segment'] = $this->text;
         $config['source'] = $this->source;
         $config['target'] = $this->target;
         $config['email'] = INIT::$MYMEMORY_API_KEY;
         $config['id_user'] = array();
         $config['num_result'] = $this->num_results;
         $config['isConcordance'] = $this->concordance_search;
         //get job's TM keys
         $this->checkLogin();
         try {
             if (self::isRevision()) {
                 $this->userRole = TmKeyManagement_Filter::ROLE_REVISOR;
             }
             $tm_keys = TmKeyManagement_TmKeyManagement::getJobTmKeys($this->tm_keys, 'r', 'tm', $this->uid, $this->userRole);
             if (is_array($tm_keys) && !empty($tm_keys)) {
                 foreach ($tm_keys as $tm_key) {
                     $config['id_user'][] = $tm_key->key;
                 }
             }
         } catch (Exception $e) {
             $this->result['errors'][] = array("code" => -11, "message" => "Cannot retrieve TM keys info.");
             Log::doLog($e->getMessage());
             return;
         }
         $tms_match = $tms->get($config);
         $tms_match = $tms_match->get_matches_as_array();
     }
     if ($this->id_mt_engine > 1) {
         /**
          * @var $mt Engines_Moses
          */
         $mt = Engine::getInstance($this->id_mt_engine);
         $config = $mt->getConfigStruct();
         $config['segment'] = $this->text;
         $config['source'] = $this->source;
         $config['target'] = $this->target;
         $config['id_user'] = INIT::$MYMEMORY_API_KEY;
         $config['segid'] = $this->id_segment;
         $mt_result = $mt->get($config);
         if (isset($mt_result['error']['code'])) {
             $mt_result['error']['created_by_type'] = 'MT';
             $this->result['errors'][] = $mt_result['error'];
             $mt_result = false;
         }
     }
     $matches = array();
     if (!empty($tms_match)) {
         $matches = $tms_match;
     }
     if (!empty($mt_result)) {
         $matches[] = $mt_result;
         usort($matches, array("getContributionController", "__compareScore"));
         //this is necessary since usort sorts is ascending order, thus inverting the ranking
         $matches = array_reverse($matches);
     }
     $matches = array_slice($matches, 0, $this->num_results);
     isset($matches[0]['match']) ? $firstMatchVal = floatval($matches[0]['match']) : null;
     if (isset($firstMatchVal) && $firstMatchVal >= 90 && $firstMatchVal < 100) {
         $srcSearch = strip_tags($this->text);
         $segmentFound = strip_tags($matches[0]['raw_segment']);
         $srcSearch = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $srcSearch));
         $segmentFound = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $segmentFound));
         $fuzzy = levenshtein($srcSearch, $segmentFound) / log10(mb_strlen($srcSearch . $segmentFound) + 1);
         //levenshtein handle max 255 chars per string and returns -1, so fuzzy var can be less than 0 !!
         if ($srcSearch == $segmentFound || $fuzzy < 2.5 && $fuzzy >= 0) {
             $qaRealign = new QA($this->text, html_entity_decode($matches[0]['raw_translation']));
             $qaRealign->tryRealignTagID();
             $log_prepend = "CLIENT REALIGN IDS PROCEDURE | ";
             if (!$qaRealign->thereAreErrors()) {
                 /*
                 Log::doLog( $log_prepend . " - Requested Segment: " . var_export( $this->__postInput, true) );
                 Log::doLog( $log_prepend . "Fuzzy: " . $fuzzy .  " - Try to Execute Tag ID Realignment." );
                 Log::doLog( $log_prepend . "TMS RAW RESULT:" );
                 Log::doLog( $log_prepend . var_export($matches[0], true) );
                 Log::doLog( $log_prepend . "Realignment Success:");
                 */
                 $matches[0]['segment'] = CatUtils::rawxliff2view($this->text);
                 $matches[0]['translation'] = CatUtils::rawxliff2view($qaRealign->getTrgNormalized());
                 $matches[0]['match'] = $fuzzy == 0 ? '100%' : '99%';
                 /*
                                     Log::doLog( $log_prepend . "View Segment:     " . var_export($matches[0]['segment'], true) );
                                     Log::doLog( $log_prepend . "View Translation: " . var_export($matches[0]['translation'], true) );
                 */
             } else {
                 Log::doLog($log_prepend . 'Realignment Failed. Skip. Segment: ' . $this->__postInput['id_segment']);
             }
         }
     }
     /* New Feature only if this is not a MT and if it is a ( 90 =< MATCH < 100 ) */
     if (!$this->concordance_search) {
         //execute these lines only in segment contribution search,
         //in case of user concordance search skip these lines
         $res = $this->setSuggestionReport($matches);
         if (is_array($res) and array_key_exists("error", $res)) {
             // error occurred
         }
         //
     }
     foreach ($matches as &$match) {
         if (strpos($match['created_by'], 'MT') !== false) {
             $match['match'] = 'MT';
             $QA = new PostProcess($match['raw_segment'], $match['raw_translation']);
             $QA->realignMTSpaces();
             //this should every time be ok because MT preserve tags, but we use the check on the errors
             //for logic correctness
             if (!$QA->thereAreErrors()) {
                 $match['raw_translation'] = $QA->getTrgNormalized();
                 $match['translation'] = CatUtils::rawxliff2view($match['raw_translation']);
             } else {
                 Log::doLog($QA->getErrors());
             }
         }
         if ($match['created_by'] == 'MT!') {
             $match['created_by'] = 'MT';
             //MyMemory returns MT!
         } else {
             $match['created_by'] = $this->__changeSuggestionSource($match);
         }
         if (!empty($match['sentence_confidence'])) {
             $match['sentence_confidence'] = round($match['sentence_confidence'], 0) . "%";
         }
         if ($this->concordance_search) {
             $match['segment'] = strip_tags(html_entity_decode($match['segment']));
             $match['segment'] = preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $match['segment']);
             //Do something with &$match, tokenize strings and send to client
             $match['segment'] = preg_replace(array_keys($regularExpressions), array_values($regularExpressions), $match['segment']);
             $match['translation'] = strip_tags(html_entity_decode($match['translation']));
         }
     }
     $this->result['data']['matches'] = $matches;
 }
예제 #2
0
    public function testRealString1()
    {
        $source_seg = <<<TRG
<g id="1877">31-235</g>\t<g id="1878">The default PR upper alarm is120.</g>
TRG;
        $target_seg = <<<SRC
<g id="1877"> 31-235 </g><g id="1878"> L'impostazione predefinita PR IS120 allarme. </g>
SRC;
        $source_seg = CatUtils::view2rawxliff($source_seg);
        $target_seg = CatUtils::view2rawxliff($target_seg);
        $check = new PostProcess($source_seg, $target_seg);
        $check->realignMTSpaces();
        $warnings = $check->getWarnings();
        $errors = $check->getErrors();
        $this->assertFalse($check->thereAreErrors());
        $this->assertFalse($check->thereAreWarnings());
        $this->assertEquals(count($warnings), 1);
        $this->assertEquals(0, $warnings[0]->outcome);
        $this->assertEquals(count($errors), 1);
        $this->assertEquals(0, $errors[0]->outcome);
        $normalized = $check->getTrgNormalized();
        //trick strings are not exactly the same .. there's a tab between tags in source string
        $this->assertEquals('<g id="1877">31-235</g><g id="1878">L\'impostazione predefinita PR IS120 allarme.</g>', $normalized);
    }
예제 #3
0
 $suggestion_match = $matches[0]['match'];
 $suggestion_json = json_encode($matches);
 $suggestion_source = $matches[0]['created_by'];
 $equivalentWordMapping = json_decode($payable_rates, true);
 $new_match_type = getNewMatchType($tm_match_type, $fast_match_type, $equivalentWordMapping, empty($matches[0]['memory_key']));
 $eq_words = $equivalentWordMapping[$new_match_type] * $raw_wc / 100;
 $standard_words = $eq_words;
 //if the first match is MT perform QA realignment
 if ($new_match_type == 'MT') {
     $standard_words = $equivalentWordMapping["NO_MATCH"] * $raw_wc / 100;
     $check = new PostProcess($matches[0]['raw_segment'], $suggestion);
     $check->realignMTSpaces();
     //this should every time be ok because MT preserve tags, but we use the check on the errors
     //for logic correctness
     if (!$check->thereAreErrors()) {
         $suggestion = CatUtils::view2rawxliff($check->getTrgNormalized());
         $err_json = '';
     } else {
         $err_json = $check->getErrorsJSON();
     }
 } else {
     //try to perform only the tagCheck
     $check = new PostProcess($text, $suggestion);
     $check->performTagCheckOnly();
     //_TimeStampMsg( $check->getErrors() );
     if ($check->thereAreErrors()) {
         $err_json = $check->getErrorsJSON();
     } else {
         $err_json = '';
     }
 }
예제 #4
0
 /**
  * @param QueueElement $queueElement
  *
  * @throws Exception
  * @throws ReQueueException
  */
 protected function _updateRecord(QueueElement $queueElement)
 {
     $tm_match_type = $this->_matches[0]['match'];
     if (stripos($this->_matches[0]['created_by'], "MT") !== false) {
         $tm_match_type = "MT";
     }
     $suggestion = \CatUtils::view2rawxliff($this->_matches[0]['raw_translation']);
     //preg_replace all x tags <x not closed > inside suggestions with correctly closed
     $suggestion = preg_replace('|<x([^/]*?)>|', '<x\\1/>', $suggestion);
     $suggestion_match = $this->_matches[0]['match'];
     $suggestion_json = json_encode($this->_matches);
     $suggestion_source = $this->_matches[0]['created_by'];
     $equivalentWordMapping = json_decode($queueElement->params->payable_rates, true);
     $new_match_type = $this->_getNewMatchType($tm_match_type, $queueElement->params->match_type, $equivalentWordMapping, empty($this->_matches[0]['memory_key']));
     $eq_words = $equivalentWordMapping[$new_match_type] * $queueElement->params->raw_word_count / 100;
     $standard_words = $eq_words;
     //if the first match is MT perform QA realignment
     if ($new_match_type == 'MT') {
         $standard_words = $equivalentWordMapping["NO_MATCH"] * $queueElement->params->raw_word_count / 100;
         $check = new \PostProcess($this->_matches[0]['raw_segment'], $suggestion);
         $check->realignMTSpaces();
         //this should every time be ok because MT preserve tags, but we use the check on the errors
         //for logic correctness
         if (!$check->thereAreErrors()) {
             $suggestion = \CatUtils::view2rawxliff($check->getTrgNormalized());
             $err_json = '';
         } else {
             $err_json = $check->getErrorsJSON();
         }
     } else {
         //try to perform only the tagCheck
         $check = new \PostProcess($queueElement->params->segment, $suggestion);
         $check->performTagCheckOnly();
         //_TimeStampMsg( $check->getErrors() );
         if ($check->thereAreErrors()) {
             $err_json = $check->getErrorsJSON();
         } else {
             $err_json = '';
         }
     }
     !empty($this->_matches[0]['sentence_confidence']) ? $mt_qe = floatval($this->_matches[0]['sentence_confidence']) : ($mt_qe = null);
     $tm_data = array();
     $tm_data['id_job'] = $queueElement->params->id_job;
     $tm_data['id_segment'] = $queueElement->params->id_segment;
     $tm_data['suggestions_array'] = $suggestion_json;
     $tm_data['suggestion'] = $suggestion;
     $tm_data['match_type'] = $new_match_type;
     $tm_data['eq_word_count'] = $eq_words;
     $tm_data['standard_word_count'] = $standard_words;
     $tm_data['translation'] = $suggestion;
     $tm_data['tm_analysis_status'] = "DONE";
     $tm_data['warning'] = (int) $check->thereAreErrors();
     $tm_data['serialized_errors_list'] = $err_json;
     $tm_data['mt_qe'] = $mt_qe;
     $tm_data['suggestion_source'] = $suggestion_source;
     if (!empty($tm_data['suggestion_source'])) {
         if (strpos($tm_data['suggestion_source'], "MT") === false) {
             $tm_data['suggestion_source'] = 'TM';
         } else {
             $tm_data['suggestion_source'] = 'MT';
         }
     }
     //check the value of suggestion_match
     $tm_data['suggestion_match'] = $suggestion_match;
     if ($tm_data['suggestion_match'] == "100%" && $queueElement->params->pretranslate_100) {
         $tm_data['status'] = \Constants_TranslationStatus::STATUS_TRANSLATED;
     }
     $updateRes = setSuggestionUpdate($tm_data);
     if ($updateRes < 0) {
         $this->_doLog("**** Error occurred during the storing (UPDATE) of the suggestions for the segment {$tm_data['id_segment']}");
         throw new ReQueueException("**** Error occurred during the storing (UPDATE) of the suggestions for the segment {$tm_data['id_segment']}", self::ERR_REQUEUE);
     } elseif ($updateRes == 0) {
         //There was not a fast Analysis??? Impossible.
         $this->_doLog("No row found: " . $tm_data['id_segment'] . "-" . $tm_data['id_job']);
     } else {
         $this->_doLog("Row found: " . $tm_data['id_segment'] . "-" . $tm_data['id_job'] . " - UPDATED.");
     }
     //set redis cache
     $this->_incrementAnalyzedCount($queueElement->params->pid, $eq_words, $standard_words);
     $this->_decSegmentsToAnalyzeOfWaitingProjects($queueElement->params->pid);
     $this->_tryToCloseProject($queueElement->params->pid);
 }