Esempio n. 1
0
    public function testSpaces_1()
    {
        $source_seg = <<<SRC
<g id="pt2">WASHINGTON </g><g id="pt3">— The Treasury Department and Internal Revenue Service today requested public comment on issues relating to the shared responsibility provisions included in the Affordable Care Act that will apply to certain employers starting in 2014.</g>
SRC;
        $target_seg = <<<TRG
<g id="pt2"> WASHINGTON </g><g id="pt3">- Il Dipartimento del Tesoro e Internal Revenue Service di oggi hanno chiesto un commento pubblico sulle questioni relative alle disposizioni di responsabilità condivise incluse nel Affordable Care Act che verranno applicate a certi datori di lavoro a partire dal 2014. </g>
TRG;
        $source_seg = CatUtils::view2rawxliff($source_seg);
        $target_seg = CatUtils::view2rawxliff($target_seg);
        $check = new QA($source_seg, $target_seg);
        $check->performConsistencyCheck();
        $notices = $check->getNotices();
        $warnings = $check->getWarnings();
        $errors = $check->getErrors();
        $this->assertFalse($check->thereAreErrors());
        $this->assertFalse($check->thereAreWarnings());
        $this->assertTrue($check->thereAreNotices());
        $this->assertEquals(count($notices), 2);
        $this->assertEquals(1100, $notices[0]->outcome);
        $this->assertEquals(count($warnings), 1);
        $this->assertEquals(0, $warnings[0]->outcome);
        $this->assertEquals(count($errors), 1);
        $this->assertEquals(0, $errors[0]->outcome);
        $normalized = $check->getTrgNormalized();
        //" 1 " -> 20 31 20
        $this->assertEquals('<g id="pt2"> WASHINGTON </g><g id="pt3">- Il Dipartimento del Tesoro e Internal Revenue Service di oggi hanno chiesto un commento pubblico sulle questioni relative alle disposizioni di responsabilità condivise incluse nel Affordable Care Act che verranno applicate a certi datori di lavoro a partire dal 2014. </g>', $normalized);
    }
 public function doAction()
 {
     if (!$this->concordance_search) {
         //execute these lines only in segment contribution search,
         //in case of user concordance search skip these lines
         //because segment can be optional
         if (empty($this->id_segment)) {
             $this->result['errors'][] = array("code" => -1, "message" => "missing id_segment");
         }
     }
     if (is_null($this->text) || $this->text === '') {
         $this->result['errors'][] = array("code" => -2, "message" => "missing text");
     }
     if (empty($this->id_job)) {
         $this->result['errors'][] = array("code" => -3, "message" => "missing id_job");
     }
     if (empty($this->num_results)) {
         $this->num_results = INIT::$DEFAULT_NUM_RESULTS_FROM_TM;
     }
     if (!empty($this->result['errors'])) {
         return -1;
     }
     //get Job Infos, we need only a row of jobs ( split )
     $this->jobData = getJobData($this->id_job, $this->password);
     $pCheck = new AjaxPasswordCheck();
     //check for Password correctness
     if (empty($this->jobData) || !$pCheck->grantJobAccessByJobData($this->jobData, $this->password)) {
         $this->result['errors'][] = array("code" => -10, "message" => "wrong password");
         return -1;
     }
     /*
      * string manipulation strategy
      *
      */
     if (!$this->concordance_search) {
         //
         $this->text = CatUtils::view2rawxliff($this->text);
         $this->source = $this->jobData['source'];
         $this->target = $this->jobData['target'];
     } else {
         $regularExpressions = $this->tokenizeSourceSearch();
         if ($this->switch_languages) {
             /*
              *
              * switch languages from user concordances search on the target language value
              * Example:
              * Job is in
              *      source: it_IT,
              *      target: de_DE
              *
              * user perform a right click for concordance help on a german word or phrase
              * we want result in italian from german source
              *
              */
             $this->source = $this->jobData['target'];
             $this->target = $this->jobData['source'];
         } else {
             $this->source = $this->jobData['source'];
             $this->target = $this->jobData['target'];
         }
     }
     $this->id_mt_engine = $this->jobData['id_mt_engine'];
     $this->id_tms = $this->jobData['id_tms'];
     $this->tm_keys = $this->jobData['tm_keys'];
     $config = array();
     if ($this->id_tms == 1) {
         /**
          * MyMemory Enabled
          */
         $config['get_mt'] = true;
         $config['mt_only'] = false;
         if ($this->id_mt_engine != 1) {
             /**
              * Don't get MT contribution from MyMemory ( Custom MT )
              */
             $config['get_mt'] = false;
         }
         $_TMS = $this->id_tms;
     } else {
         if ($this->id_tms == 0 && $this->id_mt_engine == 1) {
             /**
              * MyMemory disabled but MT Enabled and it is NOT a Custom one
              * So tell to MyMemory to get MT only
              */
             $config['get_mt'] = true;
             $config['mt_only'] = true;
             $_TMS = 1;
             /* MyMemory */
         }
     }
     /**
      * if No TM server and No MT selected $_TMS is not defined
      * so we want not to perform TMS Call
      *
      */
     if (isset($_TMS)) {
         /**
          * @var $tms Engines_MyMemory
          */
         $tms = Engine::getInstance($_TMS);
         $config = array_merge($tms->getConfigStruct(), $config);
         $config['segment'] = $this->text;
         $config['source'] = $this->source;
         $config['target'] = $this->target;
         $config['email'] = INIT::$MYMEMORY_API_KEY;
         $config['id_user'] = array();
         $config['num_result'] = $this->num_results;
         $config['isConcordance'] = $this->concordance_search;
         //get job's TM keys
         $this->checkLogin();
         try {
             if (self::isRevision()) {
                 $this->userRole = TmKeyManagement_Filter::ROLE_REVISOR;
             }
             $tm_keys = TmKeyManagement_TmKeyManagement::getJobTmKeys($this->tm_keys, 'r', 'tm', $this->uid, $this->userRole);
             if (is_array($tm_keys) && !empty($tm_keys)) {
                 foreach ($tm_keys as $tm_key) {
                     $config['id_user'][] = $tm_key->key;
                 }
             }
         } catch (Exception $e) {
             $this->result['errors'][] = array("code" => -11, "message" => "Cannot retrieve TM keys info.");
             Log::doLog($e->getMessage());
             return;
         }
         $tms_match = $tms->get($config);
         $tms_match = $tms_match->get_matches_as_array();
     }
     if ($this->id_mt_engine > 1) {
         /**
          * @var $mt Engines_Moses
          */
         $mt = Engine::getInstance($this->id_mt_engine);
         $config = $mt->getConfigStruct();
         $config['segment'] = $this->text;
         $config['source'] = $this->source;
         $config['target'] = $this->target;
         $config['id_user'] = INIT::$MYMEMORY_API_KEY;
         $config['segid'] = $this->id_segment;
         $mt_result = $mt->get($config);
         if (isset($mt_result['error']['code'])) {
             $mt_result['error']['created_by_type'] = 'MT';
             $this->result['errors'][] = $mt_result['error'];
             $mt_result = false;
         }
     }
     $matches = array();
     if (!empty($tms_match)) {
         $matches = $tms_match;
     }
     if (!empty($mt_result)) {
         $matches[] = $mt_result;
         usort($matches, array("getContributionController", "__compareScore"));
         //this is necessary since usort sorts is ascending order, thus inverting the ranking
         $matches = array_reverse($matches);
     }
     $matches = array_slice($matches, 0, $this->num_results);
     isset($matches[0]['match']) ? $firstMatchVal = floatval($matches[0]['match']) : null;
     if (isset($firstMatchVal) && $firstMatchVal >= 90 && $firstMatchVal < 100) {
         $srcSearch = strip_tags($this->text);
         $segmentFound = strip_tags($matches[0]['raw_segment']);
         $srcSearch = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $srcSearch));
         $segmentFound = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $segmentFound));
         $fuzzy = levenshtein($srcSearch, $segmentFound) / log10(mb_strlen($srcSearch . $segmentFound) + 1);
         //levenshtein handle max 255 chars per string and returns -1, so fuzzy var can be less than 0 !!
         if ($srcSearch == $segmentFound || $fuzzy < 2.5 && $fuzzy >= 0) {
             $qaRealign = new QA($this->text, html_entity_decode($matches[0]['raw_translation']));
             $qaRealign->tryRealignTagID();
             $log_prepend = "CLIENT REALIGN IDS PROCEDURE | ";
             if (!$qaRealign->thereAreErrors()) {
                 /*
                 Log::doLog( $log_prepend . " - Requested Segment: " . var_export( $this->__postInput, true) );
                 Log::doLog( $log_prepend . "Fuzzy: " . $fuzzy .  " - Try to Execute Tag ID Realignment." );
                 Log::doLog( $log_prepend . "TMS RAW RESULT:" );
                 Log::doLog( $log_prepend . var_export($matches[0], true) );
                 Log::doLog( $log_prepend . "Realignment Success:");
                 */
                 $matches[0]['segment'] = CatUtils::rawxliff2view($this->text);
                 $matches[0]['translation'] = CatUtils::rawxliff2view($qaRealign->getTrgNormalized());
                 $matches[0]['match'] = $fuzzy == 0 ? '100%' : '99%';
                 /*
                                     Log::doLog( $log_prepend . "View Segment:     " . var_export($matches[0]['segment'], true) );
                                     Log::doLog( $log_prepend . "View Translation: " . var_export($matches[0]['translation'], true) );
                 */
             } else {
                 Log::doLog($log_prepend . 'Realignment Failed. Skip. Segment: ' . $this->__postInput['id_segment']);
             }
         }
     }
     /* New Feature only if this is not a MT and if it is a ( 90 =< MATCH < 100 ) */
     if (!$this->concordance_search) {
         //execute these lines only in segment contribution search,
         //in case of user concordance search skip these lines
         $res = $this->setSuggestionReport($matches);
         if (is_array($res) and array_key_exists("error", $res)) {
             // error occurred
         }
         //
     }
     foreach ($matches as &$match) {
         if (strpos($match['created_by'], 'MT') !== false) {
             $match['match'] = 'MT';
             $QA = new PostProcess($match['raw_segment'], $match['raw_translation']);
             $QA->realignMTSpaces();
             //this should every time be ok because MT preserve tags, but we use the check on the errors
             //for logic correctness
             if (!$QA->thereAreErrors()) {
                 $match['raw_translation'] = $QA->getTrgNormalized();
                 $match['translation'] = CatUtils::rawxliff2view($match['raw_translation']);
             } else {
                 Log::doLog($QA->getErrors());
             }
         }
         if ($match['created_by'] == 'MT!') {
             $match['created_by'] = 'MT';
             //MyMemory returns MT!
         } else {
             $match['created_by'] = $this->__changeSuggestionSource($match);
         }
         if (!empty($match['sentence_confidence'])) {
             $match['sentence_confidence'] = round($match['sentence_confidence'], 0) . "%";
         }
         if ($this->concordance_search) {
             $match['segment'] = strip_tags(html_entity_decode($match['segment']));
             $match['segment'] = preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $match['segment']);
             //Do something with &$match, tokenize strings and send to client
             $match['segment'] = preg_replace(array_keys($regularExpressions), array_values($regularExpressions), $match['segment']);
             $match['translation'] = strip_tags(html_entity_decode($match['translation']));
         }
     }
     $this->result['data']['matches'] = $matches;
 }
 protected function prepareSegment($seg, $trans_unit_translation = "")
 {
     $end_tags = "";
     //We don't need transform/sanitize from wiew to xliff because the values comes from Database
     //QA non sense for source/source check, until source can be changed. For now SKIP
     if (is_null($seg['translation']) || $seg['translation'] == '') {
         $translation = $seg['segment'];
     } else {
         $translation = $seg['translation'];
         if (empty($seg['locked'])) {
             //consistency check
             $check = new QA($seg['segment'], $translation);
             $check->performTagCheckOnly();
             if ($check->thereAreErrors()) {
                 $translation = '|||UNTRANSLATED_CONTENT_START|||' . $seg['segment'] . '|||UNTRANSLATED_CONTENT_END|||';
                 Log::doLog("tag mismatch on\n" . print_r($seg, true) . "\n(because of: " . print_r($check->getErrors(), true) . ")");
             }
         }
     }
     if ($seg['mrk_id'] !== null) {
         $translation = "<mrk mid=\"" . $seg['mrk_id'] . "\" mtype=\"seg\">" . $seg['mrk_prev_tags'] . $translation . $seg['mrk_succ_tags'] . "</mrk>";
     }
     $trans_unit_translation .= $seg['prev_tags'] . $translation . $end_tags . $seg['succ_tags'];
     return $trans_unit_translation;
 }
Esempio n. 4
0
 if (stripos($matches[0]['created_by'], "MT") !== false) {
     $tm_match_type = "MT";
 }
 isset($matches[0]['match']) ? $firstMatchVal = floatval($matches[0]['match']) : null;
 if (isset($firstMatchVal) && $firstMatchVal >= 90 && $firstMatchVal < 100) {
     $srcSearch = strip_tags($text);
     $segmentFound = strip_tags($matches[0]['raw_segment']);
     $srcSearch = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $srcSearch));
     $segmentFound = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $segmentFound));
     $fuzzy = @levenshtein($srcSearch, $segmentFound) / log10(mb_strlen($srcSearch . $segmentFound) + 1);
     //levenshtein handle max 255 chars per string and returns -1, so fuzzy var can be less than 0 !!
     if ($srcSearch == $segmentFound || $fuzzy < 2.5 && $fuzzy > 0) {
         $qaRealign = new QA($text, html_entity_decode($matches[0]['raw_translation']));
         $qaRealign->tryRealignTagID();
         $log_prepend = $UNIQUID . " - SERVER REALIGN IDS PROCEDURE | ";
         if (!$qaRealign->thereAreErrors()) {
             /*
             _TimeStampMsg( $log_prepend . " - Requested Segment: " . var_export( $objQueue, true ) );
             _TimeStampMsg( $log_prepend . "Fuzzy: " . $fuzzy . " - Try to Execute Tag ID Realignment." );
             _TimeStampMsg( $log_prepend . "TMS RAW RESULT:" );
             _TimeStampMsg( $log_prepend . var_export( $matches[ 0 ], true ) );
             
             _TimeStampMsg( $log_prepend . "Realignment Success:" );
             */
             $matches[0]['raw_translation'] = $qaRealign->getTrgNormalized();
             $matches[0]['match'] = $fuzzy == 0 ? '100%' : '99%';
             //_TimeStampMsg( $log_prepend . "Raw Translation: " . var_export( $matches[ 0 ]['raw_translation'], true ) );
         } else {
             _TimeStampMsg($log_prepend . 'Realignment Failed. Skip. Segment: ' . $objQueue['id_segment']);
         }
     }
Esempio n. 5
0
 /**
  *  Only if this is not a MT and if it is a ( 90 =< MATCH < 100 ) try to realign tag IDs
  *
  * @param QueueElement $queueElement
  *
  */
 protected function _tryRealignTagID(QueueElement $queueElement)
 {
     isset($this->_matches[0]['match']) ? $firstMatchVal = floatval($this->_matches[0]['match']) : null;
     if (isset($firstMatchVal) && $firstMatchVal >= 90 && $firstMatchVal < 100) {
         $srcSearch = strip_tags($queueElement->params->segment);
         $segmentFound = strip_tags($this->_matches[0]['raw_segment']);
         $srcSearch = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $srcSearch));
         $segmentFound = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $segmentFound));
         $fuzzy = @levenshtein($srcSearch, $segmentFound) / log10(mb_strlen($srcSearch . $segmentFound) + 1);
         //levenshtein handle max 255 chars per string and returns -1, so fuzzy var can be less than 0 !!
         if ($srcSearch == $segmentFound || $fuzzy < 2.5 && $fuzzy > 0) {
             $qaRealign = new \QA($queueElement->params->segment, html_entity_decode($this->_matches[0]['raw_translation']));
             $qaRealign->tryRealignTagID();
             $log_prepend = uniqid('', true) . " - SERVER REALIGN IDS PROCEDURE | ";
             if (!$qaRealign->thereAreErrors()) {
                 /*
                     $this->_doLog( $log_prepend . " - Requested Segment: " . var_export( $queueElement, true ) );
                     $this->_doLog( $log_prepend . "Fuzzy: " . $fuzzy . " - Try to Execute Tag ID Realignment." );
                     $this->_doLog( $log_prepend . "TMS RAW RESULT:" );
                     $this->_doLog( $log_prepend . var_export( $this->_matches[ 0 ]e, true ) );
                     $this->_doLog( $log_prepend . "Realignment Success:" );
                 */
                 $this->_matches[0]['raw_translation'] = $qaRealign->getTrgNormalized();
                 $this->_matches[0]['match'] = $fuzzy == 0 ? '100%' : '99%';
             } else {
                 $this->_doLog($log_prepend . 'Realignment Failed. Skip. Segment: ' . $queueElement->params->id_segment);
             }
         }
     }
 }
 $suggestion_json = json_encode($matches);
 $suggestion_source = $matches[0]['created_by'];
 $equivalentWordMapping = json_decode($payable_rates, true);
 $new_match_type = getNewMatchType($tm_match_type, $fast_match_type, $equivalentWordMapping, empty($matches[0]['memory_key']));
 //echo "sid is $sid ";
 $eq_words = $equivalentWordMapping[$new_match_type] * $raw_wc / 100;
 $standard_words = $eq_words;
 if ($new_match_type == 'MT') {
     $standard_words = $equivalentWordMapping["NO_MATCH"] * $raw_wc / 100;
 }
 !empty($matches[0]['sentence_confidence']) ? $mt_qe = floatval($matches[0]['sentence_confidence']) : ($mt_qe = null);
 $check = new QA($text, $suggestion);
 $check->performTagCheckOnly();
 //log::doLog($check->getErrors(true));
 echo "--- (child {$my_pid}) : sid={$sid} --- \$tm_match_type={$tm_match_type}, \$fast_match_type={$fast_match_type}, \$new_match_type={$new_match_type}, \$equivalentWordMapping[\$new_match_type]=" . $equivalentWordMapping[$new_match_type] . ", \$raw_wc={$raw_wc},\$standard_words={$standard_words},\$eq_words={$eq_words}\n";
 if ($check->thereAreErrors()) {
     $err_json = $check->getErrorsJSON();
 } else {
     $err_json = '';
 }
 echo "--- (child {$my_pid}) : sid={$sid} --- \$tm_match_type={$tm_match_type}, \$fast_match_type={$fast_match_type}, \$new_match_type={$new_match_type}, \$equivalentWordMapping[\$new_match_type]=" . $equivalentWordMapping[$new_match_type] . ", \$raw_wc={$raw_wc},\$standard_words={$standard_words},\$eq_words={$eq_words}\n";
 $tm_data = array();
 $tm_data['id_job'] = $jid;
 $tm_data['id_segment'] = $sid;
 $tm_data['suggestions_array'] = $suggestion_json;
 $tm_data['suggestion'] = $suggestion;
 $tm_data['suggestion_match'] = $suggestion_match;
 $tm_data['suggestion_source'] = $suggestion_source;
 $tm_data['match_type'] = $new_match_type;
 $tm_data['eq_word_count'] = $eq_words;
 $tm_data['standard_word_count'] = $standard_words;