コード例 #1
0
ファイル: QueueHandler.php プロジェクト: spMohanty/MateCat
 /**
  * @param $objQueue
  * @param $process_pid
  */
 public function initializeTMAnalysis($objQueue, $process_pid)
 {
     $sid = $objQueue['id_segment'];
     $jid = $objQueue['id_job'];
     $pid = $objQueue['pid'];
     //get the number of segments in job
     $_acquiredLock = $this->getRedisClient()->setnx(Constants_AnalysisRedisKeys::PROJECT_INIT_SEMAPHORE . $pid, true);
     // lock for 24 hours
     if (!empty($_acquiredLock)) {
         $this->getRedisClient()->expire(Constants_AnalysisRedisKeys::PROJECT_INIT_SEMAPHORE . $pid, 60 * 60 * 24);
         $total_segs = getProjectSegmentsTranslationSummary($pid);
         $total_segs = array_pop($total_segs);
         // get the Rollup Value
         Log::doLog($total_segs);
         $this->getRedisClient()->setex(Constants_AnalysisRedisKeys::PROJECT_TOT_SEGMENTS . $pid, 60 * 60 * 24, $total_segs['project_segments']);
         $this->getRedisClient()->incrby(Constants_AnalysisRedisKeys::PROJECT_NUM_SEGMENTS_DONE . $pid, $total_segs['num_analyzed']);
         $this->getRedisClient()->expire(Constants_AnalysisRedisKeys::PROJECT_NUM_SEGMENTS_DONE . $pid, 60 * 60 * 24);
         Log::doLog("--- (child {$process_pid}) : found " . $total_segs['project_segments'] . " segments for PID {$pid}");
     } else {
         $_existingPid = $this->getRedisClient()->get(Constants_AnalysisRedisKeys::PROJECT_TOT_SEGMENTS . $pid);
         $_analyzed = $this->getRedisClient()->get(Constants_AnalysisRedisKeys::PROJECT_NUM_SEGMENTS_DONE . $pid);
         Log::doLog("--- (child {$process_pid}) : found {$_existingPid} segments for PID {$pid} in Redis");
         Log::doLog("--- (child {$process_pid}) : analyzed {$_analyzed} segments for PID {$pid} in Redis");
     }
     Log::doLog("--- (child {$process_pid}) : fetched data for segment {$sid}-{$jid}. Project ID is {$pid}");
 }
コード例 #2
0
ファイル: TMAnalysisWorker.php プロジェクト: bcrazvan/MateCat
 /**
  * @param $_project_id
  */
 protected function _tryToCloseProject($_project_id)
 {
     $project_totals = array();
     $project_totals['project_segments'] = $this->_queueHandler->getRedisClient()->get(RedisKeys::PROJECT_TOT_SEGMENTS . $_project_id);
     $project_totals['num_analyzed'] = $this->_queueHandler->getRedisClient()->get(RedisKeys::PROJECT_NUM_SEGMENTS_DONE . $_project_id);
     $project_totals['eq_wc'] = $this->_queueHandler->getRedisClient()->get(RedisKeys::PROJ_EQ_WORD_COUNT . $_project_id) / 1000;
     $project_totals['st_wc'] = $this->_queueHandler->getRedisClient()->get(RedisKeys::PROJ_ST_WORD_COUNT . $_project_id) / 1000;
     $this->_doLog("--- (Worker {$this->_workerPid}) : count segments in project {$_project_id} = " . $project_totals['project_segments'] . "");
     $this->_doLog("--- (Worker {$this->_workerPid}) : Analyzed segments in project {$_project_id} = " . $project_totals['num_analyzed'] . "");
     if (empty($project_totals['project_segments'])) {
         $this->_doLog("--- (Worker {$this->_workerPid}) : WARNING !!! error while counting segments in projects {$_project_id} skipping and continue ");
         return;
     }
     if ($project_totals['project_segments'] - $project_totals['num_analyzed'] == 0 && $this->_queueHandler->getRedisClient()->setnx(RedisKeys::PROJECT_ENDING_SEMAPHORE . $_project_id, 1)) {
         $this->_queueHandler->getRedisClient()->expire(RedisKeys::PROJECT_ENDING_SEMAPHORE . $_project_id, 60 * 60 * 24);
         $_analyzed_report = getProjectSegmentsTranslationSummary($_project_id);
         $total_segs = array_pop($_analyzed_report);
         //remove Rollup
         $this->_doLog("--- (Worker {$this->_workerPid}) : analysis project {$_project_id} finished : change status to DONE");
         changeProjectStatus($_project_id, \Constants_ProjectStatus::STATUS_DONE);
         changeTmWc($_project_id, $project_totals['eq_wc'], $project_totals['st_wc']);
         /*
          * Remove this job from the project list
          */
         $this->_queueHandler->getRedisClient()->lrem($this->_mySubscribedQueue->redis_key, 0, $_project_id);
         $this->_doLog("--- (Worker {$this->_workerPid}) : trying to initialize job total word count.");
         foreach ($_analyzed_report as $job_info) {
             $counter = new \WordCount_Counter();
             $counter->initializeJobWordCount($job_info['id_job'], $job_info['password']);
         }
     }
 }
コード例 #3
0
function insertFastAnalysis($pid, &$fastReport, $equivalentWordMapping, $perform_Tms_Analysis = true)
{
    $db = Database::obtain();
    $data = array();
    $amqHandler = new Analysis_QueueHandler();
    $total_eq_wc = 0;
    $total_standard_wc = 0;
    $data['id_segment'] = null;
    $data['id_job'] = null;
    $data['segment_hash'] = null;
    $data['match_type'] = null;
    $data['eq_word_count'] = null;
    $data['standard_word_count'] = null;
    $segment_translations = "INSERT INTO `segment_translations` ( " . implode(", ", array_keys($data)) . " ) VALUES ";
    $st_values = array();
    foreach ($fastReport as $k => $v) {
        $jid_fid = explode("-", $k);
        $id_segment = $jid_fid[0];
        $list_id_jobs_password = $jid_fid[1];
        if (array_key_exists($v['match_type'], $equivalentWordMapping)) {
            $eq_word = $v['wc'] * $equivalentWordMapping[$v['match_type']] / 100;
            if ($v['match_type'] == "INTERNAL") {
            }
        } else {
            $eq_word = $v['wc'];
        }
        $total_eq_wc += $eq_word;
        $standard_words = $eq_word;
        if ($v['match_type'] == "INTERNAL" or $v['match_type'] == "MT") {
            $standard_words = $v['wc'] * $equivalentWordMapping["NO_MATCH"] / 100;
        }
        $total_standard_wc += $standard_words;
        unset($fastReport[$k]['wc']);
        $list_id_jobs_password = explode(',', $list_id_jobs_password);
        foreach ($list_id_jobs_password as $id_job) {
            list($id_job, $job_pass) = explode(":", $id_job);
            $data['id_job'] = (int) $id_job;
            $data['id_segment'] = (int) $fastReport[$k]['id_segment'];
            $data['segment_hash'] = $db->escape($v['segment_hash']);
            $data['match_type'] = $db->escape($v['match_type']);
            if (!empty($v['segment_hash']) && empty($data['segment_hash'])) {
                $data['segment_hash'] = $v['segment_hash'];
                $msg = "mysql_real_escape_string failed!!! String was empty. Replaced with original {$v['segment_hash']}";
                _TimeStampMsg($msg);
                Utils::sendErrMailReport("<strong>{$msg}</strong>", "Fast Analysis mysql_real_escape_string failed.");
            }
            $data['eq_word_count'] = (double) $eq_word;
            $data['standard_word_count'] = (double) $standard_words;
            $st_values[] = " ( '" . implode("', '", array_values($data)) . "' )";
            if ($data['eq_word_count'] > 0 && $perform_Tms_Analysis) {
                /**
                 *
                 * IMPORTANT
                 * id_job will be taken from languages ( 80415:fr-FR,80416:it-IT )
                 */
                $fastReport[$k]['pid'] = (int) $pid;
                $fastReport[$k]['date_insert'] = date_create()->format('Y-m-d H:i:s');
                $fastReport[$k]['eq_word_count'] = (double) $eq_word;
                $fastReport[$k]['standard_word_count'] = (double) $standard_words;
            } else {
                //                Log::doLog( 'Skipped Fast Segment: ' . var_export( $fastReport[ $k ], true ) );
                unset($fastReport[$k]);
            }
        }
    }
    unset($data);
    $chunks_st = array_chunk($st_values, 200);
    _TimeStampMsg('Insert Segment Translations: ' . count($st_values));
    _TimeStampMsg('Queries: ' . count($chunks_st));
    //USE the MySQL InnoDB isolation Level to protect from thread high concurrency access
    $db->query('SET autocommit=0');
    $db->query('START TRANSACTION');
    foreach ($chunks_st as $k => $chunk) {
        $query_st = $segment_translations . implode(", ", $chunk) . " ON DUPLICATE KEY UPDATE\n            match_type = VALUES( match_type ),\n                       eq_word_count = VALUES( eq_word_count ),\n                       standard_word_count = VALUES( standard_word_count )\n                           ";
        $db->query($query_st);
        _TimeStampMsg("Executed " . ($k + 1));
        $err = $db->get_error();
        if ($err['error_code'] != 0) {
            _TimeStampMsg($err);
            return $err['error_code'] * -1;
        }
    }
    _TimeStampMsg("Memory: " . memory_get_usage(true) / (1024 * 1024) . "MB");
    unset($st_values);
    unset($chunks_st);
    _TimeStampMsg("Memory: " . memory_get_usage(true) / (1024 * 1024) . "MB");
    /*
     * IF NO TM ANALYSIS, upload the jobs global word count
     */
    if (!$perform_Tms_Analysis) {
        $_details = getProjectSegmentsTranslationSummary($pid);
        _TimeStampMsg("--- trying to initialize job total word count.");
        $project_details = array_pop($_details);
        //remove rollup
        foreach ($_details as $job_info) {
            $counter = new WordCount_Counter();
            $counter->initializeJobWordCount($job_info['id_job'], $job_info['password']);
        }
    }
    /* IF NO TM ANALYSIS, upload the jobs global word count */
    //_TimeStampMsg( "Done." );
    $data2 = array('fast_analysis_wc' => $total_eq_wc);
    $where = " id = {$pid}";
    $db->update('projects', $data2, $where);
    $err = $db->get_error();
    $errno = $err['error_code'];
    if ($errno != 0) {
        $db->query('ROLLBACK');
        $db->query('SET autocommit=1');
        _TimeStampMsg($err);
        return $errno * -1;
    }
    $db->query('COMMIT');
    $db->query('SET autocommit=1');
    if (count($fastReport)) {
        //        $chunks_st_queue = array_chunk( $fastReport, 10 );
        _TimeStampMsg('Insert Segment Translations Queue: ' . count($fastReport));
        _TimeStampMsg('Queries: ' . count($fastReport));
        try {
            $amqHandler->setTotal(array('qid' => $pid, 'queueName' => INIT::$QUEUE_NAME));
        } catch (Exception $e) {
            Utils::sendErrMailReport($e->getMessage() . "" . $e->getTraceAsString(), "Fast Analysis set Total values failed.");
            _TimeStampMsg($e->getMessage() . "" . $e->getTraceAsString());
            throw $e;
        }
        $time_start = microtime(true);
        foreach ($fastReport as $k => $queue_element) {
            try {
                $languages_job = explode(",", $queue_element['target']);
                //now target holds more than one language ex: ( 80415:fr-FR,80416:it-IT )
                //in memory replacement avoid duplication of the segment list
                //send in queue every element * number of languages
                foreach ($languages_job as $_language) {
                    list($id_job, $language) = explode(":", $_language);
                    $queue_element['target'] = $language;
                    $queue_element['id_job'] = $id_job;
                    $jsonObj = json_encode($queue_element);
                    Utils::raiseJsonExceptionError();
                    $amqHandler->send(INIT::$QUEUE_NAME, $jsonObj, array('persistent' => $amqHandler->persistent));
                    _TimeStampMsg("AMQ Set Executed " . ($k + 1));
                }
            } catch (Exception $e) {
                Utils::sendErrMailReport($e->getMessage() . "" . $e->getTraceAsString(), "Fast Analysis set queue failed.");
                _TimeStampMsg($e->getMessage() . "" . $e->getTraceAsString());
                throw $e;
            }
        }
        _TimeStampMsg('Done in ' . (microtime(true) - $time_start) . " seconds.");
        _TimeStampMsg("Memory: " . memory_get_usage(true) / (1024 * 1024) . "MB");
        unset($fastReport);
        _TimeStampMsg("Memory: " . memory_get_usage(true) / (1024 * 1024) . "MB");
    }
    $amqHandler->disconnect();
    return $db->affected_rows;
}
コード例 #4
0
ファイル: FastAnalysis.php プロジェクト: bcrazvan/MateCat
 protected function _insertFastAnalysis($pid, &$fastResultData, $equivalentWordMapping, $perform_Tms_Analysis = true)
 {
     $db = Database::obtain();
     $data = array();
     $total_eq_wc = 0;
     $total_standard_wc = 0;
     $data['id_segment'] = null;
     $data['id_job'] = null;
     $data['segment_hash'] = null;
     $data['match_type'] = null;
     $data['eq_word_count'] = null;
     $data['standard_word_count'] = null;
     $segment_translations = "INSERT INTO `segment_translations` ( " . implode(", ", array_keys($data)) . " ) VALUES ";
     $st_values = array();
     foreach ($fastResultData as $k => $v) {
         $jid_pass = explode("-", $k);
         // only to remember the meaning of $k
         // EX: 21529088-42593:b433193493c6,42594:b4331aacf3d4
         //$id_segment = $jid_fid[ 0 ];
         $list_id_jobs_password = $jid_pass[1];
         if (array_key_exists($v['match_type'], $equivalentWordMapping)) {
             $eq_word = $v['wc'] * $equivalentWordMapping[$v['match_type']] / 100;
         } else {
             $eq_word = $v['wc'];
         }
         $standard_words = $eq_word;
         if ($v['match_type'] == "INTERNAL" or $v['match_type'] == "MT") {
             $standard_words = $v['wc'] * $equivalentWordMapping["NO_MATCH"] / 100;
         }
         $total_eq_wc += $eq_word;
         $total_standard_wc += $standard_words;
         $list_id_jobs_password = explode(',', $list_id_jobs_password);
         foreach ($list_id_jobs_password as $id_job) {
             list($id_job, $job_pass) = explode(":", $id_job);
             $data['id_job'] = (int) $id_job;
             $data['id_segment'] = (int) $fastResultData[$k]['id_segment'];
             $data['segment_hash'] = $db->escape($v['segment_hash']);
             $data['match_type'] = $db->escape($v['match_type']);
             $data['eq_word_count'] = (double) $eq_word;
             $data['standard_word_count'] = (double) $standard_words;
             $st_values[] = " ( '" . implode("', '", array_values($data)) . "' )";
             //WE TRUST ON THE FAST ANALYSIS RESULTS FOR THE WORD COUNT
             //here we are pruning the segments that must not be sent to the engines for the TM analysis
             //because we multiply the word_count with the equivalentWordMapping ( and this can be 0 for some values )
             //we must check if the value of $fastReport[ $k ]['wc'] and not $data[ 'eq_word_count' ]
             if ($fastResultData[$k]['wc'] > 0 && $perform_Tms_Analysis) {
                 /**
                  *
                  * IMPORTANT
                  * id_job will be taken from languages ( 80415:fr-FR,80416:it-IT )
                  */
                 $fastResultData[$k]['pid'] = (int) $pid;
                 $fastResultData[$k]['date_insert'] = date_create()->format('Y-m-d H:i:s');
                 $fastResultData[$k]['eq_word_count'] = (double) $eq_word;
                 $fastResultData[$k]['standard_word_count'] = (double) $standard_words;
             } elseif ($perform_Tms_Analysis) {
                 Log::doLog('Skipped Fast Segment: ' . var_export($fastResultData[$k], true));
                 // this segment must not be sent to the TM analysis queue
                 unset($fastResultData[$k]);
             } else {
                 //In this case the TM analysis is disabled
                 //ALL segments must not be sent to the TM analysis queue
                 //do nothing, but $perform_Tms_Analysis is false, so we want delete all elements after the end of the loop
             }
         }
         //anyway this key must be removed because he is no more needed and we want not to send it to the queue
         unset($fastResultData[$k]['wc']);
         if (!$perform_Tms_Analysis) {
             unset($fastResultData[$k]);
         }
     }
     unset($data);
     $chunks_st = array_chunk($st_values, 200);
     self::_TimeStampMsg('Insert Segment Translations: ' . count($st_values));
     self::_TimeStampMsg('Queries: ' . count($chunks_st));
     //USE the MySQL InnoDB isolation Level to protect from thread high concurrency access
     $db->query('SET autocommit=0');
     $db->query('START TRANSACTION');
     foreach ($chunks_st as $k => $chunk) {
         $query_st = $segment_translations . implode(", ", $chunk) . " ON DUPLICATE KEY UPDATE\n                        match_type = VALUES( match_type ),\n                        eq_word_count = VALUES( eq_word_count ),\n                        standard_word_count = VALUES( standard_word_count )\n                ";
         try {
             self::_TimeStampMsg("Executed " . ($k + 1));
             $db->query($query_st);
         } catch (PDOException $e) {
             self::_TimeStampMsg($e->getMessage());
             return $e->getCode() * -1;
         }
     }
     unset($st_values);
     unset($chunks_st);
     /*
      * IF NO TM ANALYSIS, upload the jobs global word count
      */
     if (!$perform_Tms_Analysis) {
         $_details = getProjectSegmentsTranslationSummary($pid);
         self::_TimeStampMsg("--- trying to initialize job total word count.");
         $project_details = array_pop($_details);
         //Don't remove, needed to remove rollup row
         foreach ($_details as $job_info) {
             $counter = new WordCount_Counter();
             $counter->initializeJobWordCount($job_info['id_job'], $job_info['password']);
         }
     }
     /* IF NO TM ANALYSIS, upload the jobs global word count */
     //_TimeStampMsg( "Done." );
     $data2 = array('fast_analysis_wc' => $total_eq_wc);
     $where = " id = {$pid}";
     try {
         $db->update('projects', $data2, $where);
     } catch (PDOException $e) {
         $db->query('ROLLBACK');
         $db->query('SET autocommit=1');
         self::_TimeStampMsg($e->getMessage());
         return $e->getCode() * -1;
     }
     $db->query('COMMIT');
     $db->query('SET autocommit=1');
     $totalSegmentsToAnalyze = count($fastResultData);
     /*
      *  $fastResultData[0]['id_mt_engine'] is the index of the MT engine we must use,
      *  i take the value from the first element of the list ( the last one is the same for the project )
      *  because surely this value are equal for all the record of the project
      */
     $first_element = reset($fastResultData);
     $queueInfo = $this->_getQueueAddressesByPriority($totalSegmentsToAnalyze, $first_element['id_mt_engine']);
     if ($totalSegmentsToAnalyze) {
         self::_TimeStampMsg("Publish Segment Translations to the queue --> {$queueInfo->queue_name}: " . count($fastResultData));
         self::_TimeStampMsg('Elements: ' . count($fastResultData));
         try {
             $this->_setTotal(array('pid' => $pid, 'queueInfo' => $queueInfo));
         } catch (Exception $e) {
             Utils::sendErrMailReport($e->getMessage() . "" . $e->getTraceAsString(), "Fast Analysis set Total values failed.");
             self::_TimeStampMsg($e->getMessage() . "" . $e->getTraceAsString());
             throw $e;
         }
         $time_start = microtime(true);
         foreach ($fastResultData as $k => $queue_element) {
             try {
                 $languages_job = explode(",", $queue_element['target']);
                 //now target holds more than one language ex: ( 80415:fr-FR,80416:it-IT )
                 //in memory replacement avoid duplication of the segment list
                 //send in queue every element * number of languages
                 foreach ($languages_job as $_language) {
                     list($id_job, $language) = explode(":", $_language);
                     $queue_element['target'] = $language;
                     $queue_element['id_job'] = $id_job;
                     $element = new QueueElement();
                     $element->params = $queue_element;
                     $element->classLoad = '\\Analysis\\Workers\\TMAnalysisWorker';
                     self::$queueHandler->send($queueInfo->queue_name, $element, array('persistent' => self::$queueHandler->persistent));
                     self::_TimeStampMsg("AMQ Set Executed " . ($k + 1));
                 }
             } catch (Exception $e) {
                 Utils::sendErrMailReport($e->getMessage() . "" . $e->getTraceAsString(), "Fast Analysis set queue failed.");
                 self::_TimeStampMsg($e->getMessage() . "" . $e->getTraceAsString());
                 throw $e;
             }
         }
         self::_TimeStampMsg('Done in ' . (microtime(true) - $time_start) . " seconds.");
         unset($fastResultData);
     }
     return $db->affected_rows;
 }