/** * @param $objQueue * @param $process_pid */ public function initializeTMAnalysis($objQueue, $process_pid) { $sid = $objQueue['id_segment']; $jid = $objQueue['id_job']; $pid = $objQueue['pid']; //get the number of segments in job $_acquiredLock = $this->getRedisClient()->setnx(Constants_AnalysisRedisKeys::PROJECT_INIT_SEMAPHORE . $pid, true); // lock for 24 hours if (!empty($_acquiredLock)) { $this->getRedisClient()->expire(Constants_AnalysisRedisKeys::PROJECT_INIT_SEMAPHORE . $pid, 60 * 60 * 24); $total_segs = getProjectSegmentsTranslationSummary($pid); $total_segs = array_pop($total_segs); // get the Rollup Value Log::doLog($total_segs); $this->getRedisClient()->setex(Constants_AnalysisRedisKeys::PROJECT_TOT_SEGMENTS . $pid, 60 * 60 * 24, $total_segs['project_segments']); $this->getRedisClient()->incrby(Constants_AnalysisRedisKeys::PROJECT_NUM_SEGMENTS_DONE . $pid, $total_segs['num_analyzed']); $this->getRedisClient()->expire(Constants_AnalysisRedisKeys::PROJECT_NUM_SEGMENTS_DONE . $pid, 60 * 60 * 24); Log::doLog("--- (child {$process_pid}) : found " . $total_segs['project_segments'] . " segments for PID {$pid}"); } else { $_existingPid = $this->getRedisClient()->get(Constants_AnalysisRedisKeys::PROJECT_TOT_SEGMENTS . $pid); $_analyzed = $this->getRedisClient()->get(Constants_AnalysisRedisKeys::PROJECT_NUM_SEGMENTS_DONE . $pid); Log::doLog("--- (child {$process_pid}) : found {$_existingPid} segments for PID {$pid} in Redis"); Log::doLog("--- (child {$process_pid}) : analyzed {$_analyzed} segments for PID {$pid} in Redis"); } Log::doLog("--- (child {$process_pid}) : fetched data for segment {$sid}-{$jid}. Project ID is {$pid}"); }
/** * @param $_project_id */ protected function _tryToCloseProject($_project_id) { $project_totals = array(); $project_totals['project_segments'] = $this->_queueHandler->getRedisClient()->get(RedisKeys::PROJECT_TOT_SEGMENTS . $_project_id); $project_totals['num_analyzed'] = $this->_queueHandler->getRedisClient()->get(RedisKeys::PROJECT_NUM_SEGMENTS_DONE . $_project_id); $project_totals['eq_wc'] = $this->_queueHandler->getRedisClient()->get(RedisKeys::PROJ_EQ_WORD_COUNT . $_project_id) / 1000; $project_totals['st_wc'] = $this->_queueHandler->getRedisClient()->get(RedisKeys::PROJ_ST_WORD_COUNT . $_project_id) / 1000; $this->_doLog("--- (Worker {$this->_workerPid}) : count segments in project {$_project_id} = " . $project_totals['project_segments'] . ""); $this->_doLog("--- (Worker {$this->_workerPid}) : Analyzed segments in project {$_project_id} = " . $project_totals['num_analyzed'] . ""); if (empty($project_totals['project_segments'])) { $this->_doLog("--- (Worker {$this->_workerPid}) : WARNING !!! error while counting segments in projects {$_project_id} skipping and continue "); return; } if ($project_totals['project_segments'] - $project_totals['num_analyzed'] == 0 && $this->_queueHandler->getRedisClient()->setnx(RedisKeys::PROJECT_ENDING_SEMAPHORE . $_project_id, 1)) { $this->_queueHandler->getRedisClient()->expire(RedisKeys::PROJECT_ENDING_SEMAPHORE . $_project_id, 60 * 60 * 24); $_analyzed_report = getProjectSegmentsTranslationSummary($_project_id); $total_segs = array_pop($_analyzed_report); //remove Rollup $this->_doLog("--- (Worker {$this->_workerPid}) : analysis project {$_project_id} finished : change status to DONE"); changeProjectStatus($_project_id, \Constants_ProjectStatus::STATUS_DONE); changeTmWc($_project_id, $project_totals['eq_wc'], $project_totals['st_wc']); /* * Remove this job from the project list */ $this->_queueHandler->getRedisClient()->lrem($this->_mySubscribedQueue->redis_key, 0, $_project_id); $this->_doLog("--- (Worker {$this->_workerPid}) : trying to initialize job total word count."); foreach ($_analyzed_report as $job_info) { $counter = new \WordCount_Counter(); $counter->initializeJobWordCount($job_info['id_job'], $job_info['password']); } } }
function insertFastAnalysis($pid, &$fastReport, $equivalentWordMapping, $perform_Tms_Analysis = true) { $db = Database::obtain(); $data = array(); $amqHandler = new Analysis_QueueHandler(); $total_eq_wc = 0; $total_standard_wc = 0; $data['id_segment'] = null; $data['id_job'] = null; $data['segment_hash'] = null; $data['match_type'] = null; $data['eq_word_count'] = null; $data['standard_word_count'] = null; $segment_translations = "INSERT INTO `segment_translations` ( " . implode(", ", array_keys($data)) . " ) VALUES "; $st_values = array(); foreach ($fastReport as $k => $v) { $jid_fid = explode("-", $k); $id_segment = $jid_fid[0]; $list_id_jobs_password = $jid_fid[1]; if (array_key_exists($v['match_type'], $equivalentWordMapping)) { $eq_word = $v['wc'] * $equivalentWordMapping[$v['match_type']] / 100; if ($v['match_type'] == "INTERNAL") { } } else { $eq_word = $v['wc']; } $total_eq_wc += $eq_word; $standard_words = $eq_word; if ($v['match_type'] == "INTERNAL" or $v['match_type'] == "MT") { $standard_words = $v['wc'] * $equivalentWordMapping["NO_MATCH"] / 100; } $total_standard_wc += $standard_words; unset($fastReport[$k]['wc']); $list_id_jobs_password = explode(',', $list_id_jobs_password); foreach ($list_id_jobs_password as $id_job) { list($id_job, $job_pass) = explode(":", $id_job); $data['id_job'] = (int) $id_job; $data['id_segment'] = (int) $fastReport[$k]['id_segment']; $data['segment_hash'] = $db->escape($v['segment_hash']); $data['match_type'] = $db->escape($v['match_type']); if (!empty($v['segment_hash']) && empty($data['segment_hash'])) { $data['segment_hash'] = $v['segment_hash']; $msg = "mysql_real_escape_string failed!!! String was empty. Replaced with original {$v['segment_hash']}"; _TimeStampMsg($msg); Utils::sendErrMailReport("<strong>{$msg}</strong>", "Fast Analysis mysql_real_escape_string failed."); } $data['eq_word_count'] = (double) $eq_word; $data['standard_word_count'] = (double) $standard_words; $st_values[] = " ( '" . implode("', '", array_values($data)) . "' )"; if ($data['eq_word_count'] > 0 && $perform_Tms_Analysis) { /** * * IMPORTANT * id_job will be taken from languages ( 80415:fr-FR,80416:it-IT ) */ $fastReport[$k]['pid'] = (int) $pid; $fastReport[$k]['date_insert'] = date_create()->format('Y-m-d H:i:s'); $fastReport[$k]['eq_word_count'] = (double) $eq_word; $fastReport[$k]['standard_word_count'] = (double) $standard_words; } else { // Log::doLog( 'Skipped Fast Segment: ' . var_export( $fastReport[ $k ], true ) ); unset($fastReport[$k]); } } } unset($data); $chunks_st = array_chunk($st_values, 200); _TimeStampMsg('Insert Segment Translations: ' . count($st_values)); _TimeStampMsg('Queries: ' . count($chunks_st)); //USE the MySQL InnoDB isolation Level to protect from thread high concurrency access $db->query('SET autocommit=0'); $db->query('START TRANSACTION'); foreach ($chunks_st as $k => $chunk) { $query_st = $segment_translations . implode(", ", $chunk) . " ON DUPLICATE KEY UPDATE\n match_type = VALUES( match_type ),\n eq_word_count = VALUES( eq_word_count ),\n standard_word_count = VALUES( standard_word_count )\n "; $db->query($query_st); _TimeStampMsg("Executed " . ($k + 1)); $err = $db->get_error(); if ($err['error_code'] != 0) { _TimeStampMsg($err); return $err['error_code'] * -1; } } _TimeStampMsg("Memory: " . memory_get_usage(true) / (1024 * 1024) . "MB"); unset($st_values); unset($chunks_st); _TimeStampMsg("Memory: " . memory_get_usage(true) / (1024 * 1024) . "MB"); /* * IF NO TM ANALYSIS, upload the jobs global word count */ if (!$perform_Tms_Analysis) { $_details = getProjectSegmentsTranslationSummary($pid); _TimeStampMsg("--- trying to initialize job total word count."); $project_details = array_pop($_details); //remove rollup foreach ($_details as $job_info) { $counter = new WordCount_Counter(); $counter->initializeJobWordCount($job_info['id_job'], $job_info['password']); } } /* IF NO TM ANALYSIS, upload the jobs global word count */ //_TimeStampMsg( "Done." ); $data2 = array('fast_analysis_wc' => $total_eq_wc); $where = " id = {$pid}"; $db->update('projects', $data2, $where); $err = $db->get_error(); $errno = $err['error_code']; if ($errno != 0) { $db->query('ROLLBACK'); $db->query('SET autocommit=1'); _TimeStampMsg($err); return $errno * -1; } $db->query('COMMIT'); $db->query('SET autocommit=1'); if (count($fastReport)) { // $chunks_st_queue = array_chunk( $fastReport, 10 ); _TimeStampMsg('Insert Segment Translations Queue: ' . count($fastReport)); _TimeStampMsg('Queries: ' . count($fastReport)); try { $amqHandler->setTotal(array('qid' => $pid, 'queueName' => INIT::$QUEUE_NAME)); } catch (Exception $e) { Utils::sendErrMailReport($e->getMessage() . "" . $e->getTraceAsString(), "Fast Analysis set Total values failed."); _TimeStampMsg($e->getMessage() . "" . $e->getTraceAsString()); throw $e; } $time_start = microtime(true); foreach ($fastReport as $k => $queue_element) { try { $languages_job = explode(",", $queue_element['target']); //now target holds more than one language ex: ( 80415:fr-FR,80416:it-IT ) //in memory replacement avoid duplication of the segment list //send in queue every element * number of languages foreach ($languages_job as $_language) { list($id_job, $language) = explode(":", $_language); $queue_element['target'] = $language; $queue_element['id_job'] = $id_job; $jsonObj = json_encode($queue_element); Utils::raiseJsonExceptionError(); $amqHandler->send(INIT::$QUEUE_NAME, $jsonObj, array('persistent' => $amqHandler->persistent)); _TimeStampMsg("AMQ Set Executed " . ($k + 1)); } } catch (Exception $e) { Utils::sendErrMailReport($e->getMessage() . "" . $e->getTraceAsString(), "Fast Analysis set queue failed."); _TimeStampMsg($e->getMessage() . "" . $e->getTraceAsString()); throw $e; } } _TimeStampMsg('Done in ' . (microtime(true) - $time_start) . " seconds."); _TimeStampMsg("Memory: " . memory_get_usage(true) / (1024 * 1024) . "MB"); unset($fastReport); _TimeStampMsg("Memory: " . memory_get_usage(true) / (1024 * 1024) . "MB"); } $amqHandler->disconnect(); return $db->affected_rows; }
protected function _insertFastAnalysis($pid, &$fastResultData, $equivalentWordMapping, $perform_Tms_Analysis = true) { $db = Database::obtain(); $data = array(); $total_eq_wc = 0; $total_standard_wc = 0; $data['id_segment'] = null; $data['id_job'] = null; $data['segment_hash'] = null; $data['match_type'] = null; $data['eq_word_count'] = null; $data['standard_word_count'] = null; $segment_translations = "INSERT INTO `segment_translations` ( " . implode(", ", array_keys($data)) . " ) VALUES "; $st_values = array(); foreach ($fastResultData as $k => $v) { $jid_pass = explode("-", $k); // only to remember the meaning of $k // EX: 21529088-42593:b433193493c6,42594:b4331aacf3d4 //$id_segment = $jid_fid[ 0 ]; $list_id_jobs_password = $jid_pass[1]; if (array_key_exists($v['match_type'], $equivalentWordMapping)) { $eq_word = $v['wc'] * $equivalentWordMapping[$v['match_type']] / 100; } else { $eq_word = $v['wc']; } $standard_words = $eq_word; if ($v['match_type'] == "INTERNAL" or $v['match_type'] == "MT") { $standard_words = $v['wc'] * $equivalentWordMapping["NO_MATCH"] / 100; } $total_eq_wc += $eq_word; $total_standard_wc += $standard_words; $list_id_jobs_password = explode(',', $list_id_jobs_password); foreach ($list_id_jobs_password as $id_job) { list($id_job, $job_pass) = explode(":", $id_job); $data['id_job'] = (int) $id_job; $data['id_segment'] = (int) $fastResultData[$k]['id_segment']; $data['segment_hash'] = $db->escape($v['segment_hash']); $data['match_type'] = $db->escape($v['match_type']); $data['eq_word_count'] = (double) $eq_word; $data['standard_word_count'] = (double) $standard_words; $st_values[] = " ( '" . implode("', '", array_values($data)) . "' )"; //WE TRUST ON THE FAST ANALYSIS RESULTS FOR THE WORD COUNT //here we are pruning the segments that must not be sent to the engines for the TM analysis //because we multiply the word_count with the equivalentWordMapping ( and this can be 0 for some values ) //we must check if the value of $fastReport[ $k ]['wc'] and not $data[ 'eq_word_count' ] if ($fastResultData[$k]['wc'] > 0 && $perform_Tms_Analysis) { /** * * IMPORTANT * id_job will be taken from languages ( 80415:fr-FR,80416:it-IT ) */ $fastResultData[$k]['pid'] = (int) $pid; $fastResultData[$k]['date_insert'] = date_create()->format('Y-m-d H:i:s'); $fastResultData[$k]['eq_word_count'] = (double) $eq_word; $fastResultData[$k]['standard_word_count'] = (double) $standard_words; } elseif ($perform_Tms_Analysis) { Log::doLog('Skipped Fast Segment: ' . var_export($fastResultData[$k], true)); // this segment must not be sent to the TM analysis queue unset($fastResultData[$k]); } else { //In this case the TM analysis is disabled //ALL segments must not be sent to the TM analysis queue //do nothing, but $perform_Tms_Analysis is false, so we want delete all elements after the end of the loop } } //anyway this key must be removed because he is no more needed and we want not to send it to the queue unset($fastResultData[$k]['wc']); if (!$perform_Tms_Analysis) { unset($fastResultData[$k]); } } unset($data); $chunks_st = array_chunk($st_values, 200); self::_TimeStampMsg('Insert Segment Translations: ' . count($st_values)); self::_TimeStampMsg('Queries: ' . count($chunks_st)); //USE the MySQL InnoDB isolation Level to protect from thread high concurrency access $db->query('SET autocommit=0'); $db->query('START TRANSACTION'); foreach ($chunks_st as $k => $chunk) { $query_st = $segment_translations . implode(", ", $chunk) . " ON DUPLICATE KEY UPDATE\n match_type = VALUES( match_type ),\n eq_word_count = VALUES( eq_word_count ),\n standard_word_count = VALUES( standard_word_count )\n "; try { self::_TimeStampMsg("Executed " . ($k + 1)); $db->query($query_st); } catch (PDOException $e) { self::_TimeStampMsg($e->getMessage()); return $e->getCode() * -1; } } unset($st_values); unset($chunks_st); /* * IF NO TM ANALYSIS, upload the jobs global word count */ if (!$perform_Tms_Analysis) { $_details = getProjectSegmentsTranslationSummary($pid); self::_TimeStampMsg("--- trying to initialize job total word count."); $project_details = array_pop($_details); //Don't remove, needed to remove rollup row foreach ($_details as $job_info) { $counter = new WordCount_Counter(); $counter->initializeJobWordCount($job_info['id_job'], $job_info['password']); } } /* IF NO TM ANALYSIS, upload the jobs global word count */ //_TimeStampMsg( "Done." ); $data2 = array('fast_analysis_wc' => $total_eq_wc); $where = " id = {$pid}"; try { $db->update('projects', $data2, $where); } catch (PDOException $e) { $db->query('ROLLBACK'); $db->query('SET autocommit=1'); self::_TimeStampMsg($e->getMessage()); return $e->getCode() * -1; } $db->query('COMMIT'); $db->query('SET autocommit=1'); $totalSegmentsToAnalyze = count($fastResultData); /* * $fastResultData[0]['id_mt_engine'] is the index of the MT engine we must use, * i take the value from the first element of the list ( the last one is the same for the project ) * because surely this value are equal for all the record of the project */ $first_element = reset($fastResultData); $queueInfo = $this->_getQueueAddressesByPriority($totalSegmentsToAnalyze, $first_element['id_mt_engine']); if ($totalSegmentsToAnalyze) { self::_TimeStampMsg("Publish Segment Translations to the queue --> {$queueInfo->queue_name}: " . count($fastResultData)); self::_TimeStampMsg('Elements: ' . count($fastResultData)); try { $this->_setTotal(array('pid' => $pid, 'queueInfo' => $queueInfo)); } catch (Exception $e) { Utils::sendErrMailReport($e->getMessage() . "" . $e->getTraceAsString(), "Fast Analysis set Total values failed."); self::_TimeStampMsg($e->getMessage() . "" . $e->getTraceAsString()); throw $e; } $time_start = microtime(true); foreach ($fastResultData as $k => $queue_element) { try { $languages_job = explode(",", $queue_element['target']); //now target holds more than one language ex: ( 80415:fr-FR,80416:it-IT ) //in memory replacement avoid duplication of the segment list //send in queue every element * number of languages foreach ($languages_job as $_language) { list($id_job, $language) = explode(":", $_language); $queue_element['target'] = $language; $queue_element['id_job'] = $id_job; $element = new QueueElement(); $element->params = $queue_element; $element->classLoad = '\\Analysis\\Workers\\TMAnalysisWorker'; self::$queueHandler->send($queueInfo->queue_name, $element, array('persistent' => self::$queueHandler->persistent)); self::_TimeStampMsg("AMQ Set Executed " . ($k + 1)); } } catch (Exception $e) { Utils::sendErrMailReport($e->getMessage() . "" . $e->getTraceAsString(), "Fast Analysis set queue failed."); self::_TimeStampMsg($e->getMessage() . "" . $e->getTraceAsString()); throw $e; } } self::_TimeStampMsg('Done in ' . (microtime(true) - $time_start) . " seconds."); unset($fastResultData); } return $db->affected_rows; }