public function mlfindattributesAction() { $instance = $instances = $message = $tree_descriptor = $model_html = $config = ''; $possible_models = $possible_models_id = $other_models = array(); $must_wait = 'NO'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); if (array_key_exists('dump', $_GET)) { $dump = $_GET["dump"]; unset($_GET["dump"]); } if (array_key_exists('pass', $_GET)) { $pass = $_GET["pass"]; unset($_GET["pass"]); } $this->buildFilters(array('current_model' => array('type' => 'selectOne', 'default' => null, 'label' => 'Model to use: ', 'generateChoices' => function () { return array(); }, 'parseFunction' => function () { $choice = isset($_GET['current_model']) ? $_GET['current_model'] : array(""); return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'), 'unseen' => array('type' => 'checkbox', 'default' => 1, 'label' => 'Predict with unseen atributes ⚠', 'parseFunction' => function () { $choice = isset($_GET['unseen']) && !isset($_GET['unseen']) ? 0 : 1; return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'), 'minexetime' => array('default' => 0), 'valid' => array('default' => 0), 'filter' => array('default' => 0), 'prepares' => array('default' => 1))); $this->buildFilterGroups(array('MLearning' => array('label' => 'Machine Learning', 'tabOpenDefault' => true, 'filters' => array('current_model', 'unseen')))); $where_configs = $this->filters->getWhereClause(); $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'vm_OS', 'vm_cores', 'vm_RAM', 'provider', 'vm_size', 'type', 'bench_type', 'hadoop_version'); // Order is important $params = $this->filters->getFiltersSelectedChoices($param_names); foreach ($param_names as $p) { if (!is_null($params[$p]) && is_array($params[$p])) { sort($params[$p]); } } $learnParams = $this->filters->getFiltersSelectedChoices(array('current_model', 'unseen')); $param_current_model = $learnParams['current_model']; $unseen = $learnParams['unseen'] ? true : false; // FIXME PATCH FOR PARAM LIBRARIES WITHOUT LEGACY $where_configs = str_replace("AND .", "AND ", $where_configs); $jsonData = $jsonHeader = "[]"; $mae = $rae = 0; // compose instance $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, $unseen); $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, $unseen); $instances = MLUtils::generateInstances($this->filters, $param_names, $params, $unseen, $db); // Model for filling MLUtils::findMatchingModels($model_info, $possible_models, $possible_models_id, $dbml); $current_model = ''; if (!is_null($possible_models_id) && in_array($param_current_model, $possible_models_id)) { $current_model = $param_current_model; } // Other models for filling $where_models = ''; if (!empty($possible_models_id)) { $where_models = " WHERE id_learner NOT IN ('" . implode("','", $possible_models_id) . "')"; } $result = $dbml->query("SELECT id_learner FROM aloja_ml.learners" . $where_models); foreach ($result as $row) { $other_models[] = $row['id_learner']; } if (!empty($possible_models_id) || $current_model != "") { $result = $dbml->query("SELECT id_learner, model, algorithm, CASE WHEN `id_learner` IN ('" . implode("','", $possible_models_id) . "') THEN 'COMPATIBLE' ELSE 'NOT MATCHED' END AS compatible FROM aloja_ml.learners"); foreach ($result as $row) { $model_html = $model_html . "<li>" . $row['id_learner'] . " => " . $row['algorithm'] . " : " . $row['compatible'] . " : " . $row['model'] . "</li>"; } if ($current_model == "") { $query = "SELECT AVG(ABS(exe_time - pred_time)) AS MAE, AVG(ABS(exe_time - pred_time)/exe_time) AS RAE, p.id_learner FROM aloja_ml.predictions p, aloja_ml.learners l WHERE l.id_learner = p.id_learner AND p.id_learner IN ('" . implode("','", $possible_models_id) . "') AND predict_code > 0 ORDER BY MAE LIMIT 1"; $result = $dbml->query($query); $row = $result->fetch(); $current_model = $row['id_learner']; } $config = $instance . '-' . $current_model . '-' . ($unseen ? 'U' : 'R'); $is_cached_mysql = $dbml->query("SELECT count(*) as total FROM aloja_ml.trees WHERE id_findattrs = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['total'] > 0; $tmp_file = md5($config) . '.tmp'; $in_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.lock'); $finished_process = $in_process && (int) shell_exec('ls ' . getcwd() . '/cache/query/' . md5($config) . '-*.lock | wc -w ') == count($instances); if (!$in_process && !$finished_process && !$is_cached) { // Retrieve file model from DB $query = "SELECT file FROM aloja_ml.model_storage WHERE id_hash='" . $current_model . "' AND type='learner';"; $result = $dbml->query($query); $row = $result->fetch(); $content = $row['file']; $filemodel = getcwd() . '/cache/query/' . $current_model . '-object.rds'; $fp = fopen($filemodel, 'w'); fwrite($fp, $content); fclose($fp); // Run the predictor exec('cd ' . getcwd() . '/cache/query ; touch ' . md5($config) . '.lock ; rm -f ' . $tmp_file); $count = 1; foreach ($instances as $inst) { exec(getcwd() . '/resources/queue -d -c "cd ' . getcwd() . '/cache/query ; ../../resources/aloja_cli.r -m aloja_predict_instance -l ' . $current_model . ' -p inst_predict=\'' . $inst . '\' -v | grep -v \'Prediction\' >>' . $tmp_file . ' 2>/dev/null; touch ' . md5($config) . '-' . $count++ . '.lock" >/dev/null 2>&1 &'); } } $finished_process = (int) shell_exec('ls ' . getcwd() . '/cache/query/' . md5($config) . '-*.lock | wc -w ') == count($instances); if ($finished_process && !$is_cached) { // Read results and dump to DB $i = 0; $token = 0; $token_i = 0; $query = "INSERT IGNORE INTO aloja_ml.predictions (id_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,id_cluster,datanodes,vm_OS,vm_cores,vm_RAM,provider,vm_size,type,bench_type,hadoop_version,pred_time,id_learner,instance,predict_code) VALUES "; if (($handle = fopen(getcwd() . '/cache/query/' . $tmp_file, "r")) !== FALSE) { while (($line = fgets($handle, 1000)) !== FALSE && $i < 1000) { if ($line == '') { break; } // Fetch Real Value $inst_aux = preg_split("/\\s+/", $line); $query_var = "SELECT AVG(exe_time) as AVG, id_exec, outlier FROM aloja_ml.predictions WHERE instance = '" . $inst_aux[1] . "' AND predict_code > 0"; $result = $dbml->query($query_var); $row = $result->fetch(); $realexecval = is_null($row['AVG']) || $row['outlier'] == 2 ? 0 : $row['AVG']; $realid_exec = is_null($row['id_exec']) || $row['outlier'] == 2 ? 0 : $row['id_exec']; $query_var = "SELECT count(*) as num FROM aloja_ml.predictions WHERE instance = '" . $inst_aux[1] . "' AND id_learner = '" . $current_model . "'"; $result = $dbml->query($query_var); $row = $result->fetch(); // Insert instance values if ($row['num'] == 0) { $token_i = 1; $selected_instance = preg_replace('/,Cmp(\\d+),/', ',${1},', $inst_aux[1]); $selected_instance = preg_replace('/,Cl(\\d+),/', ',${1},', $selected_instance); if ($token > 0) { $query = $query . ","; } $token = 1; $query = $query . "('" . $realid_exec . "','" . $realexecval . "','" . str_replace(",", "','", $selected_instance) . "','" . $inst_aux[2] . "','" . $current_model . "','" . $inst_aux[1] . "','0') "; } $i++; if ($i % 100 == 0 && $token_i > 0) { if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving into DB'); } $query = "INSERT IGNORE INTO aloja_ml.predictions (id_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,id_cluster,datanodes,vm_OS,vm_cores,vm_RAM,provider,vm_size,type,bench_type,hadoop_version,pred_time,id_learner,instance,predict_code) VALUES "; $token = 0; $token_i = 0; } } if ($token_i > 0) { if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving into DB'); } } // Descriptive Tree $tree_descriptor = shell_exec(getcwd() . '/resources/aloja_cli.r -m aloja_representative_tree -p method=ordered:dump_file="' . getcwd() . '/cache/query/' . $tmp_file . '":output="html" -v 2> /dev/null'); $tree_descriptor = substr($tree_descriptor, 5, -2); $query = "INSERT INTO aloja_ml.trees (id_findattrs,id_learner,instance,model,tree_code) VALUES ('" . md5($config) . "','" . $current_model . "','" . $instance . "','" . $model_info . "','" . $tree_descriptor . "')"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving tree into DB'); } // remove remaining locks shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '*.lock'); // Remove temporal files $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '.tmp'); $is_cached = true; } fclose($handle); } if (!$is_cached) { $jsonData = $jsonHeader = $jsonColumns = $jsonColor = '[]'; $must_wait = 'YES'; if (isset($dump)) { $dbml = null; echo "1"; exit(0); } if (isset($pass)) { $dbml = null; return "1"; } } else { if (isset($pass) && $pass == 2) { $dbml = null; return "2"; } // Fetch results and compose JSON $header = array('Benchmark', 'Net', 'Disk', 'Maps', 'IO.SFS', 'Rep', 'IO.FBuf', 'Comp', 'Blk.Size', 'Cluster', 'Datanodes', 'VM.OS', 'VM.Cores', 'VM.RAM', 'Provider', 'VM.Size', 'Type', 'Bench.Type', 'Version', 'Prediction', 'Observed'); $jsonHeader = '[{title:""}'; foreach ($header as $title) { $jsonHeader = $jsonHeader . ',{title:"' . $title . '"}'; } $jsonHeader = $jsonHeader . ']'; $query = "SELECT @i:=@i+1 as num, instance, AVG(pred_time) as pred_time, AVG(exe_time) as exe_time FROM aloja_ml.predictions, (SELECT @i:=0) d WHERE id_learner='" . $current_model . "' " . $where_configs . " GROUP BY instance"; $result = $dbml->query($query); $jsonData = '['; foreach ($result as $row) { if ($jsonData != '[') { $jsonData = $jsonData . ','; } $jsonData = $jsonData . "['" . $row['num'] . "','" . str_replace(",", "','", $row['instance']) . "','" . $row['pred_time'] . "','" . $row['exe_time'] . "']"; } $jsonData = $jsonData . ']'; foreach (range(1, 33) as $value) { $jsonData = str_replace('Cmp' . $value, Utils::getCompressionName($value), $jsonData); } // Fetch MAE & RAE values $query = "SELECT AVG(ABS(exe_time - pred_time)) AS MAE, AVG(ABS(exe_time - pred_time)/exe_time) AS RAE FROM aloja_ml.predictions WHERE id_learner='" . md5($config) . "' AND predict_code > 0"; $result = $dbml->query($query); $row = $result->fetch(); $mae = $row['MAE']; $rae = $row['RAE']; // Dump case if (isset($dump)) { echo "ID" . str_replace(array("[", "]", "{title:\"", "\"}"), array('', '', ''), $jsonHeader) . "\n"; echo str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonData); $dbml = null; exit(0); } if (isset($pass) && $pass == 1) { $retval = "ID" . str_replace(array("[", "]", "{title:\"", "\"}"), array('', '', ''), $jsonHeader) . "\n"; $retval .= str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonData); $dbml = null; return $retval; } // Display Descriptive Tree $query = "SELECT tree_code FROM aloja_ml.trees WHERE id_findattrs = '" . md5($config) . "'"; $result = $dbml->query($query); $row = $result->fetch(); $tree_descriptor = $row['tree_code']; } } else { $message = "There are no prediction models trained for such parameters. Train at least one model in 'ML Prediction' section."; $must_wait = 'NO'; if (isset($dump)) { echo "-1"; exit(0); } if (isset($pass)) { return "-1"; } } $dbml = null; } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); $jsonData = $jsonHeader = "[]"; $must_wait = 'NO'; $mae = $rae = 0; $dbml = null; if (isset($pass)) { return "-2"; } } $return_params = array('instance' => $instance, 'jsonData' => $jsonData, 'jsonHeader' => $jsonHeader, 'models' => $model_html, 'models_id' => $possible_models_id, 'other_models_id' => $other_models, 'current_model' => $current_model, 'message' => $message, 'mae' => $mae, 'rae' => $rae, 'must_wait' => $must_wait, 'instance' => $instance, 'instances' => implode("<br/> ", $instances), 'model_info' => $model_info, 'id_findattr' => md5($config), 'unseen' => $unseen, 'tree_descriptor' => $tree_descriptor); $this->filters->setCurrentChoices('current_model', array_merge($possible_models_id, array('---Other models---'), $other_models)); return $this->render('mltemplate/mlfindattributes.html.twig', $return_params); }
public static function getIndexPrecExps(&$jsonPrecexps, &$jsonPrecexpsHeader, $dbml) { $query = "SELECT id_precision, model, dataslice, creation_time FROM aloja_ml.precisions GROUP BY id_precision"; $rows = $dbml->query($query); $jsonPrecexps = '['; foreach ($rows as $row) { $url = MLUtils::revertModelToURL($row['model'], $row['dataslice'], 'presets=none&submit=&'); $model_display = MLUtils::display_models_noasts($row['model']); $slice_display = MLUtils::display_models_noasts($row['dataslice']); $jsonPrecexps = $jsonPrecexps . ($jsonPrecexps == '[' ? '' : ',') . "['" . $row['id_precision'] . "','" . $model_display . "','" . $slice_display . "','" . $row['creation_time'] . "','<a href=\\'/mlprecision?" . $url . "\\'>View</a> <a href=\\'/mlclearcache?rmp=" . $row['id_precision'] . "\\'>Remove</a>']"; } $jsonPrecexps = $jsonPrecexps . ']'; $jsonPrecexpsHeader = "[{'title':'ID'},{'title':'Attribute Selection'},{'title':'Advanced Filters'},{'title':'Creation'},{'title':'Actions'}]"; }
public function mlparamEvaluationAction() { $rows = $categories = $series = ''; $must_wait = 'NO'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain_ml'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); $where_configs = ''; $preset = null; if (count($_GET) <= 1 || count($_GET) == 2 && array_key_exists('parameval', $_GET) || count($_GET) == 2 && array_key_exists('current_model', $_GET)) { $preset = Utils::setDefaultPreset($db, 'mlparameval'); } $selPreset = isset($_GET['presets']) ? $_GET['presets'] : "none"; $params = array(); $param_names = array('benchs', 'nets', 'disks', 'mapss', 'iosfs', 'replications', 'iofilebufs', 'comps', 'blk_sizes', 'id_clusters', 'datanodess', 'bench_types', 'vm_sizes', 'vm_coress', 'vm_RAMs', 'types'); // Order is important foreach ($param_names as $p) { $params[$p] = Utils::read_params($p, $where_configs, FALSE); sort($params[$p]); } $money = Utils::read_params('money', $where_configs); $paramEval = isset($_GET['parameval']) && $_GET['parameval'] != '' ? $_GET['parameval'] : 'maps'; $minExecs = isset($_GET['minexecs']) ? $_GET['minexecs'] : -1; $minExecsFilter = ""; // FIXME PATCH FOR PARAM LIBRARIES WITHOUT LEGACY $where_configs = str_replace("AND .", "AND ", $where_configs); $where_configs = str_replace("`id_cluster`", "e.`id_cluster`", $where_configs); if ($minExecs > 0) { $minExecsFilter = "HAVING COUNT(*) > {$minExecs}"; } $filter_execs = DBUtils::getFilterExecs(); $options = Utils::getFilterOptions($db); $paramOptions = array(); foreach ($options[$paramEval] as $option) { if ($paramEval == 'id_cluster') { $paramOptions[] = $option['name']; } else { if ($paramEval == 'comp') { $paramOptions[] = Utils::getCompressionName($option[$paramEval]); } else { if ($paramEval == 'net') { $paramOptions[] = Utils::getNetworkName($option[$paramEval]); } else { if ($paramEval == 'disk') { $paramOptions[] = Utils::getDisksName($option[$paramEval]); } else { $paramOptions[] = $option[$paramEval]; } } } } } $benchOptions = $db->get_rows("SELECT DISTINCT bench FROM execs e LEFT JOIN clusters c ON e.id_cluster = c.id_cluster WHERE 1 {$filter_execs} {$where_configs} GROUP BY {$paramEval}, bench order by {$paramEval}"); // get the result rows $query = "SELECT count(*) as count, {$paramEval}, e.id_exec, exec as conf, bench, " . "exe_time, avg(exe_time) avg_exe_time, min(exe_time) min_exe_time " . "from execs e LEFT JOIN clusters c ON e.id_cluster = c.id_cluster WHERE 1 {$filter_execs} {$where_configs}" . "GROUP BY {$paramEval}, bench {$minExecsFilter} order by bench,{$paramEval}"; $rows = $db->get_rows($query); if (!$rows) { throw new \Exception("No results for query!"); } $arrayBenchs = array(); foreach ($paramOptions as $param) { foreach ($benchOptions as $bench) { $arrayBenchs[$bench['bench']][$param] = null; $arrayBenchs[$bench['bench']][$param]['y'] = 0; $arrayBenchs[$bench['bench']][$param]['count'] = 0; } } $series = array(); $bench = ''; foreach ($rows as $row) { if ($paramEval == 'comp') { $row[$paramEval] = Utils::getCompressionName($row['comp']); } else { if ($paramEval == 'id_cluster') { $row[$paramEval] = Utils::getClusterName($row[$paramEval], $db); } else { if ($paramEval == 'net') { $row[$paramEval] = Utils::getNetworkName($row['net']); } else { if ($paramEval == 'disk') { $row[$paramEval] = Utils::getDisksName($row['disk']); } else { if ($paramEval == 'iofilebuf') { $row[$paramEval] /= 1024; } } } } } $arrayBenchs[$row['bench']][$row[$paramEval]]['y'] = round((int) $row['avg_exe_time'], 2); $arrayBenchs[$row['bench']][$row[$paramEval]]['count'] = (int) $row['count']; } // ---------------------------------------------------- // Add predictions to the series // ---------------------------------------------------- $jsonData = $jsonHeader = "[]"; $instance = ""; $arrayBenchs_pred = array(); // FIXME PATCH FOR PARAM LIBRARIES WITHOUT LEGACY $where_configs = str_replace("AND .", "AND ", $where_configs); // compose instance $instance = MLUtils::generateSimpleInstance($param_names, $params, true, $db); $model_info = MLUtils::generateModelInfo($param_names, $params, true, $db); $instances = MLUtils::generateInstances($param_names, $params, true, $db); // model for filling $possible_models = $possible_models_id = array(); MLUtils::findMatchingModels($model_info, $possible_models, $possible_models_id, $dbml); $current_model = ""; if (array_key_exists('current_model', $_GET) && in_array($_GET['current_model'], $possible_models_id)) { $current_model = $_GET['current_model']; } if (!empty($possible_models_id)) { if ($current_model == "") { $query = "SELECT AVG(ABS(exe_time - pred_time)) AS MAE, AVG(ABS(exe_time - pred_time)/exe_time) AS RAE, p.id_learner FROM predictions p, learners l WHERE l.id_learner = p.id_learner AND p.id_learner IN ('" . implode("','", $possible_models_id) . "') AND predict_code > 0 ORDER BY MAE LIMIT 1"; $result = $dbml->query($query); $row = $result->fetch(); $current_model = $row['id_learner']; } $config = $instance . '-' . $current_model . "-parameval"; $query_cache = "SELECT count(*) as total FROM trees WHERE id_learner = '" . $current_model . "' AND model = '" . $model_info . "'"; $is_cached_mysql = $dbml->query($query_cache); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['total'] > 0; $ret_data = null; if (!$is_cached) { // Call to MLFindAttributes, to fetch data $_GET['pass'] = 2; $_GET['unseen'] = 1; $_GET['current_model'] = $current_model; $mlfa1 = new MLFindAttributesController(); $mlfa1->container = $this->container; $ret_data = $mlfa1->mlfindattributesAction(); if ($ret_data == 1) { $must_wait = "YES"; $jsonData = $jsonHeader = '[]'; } else { $is_cached_mysql = $dbml->query($query_cache); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['total'] > 0; } } if ($is_cached) { $must_wait = 'NO'; $query = "SELECT count(*) as count, {$paramEval}, bench, exe_time, avg(pred_time) avg_pred_time, min(pred_time) min_pred_time " . "FROM predictions e WHERE e.id_learner = '" . $current_model . "' {$filter_execs} {$where_configs}" . "GROUP BY {$paramEval}, bench {$minExecsFilter} order by bench, {$paramEval}"; $result = $dbml->query($query); // Initialize array foreach ($paramOptions as $param) { foreach ($benchOptions as $bench) { $arrayBenchs_pred[$bench['bench'] . '_pred'][$param] = null; $arrayBenchs_pred[$bench['bench'] . '_pred'][$param]['y'] = 0; $arrayBenchs_pred[$bench['bench'] . '_pred'][$param]['count'] = 0; } } foreach ($result as $row) { $bench_n = $row['bench'] . '_pred'; $class = $row[$paramEval]; if ($paramEval == 'comp') { $value = Utils::getCompressionName($class); } else { if ($paramEval == 'id_cluster') { $value = Utils::getClusterName($class, $db); } else { if ($paramEval == 'net') { $value = Utils::getNetworkName($class); } else { if ($paramEval == 'disk') { $value = Utils::getDisksName($class); } else { if ($paramEval == 'iofilebuf') { $value = $class / 1024; } else { $value = $class; } } } } } if (!in_array($value, $paramOptions)) { $paramOptions[] = $value; foreach ($benchOptions as $bench) { $arrayBenchs_pred[$bench['bench'] . '_pred'][$value] = null; $arrayBenchs_pred[$bench['bench'] . '_pred'][$value]['y'] = 0; $arrayBenchs_pred[$bench['bench'] . '_pred'][$value]['count'] = 0; $arrayBenchs[$bench['bench']][$value] = null; $arrayBenchs[$bench['bench']][$value]['y'] = 0; $arrayBenchs[$bench['bench']][$value]['count'] = 0; } } $arrayBenchs_pred[$bench_n][$value]['y'] = (int) $row['avg_pred_time']; $arrayBenchs_pred[$bench_n][$value]['count'] = (int) $row['count']; } } } // ---------------------------------------------------- // END - Add predictions to the series // ---------------------------------------------------- asort($paramOptions); foreach ($arrayBenchs as $key => $arrayBench) { $caregories = ''; $data_a = null; $data_p = null; foreach ($paramOptions as $param) { if ($arrayBenchs[$key][$param]['count'] > 0 && empty($arrayBenchs_pred) || !empty($arrayBenchs_pred) && ($arrayBenchs_pred[$key . '_pred'][$param]['count'] > 0 || $arrayBenchs[$key][$param]['count'] > 0)) { $data_a[] = $arrayBenchs[$key][$param]; if (!empty($arrayBenchs_pred)) { $data_p[] = $arrayBenchs_pred[$key . '_pred'][$param]; } $categories = $categories . "'{$param} " . Utils::getParamevalUnit($paramEval) . "',"; // FIXME - Redundant n times performed... don't care now } } $series[] = array('name' => $key, 'data' => $data_a); if (!empty($arrayBenchs_pred)) { $series[] = array('name' => $key . '_pred', 'data' => $data_p); } } $series = json_encode($series); if (!empty($arrayBenchs_pred)) { $colors = "['#7cb5ec','#9cd5fc','#434348','#636368','#90ed7d','#b0fd9d','#f7a35c','#f7c37c','#8085e9','#a0a5f9','#f15c80','#f17ca0','#e4d354','#f4f374','#8085e8','#a0a5f8','#8d4653','#ad6673','#91e8e1','#b1f8f1']"; } else { $colors = "['#7cb5ec','#434348','#90ed7d','#f7a35c','#8085e9','#f15c80','#e4d354','#8085e8','#8d4653','#91e8e1']"; } } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); $series = $jsonHeader = $colors = '[]'; $instance = $current_model = ''; $possible_models = $possible_models_id = array(); $must_wait = 'NO'; } echo $this->container->getTwig()->render('mltemplate/mlconfigperf.html.twig', array('selected' => 'mlparameval', 'title' => 'Improvement of Hadoop Execution by SW and HW Configurations', 'categories' => $categories, 'series' => $series, 'benchs' => $params['benchs'], 'nets' => $params['nets'], 'disks' => $params['disks'], 'blk_sizes' => $params['blk_sizes'], 'comps' => $params['comps'], 'id_clusters' => $params['id_clusters'], 'mapss' => $params['mapss'], 'replications' => $params['replications'], 'iosfs' => $params['iosfs'], 'iofilebufs' => $params['iofilebufs'], 'datanodess' => $params['datanodess'], 'bench_types' => $params['bench_types'], 'vm_sizes' => $params['vm_sizes'], 'vm_coress' => $params['vm_coress'], 'vm_RAMs' => $params['vm_RAMs'], 'types' => $params['types'], 'money' => $money, 'paramEval' => $paramEval, 'instance' => $instance, 'models' => '<li>' . implode('</li><li>', $possible_models) . '</li>', 'models_id' => $possible_models_id, 'current_model' => $current_model, 'gammacolors' => $colors, 'must_wait' => $must_wait, 'preset' => $preset, 'selPreset' => $selPreset, 'options' => Utils::getFilterOptions($db))); }
public function mlpredictionAction() { $jsonExecs = array(); $instance = $error_stats = ''; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); if (array_key_exists('dump', $_GET)) { $dump = $_GET["dump"]; unset($_GET["dump"]); } if (array_key_exists('pass', $_GET)) { $pass = $_GET["pass"]; unset($_GET["pass"]); } $this->buildFilters(array('learn' => array('type' => 'selectOne', 'default' => array('regtree'), 'label' => 'Learning method: ', 'generateChoices' => function () { return array('regtree', 'nneighbours', 'nnet', 'polyreg'); }, 'beautifier' => function ($value) { $labels = array('regtree' => 'Regression Tree', 'nneighbours' => 'k-NN', 'nnet' => 'NNets', 'polyreg' => 'PolyReg-3'); return $labels[$value]; }, 'parseFunction' => function () { $choice = isset($_GET['learn']) ? $_GET['learn'] : array('regtree'); return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'), 'umodel' => array('type' => 'checkbox', 'default' => 1, 'label' => 'Unrestricted to new values', 'parseFunction' => function () { $choice = isset($_GET['submit']) && !isset($_GET['umodel']) ? 0 : 1; return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'))); $this->buildFilterGroups(array('MLearning' => array('label' => 'Machine Learning', 'tabOpenDefault' => true, 'filters' => array('learn', 'umodel')))); $where_configs = $this->filters->getWhereClause(); $params = array(); $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'vm_OS', 'vm_cores', 'vm_RAM', 'provider', 'vm_size', 'type', 'bench_type', 'hadoop_version'); // Order is important $params = $this->filters->getFiltersSelectedChoices($param_names); foreach ($param_names as $p) { if (!is_null($params[$p]) && is_array($params[$p])) { sort($params[$p]); } } $params_additional = array(); $param_names_additional = array('datefrom', 'dateto', 'minexetime', 'maxexetime', 'valid', 'filter'); // Order is important $params_additional = $this->filters->getFiltersSelectedChoices($param_names_additional); $learnParams = $this->filters->getFiltersSelectedChoices(array('learn', 'umodel')); $learn_param = $learnParams['learn']; $unrestricted = $learnParams['umodel'] ? true : false; $where_configs = str_replace("id_cluster", "e.id_cluster", $where_configs); $where_configs = str_replace("AND .", "AND ", $where_configs); // compose instance $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, $unrestricted); $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, $unrestricted); $slice_info = MLUtils::generateDatasliceInfo($this->filters, $param_names_additional, $params_additional); $config = $model_info . ' ' . $learn_param . ' ' . ($unrestricted ? 'U' : 'R') . ' ' . $slice_info; $learn_options = 'saveall=' . md5($config); if ($learn_param == 'regtree') { $learn_method = 'aloja_regtree'; $learn_options .= ':prange=0,20000'; } else { if ($learn_param == 'nneighbours') { $learn_method = 'aloja_nneighbors'; $learn_options .= ':kparam=3'; } else { if ($learn_param == 'nnet') { $learn_method = 'aloja_nnet'; $learn_options .= ':prange=0,20000'; } else { if ($learn_param == 'polyreg') { $learn_method = 'aloja_linreg'; $learn_options .= ':ppoly=3:prange=0,20000'; } } } } $cache_ds = getcwd() . '/cache/query/' . md5($config) . '-cache.csv'; $is_cached_mysql = $dbml->query("SELECT count(*) as num FROM aloja_ml.learners WHERE id_learner = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['num'] > 0; $in_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.lock'); $finished_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.fin'); if (!$is_cached && !$in_process && !$finished_process) { // get headers for csv $header_names = array('id_exec' => 'ID', 'bench' => 'Benchmark', 'exe_time' => 'Exe.Time', 'net' => 'Net', 'disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'IO.SFac', 'replication' => 'Rep', 'iofilebuf' => 'IO.FBuf', 'comp' => 'Comp', 'blk_size' => 'Blk.size', 'e.id_cluster' => 'Cluster', 'datanodes' => 'Datanodes', 'vm_OS' => 'VM.OS', 'vm_cores' => 'VM.Cores', 'vm_RAM' => 'VM.RAM', 'provider' => 'Provider', 'vm_size' => 'VM.Size', 'type' => 'Type', 'bench_type' => 'Bench.Type', 'hadoop_version' => 'Hadoop.Version'); $headers = array_keys($header_names); $names = array_values($header_names); // dump the result to csv $query = "SELECT " . implode(",", $headers) . " FROM aloja2.execs e LEFT JOIN aloja2.clusters c ON e.id_cluster = c.id_cluster WHERE hadoop_version IS NOT NULL" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } $fp = fopen($cache_ds, 'w'); fputcsv($fp, $names, ',', '"'); foreach ($rows as $row) { $row['id_cluster'] = "Cl" . $row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... fputcsv($fp, array_values($row), ',', '"'); } // run the R processor exec('cd ' . getcwd() . '/cache/query ; touch ' . getcwd() . '/cache/query/' . md5($config) . '.lock'); exec('cd ' . getcwd() . '/cache/query ; ' . getcwd() . '/resources/queue -c "' . getcwd() . '/resources/aloja_cli.r -d ' . $cache_ds . ' -m ' . $learn_method . ' -p ' . $learn_options . ' > /dev/null 2>&1; rm -f ' . getcwd() . '/cache/query/' . md5($config) . '.lock; touch ' . md5($config) . '.fin" > /dev/null 2>&1 -p 1 &'); } $in_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.lock'); $finished_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.fin'); if ($in_process) { $jsonExecs = "[]"; $must_wait = "YES"; $max_x = $max_y = 0; if (isset($dump)) { echo "1"; exit(0); } if (isset($pass)) { return 1; } } else { $is_cached_mysql = $dbml->query("SELECT count(*) as num FROM aloja_ml.learners WHERE id_learner = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['num'] > 0; if (!$is_cached) { // register model to DB $query = "INSERT IGNORE INTO aloja_ml.learners (id_learner,instance,model,algorithm,dataslice)"; $query = $query . " VALUES ('" . md5($config) . "','" . $instance . "','" . substr($model_info, 1) . "','" . $learn_param . "','" . $slice_info . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving model into DB'); } // read results of the CSV and dump to DB foreach (array("tt", "tv", "tr") as $value) { if (($handle = fopen(getcwd() . '/cache/query/' . md5($config) . '-' . $value . '.csv', 'r')) !== FALSE) { $header = fgetcsv($handle, 1000, ","); $token = 0; $insertions = 0; $query = "INSERT IGNORE INTO aloja_ml.predictions (id_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,id_cluster,datanodes,vm_OS,vm_cores,vm_RAM,provider,vm_size,type,bench_type,hadoop_version,pred_time,id_learner,instance,predict_code) VALUES "; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $specific_instance = implode(",", array_slice($data, 2, 21)); $specific_data = implode(",", $data); $specific_data = preg_replace('/,Cmp(\\d+),/', ',${1},', $specific_data); $specific_data = preg_replace('/,Cl(\\d+),/', ',${1},', $specific_data); $specific_data = str_replace(",", "','", $specific_data); $query_var = "SELECT count(*) as num FROM aloja_ml.predictions WHERE instance = '" . $specific_instance . "' AND id_learner = '" . md5($config) . "'"; $result = $dbml->query($query_var); $row = $result->fetch(); // Insert instance values if ($row['num'] == 0) { if ($token != 0) { $query = $query . ","; } $token = 1; $insertions = 1; $query = $query . "('" . $specific_data . "','" . md5($config) . "','" . $specific_instance . "','" . ($value == 'tt' ? 3 : ($value == 'tv' ? 2 : 1)) . "') "; } } if ($insertions > 0) { if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving into DB'); } } fclose($handle); } } // Store file model to DB $filemodel = getcwd() . '/cache/query/' . md5($config) . '-object.rds'; $fp = fopen($filemodel, 'r'); $content = fread($fp, filesize($filemodel)); $content = addslashes($content); fclose($fp); $query = "INSERT INTO aloja_ml.model_storage (id_hash,type,file) VALUES ('" . md5($config) . "','learner','" . $content . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving file model into DB'); } // Remove temporal files $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '*.csv'); $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '*.fin'); $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '*.dat'); } $must_wait = "NO"; $count = 0; $max_x = $max_y = 0; $error_stats = ''; $query = "SELECT exe_time, pred_time, instance FROM aloja_ml.predictions WHERE id_learner='" . md5($config) . "' AND exe_time > 100 LIMIT 5000"; // FIXME - CLUMPSY PATCH FOR BYPASS THE BUG FROM HIGHCHARTS... REMEMBER TO ERASE THIS LIMIT WHEN THE BUG IS SOLVED $result = $dbml->query($query); foreach ($result as $row) { $jsonExecs[$count]['y'] = (int) $row['exe_time']; $jsonExecs[$count]['x'] = (int) $row['pred_time']; $jsonExecs[$count]['mydata'] = $row['instance']; if ((int) $row['exe_time'] > $max_y) { $max_y = (int) $row['exe_time']; } if ((int) $row['pred_time'] > $max_x) { $max_x = (int) $row['pred_time']; } $count++; } $query = "SELECT AVG(ABS(exe_time - pred_time)) AS MAE, AVG(ABS(exe_time - pred_time)/exe_time) AS RAE, predict_code FROM aloja_ml.predictions WHERE id_learner='" . md5($config) . "' AND predict_code > 0 AND exe_time > 100 GROUP BY predict_code"; $result = $dbml->query($query); foreach ($result as $row) { $error_stats = $error_stats . 'Dataset: ' . ($row['predict_code'] == 1 ? 'tr' : ($row['predict_code'] == 2 ? 'tv' : 'tt')) . ' => MAE: ' . $row['MAE'] . ' RAE: ' . $row['RAE'] . '<br/>'; } if (isset($dump)) { $data = json_encode($jsonExecs); echo "Observed, Predicted, Execution\n"; echo str_replace(array('},{"y":', '"x":', '"mydata":', '[{"y":', '"}]'), array("\n", '', '', '', ''), $data); exit(0); } if (isset($pass)) { $data = json_encode($jsonExecs); $retval = "Observed, Predicted, Execution\n"; $retval = $retval . str_replace(array('},{"y":', '"x":', '"mydata":', '[{"y":', '"}]'), array("\n", '', '', '', ''), $data); return $retval; } } $dbml = null; } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); $jsonExecs = '[]'; $max_x = $max_y = 0; $must_wait = 'NO'; $dbml = null; } $return_params = array('jsonExecs' => json_encode($jsonExecs), 'max_p' => min(array($max_x, $max_y)), 'unrestricted' => $unrestricted, 'learn' => $learn_param, 'must_wait' => $must_wait, 'instance' => $instance, 'model_info' => $model_info, 'slice_info' => $slice_info, 'id_learner' => md5($config), 'error_stats' => $error_stats); return $this->render('mltemplate/mlprediction.html.twig', $return_params); }
public function mlsummariesAction() { $displaydata = $message = ''; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain_ml'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); $where_configs = ''; $preset = null; if (count($_GET) <= 1) { $preset = Utils::setDefaultPreset($db, 'mlsummaries'); } $selPreset = isset($_GET['presets']) ? $_GET['presets'] : "none"; $params = array(); $param_names = array('benchs', 'nets', 'disks', 'mapss', 'iosfs', 'replications', 'iofilebufs', 'comps', 'blk_sizes', 'id_clusters', 'datanodess', 'bench_types', 'vm_sizes', 'vm_coress', 'vm_RAMs', 'types'); // Order is important foreach ($param_names as $p) { $params[$p] = Utils::read_params($p, $where_configs, FALSE); sort($params[$p]); } $separate_feat = 'joined'; if (array_key_exists('feature', $_GET)) { $separate_feat = $_GET['feature']; } // compose instance $instance = MLUtils::generateSimpleInstance($param_names, $params, true, $db); $model_info = MLUtils::generateModelInfo($param_names, $params, true, $db); $config = $model_info . ' ' . $separate_feat . ' SUMMARY'; $cache_ds = getcwd() . '/cache/query/' . md5($config) . '-cache.csv'; $is_cached_mysql = $dbml->query("SELECT count(*) as num FROM summaries WHERE id_summaries = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['num'] > 0; if (!$is_cached) { // get headers for csv $header_names = array('id_exec' => 'ID', 'bench' => 'Benchmark', 'exe_time' => 'Exe.Time', 'net' => 'Net', 'disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'IO.SFac', 'replication' => 'Rep', 'iofilebuf' => 'IO.FBuf', 'comp' => 'Comp', 'blk_size' => 'Blk.size', 'e.id_cluster' => 'Cluster', 'name' => 'Cl.Name', 'datanodes' => 'Datanodes', 'headnodes' => 'Headnodes', 'vm_OS' => 'VM.OS', 'vm_cores' => 'VM.Cores', 'vm_RAM' => 'VM.RAM', 'provider' => 'Provider', 'vm_size' => 'VM.Size', 'type' => 'Type', 'bench_type' => 'Bench.Type'); $headers = array_keys($header_names); $names = array_values($header_names); // dump the result to csv $query = "SELECT " . implode(",", $headers) . " FROM execs e LEFT JOIN clusters c ON e.id_cluster = c.id_cluster WHERE e.valid = TRUE AND e.exe_time > 100" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } $fp = fopen($cache_ds, 'w'); fputcsv($fp, $names, ',', '"'); foreach ($rows as $row) { $row['id_cluster'] = "Cl" . $row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... fputcsv($fp, array_values($row), ',', '"'); } // launch query $command = 'cd ' . getcwd() . '/cache/query; ../../resources/aloja_cli.r -m aloja_print_summaries -d ' . $cache_ds . ' -p ' . ($separate_feat != 'joined' ? 'sname=' . $separate_feat . ':' : '') . 'fprint=' . md5($config) . ':fwidth=1000:html=1'; #fwidth=135 $output = shell_exec($command); // Save to DB if (($handle = fopen(getcwd() . '/cache/query/' . md5($config) . '-summary.data', 'r')) !== FALSE) { $displaydata = ""; while (($data = fgets($handle)) !== FALSE) { $displaydata = $displaydata . $data; } fclose($handle); $displaydata = str_replace('\'', '\\\'', $displaydata); // register model to DB $query = "INSERT INTO summaries (id_summaries,instance,model,summary)"; $query = $query . " VALUES ('" . md5($config) . "','" . $instance . "','" . substr($model_info, 1) . "','" . $displaydata . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving model into DB'); } } // Remove temporal files $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '-summary.data'); $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '-cache.csv'); } // Read results of the DB $is_cached_mysql = $dbml->query("SELECT summary FROM summaries WHERE id_summaries = '" . md5($config) . "' LIMIT 1"); $tmp_result = $is_cached_mysql->fetch(); $displaydata = $tmp_result['summary']; } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); $displaydata = $separate_feat = ''; } echo $this->container->getTwig()->render('mltemplate/mlsummaries.html.twig', array('selected' => 'mlsummaries', 'displaydata' => $displaydata, 'benchs' => $params['benchs'], 'nets' => $params['nets'], 'disks' => $params['disks'], 'blk_sizes' => $params['blk_sizes'], 'comps' => $params['comps'], 'id_clusters' => $params['id_clusters'], 'mapss' => $params['mapss'], 'replications' => $params['replications'], 'iosfs' => $params['iosfs'], 'iofilebufs' => $params['iofilebufs'], 'datanodess' => $params['datanodess'], 'bench_types' => $params['bench_types'], 'vm_sizes' => $params['vm_sizes'], 'vm_coress' => $params['vm_coress'], 'vm_RAMs' => $params['vm_RAMs'], 'types' => $params['types'], 'feature' => $separate_feat, 'message' => $message, 'preset' => $preset, 'selPreset' => $selPreset, 'options' => Utils::getFilterOptions($db))); }
public function mlparamEvaluationAction() { $rows = $categories = $series = $instance = $model_info = $config = $current_model = $slice_info = ''; $arrayBenchs_pred = $possible_models = $possible_models_id = $other_models = array(); $jsonData = $jsonHeader = "[]"; $must_wait = 'NO'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); if (array_key_exists('parameval', $_GET)) { $paramEval = isset($_GET['parameval']) && Utils::get_GET_string('parameval') != '' ? Utils::get_GET_string('parameval') : 'maps'; unset($_GET["parameval"]); } $this->buildFilters(array('current_model' => array('type' => 'selectOne', 'default' => null, 'label' => 'Model to use: ', 'generateChoices' => function () { return array(); }, 'parseFunction' => function () { $choice = isset($_GET['current_model']) ? $_GET['current_model'] : array(""); return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'), 'minExecs' => array('default' => 0, 'type' => 'inputNumber', 'label' => 'Minimum executions:', 'parseFunction' => function () { return 0; }, 'filterGroup' => 'basic'), 'minexetime' => array('default' => 0), 'valid' => array('default' => 0), 'filter' => array('default' => 0), 'prepares' => array('default' => 0))); $this->buildFilterGroups(array('MLearning' => array('label' => 'Machine Learning', 'tabOpenDefault' => true, 'filters' => array('current_model')))); $where_configs = $this->filters->getWhereClause(); $params = array(); $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'vm_OS', 'vm_cores', 'vm_RAM', 'provider', 'vm_size', 'type', 'bench_type', 'hadoop_version'); // Order is important $params = $this->filters->getFiltersSelectedChoices($param_names); foreach ($param_names as $p) { if (!is_null($params[$p]) && is_array($params[$p])) { sort($params[$p]); } } $params_additional = array(); $param_names_additional = array('datefrom', 'dateto', 'minexetime', 'maxexetime', 'valid', 'filter'); // Order is important $params_additional = $this->filters->getFiltersSelectedChoices($param_names_additional); $param_variables = $this->filters->getFiltersSelectedChoices(array('current_model', 'minExecs')); $param_current_model = $param_variables['current_model']; $minExecs = $param_variables['minExecs']; $where_configs = str_replace("AND .", "AND ", $where_configs); $where_configs = str_replace("id_cluster", "e.id_cluster", $where_configs); $minExecsFilter = ""; if ($minExecs > 0) { $minExecsFilter = "HAVING COUNT(*) > {$minExecs}"; } $filter_execs = DBUtils::getFilterExecs(); $options = $this->filters->getFilterChoices(); $paramOptions = array(); foreach ($options[$paramEval] as $option) { if ($paramEval == 'comp') { $paramOptions[] = Utils::getCompressionName($option); } else { if ($paramEval == 'net') { $paramOptions[] = Utils::getNetworkName($option); } else { if ($paramEval == 'disk') { $paramOptions[] = Utils::getDisksName($option); } else { $paramOptions[] = $option; } } } } $param_eval_query = $paramEval == 'id_cluster' ? 'e.id_cluster' : $paramEval; $benchOptions = $db->get_rows("SELECT DISTINCT bench FROM aloja2.execs e LEFT JOIN aloja2.clusters c ON e.id_cluster = c.id_cluster WHERE 1 {$filter_execs} {$where_configs} GROUP BY {$param_eval_query}, bench order by {$param_eval_query}"); // get the result rows $query = "SELECT count(*) as count, {$param_eval_query}, e.id_exec, exec as conf, bench, " . "exe_time, avg(exe_time) avg_exe_time, min(exe_time) min_exe_time " . "from aloja2.execs e LEFT JOIN aloja2.clusters c ON e.id_cluster = c.id_cluster WHERE 1 {$filter_execs} {$where_configs}" . "GROUP BY {$param_eval_query},bench {$minExecsFilter} order by bench,{$param_eval_query}"; $rows = $db->get_rows($query); if (!$rows) { throw new \Exception("No results for query!"); } $arrayBenchs = array(); foreach ($paramOptions as $param) { foreach ($benchOptions as $bench) { $arrayBenchs[$bench['bench']][$param] = null; $arrayBenchs[$bench['bench']][$param]['y'] = 0; $arrayBenchs[$bench['bench']][$param]['count'] = 0; } } $series = array(); $bench = ''; foreach ($rows as $row) { if ($paramEval == 'comp') { $row[$paramEval] = Utils::getCompressionName($row['comp']); } else { if ($paramEval == 'net') { $row[$paramEval] = Utils::getNetworkName($row['net']); } else { if ($paramEval == 'disk') { $row[$paramEval] = Utils::getDisksName($row['disk']); } else { if ($paramEval == 'iofilebuf') { $row[$paramEval] /= 1024; } } } } $arrayBenchs[$row['bench']][$row[$paramEval]]['y'] = round((int) $row['avg_exe_time'], 2); $arrayBenchs[$row['bench']][$row[$paramEval]]['count'] = (int) $row['count']; } // ---------------------------------------------------- // Add predictions to the series // ---------------------------------------------------- $param_variables = $this->filters->getFiltersSelectedChoices(array('current_model')); $param_current_model = $param_variables['current_model']; $where_configs = str_replace("AND .", "AND ", $where_configs); // compose instance $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, true); $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, true); $slice_info = MLUtils::generateDatasliceInfo($this->filters, $param_names_additional, $params_additional); // model for filling MLUtils::findMatchingModels($model_info, $possible_models, $possible_models_id, $dbml); $current_model = ''; if (!is_null($possible_models_id) && in_array($param_current_model, $possible_models_id)) { $current_model = $param_current_model; } // Other models for filling $where_models = ''; if (!empty($possible_models_id)) { $where_models = " WHERE id_learner NOT IN ('" . implode("','", $possible_models_id) . "')"; } $result = $dbml->query("SELECT id_learner FROM aloja_ml.learners" . $where_models); foreach ($result as $row) { $other_models[] = $row['id_learner']; } if (!empty($possible_models_id)) { if ($current_model == "") { $query = "SELECT AVG(ABS(exe_time - pred_time)) AS MAE, AVG(ABS(exe_time - pred_time)/exe_time) AS RAE, p.id_learner FROM aloja_ml.predictions p, aloja_ml.learners l WHERE l.id_learner = p.id_learner AND p.id_learner IN ('" . implode("','", $possible_models_id) . "') AND predict_code > 0 ORDER BY MAE LIMIT 1"; $result = $dbml->query($query); $row = $result->fetch(); $current_model = $row['id_learner']; } $config = $instance . '-' . $current_model . ' ' . $slice_info . "-parameval"; $query_cache = "SELECT count(*) as total FROM aloja_ml.trees WHERE id_learner = '" . $current_model . "' AND model = '" . $model_info . "'"; $is_cached_mysql = $dbml->query($query_cache); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['total'] > 0; $ret_data = null; if (!$is_cached) { // Call to MLFindAttributes, to fetch data $_GET['pass'] = 2; $_GET['unseen'] = 1; $_GET['current_model'] = $current_model; $mlfa1 = new MLFindAttributesController(); $mlfa1->container = $this->container; $ret_data = $mlfa1->mlfindattributesAction(); if ($ret_data == 1) { $must_wait = "YES"; $jsonData = $jsonHeader = '[]'; } else { $is_cached_mysql = $dbml->query($query_cache); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['total'] > 0; } } if ($is_cached) { $must_wait = 'NO'; $query = "SELECT count(*) as count, {$param_eval_query}, bench, exe_time, avg(pred_time) avg_pred_time, min(pred_time) min_pred_time " . "FROM aloja_ml.predictions e WHERE e.id_learner = '" . $current_model . "' {$filter_execs} {$where_configs}" . "GROUP BY {$param_eval_query}, bench {$minExecsFilter} order by bench,{$param_eval_query}"; $result = $dbml->query($query); // Initialize array foreach ($paramOptions as $param) { foreach ($benchOptions as $bench) { $arrayBenchs_pred[$bench['bench'] . '_pred'][$param] = null; $arrayBenchs_pred[$bench['bench'] . '_pred'][$param]['y'] = 0; $arrayBenchs_pred[$bench['bench'] . '_pred'][$param]['count'] = 0; } } foreach ($result as $row) { $bench_n = $row['bench'] . '_pred'; $class = $row[$paramEval]; if ($paramEval == 'comp') { $value = Utils::getCompressionName($class); } else { if ($paramEval == 'id_cluster') { $value = Utils::getClusterName($class, $db); } else { if ($paramEval == 'net') { $value = Utils::getNetworkName($class); } else { if ($paramEval == 'disk') { $value = Utils::getDisksName($class); } else { if ($paramEval == 'iofilebuf') { $value = $class / 1024; } else { $value = $class; } } } } } if (!in_array($value, $paramOptions)) { $paramOptions[] = $value; foreach ($benchOptions as $bench) { $arrayBenchs_pred[$bench['bench'] . '_pred'][$value] = null; $arrayBenchs_pred[$bench['bench'] . '_pred'][$value]['y'] = 0; $arrayBenchs_pred[$bench['bench'] . '_pred'][$value]['count'] = 0; $arrayBenchs[$bench['bench']][$value] = null; $arrayBenchs[$bench['bench']][$value]['y'] = 0; $arrayBenchs[$bench['bench']][$value]['count'] = 0; } } $arrayBenchs_pred[$bench_n][$value]['y'] = (int) $row['avg_pred_time']; $arrayBenchs_pred[$bench_n][$value]['count'] = (int) $row['count']; } } } // ---------------------------------------------------- // END - Add predictions to the series // ---------------------------------------------------- asort($paramOptions); foreach ($arrayBenchs as $key => $arrayBench) { $caregories = ''; $data_a = null; $data_p = null; foreach ($paramOptions as $param) { if ($arrayBenchs[$key][$param]['count'] > 0 && empty($arrayBenchs_pred) || !empty($arrayBenchs_pred) && ($arrayBenchs_pred[$key . '_pred'][$param]['count'] > 0 || $arrayBenchs[$key][$param]['count'] > 0)) { $data_a[] = $arrayBenchs[$key][$param]; if (!empty($arrayBenchs_pred)) { $data_p[] = $arrayBenchs_pred[$key . '_pred'][$param]; } $categories = $categories . "'{$param} " . Utils::getParamevalUnit($paramEval) . "',"; // FIXME - Redundant n times performed... don't care now } } $series[] = array('name' => $key, 'data' => $data_a); if (!empty($arrayBenchs_pred)) { $series[] = array('name' => $key . '_pred', 'data' => $data_p); } } $series = json_encode($series); if (!empty($arrayBenchs_pred)) { $colors = "['#7cb5ec','#9cd5fc','#434348','#636368','#90ed7d','#b0fd9d','#f7a35c','#f7c37c','#8085e9','#a0a5f9','#f15c80','#f17ca0','#e4d354','#f4f374','#8085e8','#a0a5f8','#8d4653','#ad6673','#91e8e1','#b1f8f1']"; } else { $colors = "['#7cb5ec','#434348','#90ed7d','#f7a35c','#8085e9','#f15c80','#e4d354','#8085e8','#8d4653','#91e8e1']"; } } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); $series = $jsonHeader = $colors = '[]'; $must_wait = 'NO'; } $return_params = array('title' => 'Improvement of Hadoop Execution by SW and HW Configurations', 'categories' => $categories, 'series' => $series, 'paramEval' => $paramEval, 'instance' => $instance, 'models' => '<li>' . implode('</li><li>', $possible_models) . '</li>', 'models_id' => $possible_models_id, 'current_model' => $current_model, 'gammacolors' => $colors, 'model_info' => $model_info, 'slice_info' => $slice_info, 'must_wait' => $must_wait); $this->filters->setCurrentChoices('current_model', array_merge($possible_models_id, array('---Other models---'), $other_models)); return $this->render('mltemplate/mlparameval.html.twig', $return_params); }
public function mlcrossvar3dfaAction() { $jsonData = array(); $message = $instance = $possible_models_id = ''; $maxx = $minx = $maxy = $miny = $maxz = $minz = 0; $must_wait = 'NO'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain_ml'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); $where_configs = ''; $preset = null; if (count($_GET) <= 1 || count($_GET) == 2 && array_key_exists('current_model', $_GET) || count($_GET) == 3 && array_key_exists('variable1', $_GET) && array_key_exists('variable2', $_GET) || count($_GET) == 4 && array_key_exists('current_model', $_GET) && array_key_exists('variable1', $_GET) && array_key_exists('variable2', $_GET)) { $preset = Utils::setDefaultPreset($db, 'mlcrossvar3dfa'); } $selPreset = isset($_GET['presets']) ? $_GET['presets'] : "none"; $params = array(); $param_names = array('benchs', 'nets', 'disks', 'mapss', 'iosfs', 'replications', 'iofilebufs', 'comps', 'blk_sizes', 'id_clusters', 'datanodess', 'bench_types', 'vm_sizes', 'vm_coress', 'vm_RAMs', 'types'); // Order is important foreach ($param_names as $p) { $params[$p] = Utils::read_params($p, $where_configs, FALSE); sort($params[$p]); } $cross_var1 = array_key_exists('variable1', $_GET) ? $_GET['variable1'] : 'maps'; $cross_var2 = array_key_exists('variable2', $_GET) ? $_GET['variable2'] : 'net'; $unseen = array_key_exists('unseen', $_GET) && $_GET['unseen'] == 1; $where_configs = str_replace("AND .", "AND ", $where_configs); $cross_var1 = str_replace("id_cluster", "e.id_cluster", $cross_var1); $cross_var2 = str_replace("id_cluster", "e.id_cluster", $cross_var2); // compose instance $instance = MLUtils::generateSimpleInstance($param_names, $params, $unseen, $db); $model_info = MLUtils::generateModelInfo($param_names, $params, $unseen, $db); // Model for filling MLUtils::findMatchingModels($model_info, $possible_models, $possible_models_id, $dbml); $current_model = ""; if (array_key_exists('current_model', $_GET) && in_array($_GET['current_model'], $possible_models_id)) { $current_model = $_GET['current_model']; } // Call to MLFindAttributes, to fetch data $_GET['pass'] = 1; $_GET['unseen'] = $unseen; $mlfa1 = new MLFindAttributesController(); $mlfa1->container = $this->container; $ret_data = $mlfa1->mlfindattributesAction(); $rows = null; if ($ret_data == 1) { $must_wait = "YES"; $jsonData = '[]'; $categories1 = $categories2 = "''"; } else { if ($ret_data == -1) { $must_wait = "NO"; $jsonData = '[]'; $categories1 = $categories2 = "''"; $message = "There are no prediction models trained for such parameters. Train at least one model in 'ML Prediction' section. [" . $instance . "]"; } else { // Get stuff from the DB $query = "SELECT " . $cross_var1 . " AS V1, " . $cross_var2 . " AS V2, AVG(p.pred_time) as V3, p.instance\n\t\t\t\t\tFROM predictions as p\n\t\t\t\t\tWHERE p.id_learner " . ($current_model != '' ? "='" . $current_model . "'" : "IN (SELECT id_learner FROM trees WHERE model='" . $model_info . "')") . $where_configs . "\n\t\t\t\t\tGROUP BY p.instance\n\t\t\t\t\tORDER BY RAND() LIMIT 5000;"; // FIXME - CLUMPSY PATCH FOR BYPASS THE BUG FROM HIGHCHARTS... REMEMBER TO ERASE THIS LINE WHEN THE BUG IS SOLVED $rows = $dbml->query($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } } } if ($must_wait == "NO") { $map_var1 = $map_var2 = array(); $count_var1 = $count_var2 = 0; $categories1 = $categories2 = ''; $var1_categorical = in_array($cross_var1, array("net", "disk", "bench", "vm_OS", "provider", "vm_size", "type", "bench_type")); $var2_categorical = in_array($cross_var2, array("net", "disk", "bench", "vm_OS", "provider", "vm_size", "type", "bench_type")); foreach ($rows as $row) { $entry = array(); if ($var1_categorical) { if (!array_key_exists($row['V1'], $map_var1)) { $map_var1[$row['V1']] = $count_var1++; $categories1 = $categories1 . ($categories1 != '' ? "," : "") . "\"" . $row['V1'] . "\""; } $entry['y'] = $map_var1[$row['V1']] * (rand(990, 1010) / 1000); } else { $entry['y'] = (int) $row['V1'] * (rand(990, 1010) / 1000); } if ($entry['y'] > $maxy) { $maxy = $entry['y']; } if ($entry['y'] < $miny) { $miny = $entry['y']; } if ($var2_categorical) { if (!array_key_exists($row['V2'], $map_var2)) { $map_var2[$row['V2']] = $count_var2++; $categories2 = $categories2 . ($categories2 != '' ? "," : "") . "\"" . $row['V2'] . "\""; } $entry['x'] = $map_var2[$row['V2']] * (rand(990, 1010) / 1000); } else { $entry['x'] = (int) $row['V2'] * (rand(990, 1010) / 1000); } if ($entry['x'] > $maxx) { $maxx = $entry['x']; } if ($entry['x'] < $minx) { $minx = $entry['x']; } $entry['z'] = -1 * (int) $row['V3'] * (rand(990, 1010) / 1000); if ($entry['z'] > $maxz) { $maxz = $entry['z']; } if ($entry['z'] < $minz) { $minz = $entry['z']; } $entry['name'] = $row['instance']; //$row['V1']." - ".$row['V2']." - ".max(100,(int)$row['V3']); $jsonData[] = $entry; } $jsonData = json_encode($jsonData); if ($categories1 != '') { $categories1 = "[" . $categories1 . "]"; } else { $categories1 = "''"; } if ($categories2 != '') { $categories2 = "[" . $categories2 . "]"; } else { $categories2 = "''"; } } $dbml = null; $cross_var1 = str_replace("e.id_cluster", "id_cluster", $cross_var1); $cross_var2 = str_replace("e.id_cluster", "id_cluster", $cross_var2); } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); $jsonData = '[]'; $cross_var1 = $cross_var2 = ''; $categories1 = $categories2 = ''; $maxx = $minx = $maxy = $miny = $maxz = $minz = 0; $must_wait = "NO"; $dbml = null; $possible_models = $possible_models_id = array(); } echo $this->container->getTwig()->render('mltemplate/mlcrossvar3dfa.html.twig', array('selected' => 'mlcrossvar3dfa', 'jsonData' => $jsonData, 'variable1' => $cross_var1, 'variable2' => $cross_var2, 'categories1' => $categories1, 'categories2' => $categories2, 'maxx' => $maxx, 'minx' => $minx, 'maxy' => $maxy, 'miny' => $miny, 'maxz' => $maxz, 'minz' => $minz, 'benchs' => $params['benchs'], 'nets' => $params['nets'], 'disks' => $params['disks'], 'blk_sizes' => $params['blk_sizes'], 'comps' => $params['comps'], 'id_clusters' => $params['id_clusters'], 'mapss' => $params['mapss'], 'replications' => $params['replications'], 'iosfs' => $params['iosfs'], 'iofilebufs' => $params['iofilebufs'], 'datanodess' => $params['datanodess'], 'bench_types' => $params['bench_types'], 'vm_sizes' => $params['vm_sizes'], 'vm_coress' => $params['vm_coress'], 'vm_RAMs' => $params['vm_RAMs'], 'types' => $params['types'], 'message' => $message, 'instance' => $instance, 'model_info' => $model_info, 'current_model' => $current_model, 'unseen' => $unseen, 'models' => '<li>' . implode('</li><li>', $possible_models) . '</li>', 'models_id' => $possible_models_id, 'must_wait' => $must_wait, 'preset' => $preset, 'selPreset' => $selPreset, 'options' => Utils::getFilterOptions($db))); }
public function mloutliersAction() { $jsonData = $jsonWarns = $jsonOuts = array(); $message = $instance = $jsonHeader = $jsonTable = $model_html = $config = $model_info = ''; $possible_models = $possible_models_id = $other_models = array(); $jsonResolutions = $jsonResolutionsHeader = '[]'; $max_x = $max_y = 0; $must_wait = 'NO'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); // FIXME - This must be counted BEFORE building filters, as filters inject rubbish in GET when there are no parameters... $instructions = count($_GET) <= 1; if (array_key_exists('dump', $_GET)) { $dump = $_GET["dump"]; unset($_GET["dump"]); } if (array_key_exists('register', $_GET)) { $register = $_GET["register"]; unset($_GET["register"]); } $this->buildFilters(array('current_model' => array('type' => 'selectOne', 'default' => null, 'label' => 'Model to use: ', 'generateChoices' => function () { return array(); }, 'parseFunction' => function () { $choice = isset($_GET['current_model']) ? $_GET['current_model'] : array(""); return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'), 'sigma' => array('type' => 'inputNumber', 'default' => 1, 'label' => 'Sigmas: ', 'parseFunction' => function () { $choice = isset($_GET['sigma']) ? $_GET['sigma'] : 1; return array('whereClause' => '', 'currentChoice' => $choice); }, 'max' => 3, 'min' => 1, 'filterGroup' => 'MLearning'), 'minexetime' => array('default' => 0), 'valid' => array('default' => 0), 'filter' => array('default' => 0), 'prepares' => array('default' => 1))); $this->buildFilterGroups(array('MLearning' => array('label' => 'Machine Learning', 'tabOpenDefault' => true, 'filters' => array('current_model', 'sigma')))); $params = array(); $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'vm_OS', 'vm_cores', 'vm_RAM', 'provider', 'vm_size', 'type', 'bench_type', 'hadoop_version'); // Order is important $params = $this->filters->getFiltersSelectedChoices($param_names); foreach ($param_names as $p) { if (!is_null($params[$p]) && is_array($params[$p])) { sort($params[$p]); } } $params_additional = array(); $param_names_additional = array('datefrom', 'dateto', 'minexetime', 'maxexetime', 'valid', 'filter'); // Order is important $params_additional = $this->filters->getFiltersSelectedChoices($param_names_additional); $param_variables = $this->filters->getFiltersSelectedChoices(array('current_model', 'sigma')); $param_current_model = $param_variables['current_model']; $sigma_param = $param_variables['sigma']; $where_configs = $this->filters->getWhereclause(); $where_configs = str_replace("AND .", "AND ", $where_configs); // compose instance $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, true); $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, true); $slice_info = MLUtils::generateDatasliceInfo($this->filters, $param_names_additional, $params_additional); // model for filling MLUtils::findMatchingModels($model_info, $possible_models, $possible_models_id, $dbml); $current_model = ''; if (!is_null($possible_models_id) && in_array($param_current_model, $possible_models_id)) { $current_model = $param_current_model; } // Other models for filling $where_models = ''; if (!empty($possible_models_id)) { $where_models = " WHERE id_learner NOT IN ('" . implode("','", $possible_models_id) . "')"; } $result = $dbml->query("SELECT id_learner FROM aloja_ml.learners" . $where_models); foreach ($result as $row) { $other_models[] = $row['id_learner']; } if ($instructions) { $result = $dbml->query("SELECT id_learner, model, algorithm FROM aloja_ml.learners"); foreach ($result as $row) { $model_html = $model_html . "<li>" . $row['id_learner'] . " => " . $row['algorithm'] . " : " . $row['model'] . "</li>"; } MLUtils::getIndexOutExps($jsonResolutions, $jsonResolutionsHeader, $dbml); $this->filters->setCurrentChoices('current_model', array_merge($possible_models_id, array('---Other models---'), $other_models)); return $this->render('mltemplate/mloutliers.html.twig', array('outexps' => $jsonResolutions, 'header_outexps' => $jsonResolutionsHeader, 'jsonData' => '[]', 'jsonWarns' => '[]', 'jsonOuts' => '[]', 'jsonHeader' => '[]', 'jsonTable' => '[]', 'max_p' => 0, 'models' => $model_html, 'instructions' => 'YES')); } if (!empty($possible_models_id)) { $result = $dbml->query("SELECT id_learner, model, algorithm, CASE WHEN `id_learner` IN ('" . implode("','", $possible_models_id) . "') THEN 'COMPATIBLE' ELSE 'NOT MATCHED' END AS compatible FROM aloja_ml.learners"); foreach ($result as $row) { $model_html = $model_html . "<li>" . $row['id_learner'] . " => " . $row['algorithm'] . " : " . $row['compatible'] . " : " . $row['model'] . "</li>"; } if ($current_model == "") { $query = "SELECT AVG(ABS(exe_time - pred_time)) AS MAE, AVG(ABS(exe_time - pred_time)/exe_time) AS RAE, p.id_learner FROM aloja_ml.predictions p, aloja_ml.learners l WHERE l.id_learner = p.id_learner AND p.id_learner IN ('" . implode("','", $possible_models_id) . "') AND predict_code > 0 ORDER BY MAE LIMIT 1"; $result = $dbml->query($query); $row = $result->fetch(); $current_model = $row['id_learner']; } $config = $instance . '-' . $current_model . '-' . $sigma_param . ' ' . $slice_info . '-outliers'; $is_cached_mysql = $dbml->query("SELECT count(*) as total FROM aloja_ml.resolutions WHERE id_resolution = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['total'] > 0; $cache_ds = getcwd() . '/cache/query/' . md5($config) . '-cache.csv'; $in_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.lock'); $finished_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '-resolutions.csv'); if (!$is_cached && !$in_process && !$finished_process) { // get headers for csv $header_names = array('id_exec' => 'ID', 'bench' => 'Benchmark', 'exe_time' => 'Exe.Time', 'net' => 'Net', 'disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'IO.SFac', 'replication' => 'Rep', 'iofilebuf' => 'IO.FBuf', 'comp' => 'Comp', 'blk_size' => 'Blk.size', 'e.id_cluster' => 'Cluster', 'datanodes' => 'Datanodes', 'vm_OS' => 'VM.OS', 'vm_cores' => 'VM.Cores', 'vm_RAM' => 'VM.RAM', 'provider' => 'Provider', 'vm_size' => 'VM.Size', 'type' => 'Type', 'bench_type' => 'Bench.Type', 'hadoop_version' => 'Hadoop.Version'); $headers = array_keys($header_names); $names = array_values($header_names); // dump the result to csv $query = "SELECT " . implode(",", $headers) . " FROM aloja2.execs e LEFT JOIN aloja2.clusters c ON e.id_cluster = c.id_cluster WHERE hadoop_version IS NOT NULL" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } $fp = fopen($cache_ds, 'w'); fputcsv($fp, $names, ',', '"'); foreach ($rows as $row) { $row['id_cluster'] = "Cl" . $row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... fputcsv($fp, array_values($row), ',', '"'); } // Retrieve file model from DB $query = "SELECT file FROM aloja_ml.model_storage WHERE id_hash='" . $current_model . "' AND type='learner';"; $result = $dbml->query($query); $row = $result->fetch(); $content = $row['file']; $filemodel = getcwd() . '/cache/query/' . $current_model . '-object.rds'; $fp = fopen($filemodel, 'w'); fwrite($fp, $content); fclose($fp); // launch query exec('cd ' . getcwd() . '/cache/query ; touch ' . md5($config) . '.lock'); exec(getcwd() . '/resources/queue -c "cd ' . getcwd() . '/cache/query ; ' . getcwd() . '/resources/aloja_cli.r -m aloja_outlier_dataset -d ' . $cache_ds . ' -l ' . $current_model . ' -p sigma=' . $sigma_param . ':hdistance=3:saveall=' . md5($config) . ' > /dev/null 2>&1 ; rm -f ' . md5($config) . '.lock" > /dev/null 2>&1 &'); } $finished_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '-resolutions.csv'); if ($finished_process && !$is_cached) { if (($handle = fopen(getcwd() . '/cache/query/' . md5($config) . '-resolutions.csv', 'r')) !== FALSE) { $header = fgetcsv($handle, 1000, ","); $token = 0; $query = "REPLACE INTO aloja_ml.resolutions (id_resolution,id_learner,id_exec,instance,model,dataslice,sigma,outlier_code,predicted,observed) VALUES "; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $resolution = $data[0]; $pred_value = (int) $data[1] >= 100 ? (int) $data[1] : 100; $exec_value = (int) $data[2]; $selected_instance_pre = preg_replace('/\\s+/', '', $data[3]); $selected_instance_pre = str_replace(':', ',', $selected_instance_pre); $specific_id = $data[4]; if ($token > 0) { $query = $query . ","; } $token = 1; $query = $query . "('" . md5($config) . "','" . $current_model . "','" . $specific_id . "','" . $selected_instance_pre . "','" . $model_info . "','" . $slice_info . "','" . $sigma_param . "','" . $resolution . "','" . $pred_value . "','" . $exec_value . "') "; } if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving tree into DB'); } } // Store file model to DB $filemodel = getcwd() . '/cache/query/' . md5($config) . '-object.rds'; $fp = fopen($filemodel, 'r'); $content = fread($fp, filesize($filemodel)); $content = addslashes($content); fclose($fp); $query = "INSERT INTO aloja_ml.model_storage (id_hash,type,file) VALUES ('" . md5($config) . "','resolution','" . $content . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving file resolution into DB'); } // Remove temporary files $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '-*.csv'); $is_cached = true; } if (!$is_cached) { $jsonData = $jsonOuts = $jsonWarns = $jsonHeader = $jsonTable = '[]'; $must_wait = 'YES'; if (isset($dump)) { echo "1"; exit(0); } } else { $must_wait = 'NO'; $query = "SELECT predicted, observed, outlier_code, id_exec, instance FROM aloja_ml.resolutions WHERE id_resolution = '" . md5($config) . "' LIMIT 5000"; // FIXME - CLUMSY PATCH FOR BYPASS THE BUG FROM HIGHCHARTS... REMEMBER TO ERASE THIS LINE WHEN THE BUG IS SOLVED $result = $dbml->query($query); foreach ($result as $row) { $entry = array('x' => (int) $row['predicted'], 'y' => (int) $row['observed'], 'name' => $row['instance'], 'id' => (int) $row['id_exec']); if ($row['outlier_code'] == 0) { $jsonData[] = $entry; } if ($row['outlier_code'] == 1) { $jsonWarns[] = $entry; } if ($row['outlier_code'] == 2) { $jsonOuts[] = $entry; } $jsonTable .= ($jsonTable == '' ? '' : ',') . '["' . ($row['outlier_code'] == 0 ? 'Legitimate' : ($row['outlier_code'] == 1 ? 'Warning' : 'Outlier')) . '","' . $row['predicted'] . '","' . $row['observed'] . '","' . str_replace(",", "\",\"", $row['instance']) . '","' . $row['id_exec'] . '"]'; } $query_var = "SELECT MAX(predicted) as max_x, MAX(observed) as max_y FROM aloja_ml.resolutions WHERE id_resolution = '" . md5($config) . "' LIMIT 5000"; $result = $dbml->query($query_var); $row = $result->fetch(); $max_x = $row['max_x']; $max_y = $row['max_y']; $header = array('Prediction', 'Observed', 'Benchmark', 'Net', 'Disk', 'Maps', 'IO.SFS', 'Rep', 'IO.FBuf', 'Comp', 'Blk.Size', 'Cluster', 'Datanodes', 'VM.OS', 'VM.Cores', 'VM.RAM', 'Provider', 'VM.Size', 'Type', 'Bench.Type', 'Version', 'ID'); $jsonHeader = '[{title:""}'; foreach ($header as $title) { $jsonHeader = $jsonHeader . ',{title:"' . $title . '"}'; } $jsonHeader = $jsonHeader . ']'; $jsonData = json_encode($jsonData); $jsonWarns = json_encode($jsonWarns); $jsonOuts = json_encode($jsonOuts); $jsonTable = '[' . $jsonTable . ']'; // Dump case if (isset($dump)) { echo str_replace(array("[", "]", "{title:\"", "\"}"), array('', '', ''), $jsonHeader) . "\n"; echo str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonOuts); echo str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonWarns); echo str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonData); exit(0); } // Register case if (isset($register)) { // Update the predictions table $query_var = "UPDATE aloja_ml.predictions as p, aloja_ml.resolutions as r\n\t\t\t\t\t\t\t\tSET p.outlier = r.outlier_code\n\t\t\t\t\t\t\t\tWHERE r.id_exec = p.id_exec\n\t\t\t\t\t\t\t\t\tAND r.id_resolution = '" . md5($config) . "'\n\t\t\t\t\t\t\t\t\tAND p.id_learner = '" . $current_model . "'"; if ($dbml->query($query_var) === FALSE) { throw new \Exception('Error when updating aloja_ml.predictions in DB'); } } } } else { throw new \Exception('There are no prediction models trained for such parameters. Train at least one model in "ML Prediction" section.'); } $dbml = null; } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage()); $jsonData = $jsonOuts = $jsonWarns = $jsonHeader = $jsonTable = '[]'; $must_wait = "NO"; $dbml = null; } $return_params = array('jsonData' => $jsonData, 'jsonWarns' => $jsonWarns, 'jsonOuts' => $jsonOuts, 'jsonHeader' => $jsonHeader, 'jsonTable' => $jsonTable, 'max_p' => min(array($max_x, $max_y)), 'outexps' => $jsonResolutions, 'header_outexps' => $jsonResolutionsHeader, 'must_wait' => $must_wait, 'models' => $model_html, 'models_id' => $possible_models_id, 'other_models_id' => $other_models, 'current_model' => $current_model, 'resolution_id' => md5($config), 'model_info' => $model_info, 'slice_info' => $slice_info, 'sigma' => $sigma_param, 'message' => $message, 'instance' => $instance); $this->filters->setCurrentChoices('current_model', array_merge($possible_models_id, array('---Other models---'), $other_models)); return $this->render('mltemplate/mloutliers.html.twig', $return_params); }
public function mlcrossvar3dfaAction() { $jsonData = $possible_models = $possible_models_id = $other_models = array(); $message = $instance = $possible_models_id = ''; $categories1 = $categories2 = "''"; $maxx = $minx = $maxy = $miny = $maxz = $minz = 0; $must_wait = 'NO'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); $this->buildFilters(array('variable2' => array('type' => 'selectOne', 'default' => array('net'), 'table' => 'execs', 'label' => 'Variable 2: ', 'generateChoices' => function () { return array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'bench_type', 'vm_size', 'vm_cores', 'vm_RAM', 'type', 'hadoop_version', 'provider', 'vm_OS', 'exe_time', 'pred_time', 'TOTAL_MAPS'); }, 'beautifier' => function ($value) { $labels = array('bench' => 'Benchmark', 'net' => 'Network', 'disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'I/O Sort Factor', 'replication' => 'Replication', 'iofilebuf' => 'I/O File Buffer', 'comp' => 'Compression', 'blk_size' => 'Block size', 'id_cluster' => 'Cluster', 'datanodes' => 'Datanodes', 'bench_type' => 'Benchmark Suite', 'vm_size' => 'VM Size', 'vm_cores' => 'VM cores', 'vm_RAM' => 'VM RAM', 'type' => 'Cluster type', 'hadoop_version' => 'Hadoop Version', 'provider' => 'Provider', 'vm_OS' => 'VM OS', 'exe_time' => 'Exeuction time', 'pred_time' => 'Prediction time', 'TOTAL_MAPS' => 'Total execution maps'); return $labels[$value]; }, 'parseFunction' => function () { $value = isset($_GET['variable2']) ? $_GET['variable2'] : array('net'); return array('currentChoice' => $value, 'whereClause' => ""); }), 'variable1' => array('type' => 'selectOne', 'default' => array('maps'), 'table' => 'execs', 'label' => 'Variable 1: ', 'generateChoices' => function () { return array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'bench_type', 'vm_size', 'vm_cores', 'vm_RAM', 'type', 'hadoop_version', 'provider', 'vm_OS', 'exe_time', 'pred_time', 'TOTAL_MAPS'); }, 'beautifier' => function ($value) { $labels = array('bench' => 'Benchmark', 'net' => 'Network', 'disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'I/O Sort Factor', 'replication' => 'Replication', 'iofilebuf' => 'I/O File Buffer', 'comp' => 'Compression', 'blk_size' => 'Block size', 'id_cluster' => 'Cluster', 'datanodes' => 'Datanodes', 'bench_type' => 'Benchmark Suite', 'vm_size' => 'VM Size', 'vm_cores' => 'VM cores', 'vm_RAM' => 'VM RAM', 'type' => 'Cluster type', 'hadoop_version' => 'Hadoop Version', 'provider' => 'Provider', 'vm_OS' => 'VM OS', 'exe_time' => 'Exeuction time', 'pred_time' => 'Prediction time', 'TOTAL_MAPS' => 'Total execution maps'); return $labels[$value]; }, 'parseFunction' => function () { $value = isset($_GET['variable1']) ? $_GET['variable1'] : array('maps'); return array('currentChoice' => $value, 'whereClause' => ""); }), 'current_model' => array('type' => 'selectOne', 'default' => null, 'label' => 'Model to use: ', 'generateChoices' => function () { return array(); }, 'parseFunction' => function () { $choice = isset($_GET['current_model']) ? $_GET['current_model'] : array(""); return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'), 'unseen' => array('type' => 'checkbox', 'default' => 1, 'label' => 'Predict with unseen atributes ⚠', 'parseFunction' => function () { $choice = isset($_GET['unseen']) && !isset($_GET['unseen']) ? 0 : 1; return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'), 'minexetime' => array('default' => 0), 'valid' => array('default' => 0), 'filter' => array('default' => 0), 'prepares' => array('default' => 1))); $this->buildFilterGroups(array('MLearning' => array('label' => 'Machine Learning', 'tabOpenDefault' => true, 'filters' => array('current_model', 'unseen')))); $where_configs = $this->filters->getWhereClause(); $model_html = ''; $model_info = $db->get_rows("SELECT id_learner, model, algorithm, dataslice FROM aloja_ml.learners"); foreach ($model_info as $row) { $model_html = $model_html . "<li><b>" . $row['id_learner'] . "</b> => " . $row['algorithm'] . " : " . $row['model'] . " : " . $row['dataslice'] . "</li>"; } $params = array(); $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'vm_OS', 'vm_cores', 'vm_RAM', 'provider', 'vm_size', 'type', 'bench_type', 'hadoop_version'); // Order is important $params = $this->filters->getFiltersSelectedChoices($param_names); foreach ($param_names as $p) { if (!is_null($params[$p]) && is_array($params[$p])) { sort($params[$p]); } } $params_additional = array(); $param_names_additional = array('datefrom', 'dateto', 'minexetime', 'maxexetime', 'valid', 'filter'); // Order is important $params_additional = $this->filters->getFiltersSelectedChoices($param_names_additional); $variables = $this->filters->getFiltersSelectedChoices(array('variable1', 'variable2', 'current_model', 'unseen')); $cross_var1 = $variables['variable1']; $cross_var2 = $variables['variable2']; $param_current_model = $variables['current_model']; $unseen = $variables['unseen'] ? true : false; $where_configs = str_replace("AND .", "AND ", $where_configs); $cross_var1 = str_replace("id_cluster", "e.id_cluster", $cross_var1); $cross_var2 = str_replace("id_cluster", "e.id_cluster", $cross_var2); // compose instance $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, true); $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, true); $slice_info = MLUtils::generateDatasliceInfo($this->filters, $param_names_additional, $params_additional); // Model for filling MLUtils::findMatchingModels($model_info, $possible_models, $possible_models_id, $dbml); $current_model = in_array($param_current_model, $possible_models_id) ? $param_current_model : ''; // Other models for filling $where_models = ''; if (!empty($possible_models_id)) { $where_models = " WHERE id_learner NOT IN ('" . implode("','", $possible_models_id) . "')"; } $result = $dbml->query("SELECT id_learner FROM aloja_ml.learners" . $where_models); foreach ($result as $row) { $other_models[] = $row['id_learner']; } // Call to MLPrediction, to create a model if (empty($possible_models_id)) { $_GET['pass'] = 1; $mltc1 = new MLPredictionController(); // FIXME - Choose the default modeling algorithm $mltc1->container = $this->container; $ret_learn = $mltc1->mlpredictionAction(); $rows = null; if ($ret_data == 1) { $must_wait = "YES"; throw new \Exception("WAIT"); } else { if ($ret_data == -1) { throw new \Exception("There was an error when creating a model for [" . $instance . "]"); } } } // Call to MLFindAttributes, to generate data if ($current_model != '') { $_GET['pass'] = 2; $mlfa1 = new MLFindAttributesController(); $mlfa1->container = $this->container; $ret_data = $mlfa1->mlfindattributesAction(); $rows = null; if ($ret_data == 1) { $must_wait = "YES"; throw new \Exception("WAIT"); } else { if ($ret_data == -1) { throw new \Exception("There was an error when creating predictions for [" . $instance . "]"); } } } // Get stuff from the DB $query = "SELECT " . $cross_var1 . " AS V1, " . $cross_var2 . " AS V2, AVG(e.pred_time) as V3, e.instance\n\t\t\t\tFROM aloja_ml.predictions as e\n\t\t\t\tWHERE e.id_learner " . ($current_model != '' ? "='" . $current_model . "'" : "IN (SELECT id_learner FROM aloja_ml.trees WHERE model='" . $model_info . "')") . $where_configs . "\n\t\t\t\tGROUP BY e.instance\n\t\t\t\tORDER BY RAND() LIMIT 5000;"; // FIXME - CLUMPSY PATCH FOR BYPASS THE BUG FROM HIGHCHARTS... REMEMBER TO ERASE THIS LINE WHEN THE BUG IS SOLVED $rows = $db->get_rows($query); if (empty($rows)) { if ($current_model == '') { throw new \Exception('No data matches with your critteria. Try to select a specific model to generate data.'); } else { throw new \Exception('No data matches with your critteria.'); } } // Show the results $map_var1 = $map_var2 = array(); $count_var1 = $count_var2 = 0; $categories1 = $categories2 = ''; $var1_categorical = in_array($cross_var1, array("net", "disk", "bench", "vm_OS", "provider", "vm_size", "type", "bench_type")); $var2_categorical = in_array($cross_var2, array("net", "disk", "bench", "vm_OS", "provider", "vm_size", "type", "bench_type")); foreach ($rows as $row) { $entry = array(); if ($var1_categorical) { if (!array_key_exists($row['V1'], $map_var1)) { $map_var1[$row['V1']] = $count_var1++; $categories1 = $categories1 . ($categories1 != '' ? "," : "") . "\"" . $row['V1'] . "\""; } $entry['y'] = $map_var1[$row['V1']] * (rand(990, 1010) / 1000); } else { $entry['y'] = (int) $row['V1'] * (rand(990, 1010) / 1000); } if ($entry['y'] > $maxy) { $maxy = $entry['y']; } if ($entry['y'] < $miny) { $miny = $entry['y']; } if ($var2_categorical) { if (!array_key_exists($row['V2'], $map_var2)) { $map_var2[$row['V2']] = $count_var2++; $categories2 = $categories2 . ($categories2 != '' ? "," : "") . "\"" . $row['V2'] . "\""; } $entry['x'] = $map_var2[$row['V2']] * (rand(990, 1010) / 1000); } else { $entry['x'] = (int) $row['V2'] * (rand(990, 1010) / 1000); } if ($entry['x'] > $maxx) { $maxx = $entry['x']; } if ($entry['x'] < $minx) { $minx = $entry['x']; } $entry['z'] = (int) $row['V3'] * (rand(990, 1010) / 1000); if ($entry['z'] > $maxz) { $maxz = $entry['z']; } if ($entry['z'] < $minz) { $minz = $entry['z']; } $entry['name'] = $row['instance']; //$row['V1']." - ".$row['V2']." - ".max(100,(int)$row['V3']); $jsonData[] = $entry; } $jsonData = json_encode($jsonData); if ($categories1 != '') { $categories1 = "[" . $categories1 . "]"; } else { $categories1 = "''"; } if ($categories2 != '') { $categories2 = "[" . $categories2 . "]"; } else { $categories2 = "''"; } } catch (\Exception $e) { if ($e->getMessage() != "WAIT") { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); } $jsonData = '[]'; } $dbml = null; $return_params = array('jsonData' => $jsonData, 'variable1' => str_replace("e.id_cluster", "id_cluster", $cross_var1), 'variable2' => str_replace("e.id_cluster", "id_cluster", $cross_var2), 'categories1' => $categories1, 'categories2' => $categories2, 'maxx' => $maxx, 'minx' => $minx, 'maxy' => $maxy, 'miny' => $miny, 'maxz' => $maxz, 'minz' => $minz, 'instance' => $instance, 'model_info' => $model_info, 'slice_info' => $slice_info, 'models' => '<li>' . implode('</li><li>', $possible_models) . '</li>', 'must_wait' => $must_wait, 'models' => $model_html, 'current_model' => $current_model); $this->filters->setCurrentChoices('current_model', array_merge(array("Aggregation of Models"), $possible_models_id, !empty($other_models) ? array('---Other models---') : array(), $other_models)); return $this->render('mltemplate/mlcrossvar3dfa.html.twig', $return_params); }
public function mldatacollapseAction() { try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); $where_configs = ''; $preset = null; if (count($_GET) <= 1 || count($_GET) == 2 && array_key_exists("current_model", $_GET)) { $preset = Utils::initDefaultPreset($db, 'mldatacollapse'); } $selPreset = isset($_GET['presets']) ? $_GET['presets'] : "none"; $params = array(); $param_names = array('benchs', 'nets', 'disks', 'mapss', 'iosfs', 'replications', 'iofilebufs', 'comps', 'blk_sizes', 'id_clusters', 'datanodess', 'bench_types', 'vm_sizes', 'vm_coress', 'vm_RAMs', 'types', 'hadoop_versions'); // Order is important foreach ($param_names as $p) { $params[$p] = Utils::read_params($p, $where_configs, FALSE); sort($params[$p]); } $params_additional = array(); $param_names_additional = array('datefrom', 'dateto', 'minexetime', 'maxexetime', 'valids', 'filters'); // Order is important foreach ($param_names_additional as $p) { $params_additional[$p] = Utils::read_params($p, $where_configs, FALSE); } $unseen = array_key_exists('unseen', $_GET) && $_GET['unseen'] == 1; // FIXME PATCH FOR PARAM LIBRARIES WITHOUT LEGACY $where_configs = str_replace("AND .", "AND ", $where_configs); $dims1 = (empty($params['nets']) ? '' : 'Net,') . (empty($params['disks']) ? '' : 'Disk,') . (empty($params['blk_sizes']) ? '' : 'Blk.size,') . (empty($params['comps']) ? '' : 'Comp,'); $dims1 = $dims1 . (empty($params['id_clusters']) ? '' : 'Cluster,') . (empty($params['mapss']) ? '' : 'Maps,') . (empty($params['replications']) ? '' : 'Rep,') . (empty($params['iosfs']) ? '' : 'IO.SFac,') . (empty($params['iofilebufs']) ? '' : 'IO.FBuf,'); $dims1 = $dims1 . (empty($params['hadoop_versionss']) ? '' : 'Version,') . (empty($params['bench_types']) ? '' : 'Bench.Type'); if (substr($dims1, -1) == ',') { $dims1 = substr($dims1, 0, -1); } $dims2 = "Benchmark"; // compose instance $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, true); $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, true); $slice_info = MLUtils::generateDatasliceInfo($this->filters, $param_names_additional, $params_additional); // select model for filling $possible_models = $possible_models_id = array(); MLUtils::findMatchingModels($model_info, $possible_models, $possible_models_id, $dbml); $current_model = ''; /* if (array_key_exists('current_model',$_GET) && in_array($_GET['current_model'],$possible_models_id)) $current_model = $_GET['current_model']; // FIXME - Needs re-think logic if ($current_model == '') { $query = "SELECT AVG(ABS(exe_time - pred_time)) AS MAE, AVG(ABS(exe_time - pred_time)/exe_time) AS RAE, p.id_learner FROM predictions p, learners l WHERE l.id_learner = p.id_learner AND p.id_learner IN ('".implode("','",$possible_models_id)."') AND predict_code > 0 ORDER BY MAE LIMIT 1"; $result = $dbml->query($query); $row = $result->fetch(); $current_model = $row['id_learner']; } */ $learning_model = ''; if ($current_model != '' && file_exists(getcwd() . '/cache/query/' . $current_model . '-object.rds')) { $learning_model = ':model_name=' . $current_model . ':inst_general="' . $instance . '"'; } $config = $dims1 . '-' . $dims2 . '-' . $current_model . '-' . $model_info . '-' . $slice_info; // get headers for csv $header_names = array('id_exec' => 'ID', 'bench' => 'Benchmark', 'exe_time' => 'Exe.Time', 'net' => 'Net', 'disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'IO.SFac', 'replication' => 'Rep', 'iofilebuf' => 'IO.FBuf', 'comp' => 'Comp', 'blk_size' => 'Blk.size', 'e.id_cluster' => 'Cluster', 'name' => 'Cl.Name', 'datanodes' => 'Datanodes', 'headnodes' => 'Headnodes', 'vm_OS' => 'VM.OS', 'vm_cores' => 'VM.Cores', 'vm_RAM' => 'VM.RAM', 'provider' => 'Provider', 'vm_size' => 'VM.Size', 'type' => 'Type', 'bench_type' => 'Bench.Type', 'hadoop_version' => 'Hadoop.Version'); $headers = array_keys($header_names); $names = array_values($header_names); $dims1_array = explode(",", $dims1); $dims1_query = ''; $dims1_title = $dims1_concat = ''; foreach ($dims1_array as $d1value) { $dims1_query = $dims1_query . ($dims1_query == '' ? '' : ',') . array_search($d1value, $header_names); $dims1_title = $dims1_title . ($dims1_title == '' ? '' : ':') . array_search($d1value, $header_names); $dims1_concat = $dims1_concat . ($dims1_concat == '' ? '' : ',":",') . array_search($d1value, $header_names); } $query = "SELECT distinct bench FROM aloja2.execs e LEFT JOIN aloja2.clusters c ON e.id_cluster = c.id_cluster WHERE hadoop_version IS NOT NULL" . $where_configs . " ORDER BY bench;"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } $table = array(); $jsonHeader = '[{title:"' . $dims1_title . '"}'; foreach ($rows as $row) { $jsonHeader = $jsonHeader . ',{title:"' . $row['bench'] . '"}'; $table[$row['bench']] = array(); } $jsonHeader = $jsonHeader . ']'; $query = "SELECT CONCAT(" . $dims1_concat . ") as dim1, bench, avg(exe_time) as avg_exe_time FROM aloja2.execs e LEFT JOIN aloja2.clusters c ON e.id_cluster = c.id_cluster WHERE hadoop_version IS NOT NULL" . $where_configs . " GROUP BY bench," . $dims1_query . " ORDER BY dim1,bench;"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } foreach ($rows as $row) { $table[$row['bench']][$row['dim1']] = (int) $row['avg_exe_time']; } $row_ids = array(); foreach ($table as $bmk) { foreach ($bmk as $key => $value) { $row_ids[] = $key; } } $row_ids = array_unique($row_ids); $tableColor = array(); foreach ($table as $bmk => $values) { $tableColor[$bmk] = array(); foreach ($row_ids as $rid) { if (!array_key_exists($rid, $table[$bmk])) { $table[$bmk][$rid] = 0; $tableColor[$bmk][$rid] = 0; } else { $tableColor[$bmk][$rid] = 1; } } } $jsonData = '['; $jsonColor = '['; foreach ($row_ids as $rid) { $jsonData = $jsonData . ($jsonData == '[' ? '' : ',') . '[\'' . $rid . '\''; $jsonColor = $jsonColor . ($jsonColor == '[' ? '' : ',') . '[1'; foreach ($table as $bmk => $values) { $jsonData = $jsonData . ',' . $table[$bmk][$rid]; $jsonColor = $jsonColor . ',' . $tableColor[$bmk][$rid]; } $jsonData = $jsonData . ']'; $jsonColor = $jsonColor . ']'; } $jsonData = $jsonData . ']'; $jsonColor = $jsonColor . ']'; $jsonColumns = '['; for ($i = 1; $i <= count($table); $i++) { if ($jsonColumns != '[') { $jsonColumns = $jsonColumns . ','; } $jsonColumns = $jsonColumns . $i; } $jsonColumns = $jsonColumns . ']'; } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); $jsonData = $jsonHeader = $jsonColumns = $jsonColor = '[]'; } $return_params = array('selected' => 'mldatacollapse', 'jsonEncoded' => $jsonData, 'jsonHeader' => $jsonHeader, 'jsonColumns' => $jsonColumns, 'jsonColor' => $jsonColor, 'instance' => $instance, 'instance' => $instance, 'model_info' => $model_info, 'slice_info' => $slice_info, 'preset' => $preset, 'selPreset' => $selPreset, 'options' => Utils::getFilterOptions($db)); foreach ($param_names as $p) { $return_params[$p] = $params[$p]; } foreach ($param_names_additional as $p) { $return_params[$p] = $params_additional[$p]; } echo $this->container->getTwig()->render('mltemplate/mldatacollapse.html.twig', $return_params); }
public function mlclearcacheAction() { $cache_allow = 'localhost'; $jsonLearners = ''; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); if (isset($_GET['ccache'])) { $query = "DELETE FROM aloja_ml.summaries"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing summaries from DB'); } $query = "DELETE FROM aloja_ml.minconfigs_centers"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing centers from DB'); } $query = "DELETE FROM aloja_ml.minconfigs_props"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing props from DB'); } $query = "DELETE FROM aloja_ml.minconfigs"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing aloja_ml.minconfigs from DB'); } $query = "DELETE FROM aloja_ml.resolutions"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing resolutions from DB'); } $query = "DELETE FROM aloja_ml.trees"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing trees from DB'); } $query = "DELETE FROM aloja_ml.learners"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing learners from DB'); } $query = "DELETE FROM aloja_ml.model_storage"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing file models from DB'); } $query = "DELETE FROM aloja_ml.precisions"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing precisions from DB'); } $query = "DELETE FROM aloja_ml.observed_trees"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing observed trees from DB'); } $command = 'rm -f ' . getcwd() . '/cache/ml/*.{rds,lock,fin,dat,csv}'; $output[] = shell_exec($command); } if (isset($_GET['rml'])) { $query = "DELETE FROM aloja_ml.learners WHERE id_learner='" . $_GET['rml'] . "'"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing a learner from DB'); } $query = "DELETE FROM aloja_ml.model_storage WHERE id_hash='" . $_GET['rml'] . "' AND type='learner'"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing a model from DB'); } $command = 'rm -f ' . getcwd() . '/cache/ml/' . $_GET['rml'] . '*'; $output[] = shell_exec($command); } if (isset($_GET['rmm'])) { $query = "DELETE FROM aloja_ml.minconfigs WHERE id_minconfigs='" . $_GET['rmm'] . "'"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing a minconfig from DB'); } $query = "DELETE FROM aloja_ml.model_storage WHERE id_hash='" . $_GET['rmm'] . "' AND type='minconf'"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing a model from DB'); } $command = 'rm -f ' . getcwd() . '/cache/ml/' . $_GET['rmm'] . '*'; $output[] = shell_exec($command); } if (isset($_GET['rmr'])) { $query = "DELETE FROM aloja_ml.resolutions WHERE id_resolution='" . $_GET['rmr'] . "'"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing a resolution from DB'); } $command = 'rm -f ' . getcwd() . '/cache/ml/' . $_GET['rmr'] . '*'; $output[] = shell_exec($command); } if (isset($_GET['rms'])) { $query = "DELETE FROM aloja_ml.summaries WHERE id_summaries='" . $_GET['rms'] . "'"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing a summary from DB'); } $command = 'rm -f ' . getcwd() . '/cache/ml/' . $_GET['rms'] . '*'; $output[] = shell_exec($command); } if (isset($_GET['rmp'])) { $query = "DELETE FROM aloja_ml.precisions WHERE id_precision='" . $_GET['rmp'] . "'"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing a precision from DB'); } $command = 'rm -f ' . getcwd() . '/cache/ml/' . $_GET['rmp'] . '*'; $output[] = shell_exec($command); } if (isset($_GET['rmo'])) { $query = "DELETE FROM aloja_ml.observed_trees WHERE id_obstrees='" . $_GET['rmo'] . "'"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when removing an observed tree from DB'); } $command = 'rm -f ' . getcwd() . '/cache/ml/' . $_GET['rmo'] . '*'; $output[] = shell_exec($command); } // Compilation of Learners on Cache $query = "SELECT v.*, COUNT(t.id_findattrs) as num_trees\n\t\t\t\tFROM (\tSELECT s.*, COUNT(r.id_resolution) AS num_resolutions\n\t\t\t\t\tFROM (\tSELECT j.*, COUNT(m.id_minconfigs) AS num_minconfigs\n\t\t\t\t\t\tFROM (\tSELECT DISTINCT l.id_learner AS id_learner, l.algorithm AS algorithm,\n\t\t\t\t\t\t\t\tl.creation_time AS creation_time, l.model AS model, l.dataslice AS advanced,\n\t\t\t\t\t\t\t\tCOUNT(p.id_prediction) AS num_preds\n\t\t\t\t\t\t\tFROM aloja_ml.learners AS l LEFT JOIN aloja_ml.predictions AS p ON l.id_learner = p.id_learner\n\t\t\t\t\t\t\tGROUP BY l.id_learner\n\t\t\t\t\t\t) AS j LEFT JOIN aloja_ml.minconfigs AS m ON j.id_learner = m.id_learner\n\t\t\t\t\t\tGROUP BY j.id_learner\n\t\t\t\t\t) AS s LEFT JOIN aloja_ml.resolutions AS r ON s.id_learner = r.id_learner\n\t\t\t\t\tGROUP BY s.id_learner\n\t\t\t\t) AS v LEFT JOIN aloja_ml.trees AS t ON v.id_learner = t.id_learner\n\t\t\t\tGROUP BY v.id_learner\n\t\t\t\t"; $rows = $dbml->query($query); $jsonLearners = '['; foreach ($rows as $row) { if (strpos($row['model'], '*') !== false) { $umodel = 'umodel=umodel&'; } else { $umodel = ''; } $url = MLUtils::revertModelToURL($row['model'], $row['advanced'], 'presets=none&submit=&learner[]=' . $row['algorithm'] . '&' . $umodel); $jsonLearners = $jsonLearners . ($jsonLearners == '[' ? '' : ',') . "['" . $row['id_learner'] . "','" . $row['algorithm'] . "','" . $row['model'] . "','" . $row['advanced'] . "','" . $row['creation_time'] . "','" . $row['num_preds'] . "','" . $row['num_minconfigs'] . "','" . $row['num_resolutions'] . "','" . $row['num_trees'] . "','<a href=\\'/mlprediction?" . $url . "\\'>View</a> <a href=\\'/mlclearcache?rml=" . $row['id_learner'] . "\\'>Remove</a>']"; } $jsonLearners = $jsonLearners . ']'; $jsonLearningHeader = "[{'title':'ID'},{'title':'Algorithm'},{'title':'Model'},{'title':'Advanced'},{'title':'Creation'},{'title':'Predictions'},{'title':'MinConfigs'},{'title':'Resolutions'},{'title':'Trees'},{'title':'Actions'}]"; // Compilation of Minconfs on Cache $query = "SELECT mj.*, COUNT(mc.sid_minconfigs_centers) AS num_centers\n\t\t\t\tFROM (\tSELECT DISTINCT m.id_minconfigs AS id_minconfigs, m.model AS model, m.is_new as is_new,\n\t\t\t\t\t\tm.creation_time AS creation_time, COUNT(mp.sid_minconfigs_props) AS num_props, l.algorithm\n\t\t\t\t\tFROM aloja_ml.minconfigs AS m LEFT JOIN aloja_ml.minconfigs_props AS mp ON m.id_minconfigs = mp.id_minconfigs, aloja_ml.learners AS l\n\t\t\t\t\tWHERE l.id_learner = m.id_learner\n\t\t\t\t\tGROUP BY m.id_minconfigs\n\t\t\t\t) AS mj LEFT JOIN aloja_ml.minconfigs_centers AS mc ON mj.id_minconfigs = mc.id_minconfigs\n\t\t\t\tGROUP BY mj.id_minconfigs\n\t\t\t\t"; $rows = $dbml->query($query); $jsonMinconfs = '['; foreach ($rows as $row) { $jsonMinconfs = $jsonMinconfs . ($jsonMinconfs == '[' ? '' : ',') . "['" . $row['id_minconfigs'] . "','" . $row['algorithm'] . "','" . $row['model'] . "','" . $row['creation_time'] . "','" . $row['is_new'] . "','" . $row['num_props'] . "','" . $row['num_centers'] . "','<a href=\\'/mlclearcache?rmm=" . $row['id_minconfigs'] . "\\'>Remove</a>']"; } $jsonMinconfs = $jsonMinconfs . ']'; $jsonMinconfsHeader = "[{'title':'ID'},{'title':'Algorithm'},{'title':'Model'},{'title':'Creation'},{'title':'Is_New'},{'title':'Properties'},{'title':'Centers'},{'title':'Actions'}]"; // Compilation of Resolutions on Cache $query = "SELECT DISTINCT id_resolution, id_learner, model, dataslice, creation_time, sigma, count(*) AS instances\n\t\t\t\tFROM aloja_ml.resolutions\n\t\t\t\tGROUP BY id_resolution\n\t\t\t\t"; $rows = $dbml->query($query); $jsonResolutions = '['; foreach ($rows as $row) { $jsonResolutions = $jsonResolutions . ($jsonResolutions == '[' ? '' : ',') . "['" . $row['id_resolution'] . "','" . $row['id_learner'] . "','" . $row['model'] . "','" . $row['dataslice'] . "','" . $row['creation_time'] . "','" . $row['sigma'] . "','" . $row['instances'] . "','<a href=\\'/mlclearcache?rmr=" . $row['id_resolution'] . "\\'>Remove</a>']"; } $jsonResolutions = $jsonResolutions . ']'; $jsonResolutionsHeader = "[{'title':'ID'},{'title':'Learner'},{'title':'Model'},{'title':'Advanced'},{'title':'Creation'},{'title':'Sigma'},{'title':'Instances'},{'title':'Actions'}]"; // Compilation of Summaries on Cache $query = "SELECT DISTINCT id_summaries, model, dataslice, creation_time\n\t\t\t\tFROM aloja_ml.summaries\n\t\t\t\t"; $rows = $dbml->query($query); $jsonSummaries = '['; foreach ($rows as $row) { $jsonSummaries = $jsonSummaries . ($jsonSummaries == '[' ? '' : ',') . "['" . $row['id_summaries'] . "','" . $row['model'] . "','" . $row['dataslice'] . "','" . $row['creation_time'] . "','<a href=\\'/mlclearcache?rms=" . $row['id_summaries'] . "\\'>Remove</a>']"; } $jsonSummaries = $jsonSummaries . ']'; $jsonSummariesHeader = "[{'title':'ID'},{'title':'Model'},{'title':'Advanced'},{'title':'Creation'},{'title':'Actions'}]"; // Compilation of Precisions on Cache $query = "SELECT id_precision, model, dataslice, creation_time\n\t\t\t\tFROM aloja_ml.precisions\n\t\t\t\tGROUP BY id_precision\n\t\t\t\t"; $rows = $dbml->query($query); $jsonPrecisions = '['; foreach ($rows as $row) { $jsonPrecisions = $jsonPrecisions . ($jsonPrecisions == '[' ? '' : ',') . "['" . $row['id_precision'] . "','" . $row['model'] . "','" . $row['dataslice'] . "','" . $row['creation_time'] . "','<a href=\\'/mlclearcache?rmp=" . $row['id_precision'] . "\\'>Remove</a>']"; } $jsonPrecisions = $jsonPrecisions . ']'; $jsonPrecisionsHeader = "[{'title':'ID'},{'title':'Model'},{'title':'Advanced'},{'title':'Creation'},{'title':'Actions'}]"; // Compilation of Observed Trees on Cache $query = "SELECT id_obstrees, model, dataslice, creation_time\n\t\t\t\tFROM aloja_ml.observed_trees\n\t\t\t\t"; $rows = $dbml->query($query); $jsonObstrees = '['; foreach ($rows as $row) { $jsonObstrees = $jsonObstrees . ($jsonObstrees == '[' ? '' : ',') . "['" . $row['id_obstrees'] . "','" . $row['model'] . "','" . $row['dataslice'] . "','" . $row['creation_time'] . "','<a href=\\'/mlclearcache?rmo=" . $row['id_obstrees'] . "\\'>Remove</a>']"; } $jsonObstrees = $jsonObstrees . ']'; $jsonObstreesHeader = "[{'title':'ID'},{'title':'Model'},{'title':'Advanced'},{'title':'Creation'},{'title':'Actions'}]"; $dbml = null; } catch (Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); $output = array(); } return $this->render('mltemplate/mlclearcache.html.twig', array('learners' => $jsonLearners, 'header_learners' => $jsonLearningHeader, 'minconfs' => $jsonMinconfs, 'header_minconfs' => $jsonMinconfsHeader, 'resolutions' => $jsonResolutions, 'header_resolutions' => $jsonResolutionsHeader, 'summaries' => $jsonSummaries, 'header_summaries' => $jsonSummariesHeader, 'precisions' => $jsonPrecisions, 'header_precisions' => $jsonPrecisionsHeader, 'obstrees' => $jsonObstrees, 'header_obstrees' => $jsonObstreesHeader)); }
public function mlsummariesAction() { $displaydata = $separate_feat = $instance = $model_info = $slice_info = ''; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); $this->filters->removeFilters(array('upred', 'uobsr')); $this->filters->removeFIltersFromGroup("MLearning", array('upred', 'uobsr')); $this->buildFilters(array('feature' => array('type' => 'selectOne', 'default' => array('joined'), 'label' => 'Separate by: ', 'generateChoices' => function () { return array('joined', 'Benchmark', 'Net', 'Disk', 'Maps', 'IO.SFac', 'Rep', 'IO.FBuf', 'Comp', 'Blk.size', 'Cluster'); }, 'beautifier' => function ($value) { $labels = array('joined' => 'None', 'Benchmark' => 'Benchmarks', 'Net' => 'Networks', 'Disk' => 'Disks', 'Maps' => 'Maps', 'IO.SFac' => 'IO Sort Factor', 'Rep' => 'Replication', 'Rep' => 'Replication', 'IO.FBuf' => 'IO File Buffer', 'Comp' => 'Compressions', 'Blk.size' => 'Block sizes', 'Cluster' => 'Clusters'); return $labels[$value]; }, 'parseFunction' => function () { $choice = isset($_GET['feature']) ? $_GET['feature'] : array('joined'); return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'))); $this->buildFilterGroups(array('MLearning' => array('label' => 'Machine Learning', 'tabOpenDefault' => true))); $where_configs = $this->filters->getWhereClause(); $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'bench_type', 'vm_size', 'vm_cores', 'vm_RAM', 'type', 'hadoop_version', 'provider', 'vm_OS'); // Order is important $params = $this->filters->getFiltersSelectedChoices($param_names); foreach ($param_names as $p) { if (!is_null($params[$p]) && is_array($params[$p])) { sort($params[$p]); } } $param_names_additional = array('datefrom', 'dateto', 'minexetime', 'maxexetime', 'valid', 'filter'); // Order is important $params_additional = $this->filters->getFiltersSelectedChoices($param_names_additional); $feature = $this->filters->getFiltersSelectedChoices(array('feature')); $separate_feat = $feature['feature']; // compose instance $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, true); $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, true); $slice_info = MLUtils::generateDatasliceInfo($this->filters, $param_names_additional, $params_additional); $config = $model_info . ' ' . $separate_feat . ' ' . $slice_info . ' SUMMARY'; $cache_ds = getcwd() . '/cache/query/' . md5($config) . '-cache.csv'; $is_cached_mysql = $dbml->query("SELECT count(*) as num FROM aloja_ml.summaries WHERE id_summaries = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['num'] > 0; if (!$is_cached) { // get headers for csv $header_names = array('e.id_exec' => 'ID', 'e.bench' => 'Benchmark', 'e.exe_time' => 'Exe.Time', 'e.net' => 'Net', 'e.disk' => 'Disk', 'e.maps' => 'Maps', 'e.iosf' => 'IO.SFac', 'e.replication' => 'Rep', 'e.iofilebuf' => 'IO.FBuf', 'e.comp' => 'Comp', 'e.blk_size' => 'Blk.size', 'e.id_cluster' => 'Cluster', 'c.name' => 'Cl.Name', 'c.datanodes' => 'Datanodes', 'c.headnodes' => 'Headnodes', 'c.vm_OS' => 'VM.OS', 'c.vm_cores' => 'VM.Cores', 'c.vm_RAM' => 'VM.RAM', 'c.provider' => 'Provider', 'c.vm_size' => 'VM.Size', 'c.type' => 'Type', 'e.bench_type' => 'Bench.Type', 'e.hadoop_version' => 'Hadoop.Version'); $headers = array_keys($header_names); $names = array_values($header_names); // dump the result to csv $query = "SELECT " . implode(",", $headers) . " FROM aloja2.execs e LEFT JOIN aloja2.clusters c ON e.id_cluster = c.id_cluster LEFT JOIN aloja_ml.predictions p USING (id_exec) WHERE e.hadoop_version IS NOT NULL" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } $fp = fopen($cache_ds, 'w'); fputcsv($fp, $names, ',', '"'); foreach ($rows as $row) { $row['id_cluster'] = "Cl" . $row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... fputcsv($fp, array_values($row), ',', '"'); } // launch query $command = 'cd ' . getcwd() . '/cache/query; ../../resources/aloja_cli.r -m aloja_print_summaries -d ' . $cache_ds . ' -p ' . ($separate_feat != 'joined' ? 'sname=' . $separate_feat . ':' : '') . 'fprint=' . md5($config) . ':fwidth=1000:html=1'; #fwidth=135 $output = shell_exec($command); // Save to DB if (($handle = fopen(getcwd() . '/cache/query/' . md5($config) . '-summary.data', 'r')) !== FALSE) { $displaydata = ""; while (($data = fgets($handle)) !== FALSE) { $displaydata = $displaydata . $data; } fclose($handle); $displaydata = str_replace('\'', '\\\'', $displaydata); // register model to DB $query = "INSERT INTO aloja_ml.summaries (id_summaries,instance,model,dataslice,summary)"; $query = $query . " VALUES ('" . md5($config) . "','" . $instance . "','" . substr($model_info, 1) . "','" . $slice_info . "','" . $displaydata . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving model into DB'); } } // Remove temporal files $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '-summary.data'); $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '-cache.csv'); } // Read results of the DB $is_cached_mysql = $dbml->query("SELECT summary FROM aloja_ml.summaries WHERE id_summaries = '" . md5($config) . "' LIMIT 1"); $tmp_result = $is_cached_mysql->fetch(); $displaydata = $tmp_result['summary']; } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); } $return_params = array('displaydata' => $displaydata, 'feature' => $separate_feat, 'instance' => $instance, 'model_info' => $model_info, 'slice_info' => $slice_info); foreach ($param_names as $p) { $return_params[$p] = $params[$p]; } foreach ($param_names_additional as $p) { $return_params[$p] = $params_additional[$p]; } return $this->render('mltemplate/mlsummaries.html.twig', $return_params); }
public function mlminconfigsAction() { $jsonData = $jsonHeader = $configs = $jsonMinconfs = $jsonMinconfsHeader = '[]'; $model_info = $slice_info = $message = $instance = $config = ''; $max_x = $max_y = 0; $must_wait = 'NO'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); // FIXME - This must be counted BEFORE building filters, as filters inject rubbish in GET when there are no parameters... $instructions = count($_GET) <= 1; $this->buildFilters(array('learn' => array('type' => 'selectOne', 'default' => array('regtree'), 'label' => 'Learning method: ', 'generateChoices' => function () { return array('regtree', 'nneighbours', 'nnet', 'polyreg'); }, 'beautifier' => function ($value) { $labels = array('regtree' => 'Regression Tree', 'nneighbours' => 'k-NN', 'nnet' => 'NNets', 'polyreg' => 'PolyReg-3'); return $labels[$value]; }, 'parseFunction' => function () { $choice = isset($_GET['learn']) ? $_GET['learn'] : array('regtree'); return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'), 'umodel' => array('type' => 'checkbox', 'default' => 1, 'label' => 'Unrestricted to new values', 'parseFunction' => function () { $choice = isset($_GET['submit']) && !isset($_GET['umodel']) ? 0 : 1; return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'))); $this->buildFilterGroups(array('MLearning' => array('label' => 'Machine Learning', 'tabOpenDefault' => true, 'filters' => array('learn', 'umodel')))); if ($instructions) { MLUtils::getIndexMinconfs($jsonMinconfs, $jsonMinconfsHeader, $dbml); return $this->render('mltemplate/mlminconfigs.html.twig', array('jsonData' => $jsonData, 'jsonHeader' => $jsonHeader, 'configs' => $configs, 'minconfs' => $jsonMinconfs, 'header_minconfs' => $jsonMinconfsHeader, 'instructions' => 'YES')); } $params = array(); $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'vm_OS', 'vm_cores', 'vm_RAM', 'provider', 'vm_size', 'type', 'bench_type', 'hadoop_version', 'datasize', 'scale_factor'); // Order is important $params = $this->filters->getFiltersSelectedChoices($param_names); foreach ($param_names as $p) { if (!is_null($params[$p]) && is_array($params[$p])) { sort($params[$p]); } } $params_additional = array(); $param_names_additional = array('datefrom', 'dateto', 'minexetime', 'maxexetime', 'valid', 'filter'); // Order is important $params_additional = $this->filters->getFiltersSelectedChoices($param_names_additional); $learnParams = $this->filters->getFiltersSelectedChoices(array('learn', 'umodel')); $learn_param = $learnParams['learn']; $unrestricted = $learnParams['umodel'] ? true : false; $where_configs = $this->filters->getWhereClause(); $where_configs = str_replace("AND .", "AND ", $where_configs); // compose instance $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, $unrestricted, true); $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, $unrestricted, true); $slice_info = MLUtils::generateDatasliceInfo($this->filters, $param_names_additional, $params_additional); $config = $model_info . ' ' . $learn_param . ' ' . ($unrestricted ? 'U' : 'R') . ' ' . $slice_info . ' minconfs'; $learn_options = 'saveall=' . md5($config); if ($learn_param == 'regtree') { $learn_method = 'aloja_regtree'; $learn_options .= ':prange=0,20000'; } else { if ($learn_param == 'nneighbours') { $learn_method = 'aloja_nneighbors'; $learn_options .= ':kparam=3'; } else { if ($learn_param == 'nnet') { $learn_method = 'aloja_nnet'; $learn_options .= ':prange=0,20000'; } else { if ($learn_param == 'polyreg') { $learn_method = 'aloja_linreg'; $learn_options .= ':ppoly=3:prange=0,20000'; } } } } $cache_ds = getcwd() . '/cache/ml/' . md5($config) . '-cache.csv'; $is_cached_mysql = $dbml->query("SELECT count(*) as num FROM aloja_ml.learners WHERE id_learner = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['num'] > 0; $is_cached_mysql = $dbml->query("SELECT count(*) as num FROM aloja_ml.minconfigs WHERE id_minconfigs = '" . md5($config . 'R') . "' AND id_learner = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $is_cached && $tmp_result['num'] > 0; $in_process = file_exists(getcwd() . '/cache/ml/' . md5($config) . '.lock'); $finished_process = file_exists(getcwd() . '/cache/ml/' . md5($config) . '.fin'); // Create Models and Predictions if (!$is_cached && !$in_process && !$finished_process) { // get headers for csv $header_names = array('id_exec' => 'ID', 'bench' => 'Benchmark', 'exe_time' => 'Exe.Time', 'e.net' => 'Net', 'e.disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'IO.SFac', 'replication' => 'Rep', 'iofilebuf' => 'IO.FBuf', 'comp' => 'Comp', 'blk_size' => 'Blk.size', 'e.id_cluster' => 'Cluster', 'datanodes' => 'Datanodes', 'c.vm_OS' => 'VM.OS', 'c.vm_cores' => 'VM.Cores', 'c.vm_RAM' => 'VM.RAM', 'c.provider' => 'Provider', 'c.vm_size' => 'VM.Size', 'type' => 'Type', 'bench_type' => 'Bench.Type', 'hadoop_version' => 'Hadoop.Version', 'IFNULL(datasize,0)' => 'Datasize', 'scale_factor' => 'Scale.Factor'); $added_names = array('maxtxkbs' => 'Net.maxtxKB.s', 'maxrxkbs' => 'Net.maxrxKB.s', 'maxtxpcks' => 'Net.maxtxPck.s', 'maxrxpcks' => 'Net.maxrxPck.s', 'maxtxcmps' => 'Net.maxtxCmp.s', 'maxrxcmps' => 'Net.maxrxCmp.s', 'maxrxmscts' => 'Net.maxrxmsct.s', 'maxtps' => 'Disk.maxtps', 'maxsvctm' => 'Disk.maxsvctm', 'maxrds' => 'Disk.maxrd.s', 'maxwrs' => 'Disk.maxwr.s', 'maxrqsz' => 'Disk.maxrqsz', 'maxqusz' => 'Disk.maxqusz', 'maxawait' => 'Disk.maxawait', 'maxutil' => 'Disk.maxutil'); // dump the result to csv $query = "SELECT " . implode(",", array_keys($header_names)) . ",\n\t\t\t\t\tn.maxtxkbs, n.maxrxkbs, n.maxtxpcks, n.maxrxpcks, n.maxtxcmps, n.maxrxcmps, n.maxrxmscts,\n\t\t\t\t\td.maxtps, d.maxsvctm, d.maxrds, d.maxwrs, d.maxrqsz, d.maxqusz, d.maxawait, d.maxutil\n\t\t\t\t\tFROM aloja2.execs AS e LEFT JOIN aloja2.clusters AS c ON e.id_cluster = c.id_cluster,\n\t\t\t\t\t(\n\t\t\t\t\t SELECT MAX(n1.`maxtxkB/s`) AS maxtxkbs, MAX(n1.`maxrxkB/s`) AS maxrxkbs,\n\t\t\t\t\t MAX(n1.`maxtxpck/s`) AS maxtxpcks, MAX(n1.`maxrxpck/s`) AS maxrxpcks,\n\t\t\t\t\t MAX(n1.`maxtxcmp/s`) AS maxtxcmps, MAX(n1.`maxrxcmp/s`) AS maxrxcmps,\n\t\t\t\t\t MAX(n1.`maxrxmcst/s`) AS maxrxmscts,\n\t\t\t\t\t e1.net AS net, c1.vm_cores, c1.vm_RAM, c1.vm_size, c1.vm_OS, c1.provider\n\t\t\t\t\t FROM aloja2.precal_network_metrics AS n1,\n\t\t\t\t\t aloja2.execs AS e1 LEFT JOIN aloja2.clusters AS c1 ON e1.id_cluster = c1.id_cluster\n\t\t\t\t\t WHERE e1.id_exec = n1.id_exec\n\t\t\t\t\t GROUP BY e1.net, c1.vm_cores, c1.vm_RAM, c1.vm_size, c1.vm_OS, c1.provider\n\t\t\t\t\t) AS n,\n\t\t\t\t\t(\n\t\t\t\t\t SELECT MAX(d1.maxtps) AS maxtps, MAX(d1.maxsvctm) as maxsvctm,\n\t\t\t\t\t MAX(d1.`maxrd_sec/s`) as maxrds, MAX(d1.`maxwr_sec/s`) as maxwrs,\n\t\t\t\t\t MAX(d1.maxrq_sz) as maxrqsz, MAX(d1.maxqu_sz) as maxqusz,\n\t\t\t\t\t MAX(d1.maxawait) as maxawait, MAX(d1.`max%util`) as maxutil,\n\t\t\t\t\t e2.disk AS disk, c1.vm_cores, c1.vm_RAM, c1.vm_size, c1.vm_OS, c1.provider\n\t\t\t\t\t FROM aloja2.precal_disk_metrics AS d1,\n\t\t\t\t\t aloja2.execs AS e2 LEFT JOIN aloja2.clusters AS c1 ON e2.id_cluster = c1.id_cluster\n\t\t\t\t\t WHERE e2.id_exec = d1.id_exec\n\t\t\t\t\t GROUP BY e2.disk, c1.vm_cores, c1.vm_RAM, c1.vm_size, c1.vm_OS, c1.provider\n\t\t\t\t\t) AS d\n\t\t\t\t\tWHERE e.net = n.net AND c.vm_cores = n.vm_cores AND c.vm_RAM = n.vm_RAM AND c.vm_size = n.vm_size\n\t\t\t\t\tAND c.vm_OS = n.vm_OS AND c.provider = n.provider AND e.disk = d.disk AND c.vm_cores = d.vm_cores\n\t\t\t\t\tAND c.vm_RAM = d.vm_RAM AND c.vm_size = d.vm_size AND c.vm_OS = d.vm_OS AND c.provider = d.provider\n\t\t\t\t\tAND hadoop_version IS NOT NULL" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } $fp = fopen($cache_ds, 'w'); fputcsv($fp, array_values(array_merge($header_names, $added_names)), ',', '"'); foreach ($rows as $row) { $row['id_cluster'] = "Cl" . $row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... fputcsv($fp, array_values($row), ',', '"'); } // run the R processor exec('cd ' . getcwd() . '/cache/ml; touch ' . md5($config) . '.lock'); $command = getcwd() . '/resources/queue -c "cd ' . getcwd() . '/cache/ml; ../../resources/aloja_cli.r -d ' . $cache_ds . ' -m ' . $learn_method . ' -p ' . $learn_options . ' >/dev/null 2>&1 && '; $command = $command . '../../resources/aloja_cli.r -m aloja_minimal_instances -l ' . md5($config) . ' -p saveall=' . md5($config . 'R') . ':kmax=200 >/dev/null 2>&1; rm -f ' . md5($config) . '.lock; touch ' . md5($config) . '.fin" >/dev/null 2>&1 &'; exec($command); } $in_process = file_exists(getcwd() . '/cache/ml/' . md5($config) . '.lock'); if ($in_process) { $must_wait = "YES"; throw new \Exception('WAIT'); } // Save learning model to DB, with predictions $is_cached_mysql = $dbml->query("SELECT id_learner FROM aloja_ml.learners WHERE id_learner = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); if ($tmp_result['id_learner'] != md5($config)) { // register model to DB $query = "INSERT INTO aloja_ml.learners (id_learner,instance,model,algorithm,dataslice)"; $query = $query . " VALUES ('" . md5($config) . "','" . $instance . "','" . substr($model_info, 1) . "','" . $learn_param . "','" . $slice_info . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving model into DB'); } // read results of the CSV and dump to DB foreach (array("tt", "tv", "tr") as $value) { if (($handle = fopen(getcwd() . '/cache/ml/' . md5($config) . '-' . $value . '.csv', 'r')) !== FALSE) { $header = fgetcsv($handle, 1000, ","); $token = 0; $query = "INSERT IGNORE INTO aloja_ml.predictions (\n\t\t\t\t\t\t\tid_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,\n\t\t\t\t\t\t\tid_cluster,datanodes,vm_OS,vm_cores,vm_RAM,provider,vm_size,type,bench_type,hadoop_version,\n\t\t\t\t\t\t\tdatasize,scale_factor,\n\t\t\t\t\t\t\tnet_maxtxkbs,net_maxrxkbs,net_maxtxpcks,net_maxrxpcks,net_maxtxcmps,net_maxrxcmps,net_maxrxmscts,\n\t\t\t\t\t\t\tdisk_maxtps,disk_maxsvctm,disk_maxrds,disk_maxwrs,disk_maxrqsz,disk_maxqusz,disk_maxawait, disk_maxutil,\n\t\t\t\t\t\t\tpred_time,id_learner,instance,predict_code) VALUES "; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $specific_instance = implode(",", array_slice($data, 2, 36)); $specific_data = implode(",", $data); $specific_data = preg_replace('/,Cmp(\\d+),/', ',${1},', $specific_data); $specific_data = preg_replace('/,Cl(\\d+),/', ',${1},', $specific_data); $specific_data = str_replace(",", "','", $specific_data); $query_var = "SELECT count(*) as num FROM aloja_ml.predictions WHERE instance = '" . $specific_instance . "' AND id_learner = '" . md5($config) . "'"; $result = $dbml->query($query_var); $row = $result->fetch(); // Insert instance values if ($row['num'] == 0) { if ($token != 0) { $query = $query . ","; } $token = 1; $query = $query . "('" . $specific_data . "','" . md5($config) . "','" . $specific_instance . "','" . ($value == 'tt' ? 3 : ($value == 'tv' ? 2 : 1)) . "') "; } } if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving into DB'); } fclose($handle); } else { throw new \Exception('Error on R processing. Result file ' . md5($config) . '-' . $value . '.csv not present'); } } // Remove temporal files $output = shell_exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config) . '*.csv'); $output = shell_exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config) . '*.dat'); } // Save minconfigs to DB, with props and centers $is_cached_mysql = $dbml->query("SELECT id_minconfigs FROM aloja_ml.minconfigs WHERE id_minconfigs = '" . md5($config . 'R') . "'"); $tmp_result = $is_cached_mysql->fetch(); if ($tmp_result['id_minconfigs'] != md5($config . 'R')) { // register minconfigs to DB $query = "INSERT INTO aloja_ml.minconfigs (id_minconfigs,id_learner,instance,model,dataslice)"; $query = $query . " VALUES ('" . md5($config . 'R') . "','" . md5($config) . "','" . $instance . "','" . substr($model_info, 1) . "','" . $slice_info . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving minconfis into DB'); } $clusters = array(); // Save results of the CSV - MAE or RAE if (file_exists(getcwd() . '/cache/ml/' . md5($config . 'R') . '-raes.csv')) { $error_file = 'raes.csv'; } else { $error_file = 'maes.csv'; } $handle = fopen(getcwd() . '/cache/ml/' . md5($config . 'R') . '-' . $error_file, 'r'); while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $cluster = (int) $data[0]; if ($error_file == 'raes.csv') { $error_mae = 'NULL'; $error_rae = (double) $data[1]; } if ($error_file == 'maes.csv') { $error_mae = (double) $data[1]; $error_rae = 'NULL'; } // register minconfigs_props to DB $query = "INSERT INTO aloja_ml.minconfigs_props (id_minconfigs,cluster,MAE,RAE)"; $query = $query . " VALUES ('" . md5($config . 'R') . "','" . $cluster . "','" . $error_mae . "','" . $error_rae . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving minconfis into DB'); } $clusters[] = $cluster; } fclose($handle); // Save results of the CSV - Configs $handle_sizes = fopen(getcwd() . '/cache/ml/' . md5($config . 'R') . '-sizes.csv', 'r'); foreach ($clusters as $cluster) { // Get supports from sizes $sizes = fgetcsv($handle_sizes, 1000, ","); // Get clusters $handle = fopen(getcwd() . '/cache/ml/' . md5($config . 'R') . '-dsk' . $cluster . '.csv', 'r'); $header = fgetcsv($handle, 1000, ","); $i = 0; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $subdata1 = array_slice($data, 0, 12); $subdata2 = array_slice($data, 19, 4); $specific_data = implode(',', array_merge($subdata1, $subdata2)); $specific_data = preg_replace('/,Cmp(\\d+),/', ',${1},', $specific_data); $specific_data = preg_replace('/,Cl(\\d+),/', ',${1},', $specific_data); $specific_data = preg_replace('/,Cl(\\d+)/', ',${1}', $specific_data); $specific_data = str_replace(",", "','", $specific_data); // register minconfigs_props to DB $query = "INSERT INTO aloja_ml.minconfigs_centers (id_minconfigs,cluster,id_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,id_cluster,bench_type,hadoop_version,datasize,scale_factor,support)"; $query = $query . " VALUES ('" . md5($config . 'R') . "','" . $cluster . "','" . $specific_data . "','" . $sizes[$i++] . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving centers into DB'); } } fclose($handle); } fclose($handle_sizes); // Store file model to DB $filemodel = getcwd() . '/cache/ml/' . md5($config) . '-object.rds'; $fp = fopen($filemodel, 'r'); $content = fread($fp, filesize($filemodel)); $content = addslashes($content); fclose($fp); $query = "INSERT INTO aloja_ml.model_storage (id_hash,type,file) VALUES ('" . md5($config) . "','learner','" . $content . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving file model into DB'); } $filemodel = getcwd() . '/cache/ml/' . md5($config . 'R') . '-object.rds'; $fp = fopen($filemodel, 'r'); $content = fread($fp, filesize($filemodel)); $content = addslashes($content); fclose($fp); $query = "INSERT INTO aloja_ml.model_storage (id_hash,type,file) VALUES ('" . md5($config . 'R') . "','minconf','" . $content . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving file minconf into DB'); } // Remove temporal files $output = shell_exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'R') . '*.csv'); $output = shell_exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'R') . '*.rds'); $output = shell_exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config) . '*.rds'); $output = shell_exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config) . '.fin'); } // Retrieve minconfig progression results from DB $header = "id_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,bench_type,hadoop_version,datasize,scale_factor,id_cluster,support"; $header_array = explode(",", $header); $last_y = 9000000000000000.0; $configs = '['; $jsonData = array(); $query = "SELECT cluster, MAE, RAE FROM aloja_ml.minconfigs_props WHERE id_minconfigs='" . md5($config . 'R') . "'"; $result = $dbml->query($query); foreach ($result as $row) { // Retrieve minconfig progression results from DB if ((double) $row['MAE'] > 0) { $error = (double) $row['MAE']; } else { $error = (double) $row['RAE']; } $cluster = (int) $row['cluster']; $new_val = array(); $new_val['x'] = $cluster; if ($error > $last_y) { $new_val['y'] = $last_y; } else { $last_y = $new_val['y'] = $error; } $jsonData[] = $new_val; // Retrieve minconfig centers from DB $query_2 = "SELECT " . $header . " FROM aloja_ml.minconfigs_centers WHERE id_minconfigs='" . md5($config . 'R') . "' AND cluster='" . $cluster . "'"; $result_2 = $dbml->query($query_2); $jsonConfig = '['; foreach ($result_2 as $row_2) { $values = ''; foreach ($header_array as $ha) { $values = $values . ($values != '' ? ',' : '') . '\'' . $row_2[$ha] . '\''; } $jsonConfig = $jsonConfig . ($jsonConfig != '[' ? ',' : '') . '[' . $values . ']'; } $jsonConfig = $jsonConfig . ']'; $configs = $configs . ($configs != '[' ? ',' : '') . $jsonConfig; } $configs = $configs . ']'; $jsonData = json_encode($jsonData); $jsonHeader = '[{title:""},{title:"Est.Time"},{title:"Benchmark"},{title:"Network"},{title:"Disk"},{title:"Maps"},{title:"IO.SF"},{title:"Replicas"},{title:"IO.FBuf"},{title:"Compression"},{title:"Blk.Size"},{title:"Bench.Type"},{title:"Hadoop.Ver"},{title:"Data.Size"},{title:"Scale.Factor"},{title:"Main Ref. Cluster"},{title:"Support"}]'; $is_cached_mysql = $dbml->query("SELECT MAX(cluster) as mcluster, MAX(MAE) as mmae, MAX(RAE) as mrae FROM aloja_ml.minconfigs_props WHERE id_minconfigs='" . md5($config . 'R') . "'"); $tmp_result = $is_cached_mysql->fetch(); $max_x = (double) $tmp_result['mmae'] > 0 ? (double) $tmp_result['mmae'] : (double) $tmp_result['mrae']; $max_y = (double) $tmp_result['mcluster']; } catch (\Exception $e) { if ($e->getMessage() != "WAIT") { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); } $jsonData = $jsonHeader = $configs = '[]'; } $dbml = null; $return_params = array('jsonData' => $jsonData, 'jsonHeader' => $jsonHeader, 'minconfs' => $jsonMinconfs, 'header_minconfs' => $jsonMinconfsHeader, 'configs' => $configs, 'max_p' => min(array($max_x, $max_y)), 'instance' => $instance, 'id_learner' => md5($config), 'id_minconf' => md5($config . 'R'), 'model_info' => $model_info, 'slice_info' => $slice_info, 'must_wait' => $must_wait); return $this->render('mltemplate/mlminconfigs.html.twig', $return_params); }
public function mlminconfigsAction() { $jsonData = array(); $message = $instance = ''; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain_ml'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); $where_configs = ''; $preset = null; if (count($_GET) <= 1 || count($_GET) == 2 && array_key_exists('learn', $_GET)) { $preset = Utils::setDefaultPreset($db, 'mlminconfigs'); } $selPreset = isset($_GET['presets']) ? $_GET['presets'] : "none"; $params = array(); $param_names = array('benchs', 'nets', 'disks', 'mapss', 'iosfs', 'replications', 'iofilebufs', 'comps', 'blk_sizes', 'id_clusters', 'datanodess', 'bench_types', 'vm_sizes', 'vm_coress', 'vm_RAMs', 'types'); // Order is important foreach ($param_names as $p) { $params[$p] = Utils::read_params($p, $where_configs, FALSE); sort($params[$p]); } $learn_param = array_key_exists('learn', $_GET) ? $_GET['learn'] : 'regtree'; $unrestricted = array_key_exists('umodel', $_GET) && $_GET['umodel'] == 1; // FIXME PATCH FOR PARAM LIBRARIES WITHOUT LEGACY $where_configs = str_replace("`id_cluster`", "e.`id_cluster`", $where_configs); $where_configs = str_replace("AND .", "AND ", $where_configs); // compose instance $instance = MLUtils::generateSimpleInstance($param_names, $params, $unrestricted, $db); // Used only as indicator in the WEB $model_info = MLUtils::generateModelInfo($param_names, $params, $unrestricted, $db); $config = $model_info . ' ' . $learn_param . ' ' . ($unrestricted ? 'U' : 'R') . ' minconfs'; $learn_options = 'saveall=' . md5($config); if ($learn_param == 'regtree') { $learn_method = 'aloja_regtree'; $learn_options .= ':prange=0,20000'; } else { if ($learn_param == 'nneighbours') { $learn_method = 'aloja_nneighbors'; $learn_options .= ':kparam=3'; } else { if ($learn_param == 'nnet') { $learn_method = 'aloja_nnet'; $learn_options .= ':prange=0,20000'; } else { if ($learn_param == 'polyreg') { $learn_method = 'aloja_linreg'; $learn_options .= ':ppoly=3:prange=0,20000'; } } } } $cache_ds = getcwd() . '/cache/query/' . md5($config) . '-cache.csv'; $is_cached_mysql = $dbml->query("SELECT count(*) as num FROM learners WHERE id_learner = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['num'] > 0; $is_cached_mysql = $dbml->query("SELECT count(*) as num FROM minconfigs WHERE id_minconfigs = '" . md5($config . 'R') . "' AND id_learner = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $is_cached && $tmp_result['num'] > 0; $in_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.lock'); $finished_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.fin'); // Create Models and Predictions if (!$is_cached && !$in_process && !$finished_process) { // get headers for csv $header_names = array('id_exec' => 'ID', 'bench' => 'Benchmark', 'exe_time' => 'Exe.Time', 'net' => 'Net', 'disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'IO.SFac', 'replication' => 'Rep', 'iofilebuf' => 'IO.FBuf', 'comp' => 'Comp', 'blk_size' => 'Blk.size', 'e.id_cluster' => 'Cluster', 'name' => 'Cl.Name', 'datanodes' => 'Datanodes', 'headnodes' => 'Headnodes', 'vm_OS' => 'VM.OS', 'vm_cores' => 'VM.Cores', 'vm_RAM' => 'VM.RAM', 'provider' => 'Provider', 'vm_size' => 'VM.Size', 'type' => 'Type', 'bench_type' => 'Bench.Type'); $headers = array_keys($header_names); $names = array_values($header_names); // dump the result to csv $query = "SELECT " . implode(",", $headers) . " FROM execs e LEFT JOIN clusters c ON e.id_cluster = c.id_cluster WHERE e.valid = TRUE AND bench_type = 'HiBench' AND bench NOT LIKE 'prep_%' AND e.exe_time > 100" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } $fp = fopen($cache_ds, 'w'); fputcsv($fp, $names, ',', '"'); foreach ($rows as $row) { $row['id_cluster'] = "Cl" . $row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... fputcsv($fp, array_values($row), ',', '"'); } // run the R processor exec('cd ' . getcwd() . '/cache/query; touch ' . md5($config) . '.lock'); $command = getcwd() . '/resources/queue -c "cd ' . getcwd() . '/cache/query; ../../resources/aloja_cli.r -d ' . $cache_ds . ' -m ' . $learn_method . ' -p ' . $learn_options . ' >/dev/null 2>&1 && '; $command = $command . '../../resources/aloja_cli.r -m aloja_minimal_instances -l ' . md5($config) . ' -p saveall=' . md5($config . 'R') . ':kmax=200 >/dev/null 2>&1; rm -f ' . md5($config) . '.lock; touch ' . md5($config) . '.fin" >/dev/null 2>&1 &'; exec($command); } $in_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.lock'); if ($in_process) { $jsonData = $jsonHeader = $configs = '[]'; $must_wait = "YES"; $max_x = $max_y = 0; } else { $must_wait = "NO"; // Save learning model to DB, with predictions $is_cached_mysql = $dbml->query("SELECT id_learner FROM learners WHERE id_learner = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); if ($tmp_result['id_learner'] != md5($config)) { // register model to DB $query = "INSERT INTO learners (id_learner,instance,model,algorithm)"; $query = $query . " VALUES ('" . md5($config) . "','" . $instance . "','" . substr($model_info, 1) . "','" . $learn_param . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving model into DB'); } // read results of the CSV and dump to DB foreach (array("tt", "tv", "tr") as $value) { if (($handle = fopen(getcwd() . '/cache/query/' . md5($config) . '-' . $value . '.csv', 'r')) !== FALSE) { $header = fgetcsv($handle, 1000, ","); $token = 0; $query = "INSERT INTO predictions (id_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,id_cluster,name,datanodes,headnodes,vm_OS,vm_cores,vm_RAM,provider,vm_size,type,bench_type,pred_time,id_learner,instance,predict_code) VALUES "; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $specific_instance = implode(",", array_slice($data, 2, 19)); $specific_data = implode(",", $data); $specific_data = preg_replace('/,Cmp(\\d+),/', ',${1},', $specific_data); $specific_data = preg_replace('/,Cl(\\d+),/', ',${1},', $specific_data); $specific_data = str_replace(",", "','", $specific_data); $query_var = "SELECT count(*) as num FROM predictions WHERE instance = '" . $specific_instance . "' AND id_learner = '" . md5($config) . "'"; $result = $dbml->query($query_var); $row = $result->fetch(); // Insert instance values if ($row['num'] == 0) { if ($token != 0) { $query = $query . ","; } $token = 1; $query = $query . "('" . $specific_data . "','" . md5($config) . "','" . $specific_instance . "','" . ($value == 'tt' ? 3 : ($value == 'tv' ? 2 : 1)) . "') "; } } if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving into DB'); } fclose($handle); } } // Remove temporal files $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '*.csv'); $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '*.dat'); } // Save minconfigs to DB, with props and centers $is_cached_mysql = $dbml->query("SELECT id_minconfigs FROM minconfigs WHERE id_minconfigs = '" . md5($config . 'R') . "'"); $tmp_result = $is_cached_mysql->fetch(); if ($tmp_result['id_minconfigs'] != md5($config . 'R')) { // register minconfigs to DB $query = "INSERT INTO minconfigs (id_minconfigs,id_learner,instance,model)"; $query = $query . " VALUES ('" . md5($config . 'R') . "','" . md5($config) . "','" . $instance . "','" . substr($model_info, 1) . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving minconfis into DB'); } $clusters = array(); // Save results of the CSV - MAE or RAE if (file_exists(getcwd() . '/cache/query/' . md5($config . 'R') . '-raes.csv')) { $error_file = 'raes.csv'; } else { $error_file = 'maes.csv'; } $handle = fopen(getcwd() . '/cache/query/' . md5($config . 'R') . '-' . $error_file, 'r'); while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $cluster = (int) $data[0]; if ($error_file == 'raes.csv') { $error_mae = 'NULL'; $error_rae = (double) $data[1]; } if ($error_file == 'maes.csv') { $error_mae = (double) $data[1]; $error_rae = 'NULL'; } // register minconfigs_props to DB $query = "INSERT INTO minconfigs_props (id_minconfigs,cluster,MAE,RAE)"; $query = $query . " VALUES ('" . md5($config . 'R') . "','" . $cluster . "','" . $error_mae . "','" . $error_rae . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving minconfis into DB'); } $clusters[] = $cluster; } fclose($handle); // Save results of the CSV - Configs $handle_sizes = fopen(getcwd() . '/cache/query/' . md5($config . 'R') . '-sizes.csv', 'r'); foreach ($clusters as $cluster) { // Get supports from sizes $sizes = fgetcsv($handle_sizes, 1000, ","); // Get clusters $handle = fopen(getcwd() . '/cache/query/' . md5($config . 'R') . '-dsk' . $cluster . '.csv', 'r'); $header = fgetcsv($handle, 1000, ","); $i = 0; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $subdata = array_slice($data, 0, 12); $specific_data = implode(',', $subdata); $specific_data = preg_replace('/,Cmp(\\d+),/', ',${1},', $specific_data); $specific_data = preg_replace('/,Cl(\\d+),/', ',${1},', $specific_data); $specific_data = preg_replace('/,Cl(\\d+)/', ',${1}', $specific_data); $specific_data = str_replace(",", "','", $specific_data); // register minconfigs_props to DB $query = "INSERT INTO minconfigs_centers (id_minconfigs,cluster,id_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,id_cluster,support)"; $query = $query . " VALUES ('" . md5($config . 'R') . "','" . $cluster . "','" . $specific_data . "','" . $sizes[$i++] . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving centers into DB'); } } fclose($handle); } fclose($handle_sizes); // Store file model to DB $filemodel = getcwd() . '/cache/query/' . md5($config) . '-object.rds'; $fp = fopen($filemodel, 'r'); $content = fread($fp, filesize($filemodel)); $content = addslashes($content); fclose($fp); $query = "INSERT INTO model_storage (id_hash,type,file) VALUES ('" . md5($config) . "','learner','" . $content . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving file model into DB'); } $filemodel = getcwd() . '/cache/query/' . md5($config . 'R') . '-object.rds'; $fp = fopen($filemodel, 'r'); $content = fread($fp, filesize($filemodel)); $content = addslashes($content); fclose($fp); $query = "INSERT INTO model_storage (id_hash,type,file) VALUES ('" . md5($config . 'R') . "','minconf','" . $content . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving file minconf into DB'); } // Remove temporal files $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config . 'R') . '*.csv'); $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config . 'R') . '*.rds'); $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '*.rds'); $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '.fin'); } // Retrieve minconfig progression results from DB $header = "id_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,id_cluster,support"; $header_array = explode(",", $header); $last_y = 9000000000000000.0; $configs = '['; $query = "SELECT cluster, MAE, RAE FROM minconfigs_props WHERE id_minconfigs='" . md5($config . 'R') . "'"; $result = $dbml->query($query); foreach ($result as $row) { // Retrieve minconfig progression results from DB if ((double) $row['MAE'] > 0) { $error = (double) $row['MAE']; } else { $error = (double) $row['RAE']; } $cluster = (int) $row['cluster']; $new_val = array(); $new_val['x'] = $cluster; if ($error > $last_y) { $new_val['y'] = $last_y; } else { $last_y = $new_val['y'] = $error; } $jsonData[] = $new_val; // Retrieve minconfig centers from DB $query_2 = "SELECT " . $header . " FROM minconfigs_centers WHERE id_minconfigs='" . md5($config . 'R') . "' AND cluster='" . $cluster . "'"; $result_2 = $dbml->query($query_2); $jsonConfig = '['; foreach ($result_2 as $row_2) { $values = ''; foreach ($header_array as $ha) { $values = $values . ($values != '' ? ',' : '') . '\'' . $row_2[$ha] . '\''; } $jsonConfig = $jsonConfig . ($jsonConfig != '[' ? ',' : '') . '[' . $values . ']'; } $jsonConfig = $jsonConfig . ']'; $configs = $configs . ($configs != '[' ? ',' : '') . $jsonConfig; } $configs = $configs . ']'; $jsonData = json_encode($jsonData); $jsonHeader = '[{title:""},{title:"Est.Time"},{title:"Benchmark"},{title:"Network"},{title:"Disk"},{title:"Maps"},{title:"IO.SF"},{title:"Replicas"},{title:"IO.FBuf"},{title:"Compression"},{title:"Blk.Size"},{title:"Main Ref. Cluster"},{title:"Support"}]'; $is_cached_mysql = $dbml->query("SELECT MAX(cluster) as mcluster, MAX(MAE) as mmae, MAX(RAE) as mrae FROM minconfigs_props WHERE id_minconfigs='" . md5($config . 'R') . "'"); $tmp_result = $is_cached_mysql->fetch(); $max_x = (double) $tmp_result['mmae'] > 0 ? (double) $tmp_result['mmae'] : (double) $tmp_result['mrae']; $max_y = (double) $tmp_result['mcluster']; } } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); $jsonData = $jsonHeader = $configs = '[]'; $max_x = $max_y = 0; $must_wait = 'NO'; } echo $this->container->getTwig()->render('mltemplate/mlminconfigs.html.twig', array('selected' => 'mlminconfigs', 'jsonData' => $jsonData, 'jsonHeader' => $jsonHeader, 'configs' => $configs, 'max_p' => min(array($max_x, $max_y)), 'benchs' => $params['benchs'], 'nets' => $params['nets'], 'disks' => $params['disks'], 'blk_sizes' => $params['blk_sizes'], 'comps' => $params['comps'], 'id_clusters' => $params['id_clusters'], 'mapss' => $params['mapss'], 'replications' => $params['replications'], 'iosfs' => $params['iosfs'], 'iofilebufs' => $params['iofilebufs'], 'datanodess' => $params['datanodess'], 'bench_types' => $params['bench_types'], 'vm_sizes' => $params['vm_sizes'], 'vm_coress' => $params['vm_coress'], 'vm_RAMs' => $params['vm_RAMs'], 'types' => $params['types'], 'message' => $message, 'instance' => $instance, 'id_learner' => md5($config), 'id_minconf' => md5($config . 'R'), 'model_info' => $model_info, 'unrestricted' => $unrestricted, 'learn' => $learn_param, 'must_wait' => $must_wait, 'preset' => $preset, 'selPreset' => $selPreset, 'options' => Utils::getFilterOptions($db))); }
public function mlprecisionAction() { $jsonDiversity = $jsonPrecisions = $jsonDiscvars = $jsonHeaderDiv = $jsonPrecisionHeader = '[]'; $instance = $error_stats = ''; $jsonPrecexps = $jsonPrecexpsHeader = '[]'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); // FIXME - This must be counted BEFORE building filters, as filters inject rubbish in GET when there are no parameters... $instructions = count($_GET) <= 1; $this->buildFilters(); if ($instructions) { MLUtils::getIndexPrecExps($jsonPrecexps, $jsonPrecexpsHeader, $dbml); return $this->render('mltemplate/mlprecision.html.twig', array('precexps' => $jsonPrecexps, 'header_precexps' => $jsonPrecexpsHeader, 'discvars' => '[]', 'diversity' => '[]', 'precisions' => '[]', 'diversityHeader' => '[]', 'precisionHeader' => '[]', 'instructions' => 'YES')); } $where_configs = $this->filters->getWhereClause(); $where_configs = str_replace("AND .", "AND ", $where_configs); $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'bench_type', 'vm_size', 'vm_cores', 'vm_RAM', 'type', 'hadoop_version', 'provider', 'vm_OS'); // Order is important $params = $this->filters->getFiltersSelectedChoices($param_names); foreach ($param_names as $p) { if (!is_null($params[$p]) && is_array($params[$p])) { sort($params[$p]); } } $param_names_additional = array('datefrom', 'dateto', 'minexetime', 'maxexetime', 'valid', 'filter'); // Order is important $params_additional = $this->filters->getFiltersSelectedChoices($param_names_additional); // compose instance $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, true); $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, true); $slice_info = MLUtils::generateDatasliceInfo($this->filters, $param_names_additional, $params_additional); $config = $model_info . ' ' . $slice_info . "-precision"; $cache_ds = getcwd() . '/cache/query/' . md5($config) . '-cache.csv'; $is_cached_mysql = $dbml->query("SELECT count(*) as num FROM aloja_ml.precisions WHERE id_precision = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['num'] > 0; $eval_names = array('Cl.Name', 'Datanodes', 'Headnodes', 'VM.OS', 'VM.Cores', 'VM.RAM', 'Provider', 'VM.Size', 'Type', 'Bench.Type', 'Hadoop.Version'); $in_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.lock'); $finished_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.fin'); if (!$is_cached && !$in_process && !$finished_process) { // get headers for csv $header_names = array('e.id_exec' => 'ID', 'e.bench' => 'Benchmark', 'e.exe_time' => 'Exe.Time', 'e.net' => 'Net', 'e.disk' => 'Disk', 'e.maps' => 'Maps', 'e.iosf' => 'IO.SFac', 'e.replication' => 'Rep', 'e.iofilebuf' => 'IO.FBuf', 'e.comp' => 'Comp', 'e.blk_size' => 'Blk.size', 'e.id_cluster' => 'Cluster', 'c.name' => 'Cl.Name', 'c.datanodes' => 'Datanodes', 'c.headnodes' => 'Headnodes', 'c.vm_OS' => 'VM.OS', 'c.vm_cores' => 'VM.Cores', 'c.vm_RAM' => 'VM.RAM', 'c.provider' => 'Provider', 'c.vm_size' => 'VM.Size', 'c.type' => 'Type', 'e.bench_type' => 'Bench.Type', 'e.hadoop_version' => 'Hadoop.Version'); $headers = array_keys($header_names); $names = array_values($header_names); // dump the result to csv $query = "SELECT " . implode(",", $headers) . " FROM aloja2.execs e LEFT JOIN aloja2.clusters c ON e.id_cluster = c.id_cluster LEFT JOIN aloja_ml.predictions p USING (id_exec) WHERE e.hadoop_version IS NOT NULL" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } $fp = fopen($cache_ds, 'w'); fputcsv($fp, $names, ',', '"'); foreach ($rows as $row) { $row['id_cluster'] = "Cl" . $row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... fputcsv($fp, array_values($row), ',', '"'); } // run the R processor exec('cd ' . getcwd() . '/cache/query ; touch ' . getcwd() . '/cache/query/' . md5($config) . '.lock'); $count = 1; foreach ($eval_names as $name) { exec(getcwd() . '/resources/queue -d -c "cd ' . getcwd() . '/cache/query ; ../../resources/aloja_cli.r -d ' . md5($config) . '-cache.csv -m aloja_diversity -p vdisc="' . $name . '":noout=1:json=1 -v > ' . md5($config) . '-D-' . $name . '.tmp 2>/dev/null; touch ' . md5($config) . '-' . $count++ . '.lock" >/dev/null 2>&1 &'); exec(getcwd() . '/resources/queue -d -c "cd ' . getcwd() . '/cache/query ; ../../resources/aloja_cli.r -d ' . md5($config) . '-cache.csv -m aloja_precision_split -p vdisc="' . $name . '":noout=1:json=1 -v > ' . md5($config) . '-P-' . $name . '.tmp 2>/dev/null; touch ' . md5($config) . '-' . $count++ . '.lock" >/dev/null 2>&1 &'); } } $finished_process = (int) shell_exec('ls ' . getcwd() . '/cache/query/' . md5($config) . '-*.lock | wc -w ') == 2 * count($eval_names); if ($finished_process && !$is_cached) { $token = 0; $token_i = 0; $query = "INSERT IGNORE INTO aloja_ml.precisions (id_precision,model,instance,dataslice,diversity,precisions,discvar) VALUES "; foreach ($eval_names as $name) { $treated_line_d = ""; $treated_line_p = ""; if (($handle = fopen(getcwd() . '/cache/query/' . md5($config) . '-D-' . $name . '.tmp', "r")) !== FALSE) { $line = fgets($handle, 1000000); $treated_line_d = substr($line, 5); $treated_line_d = substr($treated_line_d, 0, -2); $treated_line_d = preg_replace('/,Cmp(\\d+),/', ',${1},', $treated_line_d); $treated_line_d = preg_replace('/,Cl(\\d+),/', ',${1},', $treated_line_d); $treated_line_d = str_replace("'", "\"", $treated_line_d); } fclose($handle); if (($handle = fopen(getcwd() . '/cache/query/' . md5($config) . '-P-' . $name . '.tmp', "r")) !== FALSE) { $line = fgets($handle, 1000000); $treated_line_p = substr($line, 5); $treated_line_p = substr($treated_line_p, 0, -2); $treated_line_p = preg_replace('/,Cmp(\\d+),/', ',${1},', $treated_line_p); $treated_line_p = preg_replace('/,Cl(\\d+),/', ',${1},', $treated_line_p); $treated_line_p = str_replace("'", "\"", $treated_line_p); } fclose($handle); if ($token > 0) { $query = $query . ","; } $token = 1; $query = $query . "('" . md5($config) . "','" . substr($model_info, 1) . "','" . $instance . "','" . $slice_info . "','" . $treated_line_d . "','" . $treated_line_p . "','" . $name . "') "; $token_i = 1; } if ($token_i > 0) { if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving into DB'); } } // remove remaining locks shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '*.lock'); // Remove temporal files shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '*.tmp'); shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '*.csv'); $is_cached = true; } if (!$is_cached) { $jsonDiversity = $jsonPrecisions = $jsonDiscvars = $jsonHeaderDiv = $jsonPrecisionHeader = '[]'; $must_wait = 'YES'; } else { $must_wait = 'NO'; $discvars = array(); $diversity = array(); $precisions = array(); $query = "SELECT id_precision,model,instance,dataslice,diversity,precisions,discvar FROM aloja_ml.precisions WHERE id_precision = '" . md5($config) . "'"; $result = $dbml->query($query); foreach ($result as $row) { $discvars[] = $row['discvar']; $diversity[] = $row['diversity']; $precisions[] = $row['precisions']; } $jsonDiscvars = "['" . implode("','", $discvars) . "']"; $jsonDiversity = "[" . implode(",", $diversity) . "]"; $jsonPrecisions = "[" . implode(",", $precisions) . "]"; $header = array('Benchmark', 'Net', 'Disk', 'Maps', 'IO.SFS', 'Rep', 'IO.FBuf', 'Comp', 'Blk.Size', 'Target', 'Exe.Time', 'Support'); $jsonHeaderDiv = '['; foreach ($header as $title) { if ($jsonHeaderDiv != '[') { $jsonHeaderDiv = $jsonHeaderDiv . ','; } $jsonHeaderDiv = $jsonHeaderDiv . '{"title":"' . $title . '"}'; } $jsonHeaderDiv = $jsonHeaderDiv . ']'; $jsonPrecisionHeader = '[{"title":"Target"},{"title":"Diversity"},{"title":"# Executions"},{"title":"Deviation (UnPrecision)"},{"title":"Mean [Stats]"},{"title":"StDev [Stats]"},{"title":"Max [Stats]"},{"title":"Min [Stats]"}]'; } $dbml = null; } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage()); $jsonDiversity = $jsonPrecisions = $jsonDiscvars = $jsonHeaderDiv = $jsonPrecisionHeader = '[]'; $must_wait = 'NO'; $dbml = null; } $return_params = array('discvars' => $jsonDiscvars, 'diversity' => $jsonDiversity, 'precisions' => $jsonPrecisions, 'diversityHeader' => $jsonHeaderDiv, 'precisionHeader' => $jsonPrecisionHeader, 'precexps' => $jsonPrecexps, 'header_precexps' => $jsonPrecexpsHeader, 'must_wait' => $must_wait, 'instance' => $instance, 'model_info' => $model_info, 'slice_info' => $slice_info, 'id_precision' => md5($config), 'error_stats' => $error_stats); return $this->render('mltemplate/mlprecision.html.twig', $return_params); }
public function mlnewconfigsAction() { $jsonData = $jsonHeader = $configs = $jsonNewconfs = $jsonNewconfsHeader = '[]'; $message = $instance = $config = $model_info = $slice_info = ''; $max_x = $max_y = 0; $must_wait = 'NO'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); $where_configs = ''; // FIXME - This must be counted BEFORE building filters, as filters inject rubbish in GET when there are no parameters... $instructions = count($_GET) <= 1; // Where_Configs and Manual Presets if (count($_GET) <= 1 || count($_GET) == 2 && array_key_exists('learn', $_GET)) { $_GET['id_cluster'] = $params['id_cluster'] = array('3', '5', '8'); $where_configs .= ' AND id_cluster IN (3,5,8)'; //$_GET['bench'] = $params['bench'] = array('terasort'); $where_configs .= ' AND bench IN ("terasort")'; //$_GET['disk'] = $params['disk'] = array('HDD','SSD'); $where_configs .= ' AND disk IN ("HDD","SSD")'; $_GET['blk_size'] = $params['blk_size'] = array('64', '128', '256'); $where_configs .= ' AND blk_size IN ("64","128","256")'; $_GET['iofilebuf'] = $params['iofilebuf'] = array('32768', '65536', '131072'); $where_configs .= ' AND iofilebuf IN ("32768","65536","131072")'; $_GET['comp'] = $params['comp'] = array('0'); $where_configs .= ' AND comp IN ("0")'; $_GET['replication'] = $params['replication'] = array('1'); $where_configs .= ' AND replication IN ("1")'; //$_GET['hadoop_version'] = $params['hadoop_version'] = array('1','1.03','2'); $where_configs .= ' AND hadoop_version IN ("1","1.03","2")'; //$_GET['bench_type'] = $params['bench_type'] = array('HiBench'); $where_configs .= ' AND bench_type IN ("HiBench")'; $_GET['datanodes'] = $params['datanodes'] = array('3'); // $where_configs .= ' AND datanodes = 3'; $_GET['vm_OS'] = $params['vm_OS'] = array('linux'); // $where_configs .= ' AND vm_OS = "linux"'; $_GET['vm_size'] = $params['vm_size'] = array('SYS-6027R-72RF'); // $where_configs .= ' AND vm_size = "SYS-6027R-72RF"'; $_GET['vm_cores'] = $params['vm_cores'] = array('12'); // $where_configs .= ' AND vm_cores = 12'; $_GET['vm_RAM'] = $params['vm_RAM'] = array('128'); // $where_configs .= ' AND vm_RAM = 128'; $_GET['type'] = $params['type'] = array('On-premise'); // $where_configs .= ' AND type = "On-premise"'; $_GET['provider'] = $params['provider'] = array('on-premise'); // $where_configs .= ' AND provider = "on-premise"'; $_GET['datefrom'] = $params['datefrom'] = ''; $_GET['dateto'] = $params['dateto'] = ''; $_GET['maxexetime'] = $params['maxexetime'] = 20000; $_GET['minexetime'] = $params['minexetime'] = 1; } else { $param_names_whereconfig = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'bench_type', 'hadoop_version', 'datasize', 'scale_factor'); foreach ($param_names_whereconfig as $p) { MLNewconfigsController::add_where_configs($p, $where_configs); } $where_configs .= (isset($_GET['datefrom']) && $_GET['datefrom'] != '' ? ' AND start_time >= ' . $_GET['datefrom'] : '') . (isset($_GET['dateto']) && $_GET['dateto'] != '' ? ' AND end_time <= ' . $_GET['dateto'] : '') . (isset($_GET['minexetime']) && $_GET['minexetime'] != '' ? ' AND exe_time >= ' . $_GET['minexetime'] : '') . (isset($_GET['maxexetime']) && $_GET['maxexetime'] != '' ? ' AND exe_time <= ' . $_GET['maxexetime'] : '') . (isset($_GET['valid']) ? ' AND valid = ' . $_GET['valid'] : '') . (isset($_GET['filter']) ? ' AND filter = ' . $_GET['filter'] : ''); } // Real fetching of parameters $params = array(); $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'vm_OS', 'vm_cores', 'vm_RAM', 'provider', 'vm_size', 'type', 'bench_type', 'hadoop_version', 'datasize', 'scale_factor'); // Order is important foreach ($param_names as $p) { $params[$p] = MLNewconfigsController::read_params($p); sort($params[$p]); } $params_additional = array(); $param_names_additional = array('datefrom', 'dateto', 'minexetime', 'maxexetime', 'valid', 'filter'); // Order is important foreach ($param_names_additional as $p) { $params_additional[$p] = MLNewconfigsController::read_params($p); } $learn_param = array_key_exists('learn', $_GET) ? $_GET['learn'] : 'regtree'; $param_id_cluster = $params['id_cluster']; unset($params['id_cluster']); // Exclude the param from now on $where_configs = str_replace("AND .", "AND ", $where_configs); // Semi-Dummy Filters (For ModelInfo and SimpleInstance) $this->buildFilters(array('learn' => array('type' => 'selectOne', 'default' => array('regtree'), 'label' => 'Learning method: ', 'generateChoices' => function () { return array('regtree', 'nneighbours', 'nnet', 'polyreg'); }, 'beautifier' => function ($value) { $labels = array('regtree' => 'Regression Tree', 'nneighbours' => 'k-NN', 'nnet' => 'NNets', 'polyreg' => 'PolyReg-3'); return $labels[$value]; }, 'parseFunction' => function () { $choice = isset($_GET['learn']) ? $_GET['learn'] : array('regtree'); return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'), 'minexetime' => array('default' => 1), 'maxexetime' => array('default' => 20000), 'datefrom' => array('default' => ''), 'dateto' => array('default' => ''), 'valid' => array('default' => 1), 'filter' => array('default' => 1), 'prepares' => array('default' => 0))); $this->buildFilterGroups(array('MLearning' => array('label' => 'Machine Learning', 'tabOpenDefault' => true, 'filters' => array('learn')))); if ($instructions) { MLUtils::getIndexNewconfs($jsonNewconfs, $jsonNewconfsHeader, $dbml); $params['id_cluster'] = $param_id_cluster; $return_params = array('selected' => 'mlnewconfigs', 'instructions' => 'YES', 'jsonData' => $jsonData, 'jsonHeader' => $jsonHeader, 'configs' => $configs, 'newconfs' => $jsonNewconfs, 'header_newconfs' => $jsonNewconfsHeader, 'must_wait' => $must_wait, 'options' => MLNewconfigsController::getFilterOptions($db)); foreach ($param_names as $p) { $return_params[$p] = $params[$p]; } foreach ($param_names_additional as $p) { $return_params[$p] = $params_additional[$p]; } echo $this->container->getTwig()->render('mltemplate/mlnewconfigs.html.twig', $return_params); return; } // compose instance $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, true, true); $param_names_aux = array_diff($param_names, array('id_cluster')); $instance = MLUtils::generateSimpleInstance($this->filters, $param_names_aux, $params, true, true); $instances = MLUtils::completeInstances($this->filters, array($instance), $param_names, $params, $db); $slice_info = MLUtils::generateDatasliceInfo($this->filters, $param_names_additional, $params_additional); $config = $model_info . ' ' . $learn_param . ' ' . $slice_info . ' newminconfs'; if ($learn_param == 'regtree') { $learn_method = 'aloja_regtree'; $learn_options = 'prange=0,20000'; } else { if ($learn_param == 'nneighbours') { $learn_method = 'aloja_nneighbors'; $learn_options = 'kparam=3'; } else { if ($learn_param == 'nnet') { $learn_method = 'aloja_nnet'; $learn_options = 'prange=0,20000'; } else { if ($learn_param == 'polyreg') { $learn_method = 'aloja_linreg'; $learn_options = 'ppoly=3:prange=0,20000'; } } } } $cache_ds = getcwd() . '/cache/ml/' . md5($config) . '-cache.csv'; $is_cached_mysql = $dbml->query("SELECT count(*) as num FROM aloja_ml.learners WHERE id_learner = '" . md5($config . "M") . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['num'] > 0; $is_cached_mysql = $dbml->query("SELECT count(*) as num FROM aloja_ml.minconfigs WHERE id_minconfigs = '" . md5($config . 'R') . "' AND id_learner = '" . md5($config . "M") . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $is_cached && $tmp_result['num'] > 0; $in_process = file_exists(getcwd() . '/cache/ml/' . md5($config) . '.lock'); $finished_process = file_exists(getcwd() . '/cache/ml/' . md5($config) . '.fin'); // Create Models and Predictions if (!$is_cached && !$in_process && !$finished_process) { // get headers for csv $header_names = array('id_exec' => 'ID', 'bench' => 'Benchmark', 'exe_time' => 'Exe.Time', 'e.net' => 'Net', 'e.disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'IO.SFac', 'replication' => 'Rep', 'iofilebuf' => 'IO.FBuf', 'comp' => 'Comp', 'blk_size' => 'Blk.size', 'datanodes' => 'Datanodes', 'c.vm_OS' => 'VM.OS', 'c.vm_cores' => 'VM.Cores', 'c.vm_RAM' => 'VM.RAM', 'c.provider' => 'Provider', 'c.vm_size' => 'VM.Size', 'type' => 'Type', 'bench_type' => 'Bench.Type', 'hadoop_version' => 'Hadoop.Version', 'IFNULL(datasize,0)' => 'Datasize', 'scale_factor' => 'Scale.Factor'); $added_names = array('maxtxkbs' => 'Net.maxtxKB.s', 'maxrxkbs' => 'Net.maxrxKB.s', 'maxtxpcks' => 'Net.maxtxPck.s', 'maxrxpcks' => 'Net.maxrxPck.s', 'maxtxcmps' => 'Net.maxtxCmp.s', 'maxrxcmps' => 'Net.maxrxCmp.s', 'maxrxmscts' => 'Net.maxrxmsct.s', 'maxtps' => 'Disk.maxtps', 'maxsvctm' => 'Disk.maxsvctm', 'maxrds' => 'Disk.maxrd.s', 'maxwrs' => 'Disk.maxwr.s', 'maxrqsz' => 'Disk.maxrqsz', 'maxqusz' => 'Disk.maxqusz', 'maxawait' => 'Disk.maxawait', 'maxutil' => 'Disk.maxutil'); // dump the result to csv $query = "SELECT " . implode(",", array_keys($header_names)) . ",\n\t\t\t\t\tn.maxtxkbs, n.maxrxkbs, n.maxtxpcks, n.maxrxpcks, n.maxtxcmps, n.maxrxcmps, n.maxrxmscts,\n\t\t\t\t\td.maxtps, d.maxsvctm, d.maxrds, d.maxwrs, d.maxrqsz, d.maxqusz, d.maxawait, d.maxutil\n\t\t\t\t\tFROM aloja2.execs AS e LEFT JOIN aloja2.clusters AS c ON e.id_cluster = c.id_cluster,\n\t\t\t\t\t(\n\t\t\t\t\t SELECT MAX(n1.`maxtxkB/s`) AS maxtxkbs, MAX(n1.`maxrxkB/s`) AS maxrxkbs,\n\t\t\t\t\t MAX(n1.`maxtxpck/s`) AS maxtxpcks, MAX(n1.`maxrxpck/s`) AS maxrxpcks,\n\t\t\t\t\t MAX(n1.`maxtxcmp/s`) AS maxtxcmps, MAX(n1.`maxrxcmp/s`) AS maxrxcmps,\n\t\t\t\t\t MAX(n1.`maxrxmcst/s`) AS maxrxmscts,\n\t\t\t\t\t e1.net AS net, c1.vm_cores, c1.vm_RAM, c1.vm_size, c1.vm_OS, c1.provider\n\t\t\t\t\t FROM aloja2.precal_network_metrics AS n1,\n\t\t\t\t\t aloja2.execs AS e1 LEFT JOIN aloja2.clusters AS c1 ON e1.id_cluster = c1.id_cluster\n\t\t\t\t\t WHERE e1.id_exec = n1.id_exec\n\t\t\t\t\t GROUP BY e1.net, c1.vm_cores, c1.vm_RAM, c1.vm_size, c1.vm_OS, c1.provider\n\t\t\t\t\t) AS n,\n\t\t\t\t\t(\n\t\t\t\t\t SELECT MAX(d1.maxtps) AS maxtps, MAX(d1.maxsvctm) as maxsvctm,\n\t\t\t\t\t MAX(d1.`maxrd_sec/s`) as maxrds, MAX(d1.`maxwr_sec/s`) as maxwrs,\n\t\t\t\t\t MAX(d1.maxrq_sz) as maxrqsz, MAX(d1.maxqu_sz) as maxqusz,\n\t\t\t\t\t MAX(d1.maxawait) as maxawait, MAX(d1.`max%util`) as maxutil,\n\t\t\t\t\t e2.disk AS disk, c1.vm_cores, c1.vm_RAM, c1.vm_size, c1.vm_OS, c1.provider\n\t\t\t\t\t FROM aloja2.precal_disk_metrics AS d1,\n\t\t\t\t\t aloja2.execs AS e2 LEFT JOIN aloja2.clusters AS c1 ON e2.id_cluster = c1.id_cluster\n\t\t\t\t\t WHERE e2.id_exec = d1.id_exec\n\t\t\t\t\t GROUP BY e2.disk, c1.vm_cores, c1.vm_RAM, c1.vm_size, c1.vm_OS, c1.provider\n\t\t\t\t\t) AS d\n\t\t\t\t\tWHERE e.net = n.net AND c.vm_cores = n.vm_cores AND c.vm_RAM = n.vm_RAM AND c.vm_size = n.vm_size\n\t\t\t\t\tAND c.vm_OS = n.vm_OS AND c.provider = n.provider AND e.disk = d.disk AND c.vm_cores = d.vm_cores\n\t\t\t\t\tAND c.vm_RAM = d.vm_RAM AND c.vm_size = d.vm_size AND c.vm_OS = d.vm_OS AND c.provider = d.provider\n\t\t\t\t\tAND hadoop_version IS NOT NULL" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } $fp = fopen($cache_ds, 'w'); fputcsv($fp, array_values(array_merge($header_names, $added_names)), ',', '"'); foreach ($rows as $row) { //$row['id_cluster'] = "Cl".$row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... fputcsv($fp, array_values($row), ',', '"'); } // run the R processor $vin = "Benchmark,Net,Disk,Maps,IO.SFac,Rep,IO.FBuf,Comp,Blk.size,Datanodes,VM.OS,VM.Cores,VM.RAM,Provider,VM.Size,Type,Bench.Type,Hadoop.Version,Datasize,Scale.Factor,Net.maxtxKB.s,Net.maxrxKB.s,Net.maxtxPck.s,Net.maxrxPck.s,Net.maxtxCmp.s,Net.maxrxCmp.s,Net.maxrxmsct.s,Disk.maxtps,Disk.maxsvctm,Disk.maxrd.s,Disk.maxwr.s,Disk.maxrqsz,Disk.maxqusz,Disk.maxawait,Disk.maxutil"; exec('cd ' . getcwd() . '/cache/ml; touch ' . md5($config) . '.lock'); $command = getcwd() . '/resources/queue -c "cd ' . getcwd() . '/cache/ml; ../../resources/aloja_cli.r -d ' . $cache_ds . ' -m ' . $learn_method . ' -p ' . $learn_options . ':saveall=' . md5($config . "F") . ':vin=\'' . $vin . '\' >debug1.txt 2>&1 && '; $count = 1; foreach ($instances as $inst) { $command = $command . '../../resources/aloja_cli.r -m aloja_predict_instance -l ' . md5($config . "F") . ' -p inst_predict=\'' . $inst . '\':saveall=' . md5($config . "D") . '-' . $count++ . ':vin=\'' . $vin . '\' >>debug2.txt 2>&1 && '; } $command = $command . ' head -1 ' . md5($config . "D") . '-1-dataset.data >' . md5($config . "D") . '-dataset.data 2>>debug2-1.txt && '; $command = $command . ' cat ' . md5($config . "D") . '*-dataset.data >' . md5($config . "D") . '-aux.data 2>>debug2-1.txt && '; $command = $command . ' grep -v "ID" ' . md5($config . "D") . '*-aux.data >>' . md5($config . "D") . '-dataset.data 2>>debug2-1.txt && '; $command = $command . '../../resources/aloja_cli.r -d ' . md5($config . "D") . '-dataset.data -m ' . $learn_method . ' -p ' . $learn_options . ':saveall=' . md5($config . "M") . ':vin=\'' . $vin . '\' >debug3.txt 2>&1 && '; $command = $command . '../../resources/aloja_cli.r -m aloja_minimal_instances -l ' . md5($config . "M") . ' -p saveall=' . md5($config . 'R') . ':kmax=200 >debug4.txt 2>&1; rm -f ' . md5($config) . '.lock; touch ' . md5($config) . '.fin" >debug4.tmp 2>&1 &'; exec($command); sleep(2); } $in_process = file_exists(getcwd() . '/cache/ml/' . md5($config) . '.lock'); if ($in_process) { $must_wait = "YES"; throw new \Exception('WAIT'); } $learners = array(); $learners[] = md5($config . "F"); $learners[] = md5($config . "M"); foreach ($learners as $learner_1) { // Save learning model to DB, with predictions $is_cached_mysql = $dbml->query("SELECT id_learner FROM aloja_ml.learners WHERE id_learner = '" . $learner_1 . "'"); $tmp_result = $is_cached_mysql->fetch(); if ($tmp_result['id_learner'] != $learner_1) { // register model to DB $query = "INSERT IGNORE INTO aloja_ml.learners (id_learner,instance,model,algorithm,dataslice)"; $query = $query . " VALUES ('" . $learner_1 . "','" . $instance . "','" . substr($model_info, 1) . "','" . $learn_param . "','" . $slice_info . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving model into DB'); } // read results of the CSV and dump to DB foreach (array("tt", "tv", "tr") as $value) { if (($handle = fopen(getcwd() . '/cache/ml/' . $learner_1 . '-' . $value . '.csv', 'r')) !== FALSE) { $header = fgetcsv($handle, 1000, ","); $token = 0; $query = "INSERT IGNORE INTO aloja_ml.predictions (\n\t\t\t\t\t\t\t\tid_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,\n\t\t\t\t\t\t\t\tdatanodes,vm_OS,vm_cores,vm_RAM,provider,vm_size,type,bench_type,hadoop_version,\n\t\t\t\t\t\t\t\tdatasize,scale_factor,\n\t\t\t\t\t\t\t\tnet_maxtxkbs,net_maxrxkbs,net_maxtxpcks,net_maxrxpcks,net_maxtxcmps,net_maxrxcmps,net_maxrxmscts,\n\t\t\t\t\t\t\t\tdisk_maxtps,disk_maxsvctm,disk_maxrds,disk_maxwrs,disk_maxrqsz,disk_maxqusz,disk_maxawait, disk_maxutil,\n\t\t\t\t\t\t\t\tpred_time,id_learner,instance,predict_code) VALUES "; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $specific_instance = implode(",", array_slice($data, 2, 35)); $specific_data = implode(",", $data); $specific_data = preg_replace('/,Cmp(\\d+),/', ',${1},', $specific_data); $specific_data = preg_replace('/,Cl(\\d+),/', ',${1},', $specific_data); $specific_data = str_replace(",", "','", $specific_data); $query_var = "SELECT count(*) as num FROM aloja_ml.predictions WHERE instance = '" . $specific_instance . "' AND id_learner = '" . $learner_1 . "'"; $result = $dbml->query($query_var); $row = $result->fetch(); // Insert instance values if ($row['num'] == 0) { if ($token != 0) { $query = $query . ","; } $token = 1; $query = $query . "('" . $specific_data . "','" . $learner_1 . "','" . $specific_instance . "','" . ($value == 'tt' ? 3 : ($value == 'tv' ? 2 : 1)) . "') "; } } if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving into DB'); } fclose($handle); } } // Remove temporal files $output = shell_exec('rm -f ' . getcwd() . '/cache/ml/' . $learner_1 . '*.{dat,csv}'); } } // Save minconfigs to DB, with props and centers $is_cached_mysql = $dbml->query("SELECT id_minconfigs FROM aloja_ml.minconfigs WHERE id_minconfigs = '" . md5($config . 'R') . "'"); $tmp_result = $is_cached_mysql->fetch(); if ($tmp_result['id_minconfigs'] != md5($config . 'R')) { // register minconfigs to DB $query = "INSERT IGNORE INTO aloja_ml.minconfigs (id_minconfigs,id_learner,instance,model,is_new)"; $query = $query . " VALUES ('" . md5($config . 'R') . "','" . md5($config . 'M') . "','" . $instance . "','" . substr($model_info, 1) . "','1');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving minconfis into DB'); } $clusters = array(); // Save results of the CSV - MAE or RAE if (file_exists(getcwd() . '/cache/ml/' . md5($config . 'R') . '-raes.csv')) { $error_file = 'raes.csv'; } else { $error_file = 'maes.csv'; } $handle = fopen(getcwd() . '/cache/ml/' . md5($config . 'R') . '-' . $error_file, 'r'); while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $cluster = (int) $data[0]; if ($error_file == 'raes.csv') { $error_mae = 'NULL'; $error_rae = (double) $data[1]; } if ($error_file == 'maes.csv') { $error_mae = (double) $data[1]; $error_rae = 'NULL'; } // register minconfigs_props to DB $query = "INSERT INTO aloja_ml.minconfigs_props (id_minconfigs,cluster,MAE,RAE)"; $query = $query . " VALUES ('" . md5($config . 'R') . "','" . $cluster . "','" . $error_mae . "','" . $error_rae . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving minconfis into DB'); } $clusters[] = $cluster; } fclose($handle); // Save results of the CSV - Configs $handle_sizes = fopen(getcwd() . '/cache/ml/' . md5($config . 'R') . '-sizes.csv', 'r'); foreach ($clusters as $cluster) { // Get supports from sizes $sizes = fgetcsv($handle_sizes, 1000, ","); // Get clusters $handle = fopen(getcwd() . '/cache/ml/' . md5($config . 'R') . '-dsk' . $cluster . '.csv', 'r'); $header = fgetcsv($handle, 1000, ","); $i = 0; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $subdata1 = array_slice($data, 0, 11); $subdata2 = array_slice($data, 18, 4); $specific_data = implode(',', array_merge($subdata1, $subdata2)); $specific_data = preg_replace('/,Cmp(\\d+),/', ',${1},', $specific_data); $specific_data = preg_replace('/,Cl(\\d+),/', ',${1},', $specific_data); $specific_data = preg_replace('/,Cl(\\d+)/', ',${1}', $specific_data); $specific_data = str_replace(",", "','", $specific_data); // register minconfigs_props to DB $query = "INSERT INTO aloja_ml.minconfigs_centers (id_minconfigs,cluster,id_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,bench_type,hadoop_version,datasize,scale_factor,support)"; $query = $query . " VALUES ('" . md5($config . 'R') . "','" . $cluster . "','" . $specific_data . "','" . $sizes[$i++] . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving centers into DB'); } } fclose($handle); } fclose($handle_sizes); // Store file model to DB $filemodel = getcwd() . '/cache/ml/' . md5($config . 'F') . '-object.rds'; $fp = fopen($filemodel, 'r'); $content = fread($fp, filesize($filemodel)); $content = addslashes($content); fclose($fp); $query = "INSERT INTO aloja_ml.model_storage (id_hash,type,file) VALUES ('" . md5($config . 'F') . "','learner','" . $content . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving file model into DB'); } $filemodel = getcwd() . '/cache/ml/' . md5($config . 'M') . '-object.rds'; $fp = fopen($filemodel, 'r'); $content = fread($fp, filesize($filemodel)); $content = addslashes($content); fclose($fp); $query = "INSERT INTO aloja_ml.model_storage (id_hash,type,file) VALUES ('" . md5($config . 'M') . "','learner','" . $content . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving file model into DB'); } $filemodel = getcwd() . '/cache/ml/' . md5($config . 'R') . '-object.rds'; $fp = fopen($filemodel, 'r'); $content = fread($fp, filesize($filemodel)); $content = addslashes($content); fclose($fp); $query = "INSERT INTO aloja_ml.model_storage (id_hash,type,file) VALUES ('" . md5($config . 'R') . "','minconf','" . $content . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving file minconf into DB'); } // Remove temporal files exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'R') . '*.rds'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'R') . '*.dat'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'R') . '*.csv'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'D') . '*.csv'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'D') . '*.dat'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'D') . '*.data'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'F') . '*.rds'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'F') . '*.csv'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'F') . '*.dat'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'M') . '*.rds'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'M') . '*.csv'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config . 'M') . '*.dat'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config) . '*.csv'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config) . '*.dat'); exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config) . '*.fin'); } // Retrieve minconfig progression results from DB $header = "id_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,bench_type,hadoop_version,datasize,scale_factor,support"; $header_array = explode(",", $header); $last_y = 9000000000000000.0; $configs = '['; $jsonData = array(); $query = "SELECT cluster, MAE, RAE FROM aloja_ml.minconfigs_props WHERE id_minconfigs='" . md5($config . 'R') . "'"; $result = $dbml->query($query); foreach ($result as $row) { // Retrieve minconfig progression results from DB if ((double) $row['MAE'] > 0) { $error = (double) $row['MAE']; } else { $error = (double) $row['RAE']; } $cluster = (int) $row['cluster']; $new_val = array(); $new_val['x'] = $cluster; if ($error > $last_y) { $new_val['y'] = $last_y; } else { $last_y = $new_val['y'] = $error; } $jsonData[] = $new_val; // Retrieve minconfig centers from DB $query_2 = "SELECT " . $header . " FROM aloja_ml.minconfigs_centers WHERE id_minconfigs='" . md5($config . 'R') . "' AND cluster='" . $cluster . "'"; $result_2 = $dbml->query($query_2); $jsonConfig = '['; foreach ($result_2 as $row_2) { $values = ''; foreach ($header_array as $ha) { $values = $values . ($values != '' ? ',' : '') . '\'' . $row_2[$ha] . '\''; } $jsonConfig = $jsonConfig . ($jsonConfig != '[' ? ',' : '') . '[' . $values . ']'; } $jsonConfig = $jsonConfig . ']'; $configs = $configs . ($configs != '[' ? ',' : '') . $jsonConfig; } $configs = $configs . ']'; $jsonData = json_encode($jsonData); $jsonHeader = '[{title:""},{title:"Est.Time"},{title:"Benchmark"},{title:"Network"},{title:"Disk"},{title:"Maps"},{title:"IO.SF"},{title:"Replicas"},{title:"IO.FBuf"},{title:"Compression"},{title:"Blk.Size"},{title:"Bench.Type"},{title:"Hadoop.Ver"},{title:"Data.Size"},{title:"Scale.Factor"},{title:"Support"}]'; $query = "SELECT MAX(cluster) as mcluster, MAX(MAE) as mmae, MAX(RAE) as mrae FROM aloja_ml.minconfigs_props WHERE id_minconfigs='" . md5($config . 'R') . "'"; $is_cached_mysql = $dbml->query($query); $tmp_result = $is_cached_mysql->fetch(); $max_x = (double) $tmp_result['mmae'] > 0 ? (double) $tmp_result['mmae'] : (double) $tmp_result['mrae']; $max_y = (double) $tmp_result['mcluster']; } catch (\Exception $e) { if ($e->getMessage() != "WAIT") { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); } $jsonData = $jsonHeader = $configs = '[]'; } $dbml = null; $params['id_cluster'] = $param_id_cluster; $return_params = array('selected' => 'mlnewconfigs', 'jsonData' => $jsonData, 'jsonHeader' => $jsonHeader, 'configs' => $configs, 'newconfs' => $jsonNewconfs, 'header_newconfs' => $jsonNewconfsHeader, 'max_p' => min(array($max_x, $max_y)), 'instance' => $instance, 'id_newconf' => md5($config), 'id_newconf_first' => md5($config . 'F'), 'id_newconf_dataset' => md5($config . 'D'), 'id_newconf_model' => md5($config . 'M'), 'id_newconf_result' => md5($config . 'R'), 'model_info' => $model_info, 'slice_info' => $slice_info, 'learn' => $learn_param, 'must_wait' => $must_wait, 'options' => MLNewconfigsController::getFilterOptions($db)); foreach ($param_names as $p) { $return_params[$p] = $params[$p]; } foreach ($param_names_additional as $p) { $return_params[$p] = $params_additional[$p]; } echo $this->container->getTwig()->render('mltemplate/mlnewconfigs.html.twig', $return_params); }
public function mlobservedtreesAction() { $model_info = $instance = $slice_info = $message = $config = $tree_descriptor_ordered = $tree_descriptor_gini = ''; $jsonData = $jsonHeader = '[]'; $jsonObstrees = $jsonObstreesHeader = '[]'; $must_wait = 'NO'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); // FIXME - This must be counted BEFORE building filters, as filters inject rubbish in GET when there are no parameters... $instructions = count($_GET) <= 1; $this->buildFilters(array('minexetime' => array('default' => 0), 'valid' => array('default' => 0), 'filter' => array('default' => 0), 'prepares' => array('default' => 1))); if ($instructions) { MLUtils::getIndexObsTrees($jsonObstrees, $jsonObstreesHeader, $dbml); return $this->render('mltemplate/mlobstrees.html.twig', array('obstrees' => $jsonObstrees, 'header_obstrees' => $jsonObstreesHeader, 'jsonData' => '[]', 'jsonHeader' => '[]', 'instructions' => 'YES')); } $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'vm_OS', 'vm_cores', 'vm_RAM', 'provider', 'vm_size', 'type', 'bench_type', 'hadoop_version', 'datasize', 'scale_factor'); // Order is important $params = $this->filters->getFiltersSelectedChoices($param_names); foreach ($param_names as $p) { if (!is_null($params[$p]) && is_array($params[$p])) { sort($params[$p]); } } $params_additional = array(); $param_names_additional = array('datefrom', 'dateto', 'minexetime', 'maxexetime', 'valid', 'filter'); // Order is important $params_additional = $this->filters->getFiltersSelectedChoices($param_names_additional); $where_configs = $this->filters->getWhereClause(); $where_configs = str_replace("AND .", "AND ", $where_configs); // compose instance $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, TRUE); $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, TRUE); $slice_info = MLUtils::generateDatasliceInfo($this->filters, $param_names_additional, $params_additional); $config = $instance . '-' . $slice_info . '-obstree'; $is_cached_mysql = $dbml->query("SELECT count(*) as total FROM aloja_ml.observed_trees WHERE id_obstrees = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['total'] > 0; $in_process = file_exists(getcwd() . '/cache/ml/' . md5($config) . '.lock'); $finished_process = file_exists(getcwd() . '/cache/ml/' . md5($config) . '.fin'); $tmp_file = getcwd() . '/cache/ml/' . md5($config) . '.tmp'; // get headers for csv $header_names = array('bench' => 'Benchmark', 'net' => 'Net', 'disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'IO.SFac', 'replication' => 'Rep', 'iofilebuf' => 'IO.FBuf', 'comp' => 'Comp', 'blk_size' => 'Blk.size', 'e.id_cluster' => 'Cluster', 'datanodes' => 'Datanodes', 'vm_OS' => 'VM.OS', 'vm_cores' => 'VM.Cores', 'vm_RAM' => 'VM.RAM', 'provider' => 'Provider', 'vm_size' => 'VM.Size', 'type' => 'Type', 'bench_type' => 'Bench.Type', 'hadoop_version' => 'Hadoop.Version', 'IFNULL(datasize,0)' => 'Datasize', 'scale_factor' => 'Scale.Factor'); $special_header_names = array('id_exec' => 'ID', 'exe_time' => 'Exe.Time'); $headers = array_keys($header_names); $special_headers = array_keys($special_header_names); if (!$in_process && !$finished_process && !$is_cached) { // Dump the DB slice to csv $query = "SELECT " . implode(",", $headers) . ", " . implode(",", $special_headers) . " FROM aloja2.execs e LEFT JOIN aloja2.clusters c ON e.id_cluster = c.id_cluster WHERE hadoop_version IS NOT NULL" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } if (($key = array_search('e.id_cluster', $headers)) !== false) { $headers[$key] = 'id_cluster'; } $fp = fopen($tmp_file, 'w'); foreach ($rows as $row) { $row['id_cluster'] = "Cl" . $row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... $line = ''; foreach ($headers as $hn) { $line = $line . ($line != '' ? ',' : '') . $row[$hn]; } $line = $row['id_exec'] . ' ' . $line . ' ' . $row['exe_time'] . "\n"; fputs($fp, $line); } fclose($fp); if (($key = array_search('id_cluster', $headers)) !== false) { $headers[$key] = 'e.id_cluster'; } // Execute R Engine $exe_query = 'cd ' . getcwd() . '/cache/ml;'; $exe_query = $exe_query . ' touch ' . md5($config) . '.lock;'; $exe_query = $exe_query . ' ../../resources/aloja_cli.r -m aloja_representative_tree -p method=ordered:dump_file=' . $tmp_file . ':output=nodejson -v >' . md5($config) . '-split.dat 2>/dev/null;'; $exe_query = $exe_query . ' ../../resources/aloja_cli.r -m aloja_representative_tree -p method=gini:dump_file=' . $tmp_file . ':output=nodejson -v >' . md5($config) . '-gini.dat 2>/dev/null;'; $exe_query = $exe_query . ' rm -f ' . md5($config) . '.lock; rm -f ' . $tmp_file . '; touch ' . md5($config) . '.fin'; exec(getcwd() . '/resources/queue -d -c "' . $exe_query . '" >/dev/null 2>&1 &'); } if (!$is_cached) { $finished_process = file_exists(getcwd() . '/cache/ml/' . md5($config) . '.fin'); if ($finished_process) { // Read results and dump to DB $tree_descriptor_ordered = ''; try { $file = fopen(getcwd() . '/cache/ml/' . md5($config) . '-split.dat', "r"); $tree_descriptor_ordered = fgets($file); $tree_descriptor_ordered = substr($tree_descriptor_ordered, 5, -2); $tree_descriptor_ordered = str_replace("\\\"", "\"", $tree_descriptor_ordered); $tree_descriptor_ordered = str_replace("desc:\"\"", "desc:\"---\"", $tree_descriptor_ordered); fclose($file); } catch (\Exception $e) { throw new \Exception("Error on retrieving result file. Check that R is working properly."); } $tree_descriptor_gini = ''; /* try { $file = fopen(getcwd().'/cache/ml/'.md5($config).'-gini.dat', "r"); $tree_descriptor_gini = fgets($file); $tree_descriptor_gini = substr($tree_descriptor_gini, 5, -2); $tree_descriptor_gini = str_replace("\\\"","\"",$tree_descriptor_gini); fclose($file); } catch (\Exception $e) { throw new \Exception ("Error on retrieving result file. Check that R is working properly."); } */ $query = "INSERT INTO aloja_ml.observed_trees (id_obstrees,instance,model,dataslice,tree_code_split,tree_code_gain) VALUES ('" . md5($config) . "','" . $instance . "','" . $model_info . "','" . $slice_info . "','" . $tree_descriptor_ordered . "','" . $tree_descriptor_gini . "')"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving tree into DB'); } // Remove temporal files $output = shell_exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config) . '-*.dat'); $output = shell_exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config) . '.fin'); } else { $must_wait = 'YES'; throw new \Exception('WAIT'); } } // Fetch results and compose JSON $header = array('Benchmark', 'Net', 'Disk', 'Maps', 'IO.SFS', 'Rep', 'IO.FBuf', 'Comp', 'Blk.Size', 'Cluster', 'Datanodes', 'VM.OS', 'VM.Cores', 'VM.RAM', 'Provider', 'VM.Size', 'Type', 'Bench.Type', 'Version', 'Datasize', 'Scale.Factor', 'Observed'); $jsonHeader = '[{title:""}'; foreach ($header as $title) { $jsonHeader = $jsonHeader . ',{title:"' . $title . '"}'; } $jsonHeader = $jsonHeader . ']'; // Fetch observed values $query = "SELECT " . implode(",", $headers) . ", " . implode(",", $special_headers) . " FROM aloja2.execs e LEFT JOIN aloja2.clusters c ON e.id_cluster = c.id_cluster WHERE hadoop_version IS NOT NULL" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } if (($key = array_search('e.id_cluster', $headers)) !== false) { $headers[$key] = 'id_cluster'; } $jsonData = '['; foreach ($rows as $row) { $row['id_cluster'] = "Cl" . $row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... $line = ''; foreach ($headers as $hn) { $line = $line . ($line != '' ? ',' : '') . $row[$hn]; } $line = $row['id_exec'] . ',' . $line . ',' . $row['exe_time']; if ($jsonData != '[') { $jsonData = $jsonData . ','; } $jsonData = $jsonData . "['" . str_replace(",", "','", $line) . "']"; } $jsonData = $jsonData . ']'; foreach (range(1, 32) as $value) { $jsonData = str_replace('Cmp' . $value, Utils::getCompressionName($value), $jsonData); } if ($tree_descriptor_ordered == '') { // Display Descriptive Tree, if not processed yet $query = "SELECT tree_code_split, tree_code_gain FROM aloja_ml.observed_trees WHERE id_obstrees = '" . md5($config) . "'"; $result = $dbml->query($query); $row = $result->fetch(); $tree_descriptor_ordered = $row['tree_code_split']; $tree_descriptor_gini = $row['tree_code_gain']; } } catch (\Exception $e) { if ($e->getMessage() != "WAIT") { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); } $jsonData = $jsonHeader = '[]'; } $dbml = null; $return_params = array('jsonData' => $jsonData, 'jsonHeader' => $jsonHeader, 'obstrees' => $jsonObstrees, 'header_obstrees' => $jsonObstreesHeader, 'message' => $message, 'must_wait' => $must_wait, 'instance' => $instance, 'model_info' => $model_info, 'slice_info' => $slice_info, 'id_obstrees' => md5($config), 'tree_descriptor_ordered' => $tree_descriptor_ordered, 'tree_descriptor_gini' => $tree_descriptor_gini); return $this->render('mltemplate/mlobstrees.html.twig', $return_params); }
public static function getIndexNewconfs(&$jsonNewconfs, &$jsonNewconfsHeader, $dbml) { $query = "SELECT mj.*, COUNT(mc.sid_minconfigs_centers) AS num_centers\n\t\t\tFROM (\tSELECT DISTINCT m.id_minconfigs AS id_minconfigs, m.model AS model, m.is_new as is_new, m.dataslice AS advanced,\n\t\t\t\t\tm.creation_time AS creation_time, COUNT(mp.sid_minconfigs_props) AS num_props, l.algorithm\n\t\t\t\tFROM aloja_ml.minconfigs AS m LEFT JOIN aloja_ml.minconfigs_props AS mp ON m.id_minconfigs = mp.id_minconfigs, aloja_ml.learners AS l\n\t\t\t\tWHERE l.id_learner = m.id_learner\n\t\t\t\tGROUP BY m.id_minconfigs\n\t\t\t) AS mj LEFT JOIN aloja_ml.minconfigs_centers AS mc ON mj.id_minconfigs = mc.id_minconfigs\n\t\t\tWHERE mj.is_new = 1\n\t\t\tGROUP BY mj.id_minconfigs\n\t\t\t"; $rows = $dbml->query($query); $jsonNewconfs = '['; foreach ($rows as $row) { $url = MLUtils::revertModelToURL($row['model'], $row['advanced'], 'presets=none&submit=&learner[]=' . $row['algorithm']); $model_display = MLUtils::display_models_noasts($row['model']); $slice_display = MLUtils::display_models_noasts($row['advanced']); $jsonNewconfs = $jsonNewconfs . ($jsonNewconfs == '[' ? '' : ',') . "['" . $row['id_minconfigs'] . "','" . $row['algorithm'] . "','" . $model_display . "','" . $slice_display . "','" . $row['creation_time'] . "','" . $row['num_props'] . "','" . $row['num_centers'] . "',\n\t\t\t'<a href=\\'/mlnewconfigs?" . $url . "\\'>View</a> <a href=\\'/mlclearcache?rmm=" . $row['id_minconfigs'] . "\\'>Remove</a>']"; } $jsonNewconfs = $jsonNewconfs . ']'; $jsonNewconfsHeader = "[{'title':'ID'},{'title':'Algorithm'},{'title':'Attribute Selection'},{'title':'Advanced Filters'},{'title':'Creation'},{'title':'Properties'},{'title':'Centers'},{'title':'Actions'}]"; }
public function mloutliersAction() { $jsonData = $jsonWarns = $jsonOuts = array(); $message = $instance = $jsonHeader = $jsonTable = ''; $max_x = $max_y = 0; $must_wait = 'NO'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain_ml'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); $where_configs = ''; $preset = null; if (count($_GET) <= 1 || count($_GET) == 2 && array_key_exists('current_model', $_GET) || count($_GET) == 2 && array_key_exists('dump', $_GET) || count($_GET) == 2 && array_key_exists('register', $_GET) || count($_GET) == 3 && array_key_exists('dump', $_GET) && array_key_exists('current_model', $_GET) || count($_GET) == 3 && array_key_exists('register', $_GET) && array_key_exists('current_model', $_GET)) { $preset = Utils::setDefaultPreset($db, 'mloutliers'); } $selPreset = isset($_GET['presets']) ? $_GET['presets'] : "none"; $params = array(); $param_names = array('benchs', 'nets', 'disks', 'mapss', 'iosfs', 'replications', 'iofilebufs', 'comps', 'blk_sizes', 'id_clusters', 'datanodess', 'bench_types', 'vm_sizes', 'vm_coress', 'vm_RAMs', 'types'); // Order is important foreach ($param_names as $p) { $params[$p] = Utils::read_params($p, $where_configs, FALSE); sort($params[$p]); } $sigma_param = array_key_exists('sigma', $_GET) ? (int) $_GET['sigma'] : 1; // FIXME PATCH FOR PARAM LIBRARIES WITHOUT LEGACY $where_configs = str_replace("AND .", "AND ", $where_configs); // compose instance $instance = MLUtils::generateSimpleInstance($param_names, $params, true, $db); // Used only as indicator for WEB $model_info = MLUtils::generateModelInfo($param_names, $params, true, $db); // model for filling MLUtils::findMatchingModels($model_info, $possible_models, $possible_models_id, $dbml); $current_model = ""; if (array_key_exists('current_model', $_GET) && in_array($_GET['current_model'], $possible_models_id)) { $current_model = $_GET['current_model']; } if (!empty($possible_models_id)) { if ($current_model == "") { $query = "SELECT AVG(ABS(exe_time - pred_time)) AS MAE, AVG(ABS(exe_time - pred_time)/exe_time) AS RAE, p.id_learner FROM predictions p, learners l WHERE l.id_learner = p.id_learner AND p.id_learner IN ('" . implode("','", $possible_models_id) . "') AND predict_code > 0 ORDER BY MAE LIMIT 1"; $result = $dbml->query($query); $row = $result->fetch(); $current_model = $row['id_learner']; } $config = $instance . '-' . $current_model . '-' . $sigma_param . '-outliers'; $is_cached_mysql = $dbml->query("SELECT count(*) as total FROM resolutions WHERE id_resolution = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['total'] > 0; $cache_ds = getcwd() . '/cache/query/' . md5($config) . '-cache.csv'; $in_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.lock'); $finished_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '-resolutions.csv'); if (!$is_cached && !$in_process && !$finished_process) { // get headers for csv $header_names = array('id_exec' => 'ID', 'bench' => 'Benchmark', 'exe_time' => 'Exe.Time', 'net' => 'Net', 'disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'IO.SFac', 'replication' => 'Rep', 'iofilebuf' => 'IO.FBuf', 'comp' => 'Comp', 'blk_size' => 'Blk.size', 'e.id_cluster' => 'Cluster', 'name' => 'Cl.Name', 'datanodes' => 'Datanodes', 'headnodes' => 'Headnodes', 'vm_OS' => 'VM.OS', 'vm_cores' => 'VM.Cores', 'vm_RAM' => 'VM.RAM', 'provider' => 'Provider', 'vm_size' => 'VM.Size', 'type' => 'Type', 'bench_type' => 'Bench.Type'); $headers = array_keys($header_names); $names = array_values($header_names); // dump the result to csv $query = "SELECT " . implode(",", $headers) . " FROM execs e LEFT JOIN clusters c ON e.id_cluster = c.id_cluster WHERE e.valid = TRUE AND e.exe_time > 100" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } $fp = fopen($cache_ds, 'w'); fputcsv($fp, $names, ',', '"'); foreach ($rows as $row) { $row['id_cluster'] = "Cl" . $row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... fputcsv($fp, array_values($row), ',', '"'); } // Retrieve file model from DB $query = "SELECT file FROM model_storage WHERE id_hash='" . $current_model . "' AND type='learner';"; $result = $dbml->query($query); $row = $result->fetch(); $content = $row['file']; $filemodel = getcwd() . '/cache/query/' . $current_model . '-object.rds'; $fp = fopen($filemodel, 'w'); fwrite($fp, $content); fclose($fp); // launch query exec('cd ' . getcwd() . '/cache/query ; touch ' . md5($config) . '.lock'); exec(getcwd() . '/resources/queue -c "cd ' . getcwd() . '/cache/query ; ' . getcwd() . '/resources/aloja_cli.r -m aloja_outlier_dataset -d ' . $cache_ds . ' -l ' . $current_model . ' -p sigma=' . $sigma_param . ':hdistance=3:saveall=' . md5($config) . ' > /dev/null 2>&1 ; rm -f ' . md5($config) . '.lock" > /dev/null 2>&1 &'); } $finished_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '-resolutions.csv'); if ($finished_process && !$is_cached) { if (($handle = fopen(getcwd() . '/cache/query/' . md5($config) . '-resolutions.csv', 'r')) !== FALSE) { $header = fgetcsv($handle, 1000, ","); $token = 0; $query = "REPLACE INTO resolutions (id_resolution,id_learner,id_exec,instance,model,sigma,outlier_code,predicted,observed) VALUES "; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $resolution = $data[0]; $pred_value = (int) $data[1] >= 100 ? (int) $data[1] : 100; $exec_value = (int) $data[2]; $selected_instance_pre = preg_replace('/\\s+/', '', $data[3]); $selected_instance_pre = str_replace(':', ',', $selected_instance_pre); $specific_id = $data[4]; if ($token > 0) { $query = $query . ","; } $token = 1; $query = $query . "('" . md5($config) . "','" . $current_model . "','" . $specific_id . "','" . $selected_instance_pre . "','" . $model_info . "','" . $sigma_param . "','" . $resolution . "','" . $pred_value . "','" . $exec_value . "') "; } if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving tree into DB'); } } // Store file model to DB $filemodel = getcwd() . '/cache/query/' . md5($config) . '-object.rds'; $fp = fopen($filemodel, 'r'); $content = fread($fp, filesize($filemodel)); $content = addslashes($content); fclose($fp); $query = "INSERT INTO model_storage (id_hash,type,file) VALUES ('" . md5($config) . "','resolution','" . $content . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving file resolution into DB'); } // Remove temporary files $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '-*.csv'); $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . $current_model . '-object.rds'); $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '-object.rds'); $is_cached = true; } if (!$is_cached) { $jsonData = $jsonOuts = $jsonWarns = $jsonHeader = $jsonTable = '[]'; $must_wait = 'YES'; if (isset($_GET['dump'])) { echo "1"; exit(0); } } else { $must_wait = 'NO'; $query = "SELECT predicted, observed, outlier_code, id_exec, instance FROM resolutions WHERE id_resolution = '" . md5($config) . "' LIMIT 5000"; // FIXME - CLUMSY PATCH FOR BYPASS THE BUG FROM HIGHCHARTS... REMEMBER TO ERASE THIS LINE WHEN THE BUG IS SOLVED $result = $dbml->query($query); foreach ($result as $row) { $entry = array('x' => (int) $row['predicted'], 'y' => (int) $row['observed'], 'name' => $row['instance'], 'id' => (int) $row['id_exec']); if ($row['outlier_code'] == 0) { $jsonData[] = $entry; } if ($row['outlier_code'] == 1) { $jsonWarns[] = $entry; } if ($row['outlier_code'] == 2) { $jsonOuts[] = $entry; } $jsonTable .= ($jsonTable == '' ? '' : ',') . '["' . ($row['outlier_code'] == 0 ? 'Legitimate' : ($row['outlier_code'] == 1 ? 'Warning' : 'Outlier')) . '","' . $row['predicted'] . '","' . $row['observed'] . '","' . str_replace(",", "\",\"", $row['instance']) . '","' . $row['id_exec'] . '"]'; } $query_var = "SELECT MAX(predicted) as max_x, MAX(observed) as max_y FROM resolutions WHERE id_resolution = '" . md5($config) . "' LIMIT 5000"; $result = $dbml->query($query_var); $row = $result->fetch(); $max_x = $row['max_x']; $max_y = $row['max_y']; $header = array('Prediction', 'Observed', 'Benchmark', 'Net', 'Disk', 'Maps', 'IO.SFS', 'Rep', 'IO.FBuf', 'Comp', 'Blk.Size', 'Cluster', 'Cl.Name', 'Datanodes', 'Headnodes', 'VM.OS', 'VM.Cores', 'VM.RAM', 'Provider', 'VM.Size', 'Type', 'Bench.Type', 'ID'); $jsonHeader = '[{title:""}'; foreach ($header as $title) { $jsonHeader = $jsonHeader . ',{title:"' . $title . '"}'; } $jsonHeader = $jsonHeader . ']'; $jsonData = json_encode($jsonData); $jsonWarns = json_encode($jsonWarns); $jsonOuts = json_encode($jsonOuts); $jsonTable = '[' . $jsonTable . ']'; // Dump case if (isset($_GET['dump'])) { echo str_replace(array("[", "]", "{title:\"", "\"}"), array('', '', ''), $jsonHeader) . "\n"; echo str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonOuts); echo str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonWarns); echo str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonData); exit(0); } // Register case if (isset($_GET['register'])) { // Update the predictions table $query_var = "UPDATE predictions as p, resolutions as r\n\t\t\t\t\t\t\t\tSET p.outlier = r.outlier_code\n\t\t\t\t\t\t\t\tWHERE r.id_exec = p.id_exec\n\t\t\t\t\t\t\t\t\tAND r.id_resolution = '" . md5($config) . "'\n\t\t\t\t\t\t\t\t\tAND p.id_learner = '" . $current_model . "'"; if ($dbml->query($query_var) === FALSE) { throw new \Exception('Error when updating predictions in DB'); } } } } else { $message = "There are no prediction models trained for such parameters. Train at least one model in 'ML Prediction' section."; $must_wait = "NO"; } $dbml = null; } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); $jsonData = $jsonOuts = $jsonWarns = $jsonHeader = $jsonTable = '[]'; $model = ''; $possible_models_id = $possible_models = array(); $dbml = null; } echo $this->container->getTwig()->render('mltemplate/mloutliers.html.twig', array('selected' => 'mloutliers', 'jsonData' => $jsonData, 'jsonWarns' => $jsonWarns, 'jsonOuts' => $jsonOuts, 'jsonHeader' => $jsonHeader, 'jsonTable' => $jsonTable, 'max_p' => min(array($max_x, $max_y)), 'benchs' => $params['benchs'], 'nets' => $params['nets'], 'disks' => $params['disks'], 'blk_sizes' => $params['blk_sizes'], 'comps' => $params['comps'], 'id_clusters' => $params['id_clusters'], 'mapss' => $params['mapss'], 'replications' => $params['replications'], 'iosfs' => $params['iosfs'], 'iofilebufs' => $params['iofilebufs'], 'datanodess' => $params['datanodess'], 'bench_types' => $params['bench_types'], 'vm_sizes' => $params['vm_sizes'], 'vm_coress' => $params['vm_coress'], 'vm_RAMs' => $params['vm_RAMs'], 'types' => $params['types'], 'must_wait' => $must_wait, 'models' => '<li>' . implode('</li><li>', $possible_models) . '</li>', 'models_id' => $possible_models_id, 'current_model' => $current_model, 'resolution_id' => md5($config), 'sigma' => $sigma_param, 'message' => $message, 'instance' => $instance, 'preset' => $preset, 'selPreset' => $selPreset, 'options' => Utils::getFilterOptions($db))); }