public function mloutliersAction() { $jsonData = $jsonWarns = $jsonOuts = array(); $message = $instance = $jsonHeader = $jsonTable = $model_html = $config = $model_info = ''; $possible_models = $possible_models_id = $other_models = array(); $jsonResolutions = $jsonResolutionsHeader = '[]'; $max_x = $max_y = 0; $must_wait = 'NO'; try { $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']); $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false); $db = $this->container->getDBUtils(); // FIXME - This must be counted BEFORE building filters, as filters inject rubbish in GET when there are no parameters... $instructions = count($_GET) <= 1; if (array_key_exists('dump', $_GET)) { $dump = $_GET["dump"]; unset($_GET["dump"]); } if (array_key_exists('register', $_GET)) { $register = $_GET["register"]; unset($_GET["register"]); } $this->buildFilters(array('current_model' => array('type' => 'selectOne', 'default' => null, 'label' => 'Model to use: ', 'generateChoices' => function () { return array(); }, 'parseFunction' => function () { $choice = isset($_GET['current_model']) ? $_GET['current_model'] : array(""); return array('whereClause' => '', 'currentChoice' => $choice); }, 'filterGroup' => 'MLearning'), 'sigma' => array('type' => 'inputNumber', 'default' => 1, 'label' => 'Sigmas: ', 'parseFunction' => function () { $choice = isset($_GET['sigma']) ? $_GET['sigma'] : 1; return array('whereClause' => '', 'currentChoice' => $choice); }, 'max' => 3, 'min' => 1, 'filterGroup' => 'MLearning'), 'minexetime' => array('default' => 0), 'valid' => array('default' => 0), 'filter' => array('default' => 0), 'prepares' => array('default' => 1))); $this->buildFilterGroups(array('MLearning' => array('label' => 'Machine Learning', 'tabOpenDefault' => true, 'filters' => array('current_model', 'sigma')))); $params = array(); $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'vm_OS', 'vm_cores', 'vm_RAM', 'provider', 'vm_size', 'type', 'bench_type', 'hadoop_version'); // Order is important $params = $this->filters->getFiltersSelectedChoices($param_names); foreach ($param_names as $p) { if (!is_null($params[$p]) && is_array($params[$p])) { sort($params[$p]); } } $params_additional = array(); $param_names_additional = array('datefrom', 'dateto', 'minexetime', 'maxexetime', 'valid', 'filter'); // Order is important $params_additional = $this->filters->getFiltersSelectedChoices($param_names_additional); $param_variables = $this->filters->getFiltersSelectedChoices(array('current_model', 'sigma')); $param_current_model = $param_variables['current_model']; $sigma_param = $param_variables['sigma']; $where_configs = $this->filters->getWhereclause(); $where_configs = str_replace("AND .", "AND ", $where_configs); // compose instance $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, true); $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, true); $slice_info = MLUtils::generateDatasliceInfo($this->filters, $param_names_additional, $params_additional); // model for filling MLUtils::findMatchingModels($model_info, $possible_models, $possible_models_id, $dbml); $current_model = ''; if (!is_null($possible_models_id) && in_array($param_current_model, $possible_models_id)) { $current_model = $param_current_model; } // Other models for filling $where_models = ''; if (!empty($possible_models_id)) { $where_models = " WHERE id_learner NOT IN ('" . implode("','", $possible_models_id) . "')"; } $result = $dbml->query("SELECT id_learner FROM aloja_ml.learners" . $where_models); foreach ($result as $row) { $other_models[] = $row['id_learner']; } if ($instructions) { $result = $dbml->query("SELECT id_learner, model, algorithm FROM aloja_ml.learners"); foreach ($result as $row) { $model_html = $model_html . "<li>" . $row['id_learner'] . " => " . $row['algorithm'] . " : " . $row['model'] . "</li>"; } MLUtils::getIndexOutExps($jsonResolutions, $jsonResolutionsHeader, $dbml); $this->filters->setCurrentChoices('current_model', array_merge($possible_models_id, array('---Other models---'), $other_models)); return $this->render('mltemplate/mloutliers.html.twig', array('outexps' => $jsonResolutions, 'header_outexps' => $jsonResolutionsHeader, 'jsonData' => '[]', 'jsonWarns' => '[]', 'jsonOuts' => '[]', 'jsonHeader' => '[]', 'jsonTable' => '[]', 'max_p' => 0, 'models' => $model_html, 'instructions' => 'YES')); } if (!empty($possible_models_id)) { $result = $dbml->query("SELECT id_learner, model, algorithm, CASE WHEN `id_learner` IN ('" . implode("','", $possible_models_id) . "') THEN 'COMPATIBLE' ELSE 'NOT MATCHED' END AS compatible FROM aloja_ml.learners"); foreach ($result as $row) { $model_html = $model_html . "<li>" . $row['id_learner'] . " => " . $row['algorithm'] . " : " . $row['compatible'] . " : " . $row['model'] . "</li>"; } if ($current_model == "") { $query = "SELECT AVG(ABS(exe_time - pred_time)) AS MAE, AVG(ABS(exe_time - pred_time)/exe_time) AS RAE, p.id_learner FROM aloja_ml.predictions p, aloja_ml.learners l WHERE l.id_learner = p.id_learner AND p.id_learner IN ('" . implode("','", $possible_models_id) . "') AND predict_code > 0 ORDER BY MAE LIMIT 1"; $result = $dbml->query($query); $row = $result->fetch(); $current_model = $row['id_learner']; } $config = $instance . '-' . $current_model . '-' . $sigma_param . ' ' . $slice_info . '-outliers'; $is_cached_mysql = $dbml->query("SELECT count(*) as total FROM aloja_ml.resolutions WHERE id_resolution = '" . md5($config) . "'"); $tmp_result = $is_cached_mysql->fetch(); $is_cached = $tmp_result['total'] > 0; $cache_ds = getcwd() . '/cache/query/' . md5($config) . '-cache.csv'; $in_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '.lock'); $finished_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '-resolutions.csv'); if (!$is_cached && !$in_process && !$finished_process) { // get headers for csv $header_names = array('id_exec' => 'ID', 'bench' => 'Benchmark', 'exe_time' => 'Exe.Time', 'net' => 'Net', 'disk' => 'Disk', 'maps' => 'Maps', 'iosf' => 'IO.SFac', 'replication' => 'Rep', 'iofilebuf' => 'IO.FBuf', 'comp' => 'Comp', 'blk_size' => 'Blk.size', 'e.id_cluster' => 'Cluster', 'datanodes' => 'Datanodes', 'vm_OS' => 'VM.OS', 'vm_cores' => 'VM.Cores', 'vm_RAM' => 'VM.RAM', 'provider' => 'Provider', 'vm_size' => 'VM.Size', 'type' => 'Type', 'bench_type' => 'Bench.Type', 'hadoop_version' => 'Hadoop.Version'); $headers = array_keys($header_names); $names = array_values($header_names); // dump the result to csv $query = "SELECT " . implode(",", $headers) . " FROM aloja2.execs e LEFT JOIN aloja2.clusters c ON e.id_cluster = c.id_cluster WHERE hadoop_version IS NOT NULL" . $where_configs . ";"; $rows = $db->get_rows($query); if (empty($rows)) { throw new \Exception('No data matches with your critteria.'); } $fp = fopen($cache_ds, 'w'); fputcsv($fp, $names, ',', '"'); foreach ($rows as $row) { $row['id_cluster'] = "Cl" . $row['id_cluster']; // Cluster is numerically codified... $row['comp'] = "Cmp" . $row['comp']; // Compression is numerically codified... fputcsv($fp, array_values($row), ',', '"'); } // Retrieve file model from DB $query = "SELECT file FROM aloja_ml.model_storage WHERE id_hash='" . $current_model . "' AND type='learner';"; $result = $dbml->query($query); $row = $result->fetch(); $content = $row['file']; $filemodel = getcwd() . '/cache/query/' . $current_model . '-object.rds'; $fp = fopen($filemodel, 'w'); fwrite($fp, $content); fclose($fp); // launch query exec('cd ' . getcwd() . '/cache/query ; touch ' . md5($config) . '.lock'); exec(getcwd() . '/resources/queue -c "cd ' . getcwd() . '/cache/query ; ' . getcwd() . '/resources/aloja_cli.r -m aloja_outlier_dataset -d ' . $cache_ds . ' -l ' . $current_model . ' -p sigma=' . $sigma_param . ':hdistance=3:saveall=' . md5($config) . ' > /dev/null 2>&1 ; rm -f ' . md5($config) . '.lock" > /dev/null 2>&1 &'); } $finished_process = file_exists(getcwd() . '/cache/query/' . md5($config) . '-resolutions.csv'); if ($finished_process && !$is_cached) { if (($handle = fopen(getcwd() . '/cache/query/' . md5($config) . '-resolutions.csv', 'r')) !== FALSE) { $header = fgetcsv($handle, 1000, ","); $token = 0; $query = "REPLACE INTO aloja_ml.resolutions (id_resolution,id_learner,id_exec,instance,model,dataslice,sigma,outlier_code,predicted,observed) VALUES "; while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $resolution = $data[0]; $pred_value = (int) $data[1] >= 100 ? (int) $data[1] : 100; $exec_value = (int) $data[2]; $selected_instance_pre = preg_replace('/\\s+/', '', $data[3]); $selected_instance_pre = str_replace(':', ',', $selected_instance_pre); $specific_id = $data[4]; if ($token > 0) { $query = $query . ","; } $token = 1; $query = $query . "('" . md5($config) . "','" . $current_model . "','" . $specific_id . "','" . $selected_instance_pre . "','" . $model_info . "','" . $slice_info . "','" . $sigma_param . "','" . $resolution . "','" . $pred_value . "','" . $exec_value . "') "; } if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving tree into DB'); } } // Store file model to DB $filemodel = getcwd() . '/cache/query/' . md5($config) . '-object.rds'; $fp = fopen($filemodel, 'r'); $content = fread($fp, filesize($filemodel)); $content = addslashes($content); fclose($fp); $query = "INSERT INTO aloja_ml.model_storage (id_hash,type,file) VALUES ('" . md5($config) . "','resolution','" . $content . "');"; if ($dbml->query($query) === FALSE) { throw new \Exception('Error when saving file resolution into DB'); } // Remove temporary files $output = shell_exec('rm -f ' . getcwd() . '/cache/query/' . md5($config) . '-*.csv'); $is_cached = true; } if (!$is_cached) { $jsonData = $jsonOuts = $jsonWarns = $jsonHeader = $jsonTable = '[]'; $must_wait = 'YES'; if (isset($dump)) { echo "1"; exit(0); } } else { $must_wait = 'NO'; $query = "SELECT predicted, observed, outlier_code, id_exec, instance FROM aloja_ml.resolutions WHERE id_resolution = '" . md5($config) . "' LIMIT 5000"; // FIXME - CLUMSY PATCH FOR BYPASS THE BUG FROM HIGHCHARTS... REMEMBER TO ERASE THIS LINE WHEN THE BUG IS SOLVED $result = $dbml->query($query); foreach ($result as $row) { $entry = array('x' => (int) $row['predicted'], 'y' => (int) $row['observed'], 'name' => $row['instance'], 'id' => (int) $row['id_exec']); if ($row['outlier_code'] == 0) { $jsonData[] = $entry; } if ($row['outlier_code'] == 1) { $jsonWarns[] = $entry; } if ($row['outlier_code'] == 2) { $jsonOuts[] = $entry; } $jsonTable .= ($jsonTable == '' ? '' : ',') . '["' . ($row['outlier_code'] == 0 ? 'Legitimate' : ($row['outlier_code'] == 1 ? 'Warning' : 'Outlier')) . '","' . $row['predicted'] . '","' . $row['observed'] . '","' . str_replace(",", "\",\"", $row['instance']) . '","' . $row['id_exec'] . '"]'; } $query_var = "SELECT MAX(predicted) as max_x, MAX(observed) as max_y FROM aloja_ml.resolutions WHERE id_resolution = '" . md5($config) . "' LIMIT 5000"; $result = $dbml->query($query_var); $row = $result->fetch(); $max_x = $row['max_x']; $max_y = $row['max_y']; $header = array('Prediction', 'Observed', 'Benchmark', 'Net', 'Disk', 'Maps', 'IO.SFS', 'Rep', 'IO.FBuf', 'Comp', 'Blk.Size', 'Cluster', 'Datanodes', 'VM.OS', 'VM.Cores', 'VM.RAM', 'Provider', 'VM.Size', 'Type', 'Bench.Type', 'Version', 'ID'); $jsonHeader = '[{title:""}'; foreach ($header as $title) { $jsonHeader = $jsonHeader . ',{title:"' . $title . '"}'; } $jsonHeader = $jsonHeader . ']'; $jsonData = json_encode($jsonData); $jsonWarns = json_encode($jsonWarns); $jsonOuts = json_encode($jsonOuts); $jsonTable = '[' . $jsonTable . ']'; // Dump case if (isset($dump)) { echo str_replace(array("[", "]", "{title:\"", "\"}"), array('', '', ''), $jsonHeader) . "\n"; echo str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonOuts); echo str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonWarns); echo str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonData); exit(0); } // Register case if (isset($register)) { // Update the predictions table $query_var = "UPDATE aloja_ml.predictions as p, aloja_ml.resolutions as r\n\t\t\t\t\t\t\t\tSET p.outlier = r.outlier_code\n\t\t\t\t\t\t\t\tWHERE r.id_exec = p.id_exec\n\t\t\t\t\t\t\t\t\tAND r.id_resolution = '" . md5($config) . "'\n\t\t\t\t\t\t\t\t\tAND p.id_learner = '" . $current_model . "'"; if ($dbml->query($query_var) === FALSE) { throw new \Exception('Error when updating aloja_ml.predictions in DB'); } } } } else { throw new \Exception('There are no prediction models trained for such parameters. Train at least one model in "ML Prediction" section.'); } $dbml = null; } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage()); $jsonData = $jsonOuts = $jsonWarns = $jsonHeader = $jsonTable = '[]'; $must_wait = "NO"; $dbml = null; } $return_params = array('jsonData' => $jsonData, 'jsonWarns' => $jsonWarns, 'jsonOuts' => $jsonOuts, 'jsonHeader' => $jsonHeader, 'jsonTable' => $jsonTable, 'max_p' => min(array($max_x, $max_y)), 'outexps' => $jsonResolutions, 'header_outexps' => $jsonResolutionsHeader, 'must_wait' => $must_wait, 'models' => $model_html, 'models_id' => $possible_models_id, 'other_models_id' => $other_models, 'current_model' => $current_model, 'resolution_id' => md5($config), 'model_info' => $model_info, 'slice_info' => $slice_info, 'sigma' => $sigma_param, 'message' => $message, 'instance' => $instance); $this->filters->setCurrentChoices('current_model', array_merge($possible_models_id, array('---Other models---'), $other_models)); return $this->render('mltemplate/mloutliers.html.twig', $return_params); }