Ejemplo n.º 1
0
 public function mlfindattributesAction()
 {
     $current_model = $model_info = $instance = $instances = $message = $tree_descriptor = $model_html = $config = '';
     $possible_models = $possible_models_id = $other_models = array();
     $jsonData = $jsonHeader = $jsonColumns = $jsonColor = '[]';
     $jsonFAttrs = $jsonFAttrsHeader = '[]';
     $mae = $rae = 0;
     $must_wait = 'NO';
     try {
         $dbml = new \PDO($this->container->get('config')['db_conn_chain'], $this->container->get('config')['mysql_user'], $this->container->get('config')['mysql_pwd']);
         $dbml->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION);
         $dbml->setAttribute(\PDO::ATTR_EMULATE_PREPARES, false);
         $db = $this->container->getDBUtils();
         // FIXME - This must be counted BEFORE building filters, as filters inject rubbish in GET when there are no parameters...
         $instructions = count($_GET) <= 1;
         if (array_key_exists('dump', $_GET)) {
             $dump = $_GET["dump"];
             unset($_GET["dump"]);
         }
         if (array_key_exists('pass', $_GET)) {
             $pass = $_GET["pass"];
             unset($_GET["pass"]);
         }
         $this->buildFilters(array('current_model' => array('type' => 'selectOne', 'default' => null, 'label' => 'Model to use: ', 'generateChoices' => function () {
             return array();
         }, 'parseFunction' => function () {
             $choice = isset($_GET['current_model']) ? $_GET['current_model'] : array("");
             return array('whereClause' => '', 'currentChoice' => $choice);
         }, 'filterGroup' => 'MLearning'), 'unseen' => array('type' => 'checkbox', 'default' => 1, 'label' => 'Predict with unseen atributes &#9888;', 'parseFunction' => function () {
             $choice = isset($_GET['unseen']) && !isset($_GET['unseen']) ? 0 : 1;
             return array('whereClause' => '', 'currentChoice' => $choice);
         }, 'filterGroup' => 'MLearning'), 'minexetime' => array('default' => 0), 'valid' => array('default' => 0), 'filter' => array('default' => 0), 'prepares' => array('default' => 1)));
         $this->buildFilterGroups(array('MLearning' => array('label' => 'Machine Learning', 'tabOpenDefault' => true, 'filters' => array('current_model', 'unseen'))));
         $param_names = array('bench', 'net', 'disk', 'maps', 'iosf', 'replication', 'iofilebuf', 'comp', 'blk_size', 'id_cluster', 'datanodes', 'vm_OS', 'vm_cores', 'vm_RAM', 'provider', 'vm_size', 'type', 'bench_type', 'hadoop_version', 'datasize', 'scale_factor');
         // Order is important
         $params = $this->filters->getFiltersSelectedChoices($param_names);
         foreach ($param_names as $p) {
             if (!is_null($params[$p]) && is_array($params[$p])) {
                 sort($params[$p]);
             }
         }
         $learnParams = $this->filters->getFiltersSelectedChoices(array('current_model', 'unseen'));
         $param_current_model = $learnParams['current_model'];
         $unseen = $learnParams['unseen'] ? true : false;
         $where_configs = $this->filters->getWhereClause();
         $where_configs = str_replace("AND .", "AND ", $where_configs);
         // compose instance
         $model_info = MLUtils::generateModelInfo($this->filters, $param_names, $params, $unseen);
         $instance = MLUtils::generateSimpleInstance($this->filters, $param_names, $params, $unseen);
         $instances = MLUtils::generateInstances($this->filters, $param_names, $params, $unseen, $db);
         // Model for filling
         MLUtils::findMatchingModels($model_info, $possible_models, $possible_models_id, $dbml);
         $current_model = '';
         if (!is_null($possible_models_id) && in_array($param_current_model, $possible_models_id)) {
             $current_model = $param_current_model;
         }
         // Other models for filling
         $where_models = '';
         if (!empty($possible_models_id)) {
             $where_models = " WHERE id_learner NOT IN ('" . implode("','", $possible_models_id) . "')";
         }
         $result = $dbml->query("SELECT id_learner FROM aloja_ml.learners" . $where_models);
         foreach ($result as $row) {
             $other_models[] = $row['id_learner'];
         }
         if ($instructions) {
             $result = $dbml->query("SELECT id_learner, model, algorithm FROM aloja_ml.learners");
             foreach ($result as $row) {
                 $model_html = $model_html . "<li>" . $row['id_learner'] . " => " . $row['algorithm'] . " : " . $row['model'] . "</li>";
             }
             MLUtils::getIndexFAttrs($jsonFAttrs, $jsonFAttrsHeader, $dbml);
             $this->filters->setCurrentChoices('current_model', array_merge($possible_models_id, array('---Other models---'), $other_models));
             return $this->render('mltemplate/mlfindattributes.html.twig', array('fattrs' => $jsonFAttrs, 'header_fattrs' => $jsonFAttrsHeader, 'models' => $model_html, 'instructions' => 'YES'));
         }
         if (!empty($possible_models_id) || $current_model != "") {
             $result = $dbml->query("SELECT id_learner, model, algorithm, CASE WHEN `id_learner` IN ('" . implode("','", $possible_models_id) . "') THEN 'COMPATIBLE' ELSE 'NOT MATCHED' END AS compatible FROM aloja_ml.learners");
             foreach ($result as $row) {
                 $model_html = $model_html . "<li>" . $row['id_learner'] . " => " . $row['algorithm'] . " : " . $row['compatible'] . " : " . $row['model'] . "</li>";
             }
             if ($current_model == "") {
                 $query = "SELECT AVG(ABS(exe_time - pred_time)) AS MAE, AVG(ABS(exe_time - pred_time)/exe_time) AS RAE, p.id_learner FROM aloja_ml.predictions p, aloja_ml.learners l WHERE l.id_learner = p.id_learner AND p.id_learner IN ('" . implode("','", $possible_models_id) . "') AND predict_code > 0 ORDER BY MAE LIMIT 1";
                 $result = $dbml->query($query);
                 $row = $result->fetch();
                 $current_model = $row['id_learner'];
             }
             $config = $instance . '-' . $current_model . '-' . ($unseen ? 'U' : 'R');
             $is_cached_mysql = $dbml->query("SELECT count(*) as total FROM aloja_ml.trees WHERE id_findattrs = '" . md5($config) . "'");
             $tmp_result = $is_cached_mysql->fetch();
             $is_cached = $tmp_result['total'] > 0;
             $tmp_file = md5($config) . '.tmp';
             $in_process = file_exists(getcwd() . '/cache/ml/' . md5($config) . '.lock');
             $finished_process = $in_process && (int) shell_exec('ls ' . getcwd() . '/cache/ml/' . md5($config) . '-*.lock | wc -w ') == count($instances);
             if (!$in_process && !$finished_process && !$is_cached) {
                 // Retrieve file model from DB
                 $query = "SELECT file FROM aloja_ml.model_storage WHERE id_hash='" . $current_model . "' AND type='learner';";
                 $result = $dbml->query($query);
                 $row = $result->fetch();
                 $content = $row['file'];
                 $filemodel = getcwd() . '/cache/ml/' . $current_model . '-object.rds';
                 $fp = fopen($filemodel, 'w');
                 fwrite($fp, $content);
                 fclose($fp);
                 // Run the predictor
                 exec('cd ' . getcwd() . '/cache/ml ; touch ' . md5($config) . '.lock ; rm -f ' . $tmp_file);
                 $count = 1;
                 foreach ($instances as $inst) {
                     exec(getcwd() . '/resources/queue -d -c "cd ' . getcwd() . '/cache/ml ; ../../resources/aloja_cli.r -m aloja_predict_instance -l ' . $current_model . ' -p inst_predict=\'' . $inst . '\' -v | grep -v \'Prediction\' >>' . $tmp_file . ' 2>/dev/null; touch ' . md5($config) . '-' . $count++ . '.lock" >/dev/null 2>&1 &');
                 }
             }
             $finished_process = (int) shell_exec('ls ' . getcwd() . '/cache/ml/' . md5($config) . '-*.lock | wc -w ') == count($instances);
             if ($finished_process && !$is_cached) {
                 // Read results and dump to DB
                 $i = 0;
                 $token = 0;
                 $token_i = 0;
                 $query_pattern = "INSERT IGNORE INTO aloja_ml.predictions (\n\t\t\t\t\t\tid_exec,exe_time,bench,net,disk,maps,iosf,replication,iofilebuf,comp,blk_size,\n\t\t\t\t\t\tid_cluster,datanodes,vm_OS,vm_cores,vm_RAM,provider,vm_size,type,bench_type,hadoop_version,\n\t\t\t\t\t\tdatasize,scale_factor,\n\t\t\t\t\t\tnet_maxtxkbs,net_maxrxkbs,net_maxtxpcks,net_maxrxpcks,net_maxtxcmps,net_maxrxcmps,net_maxrxmscts,\n\t\t\t\t\t\tdisk_maxtps,disk_maxsvctm,disk_maxrds,disk_maxwrs,disk_maxrqsz,disk_maxqusz,disk_maxawait, disk_maxutil,\n\t\t\t\t\t\tpred_time,id_learner,instance,predict_code) VALUES ";
                 $query = $query_pattern;
                 if (($handle = fopen(getcwd() . '/cache/ml/' . $tmp_file, "r")) !== FALSE) {
                     while (($line = fgets($handle, 1000)) !== FALSE && $i < 1000) {
                         if ($line == '') {
                             break;
                         }
                         // Fetch Real Value
                         $inst_aux = preg_split("/\\s+/", $line);
                         $query_var = "SELECT AVG(exe_time) as AVG, id_exec, outlier FROM aloja_ml.predictions WHERE instance = '" . $inst_aux[1] . "' AND predict_code > 0";
                         $result = $dbml->query($query_var);
                         $row = $result->fetch();
                         $realexecval = is_null($row['AVG']) || $row['outlier'] == 2 ? 0 : $row['AVG'];
                         $realid_exec = is_null($row['id_exec']) || $row['outlier'] == 2 ? 0 : $row['id_exec'];
                         $query_var = "SELECT count(*) as num FROM aloja_ml.predictions WHERE instance = '" . $inst_aux[1] . "' AND id_learner = '" . $current_model . "'";
                         $result = $dbml->query($query_var);
                         $row = $result->fetch();
                         // Insert instance values
                         if ($row['num'] == 0) {
                             $token_i = 1;
                             $selected_instance = preg_replace('/,Cmp(\\d+),/', ',${1},', $inst_aux[1]);
                             $selected_instance = preg_replace('/,Cl(\\d+),/', ',${1},', $selected_instance);
                             if ($token > 0) {
                                 $query = $query . ",";
                             }
                             $token = 1;
                             $query = $query . "('" . $realid_exec . "','" . $realexecval . "','" . str_replace(",", "','", $selected_instance) . "','" . $inst_aux[2] . "','" . $current_model . "','" . $inst_aux[1] . "','0') ";
                         }
                         $i++;
                         if ($i % 100 == 0 && $token_i > 0) {
                             if ($dbml->query($query) === FALSE) {
                                 throw new \Exception('Error when saving into DB');
                             }
                             $query = $query_pattern;
                             $token = 0;
                             $token_i = 0;
                         }
                     }
                     if ($token_i > 0) {
                         if ($dbml->query($query) === FALSE) {
                             throw new \Exception('Error when saving into DB');
                         }
                     }
                     // Descriptive Tree
                     $tree_descriptor = shell_exec(getcwd() . '/resources/aloja_cli.r -m aloja_representative_tree -p method=ordered:dump_file="' . getcwd() . '/cache/ml/' . $tmp_file . '":output=nodejson -v 2> /dev/null');
                     $tree_descriptor = substr($tree_descriptor, 5, -2);
                     $tree_descriptor = str_replace("\\\"", "\"", $tree_descriptor);
                     $tree_descriptor = str_replace("desc:\"\"", "desc:\"---\"", $tree_descriptor);
                     $query = "INSERT INTO aloja_ml.trees (id_findattrs,id_learner,instance,model,tree_code) VALUES ('" . md5($config) . "','" . $current_model . "','" . $instance . "','" . $model_info . "','" . $tree_descriptor . "')";
                     if ($dbml->query($query) === FALSE) {
                         throw new \Exception('Error when saving tree into DB');
                     }
                     // remove remaining locks
                     shell_exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config) . '*.lock');
                     // Remove temporal files
                     $output = shell_exec('rm -f ' . getcwd() . '/cache/ml/' . md5($config) . '.tmp');
                     $is_cached = true;
                 }
                 fclose($handle);
             }
             if (!$is_cached) {
                 $must_wait = 'YES';
                 if (isset($dump)) {
                     $dbml = null;
                     echo "1";
                     exit(0);
                 }
                 if (isset($pass)) {
                     $dbml = null;
                     return "1";
                 }
                 throw new \Exception('WAIT');
             }
             if (isset($pass) && $pass == 2) {
                 $dbml = null;
                 return "2";
             }
             // Fetch results and compose JSON
             $header = array('Benchmark', 'Net', 'Disk', 'Maps', 'IO.SFS', 'Rep', 'IO.FBuf', 'Comp', 'Blk.Size', 'Cluster', 'Datanodes', 'VM.OS', 'VM.Cores', 'VM.RAM', 'Provider', 'VM.Size', 'Type', 'Bench.Type', 'Version', 'Datasize', 'Scale.Factor', 'Prediction', 'Observed');
             $jsonHeader = '[{title:""}';
             foreach ($header as $title) {
                 $jsonHeader = $jsonHeader . ',{title:"' . $title . '"}';
             }
             $jsonHeader = $jsonHeader . ']';
             $query = "SELECT @i:=@i+1 as num, instance, AVG(pred_time) as pred_time, AVG(exe_time) as exe_time FROM aloja_ml.predictions AS e, (SELECT @i:=0) d WHERE id_learner='" . $current_model . "' " . $where_configs . " GROUP BY instance";
             $result = $dbml->query($query);
             $jsonData = '[';
             foreach ($result as $row) {
                 if ($jsonData != '[') {
                     $jsonData = $jsonData . ',';
                 }
                 $jsonData = $jsonData . "['" . $row['num'] . "','" . implode("','", array_slice(explode(",", $row['instance']), 0, 21)) . "','" . $row['pred_time'] . "','" . $row['exe_time'] . "']";
             }
             $jsonData = $jsonData . ']';
             foreach (range(1, 33) as $value) {
                 $jsonData = str_replace('Cmp' . $value, Utils::getCompressionName($value), $jsonData);
             }
             // Fetch MAE & RAE values
             $query = "SELECT AVG(ABS(exe_time - pred_time)) AS MAE, AVG(ABS(exe_time - pred_time)/exe_time) AS RAE FROM aloja_ml.predictions AS e WHERE id_learner='" . md5($config) . "' AND predict_code > 0";
             $result = $dbml->query($query);
             $row = $result->fetch();
             $mae = $row['MAE'];
             $rae = $row['RAE'];
             // Dump case
             if (isset($dump)) {
                 echo "ID" . str_replace(array("[", "]", "{title:\"", "\"}"), array('', '', ''), $jsonHeader) . "\n";
                 echo str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonData);
                 $dbml = null;
                 exit(0);
             }
             if (isset($pass) && $pass == 1) {
                 $retval = "ID" . str_replace(array("[", "]", "{title:\"", "\"}"), array('', '', ''), $jsonHeader) . "\n";
                 $retval .= str_replace(array('],[', '[[', ']]'), array("\n", '', ''), $jsonData);
                 $dbml = null;
                 return $retval;
             }
             // Display Descriptive Tree
             $query = "SELECT tree_code FROM aloja_ml.trees WHERE id_findattrs = '" . md5($config) . "'";
             $result = $dbml->query($query);
             $row = $result->fetch();
             $tree_descriptor = $row['tree_code'];
         } else {
             if (isset($dump)) {
                 echo "-1";
                 exit(0);
             }
             if (isset($pass)) {
                 return "-1";
             }
             throw new \Exception("There are no prediction models trained for such parameters. Train at least one model in 'ML Prediction' section.");
         }
     } catch (\Exception $e) {
         if ($e->getMessage() != "WAIT") {
             $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n");
         }
         $jsonData = $jsonHeader = $jsonColumns = $jsonColor = '[]';
         $instances = array();
         if (isset($pass)) {
             return "-2";
         }
     }
     $dbml = null;
     $return_params = array('instance' => $instance, 'jsonData' => $jsonData, 'jsonHeader' => $jsonHeader, 'fattrs' => $jsonFAttrs, 'header_fattrs' => $jsonFAttrsHeader, 'models' => $model_html, 'models_id' => $possible_models_id, 'other_models_id' => $other_models, 'current_model' => $current_model, 'mae' => $mae, 'rae' => $rae, 'must_wait' => $must_wait, 'instance' => $instance, 'instances' => implode("<br/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;", $instances), 'model_info' => $model_info, 'id_findattr' => md5($config), 'tree_descriptor' => $tree_descriptor);
     $this->filters->setCurrentChoices('current_model', array_merge($possible_models_id, array('---Other models---'), $other_models));
     return $this->render('mltemplate/mlfindattributes.html.twig', $return_params);
 }