public function bestConfigAction() { $db = $this->container->getDBUtils(); $this->buildFilters(array('bench' => array('default' => array('terasort'), 'type' => 'selectOne', 'label' => 'Benchmark:'), 'ordertype' => array('default' => array('cost'), 'type' => 'selectOne', 'label' => 'Best config by:', 'generateChoices' => function () { return array('exe_time', 'cost'); }, 'parseFunction' => function () { $ordertype = isset($_GET['ordertype']) ? $_GET['ordertype'] : 'cost'; return array('currentChoice' => $ordertype, 'whereClause' => ""); }, 'beautifier' => function ($value) { if ($value == 'exe_time') { return 'Execution time'; } else { return 'Cost-effectiveness'; } }, 'filterGroup' => 'basic'))); $whereClause = $this->filters->getWhereClause(); $model_html = ''; $model_info = $db->get_rows("SELECT id_learner, model, algorithm, dataslice FROM aloja_ml.learners"); foreach ($model_info as $row) { $model_html = $model_html . "<li><b>" . $row['id_learner'] . "</b> => " . $row['algorithm'] . " : " . $row['model'] . " : " . $row['dataslice'] . "</li>"; } $clusterCosts = Utils::generateCostsFilters($db); $bestexec = ''; $cluster = ''; try { $order_type = Utils::get_GET_string('ordertype'); if (!$order_type) { $order_type = 'exe_time'; } $filterExecs = DBUtils::getFilterExecs(); $params = $this->filters->getFiltersSelectedChoices(array('prediction_model', 'upred', 'uobsr')); $whereClauseML = str_replace("exe_time", "pred_time", $whereClause); $whereClauseML = str_replace("start_time", "creation_time", $whereClauseML); $queryObserved = "\n\t\t\t\t\tSELECT (e.exe_time/3600)*c.cost_hour as cost, e.id_exec,e.exec,e.bench,e.exe_time,e.net,e.disk,e.bench_type,e.maps,e.iosf,e.replication,e.iofilebuf,e.comp,e.blk_size,e.hadoop_version, c.*\n\t\t\t\t\tFROM aloja2.execs e JOIN aloja2.clusters c USING (id_cluster)\n\t\t\t\t\tLEFT JOIN aloja_ml.predictions AS p USING (id_exec)\n\t\t\t\t\tWHERE 1 {$filterExecs} {$whereClause}\n\t\t\t\t\tGROUP BY e.net,e.disk,e.bench_type,e.maps,e.iosf,e.replication,e.iofilebuf,e.comp,e.blk_size,e.hadoop_version\n\t\t\t\t\tORDER BY {$order_type} ASC\n\t\t\t\t"; $queryPredictions = "SELECT (e.exe_time/3600)*c.cost_hour AS cost,e.id_exec,e.exec,CONCAT('Predicted ',e.bench) as bench,e.pred_time AS exe_time,e.net,e.disk,e.bench_type,e.maps,e.iosf,e.replication,e.iofilebuf,e.comp,e.blk_size,e.hadoop_version,c.*\n\t\t\t\t\tFROM aloja_ml.predictions AS e JOIN aloja2.clusters AS c USING (id_cluster)\n\t\t\t\t\tWHERE 1 {$filterExecs} " . str_replace("p.", "e.", $whereClauseML) . " AND id_learner = '" . $params['prediction_model'] . "'\n\t\t\t\t\tGROUP BY e.net,e.disk,e.bench_type,e.maps,e.iosf,e.replication,e.iofilebuf,e.comp,e.blk_size,e.hadoop_version"; $query = $queryObserved; // get the result rows if ($params['uobsr'] == 1 && $params['upred'] == 1) { $query = "\n\t\t\t\t\t({$queryObserved})\n\t\t\t\t\tUNION\n\t\t\t\t\t({$queryPredictions})\n\t\t\t\t\tORDER BY {$order_type} ASC\n\t\t\t\t"; } else { if ($params['uobsr'] == 0 && $params['upred'] == 1) { $query = $queryPredictions; } else { if ($params['uobsr'] == 0 && $params['upred'] == 0) { $this->container->getTwig()->addGlobal('message', "Warning: No data selected (Predictions|Observations) from the ML Filters. Adding the Observed executions to the figure by default.\n"); } } } // $this->getContainer ()->getLog ()->addInfo ( 'BestConfig query: ' . $query ); $rows = $db->get_rows($query); if (!$rows) { throw new \Exception("No results for query!"); } $minCost = -1; $minCostIdx = 0; if ($rows) { $bestexec = $rows[0]; if ($order_type == 'cost') { foreach ($rows as $key => &$exec) { $cost = Utils::getExecutionCost($exec, $clusterCosts); if ($cost < $minCost || $minCost == -1) { $minCost = $cost; $minCostIdx = $key; } $exec['cost'] = $cost; } $bestexec = $rows[$minCostIdx]; } else { $bestexec['cost'] = Utils::getExecutionCost($bestexec, $clusterCosts); } $cluster = $bestexec['name']; Utils::makeExecInfoBeauty($bestexec); } } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); } $clusters = $db->get_rows("SELECT * FROM aloja2.clusters WHERE id_cluster IN (SELECT DISTINCT id_cluster FROM aloja2.execs e WHERE 1 {$filterExecs});"); return $this->render('configEvaluationViews/bestconfig.html.twig', array('title' => 'Best Run Configuration', 'bestexec' => $bestexec, 'cluster' => $cluster, 'order_type' => $order_type, 'clusters' => $clusters, 'clusterCosts' => $clusterCosts, 'models' => $model_html)); }
public function BestCostPerfClusterEvaluationAction() { $filter_execs = DBUtils::getFilterExecs(); $dbUtils = $this->container->getDBUtils(); try { /* * 1. Get execs and cluster associated costs * 2. For each exec calculate cost, exe_time/3600 * (cost_cluster + clust_remote|ssd|ib|eth) * 3. Calculate max and minimum costs * 4. calculate max and minimum exe times * 5. Normalize costs and exe times * 6. Print results */ $minCost = -1; $maxCost = 0; $minExeTime = -1; $maxExeTime = 0; $innerQueryWhere = str_replace("e.", "e2.", $this->whereClause); $innerQueryWhere = str_replace("c.", "c2.", $innerQueryWhere); $innerQueryWhere = str_replace("p.", "p2.", $innerQueryWhere); $execs = "SELECT t.scount as count, e.exe_time,e.net,e.disk,e.bench,e.bench_type,e.maps,e.iosf,e.replication,e.iofilebuf,e.comp,e.blk_size,e.hadoop_version,e.exec, c.name as clustername,c.* \n \t\t FROM aloja2.execs e JOIN aloja2.clusters c USING (id_cluster)\n \t\t LEFT JOIN aloja_ml.predictions p USING (id_exec)\n \t\t INNER JOIN (SELECT count(*) as scount, MIN(e2.exe_time) minexe FROM aloja2.execs e2 JOIN aloja2.clusters c2 USING(id_cluster)\n \t\t\t\t\t LEFT JOIN aloja_ml.predictions p2 USING (id_exec) WHERE 1 {$filter_execs} {$innerQueryWhere} GROUP BY c2.name,e2.net,e2.disk ORDER BY c2.name ASC)\n \t\tt ON e.exe_time = t.minexe WHERE 1 {$filter_execs} {$this->whereClause}\n \t\t GROUP BY c.name,e.net,e.disk ORDER BY c.name ASC;"; $execs = $dbUtils->get_rows($execs); if (!$execs) { throw new \Exception("No results for query!"); } $minCostKey = 0; $tmpMinCost = -1; $previousCluster = "none"; $bestExecs = array(); $sumCount = 0; foreach ($execs as $key => &$exec) { if ($previousCluster != "none" && $previousCluster != $exec['name']) { $previousCluster = $exec['name']; $tmpMinCost = -1; if ($execs[$minCostKey]['cost_std'] > $maxCost) { $maxCost = $execs[$minCostKey]['cost_std']; } if ($execs[$minCostKey]['cost_std'] < $minCost || $minCost == -1) { $minCost = $execs[$minCostKey]['cost_std']; } if ($execs[$minCostKey]['exe_time'] < $minExeTime || $minExeTime == -1) { $minExeTime = $execs[$minCostKey]['exe_time']; } if ($execs[$minCostKey]['exe_time'] > $maxExeTime) { $maxExeTime = $execs[$minCostKey]['exe_time']; } $execs[$minCostKey]['countexecs'] = $sumCount; array_push($bestExecs, $execs[$minCostKey]); $sumCount = 0; } else { if ($previousCluster == "none") { $previousCluster = $exec['name']; } } $exec['cost_std'] = Utils::getExecutionCost($exec, $this->clusterCosts); if ($tmpMinCost == -1 || $exec['cost_std'] < $tmpMinCost) { $tmpMinCost = $exec['cost_std']; $minCostKey = $key; } $sumCount += $exec['count']; } if ($execs[$minCostKey]['cost_std'] > $maxCost) { $maxCost = $execs[$minCostKey]['cost_std']; } if ($execs[$minCostKey]['cost_std'] < $minCost || $minCost == -1) { $minCost = $execs[$minCostKey]['cost_std']; } if ($execs[$minCostKey]['exe_time'] < $minExeTime || $minExeTime == -1) { $minExeTime = $execs[$minCostKey]['exe_time']; } if ($execs[$minCostKey]['exe_time'] > $maxExeTime) { $maxExeTime = $execs[$minCostKey]['exe_time']; } $execs[$minCostKey]['countexecs'] = $sumCount; array_push($bestExecs, $execs[$minCostKey]); } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); } // (exe_time - $min_exe_time)/($max_exe_time - $min_exe_time) exe_time_std, // ($cost_per_run - $min_cost_per_run)/($max_cost_per_run - $min_cost_per_run) cost_std, $seriesData = ''; foreach ($bestExecs as $exec) { $exeTimeStd = 0.01; $costTimeStd = 0.01; if (count($bestExecs) > 1) { $exeTimeStd = ($exec['exe_time'] - $minExeTime) / ($maxExeTime - $minExeTime); $costTimeStd = ($exec['cost_std'] - $minCost) / ($maxCost - $minCost); if ($costTimeStd <= 0.01) { $costTimeStd = 0.01; } if ($exeTimeStd <= 0.01) { $exeTimeStd = 0.01; } } $clusterDesc = "{$exec['datanodes']} datanodes, " . round($exec['vm_RAM'], 0) . " GB memory, {$exec['vm_OS']}, {$exec['provider']} {$exec['type']}"; $seriesData .= "{\n name: '" . $exec['name'] . "',\n data: [[" . round($exeTimeStd, 3) . ", " . round($costTimeStd, 3) . ", " . $exec['countexecs'] . "]],\n clusterdesc: '{$clusterDesc}', countExecs: '{$exec['countexecs']}'\n },"; } $clusters = $dbUtils->get_rows("SELECT * FROM aloja2.clusters c WHERE id_cluster IN (SELECT DISTINCT(id_cluster) FROM aloja2.execs e WHERE 1 {$filter_execs});"); //Sorting clusters by size usort($bestExecs, function ($a, $b) { return $a['cost_std'] > $b['cost_std']; }); return $this->render('costPerfEvaluationViews/best_perf_by_cost_cluster.html.twig', array('highcharts_js' => HighCharts::getHeader(), 'clusterCosts' => $this->clusterCosts, 'seriesData' => $seriesData, 'bestExecs' => $bestExecs, 'clusters' => $clusters)); }
public function BestCostPerfClusterEvaluationAction() { $filter_execs = DBUtils::getFilterExecs(); $dbUtils = $this->container->getDBUtils(); $preset = null; if (sizeof($_GET) <= 1) { $preset = Utils::setDefaultPreset($dbUtils, 'Best Clusters Cost Evaluation'); } $selPreset = isset($_GET['presets']) ? $_GET['presets'] : "none"; try { if (isset($_GET['benchs'])) { $_GET['benchs'] = $_GET['benchs'][0]; } if (isset($_GET['benchs']) and strlen($_GET['benchs']) > 0) { $bench = $_GET['benchs']; $bench_where = " AND bench = '{$bench}'"; } else { $bench = 'terasort'; $bench_where = " AND bench = '{$bench}'"; } $where_configs = ''; $concat_config = ""; // $benchs = $dbUtils->read_params('benchs',$where_configs); $datefrom = Utils::read_params('datefrom', $where_configs); $dateto = Utils::read_params('dateto', $where_configs); $nets = Utils::read_params('nets', $where_configs); $disks = Utils::read_params('disks', $where_configs); $blk_sizes = Utils::read_params('blk_sizes', $where_configs); $comps = Utils::read_params('comps', $where_configs); $id_clusters = Utils::read_params('id_clusters', $where_configs); $mapss = Utils::read_params('mapss', $where_configs); $replications = Utils::read_params('replications', $where_configs); $iosfs = Utils::read_params('iosfs', $where_configs); $iofilebufs = Utils::read_params('iofilebufs', $where_configs); $datanodes = Utils::read_params('datanodess', $where_configs, false); $benchtype = Utils::read_params('bench_types', $where_configs); $vm_sizes = Utils::read_params('vm_sizes', $where_configs, false); $vm_coress = Utils::read_params('vm_coress', $where_configs, false); $vm_RAMs = Utils::read_params('vm_RAMs', $where_configs, false); $hadoop_versions = Utils::read_params('hadoop_versions', $where_configs, false); $types = Utils::read_params('types', $where_configs, false); $filters = Utils::read_params('filters', $where_configs, false); $allunchecked = isset($_GET['allunchecked']) ? $_GET['allunchecked'] : ''; $minexetime = Utils::read_params('minexetime', $where_configs, false); $maxexetime = Utils::read_params('maxexetime', $where_configs, false); $provider = Utils::read_params('providers', $where_configs, false); $vm_OS = Utils::read_params('vm_OSs', $where_configs, false); /* * 1. Get execs and cluster associated costs * 2. For each exec calculate cost, exe_time/3600 * (cost_cluster + clust_remote|ssd|ib|eth) * 3. Calculate max and minimum costs * 4. calculate max and minimum exe times * 5. Normalize costs and exe times * 6. Print results */ $minCost = -1; $maxCost = 0; $minExeTime = -1; $maxExeTime = 0; $sumCount = 0; $execs = "SELECT t.scount as count, e.exe_time,e.net,e.disk,e.bench,e.bench_type,e.maps,e.iosf,e.replication,e.iofilebuf,e.comp,e.blk_size,e.hadoop_version,e.exec, c.name as clustername,c.* \n \t\t FROM execs e JOIN clusters c USING (id_cluster)\n \t\t INNER JOIN (SELECT count(*) as scount, MIN(exe_time) minexe FROM execs e JOIN clusters c USING(id_cluster)\n \t\t\t\t\t WHERE 1 {$filter_execs} {$bench_where} {$where_configs} GROUP BY name,net,disk ORDER BY name ASC)\n \t\tt ON e.exe_time = t.minexe WHERE 1 {$filter_execs} {$bench_where} {$where_configs} \n \t\t GROUP BY c.name,e.net,e.disk ORDER BY c.name ASC;"; $execs = $dbUtils->get_rows($execs); if (!$execs) { throw new \Exception("No results for query!"); } $minCostKey = 0; $tmpMinCost = -1; $previousCluster = "none"; $bestExecs = array(); $sumCount = 0; foreach ($execs as $key => &$exec) { if ($previousCluster != "none" && $previousCluster != $exec['name']) { $previousCluster = $exec['name']; $tmpMinCost = -1; if ($execs[$minCostKey]['cost_std'] > $maxCost) { $maxCost = $execs[$minCostKey]['cost_std']; } if ($execs[$minCostKey]['cost_std'] < $minCost || $minCost == -1) { $minCost = $execs[$minCostKey]['cost_std']; } if ($execs[$minCostKey]['exe_time'] < $minExeTime || $minExeTime == -1) { $minExeTime = $execs[$minCostKey]['exe_time']; } if ($execs[$minCostKey]['exe_time'] > $maxExeTime) { $maxExeTime = $execs[$minCostKey]['exe_time']; } $execs[$minCostKey]['countexecs'] = $sumCount; array_push($bestExecs, $execs[$minCostKey]); $sumCount = 0; } else { if ($previousCluster == "none") { $previousCluster = $exec['name']; } } $costHour = isset($_GET['cost_hour'][$exec['id_cluster']]) ? $_GET['cost_hour'][$exec['id_cluster']] : $exec['cost_hour']; $_GET['cost_hour'][$exec['id_cluster']] = $costHour; $costRemote = isset($_GET['cost_remote'][$exec['id_cluster']]) ? $_GET['cost_remote'][$exec['id_cluster']] : $exec['cost_remote']; $_GET['cost_remote'][$exec['id_cluster']] = $costRemote; $costSSD = isset($_GET['cost_SSD'][$exec['id_cluster']]) ? $_GET['cost_SSD'][$exec['id_cluster']] : $exec['cost_SSD']; $_GET['cost_SSD'][$exec['id_cluster']] = $costSSD; $costIB = isset($_GET['cost_IB'][$exec['id_cluster']]) ? $_GET['cost_IB'][$exec['id_cluster']] : $exec['cost_IB']; $_GET['cost_IB'][$exec['id_cluster']] = $costIB; $exec['cost_std'] = Utils::getExecutionCost($exec, $costHour, $costRemote, $costSSD, $costIB); if ($tmpMinCost == -1 || $exec['cost_std'] < $tmpMinCost) { $tmpMinCost = $exec['cost_std']; $minCostKey = $key; } $sumCount += $exec['count']; } if ($execs[$minCostKey]['cost_std'] > $maxCost) { $maxCost = $execs[$minCostKey]['cost_std']; } if ($execs[$minCostKey]['cost_std'] < $minCost || $minCost == -1) { $minCost = $execs[$minCostKey]['cost_std']; } if ($execs[$minCostKey]['exe_time'] < $minExeTime || $minExeTime == -1) { $minExeTime = $execs[$minCostKey]['exe_time']; } if ($execs[$minCostKey]['exe_time'] > $maxExeTime) { $maxExeTime = $execs[$minCostKey]['exe_time']; } $execs[$minCostKey]['countexecs'] = $sumCount; array_push($bestExecs, $execs[$minCostKey]); } catch (\Exception $e) { $this->container->getTwig()->addGlobal('message', $e->getMessage() . "\n"); } // (exe_time - $min_exe_time)/($max_exe_time - $min_exe_time) exe_time_std, // ($cost_per_run - $min_cost_per_run)/($max_cost_per_run - $min_cost_per_run) cost_std, $seriesData = ''; foreach ($bestExecs as $exec) { $exeTimeStd = 0.01; $costTimeStd = 0.01; if (count($bestExecs) > 1) { $exeTimeStd = ($exec['exe_time'] - $minExeTime) / ($maxExeTime - $minExeTime); $costTimeStd = ($exec['cost_std'] - $minCost) / ($maxCost - $minCost); if ($costTimeStd <= 0.01) { $costTimeStd = 0.01; } if ($exeTimeStd <= 0.01) { $exeTimeStd = 0.01; } } $clusterDesc = "{$exec['datanodes']} datanodes, " . round($exec['vm_RAM'], 0) . " GB memory, {$exec['vm_OS']}, {$exec['provider']} {$exec['type']}"; $seriesData .= "{\n name: '" . $exec['name'] . "',\n data: [[" . round($exeTimeStd, 3) . ", " . round($costTimeStd, 3) . ", " . $exec['countexecs'] . "]],\n clusterdesc: '{$clusterDesc}', countExecs: '{$exec['countexecs']}'\n },"; } $clusters = $dbUtils->get_rows("SELECT * FROM clusters c WHERE id_cluster IN (SELECT DISTINCT(id_cluster) FROM execs e WHERE 1 {$filter_execs});"); //Sorting clusters by size usort($bestExecs, function ($a, $b) { return $a['cost_std'] > $b['cost_std']; }); echo $this->container->getTwig()->render('perf_by_cost/best_perf_by_cost_cluster.html.twig', array('selected' => 'Best Clusters Cost Evaluation', 'highcharts_js' => HighCharts::getHeader(), 'cost_hour' => isset($_GET['cost_hour']) ? $_GET['cost_hour'] : null, 'cost_remote' => isset($_GET['cost_remote']) ? $_GET['cost_remote'] : null, 'cost_SSD' => isset($_GET['cost_SSD']) ? $_GET['cost_SSD'] : null, 'cost_IB' => isset($_GET['cost_IB']) ? $_GET['cost_IB'] : null, 'seriesData' => $seriesData, 'datefrom' => $datefrom, 'dateto' => $dateto, 'benchs' => array($bench), 'select_multiple_benchs' => false, 'nets' => $nets, 'disks' => $disks, 'blk_sizes' => $blk_sizes, 'comps' => $comps, 'id_clusters' => $id_clusters, 'mapss' => $mapss, 'replications' => $replications, 'iosfs' => $iosfs, 'iofilebufs' => $iofilebufs, 'datanodess' => $datanodes, 'bench_types' => $benchtype, 'vm_sizes' => $vm_sizes, 'vm_coress' => $vm_coress, 'vm_RAMs' => $vm_RAMs, 'vm_OS' => $vm_OS, 'hadoop_versions' => $hadoop_versions, 'types' => $types, 'providers' => $provider, 'filters' => $filters, 'allunchecked' => $allunchecked, 'minexetime' => $minexetime, 'maxexetime' => $maxexetime, 'bestExecs' => $bestExecs, 'options' => Utils::getFilterOptions($dbUtils), 'clusters' => $clusters, 'preset' => $preset, 'selPreset' => $selPreset)); }