/**
 * @brief Collects all participating nodes from the SQL tree for a given table name or alias
 * @param sqlTree the SQL tree which is rewritten
 * @param tblDb database name
 * @param tblName table name
 * @param tblAlias table alias name
 * @param returnArray return all the found columns into this array
 * @param tableList the sorted table list
 * @param recLevel current recursion level
 * @param startBrach true if this is the first call to this function, false if it is in a recursion
 * @return returns the number of columns that lie in a different table than the one given, -1 if none are found
 * 
 * Counts all the columns corresponding to a given table name or alias. The columns that correspond to the table are placed
 * into the returnArray. The function returns the number of columns that are different to the specified table.
 */
function PHPSQLcollectColumns($sqlSelect, $tblDb, $tblName, $tblAlias, &$returnArray, &$tableList, $recLevel, $startBranch = true)
{
    $countDiffTables = 0;
    $workload = array();
    if (array_key_exists('SELECT', $sqlSelect)) {
        $workload = $sqlSelect['SELECT'];
    } else {
        $workload = $sqlSelect;
    }
    foreach ($workload as $node) {
        if (!isColref($node) && !isOperator($node) && !isReserved($node)) {
            $currCountDiffTables = false;
            $currColArray = array();
            if (hasSubtree($node)) {
                $currCountDiffTables = PHPSQLcollectColumns($node['sub_tree'], $tblDb, $tblName, $tblAlias, $currColArray, $tableList, $recLevel, false);
                $countDiffTables += $currCountDiffTables;
            } else {
                if ($recLevel === 0) {
                    //if this has no subtree (as do functions without parameters like foo()), then add it at the outermost possible
                    //place (i.e. recLeve = 0)
                    $currCountDiffTables = 0;
                }
            }
            if ($startBranch === true && $currCountDiffTables === 0) {
                if (!hasAlias($node)) {
                    $node['alias'] = createAliasNode(array(buildEscapedString(array($node))));
                }
                //add the correct table names to all columns that are not in the current column
                if (hasSubtree($node) && count($tableList) > 0 && $recLevel !== max(array_keys($tableList))) {
                    $toThisTableName = extractTableAlias($tableList[$recLevel + 1]['node']);
                    if ($toThisTableName === false) {
                        $toThisTableName = extractTableName($tableList[$recLevel + 1]['node']);
                    }
                    if (hasAlias($tableList[$recLevel])) {
                        rewriteTableNameInSubqueries($node['sub_tree'], $toThisTableName, extractTableAlias($tableList[$recLevel]['node']));
                    } else {
                        rewriteTableNameInSubqueries($node['sub_tree'], $toThisTableName, extractTableName($tableList[$recLevel]['node']));
                    }
                    if ($node['expr_type'] !== "aggregate_function") {
                        $node['base_expr'] = getBaseExpr($node);
                    }
                }
                //if this is a function/aggregate that applies to a '*' "column", then only apply this at the
                //outermost level (i.e. reclevel=0) and not before (see Test40 for case where this applies)
                $canAddThisNode = true;
                foreach ($currColArray as $column) {
                    if ($column['base_expr'] === "*" && $recLevel != 0) {
                        $canAddThisNode = false;
                        break;
                    }
                }
                if ($canAddThisNode === true) {
                    array_push($returnArray, $node);
                }
            }
            //add columns if not yet added, but only if there are dependant columns evolved
            if ($currCountDiffTables !== false && $currCountDiffTables > 0) {
                foreach ($currColArray as $column) {
                    $found = false;
                    //go through the already selected columns
                    foreach ($returnArray as $returnColumn) {
                        if (columnIsEqual($returnColumn, $column)) {
                            $found = true;
                            break;
                        }
                    }
                    if ($found === false) {
                        $column['alias'] = createAliasNode(array(implodeNoQuotes($column['no_quotes'])));
                        array_push($returnArray, $column);
                    }
                }
            }
        } else {
            if (isColref($node) || $node['base_expr'] === '*' && $startBranch === true) {
                $currCol = extractColumnName($node);
                $currTable = extractTableName($node);
                $currDB = extractDbName($node);
                if ($currTable === $tblAlias || $currTable === false || $currTable === $tblName || $tblAlias === $currDB . "." . $currTable) {
                    array_push($returnArray, $node);
                } else {
                    #check if this table has already been selected and processed. if yes, we can already process it and
                    #donot need to wait
                    $found = false;
                    foreach ($tableList as $key => $tblListNode) {
                        #skip everything not yet processed...
                        if ($key < $recLevel) {
                            continue;
                        }
                        if ($currTable === extractTableAlias($tblListNode['node'])) {
                            $found = true;
                            break;
                        }
                    }
                    if ($found === false) {
                        $countDiffTables += 1;
                    }
                }
            } else {
                if (isReserved($node)) {
                    //always adding reserved keywords to each select statement we issue
                    array_push($returnArray, $node);
                }
            }
        }
    }
    return $countDiffTables;
}
 function process_select($select, $recLevel, $straight_join = false, $whereSubquery = false)
 {
     $error = array();
     $shard_query = "";
     #Query to send to each shard
     $coord_query = "";
     #Query to send to the coordination node
     $avg_count = 0;
     $distinct = false;
     $group = array();
     #list of positions which contain non-aggregate functions
     $push_group = array();
     #this is necessary for non-distributable aggregate functions
     $group_aliases = array();
     #list of group aliases which will be indexed on the aggregation temp table
     $is_aggregate = false;
     $coord_odku = array();
     //check for distinct keyword (should be the first in the list)
     if (isReserved($select[0]) && strtolower($select[0]['base_expr']) === "distinct") {
         $distinct = true;
         unset($select[0]);
         $select = array_values($select);
     }
     if ($distinct === true) {
         $shard_query .= "DISTINCT ";
         $coord_query .= "DISTINCT ";
     }
     $used_agg_func = 0;
     foreach ($select as $pos => $clause) {
         if ($shard_query && $pos != 0) {
             $shard_query .= ",";
         }
         if ($coord_query && $pos != 0) {
             $coord_query .= ",";
         }
         /*if (!empty($clause['base_expr']) && $clause['base_expr'] == "*") {
         		$error[] = array('error_clause' => '*', 'error_reason' => '"SELECT *" is not supported');
         		continue;
             }*/
         if (isset($clause['alias']['name']) && !empty($clause['alias']['name'])) {
             $alias = "`" . trim($clause['alias']['name'], "`") . "`";
         } else {
             $alias = "`" . $clause['base_expr'] . "`";
         }
         if (strpos($alias, '.')) {
             $alias = trim($alias, '``');
             $tmp = explode('.', $alias);
             if (count($tmp) > 2) {
                 #this is a more complicated expression, use the alias as it is (might be a formula or something)
                 $alias = "`" . substr(trim($clause['alias']['name'], "`"), 0, 50) . "`";
             } else {
                 $alias = $tmp[0] . "." . $tmp[1];
                 $alias = "`{$alias}`";
             }
         }
         //further escape any function name that might end up in the alias
         //the new parser does not add anything helpfull to equations - so we need to look for stuff
         //without any expr_type
         if (isset($clause['expr_type']) && ($clause['expr_type'] === "aggregate_function" || $clause['expr_type'] === "function" || $clause['expr_type'] === "bracket_expression")) {
             //build escaped string
             if (trim($alias, "`") === $clause['base_expr'] || $alias === "``") {
                 $alias = buildEscapedString(array($clause));
                 $alias = "`" . $alias . "`";
             }
         }
         $base_expr = $clause['base_expr'];
         if (!isset($clause['expr_type'])) {
             continue;
         }
         switch ($clause['expr_type']) {
             case 'aggregate_function':
                 $is_aggregate = true;
                 $skip_next = true;
                 if (strpos($base_expr, "(") === false) {
                     if (!empty($clause['sub_tree'])) {
                         $base_expr = "";
                         foreach ($clause['sub_tree'] as $node) {
                             $base_expr .= getBaseExpr($node);
                         }
                     } else {
                         $base_expr = "";
                     }
                 } else {
                     $base_expr = $clause['base_expr'];
                 }
                 $alias = "`" . substr(trim($clause['alias']['name'], "`"), 0, 50) . "`";
                 $function = strtoupper($clause['base_expr']);
                 switch ($function) {
                     #these are aggregates that dont need special treatment on the coordination side
                     case 'MIN':
                     case 'MAX':
                     case 'SUM':
                         $used_agg_func = 1;
                         $base_expr = trim($base_expr, ' ()');
                         $base_expr = fix_trunc_parenth($base_expr);
                         $expr_info = explode(" ", $base_expr);
                         if (!empty($expr_info[0]) && strtolower($expr_info[0]) == 'distinct') {
                             if ($this->verbose) {
                                 echo "Detected a {$function} [DISTINCT] expression!\n";
                             }
                             unset($expr_info[0]);
                             $new_expr = join(" ", $expr_info);
                             $shard_query .= "{$new_expr} AS {$alias}";
                             $coord_query .= "{$function}(distinct {$alias}) as {$alias}";
                             $push_group[] = $pos + 1;
                         } else {
                             switch ($function) {
                                 case 'SUM':
                                     $coord_odku[] = "{$alias}={$alias} +  VALUES({$alias})";
                                     break;
                                 case 'MIN':
                                     $coord_odku[] = "{$alias}=IF({$alias} < VALUES({$alias}), VALUES({$alias}),{$alias})";
                                     break;
                                 case 'MAX':
                                     $coord_odku[] = "{$alias}=IF({$alias} > VALUES({$alias}), VALUES({$alias}), {$alias})";
                                     break;
                             }
                             $shard_query .= "{$function}({$base_expr}) AS {$alias}";
                             $coord_query .= "{$function}({$alias}) AS {$alias}";
                         }
                         break;
                         #special treatment needed
                     #special treatment needed
                     case 'AVG':
                     case 'STDDEV':
                     case 'STD':
                     case 'STDDEV_POP':
                     case 'STDDEV_SAMP':
                     case 'VARIANCE':
                     case 'VAR_POP':
                     case 'VAR_SAMP':
                     case 'GROUP_CONCAT':
                         $used_agg_func = 1;
                         $base_expr = trim($base_expr, ' ()');
                         $base_expr = fix_trunc_parenth($base_expr);
                         $expr_info = explode(" ", $base_expr);
                         if (!empty($expr_info[0]) && strtolower($expr_info[0]) == 'distinct') {
                             if ($this->verbose) {
                                 echo "Detected a {$function} [DISTINCT] expression!\n";
                             }
                             unset($expr_info[0]);
                             $new_expr = join(" ", $expr_info);
                             $shard_query .= "{$new_expr} AS {$alias}";
                             $coord_query .= "{$function}(distinct {$alias}) as {$alias}";
                         } else {
                             switch ($function) {
                                 case 'AVG':
                                     $alias = trim($alias, '`');
                                     $shard_query .= " COUNT({$base_expr}) AS `cnt_{$alias}`, SUM({$base_expr}) AS `sum_{$alias}`";
                                     $coord_query .= " (SUM(`sum_{$alias}`) / SUM(`cnt_{$alias}`)) AS `{$alias}`";
                                     break;
                                 case 'STDDEV':
                                 case 'STD':
                                 case 'STDDEV_POP':
                                     $alias = trim($alias, '`');
                                     $avgAlias = '`agr_' . $alias . '`';
                                     $shard_query .= " sum_of_squares({$base_expr}) AS `ssqr_{$alias}`, AVG({$base_expr}) as `avg_{$alias}`, COUNT({$base_expr}) AS `cnt_{$alias}`";
                                     $coord_query .= " SQRT(partitAdd_sum_of_squares(`ssqr_{$alias}`, `avg_{$alias}`, `cnt_{$alias}`) / (SUM(`cnt_{$alias}`) - 1)) AS `{$alias}`";
                                     break;
                                 case 'STDDEV_SAMP':
                                     $alias = trim($alias, '`');
                                     $avgAlias = '`agr_' . $alias . '`';
                                     $shard_query .= " sum_of_squares({$base_expr}) AS `ssqr_{$alias}`, AVG({$base_expr}) as `avg_{$alias}`, COUNT({$base_expr}) AS `cnt_{$alias}`";
                                     $coord_query .= " SQRT(partitAdd_sum_of_squares(`ssqr_{$alias}`, `avg_{$alias}`, `cnt_{$alias}`) / SUM(`cnt_{$alias}`)) AS `{$alias}`";
                                     break;
                                 case 'VARIANCE':
                                 case 'VAR_POP':
                                     $alias = trim($alias, '`');
                                     $avgAlias = '`agr_' . $alias . '`';
                                     $shard_query .= " sum_of_squares({$base_expr}) AS `ssqr_{$alias}`, AVG({$base_expr}) as `avg_{$alias}`, COUNT({$base_expr}) AS `cnt_{$alias}`";
                                     $coord_query .= " partitAdd_sum_of_squares(`ssqr_{$alias}`, `avg_{$alias}`, `cnt_{$alias}`) / (SUM(`cnt_{$alias}`) - 1) AS `{$alias}`";
                                     break;
                                 case 'VAR_SAMP':
                                     $alias = trim($alias, '`');
                                     $avgAlias = '`agr_' . $alias . '`';
                                     $shard_query .= " sum_of_squares({$base_expr}) AS `ssqr_{$alias}`, AVG({$base_expr}) as `avg_{$alias}`, COUNT({$base_expr}) AS `cnt_{$alias}`";
                                     $coord_query .= " partitAdd_sum_of_squares(`ssqr_{$alias}`, `avg_{$alias}`, `cnt_{$alias}`) / (SUM(`cnt_{$alias}`) - 1) AS `{$alias}`";
                                     break;
                                 default:
                                     $shard_query .= "{$function}({$base_expr}) AS {$alias}";
                                     $coord_query .= "{$function}({$alias}) AS {$alias}";
                                     break;
                             }
                         }
                         $push_group[] = $pos + 1;
                         $group_aliases[] = $alias;
                         break;
                     case 'COUNT':
                         $used_agg_func = 1;
                         $base_expr = trim($base_expr, ' ()');
                         $base_expr = fix_trunc_parenth($base_expr);
                         $expr_info = explode(" ", $base_expr);
                         if (!empty($expr_info[0]) && strtolower($expr_info[0]) == 'distinct') {
                             if ($this->verbose) {
                                 echo "Detected a COUNT [DISTINCT] expression!\n";
                             }
                             unset($expr_info[0]);
                             $new_expr = join(" ", $expr_info);
                             $shard_query .= "{$new_expr} AS {$alias}";
                             $coord_query .= "COUNT(distinct {$alias}) as {$alias}";
                             $push_group[] = $pos + 1;
                         } else {
                             $shard_query .= "COUNT({$base_expr}) AS {$alias}";
                             $coord_query .= "SUM({$alias}) AS {$alias}";
                             $coord_odku[] = "{$alias}={$alias} +  VALUES({$alias})";
                         }
                         break;
                     default:
                         $error[] = array('error_clause' => $clause['base_expr'], 'error_reason' => 'Unsupported aggregate function');
                         break;
                 }
                 break;
             case 'expression':
             case 'operator':
             case 'const':
             case 'colref':
             case 'bracket_expression':
             case 'reserved':
             case 'function':
                 $group[] = $pos + 1;
                 $group_aliases[] = $alias;
                 //if this is a function without arguments, add the parenthesis
                 if ($clause['expr_type'] === 'function' && strpos($base_expr, "(") === false) {
                     $base_expr = getBaseExpr($clause);
                 }
                 //var_dump($alias); die(0);
                 $shard_query .= $base_expr . ' AS ' . $alias;
                 #if this is a temporary column used for grouping, don't select it in the coordination query
                 #TODO: CHECK IF THIS IS STILLN NEEDED!
                 /*if (!array_key_exists('group_clause', $clause) && $whereSubquery === false) {
                 		    $coord_query .= $alias;
                 		    $coord_odku[] = "$alias=VALUES($alias)";
                 		}*/
                 #exclude certain aggregation expressions
                 if (strpos($alias, 'agr_stddev') === false && !array_key_exists('group_clause', $clause)) {
                     #if this is a temporary column from WHERE, don't select it in the coordination
                     if (!array_key_exists('where_col', $clause) || $recLevel >= 0) {
                         #don't select order by columns, that are implicit
                         //if(!array_key_exists('order_clause', $clause)) {
                         if ($whereSubquery === false) {
                             $coord_query .= $alias;
                             $coord_odku[] = "{$alias}=VALUES({$alias})";
                         } else {
                             if (!array_key_exists('where_col', $clause) && !array_key_exists('order_clause', $clause)) {
                                 $coord_query .= $alias;
                                 $coord_odku[] = "{$alias}=VALUES({$alias})";
                             } else {
                                 $coord_query = substr($coord_query, 0, -1);
                             }
                         }
                     } else {
                         $coord_query = substr($coord_query, 0, -1);
                     }
                 } else {
                     #remove the ',' that is too much in the query
                     $coord_query = substr($coord_query, 0, -1);
                 }
                 break;
             default:
                 $error[] = array('error_clause' => $clause['base_expr'], 'error_reason' => 'Unsupported expression type (did you forget an alias on an aggregate expression?)');
                 break;
         }
     }
     $sql = "SELECT ";
     if ($straight_join) {
         $sql .= "STRAIGHT_JOIN ";
     }
     $sql .= $shard_query;
     $shard_group = array();
     #merge pushed and provided group-by
     if ($used_agg_func) {
         $shard_group = $group;
         foreach ($push_group as $push) {
             $shard_group[] = $push;
         }
         #they shouldn't conflict, but this ensures so
         $shard_group = array_unique($shard_group);
     } else {
         $group = array();
         $shard_group = array();
     }
     #we can't send pushed group by to the coord shard, so send the expression based
     return array('error' => $error, 'shard_sql' => $sql, 'coord_odku' => $coord_odku, 'coord_sql' => 'SELECT ' . $coord_query, 'shard_group' => join(',', $shard_group), 'coord_group' => join(',', $group), 'group_aliases' => join(',', $group_aliases));
 }