/** * @brief Generates the parallel query plan from a given SQL query * @param query SQL query that will be parallelised * @param headNodeTables list of tables that are completely located on the head node and are not sharded * @return the raw sharded queries to be run on the coordination and shard nodes * * These function will create a parallel query plan from an input SQL statement * using shard-query-parallel. It will identify any implicit joins, properly separate * them into distinct queries and assign them to temporary tables. Further any aggregate * function is properly transfered into sharded versions. This function will eventually * produce a query plan that can be further processed by the query plan to SQL parser. */ function PHPSQLprepareQuery($query, $headNodeTables = array()) { $shard_query = new ShardQuery(); $shard_query->verbose = false; $shard_query->headNodeTables = $headNodeTables; $shard_query->process_sql($query); if (!empty($shard_query->errors)) { echo "An Error occured:\n\n"; foreach ($shard_query->errors as $key => $error) { if (array_key_exists("error_clause", $error)) { echo $key . ": clause: " . $error["error_clause"] . " error: " . $error["error_reason"] . "\n"; } else { echo $key . ": clause: " . $error[0] . " error: " . $error[1] . "\n"; } } throw new Exception('SQL Parse Error'); } #determine if this is a serial or parallel query: #parallel only if there are other tables than temporary (i.e. dependent) ones involved $parallel = false; if (!empty($shard_query->parsedCopy['FROM'])) { foreach ($shard_query->parsedCopy['FROM'] as $fromNode) { if ($fromNode['table'] != 'DEPENDENT-SUBQUERY') { $parallel = true; //check if this table is only available on the head node //if yes, donot execute the query in parallel $found = false; $tableName = str_replace("`", "", $fromNode['table']); $posDot = strpos($tableName, "."); foreach ($headNodeTables as $headNodeTable) { $posTable = strpos($tableName, $headNodeTable); if ($posTable !== false) { if ($posDot === false) { if (strlen($tableName) === strlen($headNodeTable)) { $found = true; } } else { if (strlen($tableName) - $posDot - 1 === strlen($headNodeTable)) { $found = true; } else { if ($posTable == 0 && strlen($headNodeTable) == $posDot) { $found = true; } } } if ($found === true) { $parallel = false; break; } } } } } } $shard_query->subqueries[$shard_query->table_name] = $shard_query->shard_sql; $shard_query->subqueries[$shard_query->table_name]['parallel'] = $parallel; return $shard_query; }
function fetch() { $schema_id = $this->params['schema_id']; $job_id = $this->params['job_id']; //lookup schema name $schema = Doo::db()->getOne('Schemata', array('where' => 'id = ' . $schema_id)); //update jobs before we check! $shard_query = new ShardQuery($schema->schema_name); $shard_query->state->mapper->sweep_jobs(); //get job $j = new Jobs(); $j->id = $job_id; $j = $j->getOne(); //check if job exist if (!$j) { $this->res->success = false; $this->res->message = "Job_ID: {$job_id} doesnt exist!"; return null; } //check if job compeleted if ($j->job_status != 'completed') { $this->res->success = false; $this->res->message = "Job_ID: {$job_id} not completed!"; } else { //get result from job_id $stmt = $shard_query->get_async_result($job_id); //build data if ($stmt) { $this->res->success = true; $this->res->data = $this->json_format($shard_query, $stmt); $shard_query->DAL->my_free_result($stmt); } else { //error $this->res->success = false; $this->res->data = $stmt; $this->res->message = $shard_query->DAL->my_error(); } } }
protected function process_sql($sql, &$state) { //only useful for the fetch worker for debugging $state->sql = $sql; $parser = null; $straight_join = false; $state->shard_sql = ""; //identical SQL which will be broadcast to all shards $state->coord_sql = ""; //the summary table sql $state->in_lists = array(); $error = array(); $select = null; if (!is_array($sql)) { #//we need to check the syntax $sql = preg_replace(array("%-- [^\\n](?:\\n|\$)%", "%/\\*.*\\*/%"), '', trim($sql)); if (!$sql) { $this->errors[] = "SQL is empty\n"; return false; } if (strtolower(substr($sql, 0, 4)) == 'with') { // handle the WITH clauses and rewrite the SQL to use the new table names $sql = $this->process_subquery_factoring($sql); if (!$sql) { return false; } } if (strtolower(substr($sql, 0, 4)) == 'load') { $result = $this->process_load_data($sql); if (!$result) { return false; } } $blacklist = '/(?:create|drop|alter)\\s+database|last_insert_id|sql_calc_found_rows|row_count|:=|connection_id|' . 'current_user|session_user|user\\(\\)|mysql\\.|system_user|found_rows|get_lock|free_lock|' . 'is_free_lock|is_used_lock|load_file|infile|master_pos_wait|name_const|release_lock|^show\\s+(?:master|slave)|' . '^show\\s+bin|^show\\s+create\\s+database|^show\\s+databases|^show\\s+logs|^show\\s+mutex|' . '^show\\s+engine|^show\\s+innodb|^show\\s+open|^show\\s+privileges|^show\\s+(?:status|variables)|^lock\\s+table|' . '^check\\s+table|^checksum\\s+table|^backup|^repair|^restore|^call|^handler|^begin|^start\\s+transaction|^commit|' . '^rollback|^set transaction|savepoint|^xa\\s+(?:start|begin|end|prepare|commit|rollback|recover)/i'; if (preg_match($blacklist, $sql, $matches)) { $this->errors[] = 'Unsupported SQL: ' . print_r($matches, true); return false; } #syntax check is broken on ICE due to LIMIT 0 issue $version_check_sql = 'select @@version_comment c'; $stmt = $state->DAL->my_query($version_check_sql); $comment = ""; while ($row = $state->DAL->my_fetch_assoc($stmt)) { $comment = $row['c']; } if (strpos($comment, '(ice)') === false) { if (preg_match('/^select/i', $sql)) { $check_sql = trim($sql, "; \n"); //custom functions have to pass the syntax check. concat takes a variable number of //parameters, so it can be substituted for any custom function for syntax purposes foreach (ShardQueryCustomFunctions::$list as $key => $val) { $check_sql = preg_replace('/' . $key . '/i', 'concat', $check_sql); } //we need to remove the virtual schema name from any tables in the FROM clause //The parser could be used here, but a simple regex will suffice for this simple //syntax check $schema_tokens = array('"' . $this->current_schema . '"' . ".", "`{$this->current_schema}`.", $this->current_schema . "."); $check_sql = str_replace($schema_tokens, "", $check_sql); $check_sql = preg_replace('/\\s+limit\\s+\\d+,*\\s*\\d*/i', '', $check_sql) . ' LIMIT 0'; $check_sql = "select count(*) from (" . trim($check_sql, ";\r\n ") . ") check_sql where 0=1"; /* if(!$state->DAL->my_query($check_sql)) { $this->errors[] = $state->DAL->my_error(); return false; }*/ } } $sql = preg_replace("/\\s+FROM\\s+(LAST|FIRST)\\s+/i", "FROM_\$1 ", $sql); $state->parsed = $this->parser->parse($sql); } else { $state->parsed = $sql; } if (!empty($state->parsed['UNION ALL'])) { $queries = array(); $table_name = "aggregation_tmp_" . mt_rand(1, 100000000); $state->table_name = $table_name; $coord_sql = ""; foreach ($state->parsed['UNION ALL'] as $sub_tree) { $sub_state = ShardQuery::new_state(); //initialize the new state with a call to set_schema $this->set_schema($this->schema_name, $sub_state); $sub_state->tmp_shard = $state->tmp_shard; $sub_table_name = "aggregation_tmp_" . mt_rand(1, 100000000); $sub_state->table_name = $sub_table_name; $sub_state->DAL = SimpleDAL::factory($sub_state->tmp_shard); $this->query($sub_tree, false, $sub_state, true, false); $state->extra_tables[] = $sub_table_name; $state->extra_tables = array_merge($state->extra_tables, $sub_state->extra_tables); if ($coord_sql) { $coord_sql .= " UNION ALL "; } $coord_sql .= "( " . $sub_state->coord_sql . " )"; $state->extra_tables[] = $sub_table_name; } $select['coord_sql'] = $coord_sql; unset($coord_sql); } elseif (!empty($state->parsed['UNION'])) { $queries = array(); $table_name = "aggregation_tmp_" . mt_rand(1, 100000000); $state->table_name = $table_name; $coord_sql = ""; foreach ($state->parsed['UNION'] as $sub_tree) { $sub_state = shardquery::new_state(); //initialize the new state with a call to set_schema $this->set_schema($this->schema_name, $sub_state); $sub_state->tmp_shard = $state->tmp_shard; $sub_table_name = "aggregation_tmp_" . mt_rand(1, 100000000); $sub_state->table_name = $sub_table_name; $sub_state->dal = simpledal::factory($sub_state->tmp_shard); $this->query($sub_tree, false, $sub_state, true, false); $state->extra_tables[] = $sub_table_name; $state->extra_tables = array_merge($state->extra_tables, $sub_state->extra_tables); if ($coord_sql) { $coord_sql .= " UNION "; } $coord_sql .= "( " . $sub_state->coord_sql . " )"; } //UNION operation requires deduplication of the temporary table $select['coord_sql'] = $coord_sql; unset($coord_sql); } elseif (!empty($state->parsed['SELECT']) && empty($state->parsed['INSERT']) && empty($state->parsed['CREATE']) && empty($state->parsed['REPLACE'])) { //reset the important variables $select = $from = $where = $group = $order_by = ""; $straight_join = $distinct = false; $this->errors = array(); /* Attempt star schema optimization */ if ($state->star_opt) { $star_result = $this->star_optimize($state); if ($star_result !== false) { $state->parsed = $star_result['parsed']; $state->push_where = $star_result['push_where']; } else { #critical error occured during star optimization if (!empty($this->errors)) { return false; } } } //The SELECT clause is processed first. if (!empty($state->parsed['OPTIONS']) && in_array('STRAIGHT_JOIN', $state->parsed['OPTIONS'])) { $straight_join = true; } if (strtoupper($state->parsed['SELECT'][0]['base_expr']) == 'DISTINCT') { $distinct = true; } if (!empty($state->parsed['OPTIONS']) && in_array('SQL_CALC_FOUND_ROWS', $state->parsed['OPTIONS'])) { $this->errors[] = array('Unsupported query', 'SQL_CALC_FOUND_ROWS is not supported'); return false; } if (!empty($state->parsed['OPTIONS']) && in_array('FOR UPDATE', $state->parsed['OPTIONS'])) { $this->errors[] = array('Unsupported query', 'FOR UPDATE is not supported'); return false; } if (!empty($state->parsed['OPTIONS']) && in_array('LOCK IN SHARE MODE', $state->parsed['OPTIONS'])) { $this->errors[] = array('Unsupported query', 'LOCK IN SHARE MODE is not supported'); return false; } $with_rollup = ""; if (!empty($state->parsed['OPTIONS'])) { foreach ($state->parsed['OPTIONS'] as $option) { if (strtoupper($option['base_expr']) === 'WITH ROLLUP') { $with_rollup = ' WITH ROLLUP'; } } } //ignore any other options unset($state->parsed['OPTIONS']); $procedure = ""; if (!empty($state->parsed['PROCEDURE'])) { $procedure = 'PROCEDURE ' . join('', $state->parsed['PROCEDURE']); unset($state->parsed['PROCEDURE']); } if (!empty($state->parsed['INTO'])) { $this->errors[] = array('Unsupported query', 'INTO is not supported'); return false; } /*This will process the SELECT clause, doing any SQL rewrites needed for aggregation at the shard level The $this->non_distrib flag will be set if the query contains any non-distributable aggregate functions (none currently) If non-distributable aggregate functions are detected, we will throw this work away and instead use $this->process_undistributable_select() [below] */ $select = $this->process_select($state->parsed['SELECT'], $straight_join, $distinct, $state); /* Now some special window function columns are needed */ if (!empty($state->windows)) { $used_cols = array(); for ($i = 0; $i < count($state->windows); ++$i) { $win = $state->windows[$i]; $shard_hash = ""; foreach ($win['partition'] as $part) { if (empty($used_cols[$part['base_expr']])) { $select['shard_sql'] .= "," . $part['base_expr']; $used_cols[$part['base_expr']] = 1; } else { continue; } if ($shard_hash) { $shard_hash .= ","; } $shard_hash .= $part['base_expr']; } if ($shard_hash == "") { $shard_hash = "'ONE_PARTITION'"; } $select['shard_sql'] .= ",SHA1(CONCAT_WS('#'," . $shard_hash . ")) as wf{$i}_hash"; $obclause = ""; $obhash = ""; foreach ($win['order'] as $ob) { if (empty($used_cols[$ob['base_expr']])) { $select['shard_sql'] .= "," . $ob['base_expr']; $used_cols[$ob['base_expr']] = 1; } if ($obclause) { $obclause .= ","; } if ($obhash) { $obclause .= ","; } $obclause .= $ob['base_expr'] . " " . $ob['direction']; $obhash .= $ob['base_expr']; } if ($obhash) { $select['shard_sql'] .= ",SHA1(CONCAT_WS('#'," . $obhash . ")) as wf{$i}_obhash"; } $state->windows[$i]['order_by'] = $obclause; } } if (trim($select['shard_sql']) == 'SELECT') { $select['shard_sql'] = 'SELECT 1'; } if (!empty($select['error'])) { $this->errors[] = $select['error']; return false; } if (!$state->non_distrib) { unset($state->parsed['SELECT']); } else { $select = $this->process_undistributable_select($state->parsed['SELECT'], $straight_join, $distinct); unset($state->parsed['SELECT']); } if (!empty($select['error'])) { $this->errors[] = $select['error']; return false; } $coord_group = ""; $shard_group = ""; if (!empty($state->parsed['GROUP'])) { $group = $state->parsed['GROUP']; foreach ($group as $item) { if ($coord_group) { $coord_group .= ","; } $coord_group .= $this->process_group_item($item, $state->used_colrefs, 0, $select); } } if ($shard_group !== "") { if ($select['group_aliases'] !== "") { $select['group_aliases'] .= ","; } $select['group_aliases'] .= $shard_group; } $select['coord_group'] = $coord_group; $select['shard_group'] = $select['group_aliases']; if (empty($state->pushed) && empty($state->parsed['GROUP'])) { $select['coord_group'] = ""; $select['shard_group'] = ""; } unset($state->parsed['GROUP']); if ($having_info = $this->process_having($state)) { if (trim($having_info['select'])) { $select['shard_sql'] .= "," . $having_info['select']; } } else { $having_info = array('having' => " "); } $order_by = ""; if (!empty($state->windows)) { $order_by = "wf0_hash"; if (!empty($state->windows[0]['order_by'])) { $order_by .= ","; $order_by .= $state->windows[0]['order_by']; } } if (!empty($state->parsed['ORDER'])) { foreach ($state->parsed['ORDER'] as $o) { if ($order_by) { $order_by .= ','; } switch ($o['expr_type']) { case 'subquery': $this->errors[] = "Subqueries are not supported in the ORDER BY clause.\n"; return false; break; case 'custom_function': $this->errors[] = "Custom functions are not supported in the ORDER BY clause.\n"; return false; break; case 'function': case 'expression': case 'aggregate_function': $item = ""; $expr_alias = 'expr_' . mt_rand(1, 10000000000); $this->concat_all_subtrees($o, $item); $select['shard_sql'] .= ", {$item} as {$expr_alias}"; $order_by .= $o['base_expr'] . '(' . $expr_alias . ')'; if ($o['expr_type'] === 'aggregate_function') { switch (strtoupper($o['base_expr'])) { case 'SUM': $select['coord_odku'][] = "{$expr_alias}={$expr_alias} + VALUES({$expr_alias})"; break; case 'MIN': $select['coord_odku'][] = "{$expr_alias}=IF({$expr_alias} < VALUES({$expr_alias}), VALUES({$expr_alias}),{$expr_alias})"; break; case 'MAX': $select['coord_odku'][] = "{$expr_alias}=IF({$expr_alias} > VALUES({$expr_alias}), VALUES({$expr_alias}), {$expr_alias})"; break; case 'COUNT': $select['coord_odku'][] = "{$expr_alias}={$expr_alias} + VALUES({$expr_alias})"; break; default: $this->errors[] = "Unsupported aggregate function in ORDER BY"; return false; } } break; default: if (strpos($o['base_expr'], '.') !== false) { $expr_alias = 'expr_' . mt_rand(1, 10000000000); $select['shard_sql'] .= ", {$o['base_expr']} as {$expr_alias}"; $order_by .= $expr_alias; } else { $order_by .= $o['base_expr']; } break; } $order_by .= " " . $o['direction']; } } if ($order_by) { $order_by = " ORDER BY {$order_by}"; } unset($state->parsed['ORDER']); if (empty($state->parsed['FROM'])) { $this->errors = array('Unsupported query', 'Missing FROM clause'); return false; } else { $select['shard_sql'] .= "\n" . $this->process_from($state->parsed['FROM'], $state); $in_tables = ""; $is_virtual = false; foreach ($state->tables as $val => $attr) { if ($attr['is_virtual']) { $is_virtual = true; } if ($in_tables !== "") { $in_tables .= ","; } $in_tables .= "'{$val}'"; } /* * If no real tables were used (only subquery in from clause for example) * then direct the query to the coordinator shard, as it will read from * the output of the subquery in the from clause or other construct. */ if ($in_tables === "") { $state->force_shard = array($state->tmp_shard); } elseif ($in_tables !== "" && !$is_virtual) { /* * Otherwise, if this isn't a virtual table (v_ view) then we have to inspect * the tables to see if any of them contain the shard key. If any v_ tables are * used then we assume it is sharded (what is point of virtual table otherwise?) */ $sql = "select count(*) cnt from information_schema.columns where table_schema='" . $state->tmp_shard['db'] . "' and column_name='" . $this->shard_column . "' and table_name IN({$in_tables});"; if (!($stmt = $state->DAL->my_query($sql))) { $this->errors[] = 'Could not access information schema: ' . $state->DAL->my_error(); return false; } $row = $state->DAL->my_fetch_assoc(); /* If none of the tables contain the sharding key, the query goes to all shards */ if ($row['cnt'] === '0') { if (isset($state->tmp_shard)) { $state->force_shard = array($state->tmp_shard); } else { $state->force_shard = array($state->shards[array_rand($state->shards)]); } } } else { $state->force_shard = array(); } if (!isset($state->table_name) || $state->table_name == "") { $state->table_name = "aggregation_tmp_" . mt_rand(1, 100000000); } $select['coord_sql'] .= "\nFROM `{$state->table_name}`"; $select['winfunc_sql'] .= "\nFROM `{$state->table_name}`"; unset($state->parsed['FROM']); } //note that this will extract inlists and store them in $this->in_lists (if inlist optimization is on) $where_clauses = $this->process_where(empty($state->parsed['WHERE']) ? array() : $state->parsed['WHERE'], $state); if (!empty($state->parsed['WHERE'])) { unset($state->parsed['WHERE']); } if (empty($state->force_shard)) { $state->force_shard = $state->shards; } /* Handle dependent subqueries if the subquery has to be on one or more shards that are different from tmp_shard.*/ foreach ($state->dependent_subqueries as $sub_table) { if (empty($state->tables)) { continue; } //don't do this work if there are no real tables $sql = "SHOW CREATE TABLE {$sub_table}"; $stmt = $state->DAL->my_query($sql); $row = $state->DAL->my_fetch_array(); $create_sql = $row[1]; if (!$row) { $this->errors[] = 'Could not get DDL for intermediate table: ' . $sub_table . "(" . $state->DAL->my_error() . ")"; $this->cleanup_tables($state); return false; } $export_sql = "SELECT * FROM {$sub_table}"; $stmt = $state->DAL->my_unbuffered_query($export_sql); if (!$stmt) { $this->errors[] = 'Could not open intermediate result from: ' . $sub_table . "(" . $state->DAL->my_error() . ")"; $this->cleanup_tables($state); return false; } $fname = tempnam('/tmp', 'SQ'); $fh = fopen($fname, 'wb'); if (!$fh) { $this->errors[] = 'Could not open temporary file for spooling dependent subquery to shards: ' . $fname; return false; } while ($row = $state->DAL->my_fetch_array()) { $outrow = ""; foreach ($row as $col) { $col = str_replace('"', '\\"', $col); if (!is_numeric($col)) { $col = '"' . $col . '"'; } if ($outrow) { $outrow .= ","; } $outrow .= $col; } if (!fputs($fh, $outrow . "\n")) { $this->errors[] = 'Could not write to temporary file for spooling dependent subquery to shards: ' . $fname; return false; } } fclose($fh); foreach ($state->force_shard as $shard) { if ($shard === $state->tmp_shard) { continue; } $dal = SimpleDAL::factory($shard); $dal->my_select_db($shard['db']); $stmt = $dal->my_query($create_sql); if (!$stmt) { $this->errors[] = "Failed to create destination table: {$sub_table} during intermediate copy" . "(" . $dal->my_error() . ")"; $this->cleanup_tables($state); return false; } $load_sql = "LOAD DATA LOCAL INFILE '{$fname}' INTO TABLE `{$shard['db']}`.{$sub_table} FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\\\\'"; $stmt = $dal->my_query($load_sql); if (!$stmt) { $this->errors[] = 'Failed to LOAD DATA LOCAL INFILE to table: $sub_table during intermediate copy' . "(" . $dal->my_error() . ")"; $this->cleanup_tables($state); return false; } $dal->my_close(); } unlink($fname); } /* semijoin optimization */ if (!empty($state->push_join)) { $select['shard_sql'] .= join('', $state->push_join); } $used_limit = 0; if (!empty($state->parsed['LIMIT'])) { $used_limit = 1; if ($state->parsed['LIMIT']['offset'] == "") { $state->parsed['LIMIT']['offset'] = 0; } $order_by .= " LIMIT {$state->parsed['LIMIT']['offset']},{$state->parsed['LIMIT']['rowcount']}"; unset($state->parsed['LIMIT']); } foreach ($state->parsed as $key => $clauses) { $this->errors[] = array('Unsupported query', $key . ' clause is not supported'); } if ($this->errors) { return false; } //process_select only provides a list of positions if ($select['coord_group']) { $select['coord_group'] = ' GROUP BY ' . $select['coord_group'] . $having_info['having'] . $with_rollup . $procedure; } if ($select['shard_group']) { $select['shard_group'] = ' GROUP BY ' . $select['shard_group']; } $queries = array(); $where_base = " 1=1 "; if (!empty($state->push_where)) { $where_base .= ' AND ' . join(' ', $state->push_where); } if ($state->mysql_version['supports_partition_hint'] == 1) { #if table is not partitioned, then don't inject any partition hints #have to do all removals in a first pass foreach ($state->tables as $table_name => $table_info) { if (empty($table_info['alias']) || $table_info['alias'] === '') { $alias = $table_name; } else { $alias = $table_info['alias']; } $alias = trim($alias, '`'); if ($table_info['partition_info']['partition_names'][0] == "") { $select['shard_sql'] = str_replace('%p' . $alias, "", $select['shard_sql']); } } #now build out queries for each partition for each table foreach ($state->tables as $table_name => $table_info) { if (empty($table_info['alias']) || $table_info['alias'] === '') { $alias = $table_name; } else { $alias = $table_info['alias']; } $alias = trim($alias, '`'); if ($table_info['partition_info']['partition_names'][0] != "") { if (empty($queries)) { foreach ($table_info['partition_info']['partition_names'] as $x => $partition_name) { $queries[] = str_replace('%p' . $alias, " PARTITION(" . $partition_name . ") ", $select['shard_sql']); } } else { $old_queries = $queries; $queries = array(); foreach ($old_queries as $query) { foreach ($table_info['partition_info']['partition_names'] as $x => $partition_name) { $query = str_replace('%p' . $alias, " PARTITION(" . $partition_name . ") ", $query); $queries[] = $query; } } } #remove the token so that it doesn't mess up further tables in the query $select['shard_sql'] = str_replace('%p' . $alias, "", $select['shard_sql']); } } } else { $explain_query = "EXPLAIN PARTITIONS " . $select['shard_sql'] . ' ' . $where; $stmt = $state->DAL->my_query($explain_query); $state->added_where = 0; if ($where == "") { $where = " WHERE 1=1"; } while ($row = $state->DAL->my_fetch_assoc($stmt)) { $table_or_alias = $row['table']; $partitions = $row['partitions']; $partitions = explode(',', $partitions); foreach ($partitions as $partition_name) { foreach ($state->tables as $table_name => $table_info) { if ($table_or_alias === $table_name || $table_or_alias === $table_info['alias']) { if (!empty($table_info['partition_info']['where_clauses'][$partition_name])) { $expr = $table_info['partition_info']['where_clauses'][$partition_name]; if ($table_info['alias'] !== '') { $expr = preg_replace("/" . $table_info['partition_info']['partition_expression'] . "/", $table_info['alias'] . '.' . $table_info['partition_info']['partition_expression'], $expr); } $queries[] = $select['shard_sql'] . ' ' . $where . ' AND ' . $expr; $state->added_where = 1; } } } } } } $where_base = $state->added_where != 1 ? " WHERE {$where_base} " : " AND 1=1 "; if (empty($where_clauses)) { $where_clauses = array($where_base); } else { $old_clauses = $where_clauses; $where_clauses = array(); foreach ($old_clauses as $new_where_clause) { $where_clauses[] = $where_base . ' AND ' . $new_where_clause; } } #queries is empty if no partition parallelism was added #parallelism may still have been added from BETWEEN clauses ($where_clauses may be an array of clauses) if (empty($queries)) { foreach ($where_clauses as $where_clause) { if (!$state->no_pushdown_limit) { $queries[] = $select['shard_sql'] . $where_clause . $select['shard_group'] . ($used_limit ? $order_by : ''); } else { $queries[] = $select['shard_sql'] . $where_clause . $select['shard_group']; } } } else { $old_queries = $queries; $queries = array(); foreach ($old_queries as $query) { foreach ($where_clauses as $where_clause) { if (!$state->no_pushdown_limit) { $nq = $query . ' ' . $where_clause . ' ' . $select['shard_group'] . ($used_limit ? $order_by : ''); } else { $nq = $query . ' ' . $where_clause . ' ' . $select['shard_group']; } $queries[] = $nq; } } } } elseif (!empty($state->parsed['INSERT']) || !empty($state->parsed['REPLACE'])) { if (!empty($state->parsed['INSERT'])) { $replace = ""; $to_table = $state->parsed['INSERT'][1]['table']; } else { $replace = " REPLACE "; $to_table = $state->parsed['REPLACE'][1]['table']; } $result = $this->get_insert_cols($state); if ($result === false) { return false; } $cols = $result[0]; $col_list = $result[1]; $shard_column_pos = false; foreach ($cols as $pos => $col) { if (trim($col, '` ') == $this->shard_column) { $shard_column_pos = $pos; break; } } if (!empty($state->parsed['SELECT'])) { $parsed = $state->parsed; $ignore = ""; if (!empty($parsed['OPTIONS'])) { foreach ($parsed['OPTIONS'] as $option) { if (strtolower($option) == 'ignore') { $ignore = 'ignore'; break; } } } unset($parsed['INSERT']); unset($parsed['REPLACE']); unset($parsed['OPTIONS']); $sub_state = shardquery::new_state(); $this->set_schema($this->schema_name, $sub_state); $sub_state->tmp_shard = $state->tmp_shard; $sub_table_name = "aggregation_tmp_" . mt_rand(1, 100000000); $sub_state->table_name = $sub_table_name; $sub_state->dal = simpledal::factory($sub_state->tmp_shard); $this->query($parsed, false, $sub_state, true, false); $state->extra_tables[] = $sub_table_name; $state->extra_tables = array_merge($state->extra_tables, $sub_state->extra_tables); $result = $this->load_from_table($sub_state, $to_table, $col_list, $shard_column_pos, $ignore, $replace); if ($result) { $this->state->coord_sql = "select 'INSERT .. SELECT completed ok' as message from dual"; return true; } else { return false; } } else { if ($replace == "") { $sql = "INSERT INTO `" . trim($state->parsed['INSERT'][1]['table'], '`') . "` {$col_list} VALUES "; $replace = "INSERT"; } else { $sql = "REPLACE INTO `" . trim($state->parsed['REPLACE'][1]['table'], '`') . "` {$col_list} VALUES "; } $values = array(); $val_count = 0; $column_count = count($cols); foreach ($state->parsed['VALUES'] as $record) { $vals = trim($record['base_expr'], ','); //this row is going to go to a single shard if ($shard_column_pos !== false) { if (count($record['data']) !== $column_count) { $this->errors[] = 'Value count does not match column count!'; return false; } $shard_id = $this->map_shard($this->shard_column, $record['data'][$shard_column_pos]['base_expr'], $this->state->schema_name, '=', true); $shard_id = @array_pop(array_keys(array_pop($shard_id))); if (empty($values[$shard_id])) { $values[$shard_id] = $vals; } else { $values[$shard_id] .= ",{$vals}"; } $val_count++; //the row is going to all shards } else { foreach ($this->shards as $shard_id => $shard) { if (empty($values[$shard_id])) { $values[$shard_id] = $vals; } else { $values[$shard_id] .= ",{$vals}"; } $val_count++; } } } $state->insert_sql = $sql; $state->insert_values = $values; } return true; } else { if (!empty($state->parsed['CREATE']) && !empty($state->parsed['TABLE']) && !empty($state->parsed['SELECT'])) { /* CTAS */ $table_name = $state->parsed['TABLE']['base_expr']; $schema_tokens = array(';', '"' . $this->current_schema . '"' . ".", "`{$this->current_schema}`.", $this->current_schema . "."); $create_sql = $state->orig_sql; $create_sql = str_replace($schema_tokens, "", $create_sql); $create_sql = preg_replace('/\\s+limit\\s+\\d+/i', '', $create_sql) . ' LIMIT 0'; foreach ($this->shards as $name => $shard) { $dal = SimpleDAL::factory($shard); $dal->my_select_db($shard['db']); $dal->my_query($create_sql); if ($dal->my_error()) { $this->errors[] = "on shard: " . $name . " errorno:" . $dal->my_errno() . ",message:" . $dal->my_error(); } $dal->my_close(); } unset($state->parsed['CREATE']); unset($state->parsed['TABLE']); $result = $this->get_insert_cols($state, $table_name); if ($result === false) { return false; } $cols = $result[0]; $col_list = $result[1]; $shard_column_pos = false; foreach ($cols as $pos => $col) { if (trim($col, '` ') == $this->shard_column) { $shard_column_pos = $pos; break; } } $ignore = ""; $replace = ""; $sub_state = shardquery::new_state(); $this->set_schema($this->schema_name, $sub_state); $sub_state->tmp_shard = $state->tmp_shard; $sub_table_name = "aggregation_tmp_" . mt_rand(1, 100000000); $sub_state->table_name = $sub_table_name; $sub_state->dal = simpledal::factory($sub_state->tmp_shard); $this->query($state->parsed, false, $sub_state, true, false); $state->extra_tables[] = $sub_table_name; $state->extra_tables = array_merge($state->extra_tables, $sub_state->extra_tables); $result = $this->load_from_table($sub_state, $table_name, "", $shard_column_pos, $ignore, $replace); if ($result) { $this->state->coord_sql = "select 'CREATE .. SELECT completed ok' as message from dual"; return true; } else { return false; } } /*else*/ //This query should be broadcast to all nodes #$state->broadcast_query = $state->orig_sql; foreach ($this->shards as $shard_name => $shard) { $dal = SimpleDAL::factory($shard); $dal->my_select_db($shard['db']); if ($dal->my_error()) { $this->errors[] = $dal->my_error(); continue; } $dal->my_query($state->orig_sql); if ($dal->my_error()) { $this->errors[] = $dal->my_error(); } } $this->state->coord_sql = "select 'Sent query to all shards' as message from dual"; return true; } //if(!isset($state->table_name)) $state->table_name = "agg_ if (empty($order_by)) { $order_by = ""; } if (empty($select['coord_group'])) { $select['coord_group'] = ""; } if (empty($select['coord_odku'])) { $select['coord_odku'] = ""; } if (empty($select['group_aliases'])) { $select['group_aliases'] = ""; } $state->coord_sql = $select['coord_sql'] . ' ' . $select['coord_group'] . ' ' . $order_by; $state->winfunc_sql = $select['winfunc_sql']; $state->winfunc_group = $select['coord_group']; if (!isset($state->coord_odku) || empty($state->coord_odku)) { $state->coord_odku = $select['coord_odku']; } $state->shard_sql = $queries; $state->agg_key_cols = $select['group_aliases']; if (!empty($select['custom_functions'])) { $state->custom_functions = $select['custom_functions']; } else { $state->custom_functions = array(); } $explain = "Shard-Query optimizer messages:"; if ($state->agg_key_cols) { $explain .= "\n * The following projections may be selected for a UNIQUE CHECK on the storage node operation:\n {$state->agg_key_cols}\n"; if ($state->coord_odku) { $explain .= "\n * storage node result set merge optimization enabled:\n ON DUPLICATE KEY UPDATE\n\t" . join(",\n\t", $state->coord_odku); } } if (isset($state->messages)) { foreach ($state->messages as $msg) { $explain .= "\n * {$msg}"; } } if ($state->non_distrib) { $explain .= "\n * Aggregation pushdown disabled due to use of non-distributable and/or custom functions."; } $explain .= "\n\nSQL TO SEND TO SHARDS:\n"; $explain .= print_r($state->shard_sql, true); $explain .= "SQL TO SEND TO COORDINATOR NODE:\n{$state->coord_sql}\n"; $state->explain = $explain; return true; }
public function run($arg) { require 'shard-query-config.php'; if (isset($cache_servers) && !empty($cache_servers)) { $this->cache = new SaltedCache($cache_servers); } if (isset($cache_rules) && !empty($cache_rules)) { $this->cache_rules = $cache_rules; } if (!$arg) { return false; } $arg = (object) $arg; $fields = false; $errors = false; $rows = false; $sql = ""; $has_rows = false; $resultset = null; if (!isset($arg->schema_name)) { $arg->schema_name = null; } $SQ = new ShardQuery($arg->schema_name); print_r($arg); if (empty($arg->sql) || !trim($arg->sql)) { return false; } if (stripos($arg->sql, 'select database()') !== false) { $fields = array(array('type' => 250, 'name' => 'DATABASE()')); $rows = array(array($SQ->state->schema_name)); $resultset = array('fields' => &$fields, 'rows' => &$rows); } elseif (preg_match('/^SHOW/i', $arg->sql)) { $shard_id = array_rand($SQ->state->shards); $DAL = SimpleDAL::factory($SQ->state->shards[$shard_id]); $DAL->my_select_db($SQ->state->shards[$shard_id]['db']); if (preg_match('/show\\s+databases/i', $arg->sql)) { $databases = $SQ->state->mapper->get_schemata(); $fields = array(array('type' => 250, 'name' => 'Database')); /* $rows = array(); foreach($databases as $schema_info) { $rows[] = array($schema_info['schema_name']); } */ $rows = array(array($SQ->state->schema_name)); $resultset = array('fields' => &$fields, 'rows' => &$rows); } elseif ($stmt = $DAL->my_query($arg->sql)) { $row = $DAL->my_fetch_assoc(); foreach ($row as $field => $val) { $rows[0][] = $val; $fields[] = array('type' => 250, 'name' => $field); } while ($row = $DAL->my_fetch_array($stmt, MYSQL_NUM)) { $rows[] = $row; } print_r($rows); $resultset = array('fields' => &$fields, 'rows' => &$rows); } $DAL->my_close(); unset($DAL); } elseif (preg_match('/select\\s+.*\\sfrom\\s.*/i', $arg->sql) || preg_match('/(create|drop|alter)\\s+.*/i', $arg->sql)) { $cache_ttl = null; if (isset($this->cache)) { $patterns = $this->cache_rules; foreach ($patterns as $pattern => $ttl) { if (preg_match($pattern, $arg->sql)) { $cache_ttl = $ttl; $resultset = $this->cache->get($arg->sql, $arg->schema_name); break; } } } if (!$resultset) { $stmt = $SQ->query($arg->sql); if (!empty($SQ->errors)) { $errors = trim(str_replace(array("\n", "Array", "(", ")", " "), "", print_r($SQ->errors, true))); } if ($stmt) { $has_rows = true; $rows = array(array()); # get the first row and use it to construct the list of fields + collect row data # in this first fetch we process data one output column at a time $row = $SQ->DAL->my_fetch_assoc($stmt); foreach ($row as $field => $val) { $rows[0][] = $val; $fields[] = array('type' => 250, 'name' => $field); } # fetch the rest of the rows numerically and stuff into $rows, a row at a time while ($row = $SQ->DAL->my_fetch_array($stmt, MYSQL_NUM)) { $rows[] = $row; } $resultset = array('fields' => &$fields, 'rows' => &$rows); } if (isset($cache_ttl)) { $this->cache->set($arg->sql, $resultset, $cache_ttl, $arg->schema_name); } # return the actual object so that the proxy can finish aggregation and drop the table } } else { $sql = $arg->sql; } return json_encode(array('resultset' => $resultset, 'errors' => $errors, 'sql' => $sql, 'has_rows' => $has_rows)); }