Exemplo n.º 1
0
/**
 * @brief Generates the parallel query plan from a given SQL query
 * @param query SQL query that will be parallelised
 * @param headNodeTables list of tables that are completely located on the head node and are not sharded
 * @return the raw sharded queries to be run on the coordination and shard nodes
 *
 * These function will create a parallel query plan from an input SQL statement
 * using shard-query-parallel. It will identify any implicit joins, properly separate
 * them into distinct queries and assign them to temporary tables. Further any aggregate
 * function is properly transfered into sharded versions. This function will eventually
 * produce a query plan that can be further processed by the query plan to SQL parser.
 */
function PHPSQLprepareQuery($query, $headNodeTables = array())
{
    $shard_query = new ShardQuery();
    $shard_query->verbose = false;
    $shard_query->headNodeTables = $headNodeTables;
    $shard_query->process_sql($query);
    if (!empty($shard_query->errors)) {
        echo "An Error occured:\n\n";
        foreach ($shard_query->errors as $key => $error) {
            if (array_key_exists("error_clause", $error)) {
                echo $key . ": clause: " . $error["error_clause"] . " error: " . $error["error_reason"] . "\n";
            } else {
                echo $key . ": clause: " . $error[0] . " error: " . $error[1] . "\n";
            }
        }
        throw new Exception('SQL Parse Error');
    }
    #determine if this is a serial or parallel query:
    #parallel only if there are other tables than temporary (i.e. dependent) ones involved
    $parallel = false;
    if (!empty($shard_query->parsedCopy['FROM'])) {
        foreach ($shard_query->parsedCopy['FROM'] as $fromNode) {
            if ($fromNode['table'] != 'DEPENDENT-SUBQUERY') {
                $parallel = true;
                //check if this table is only available on the head node
                //if yes, donot execute the query in parallel
                $found = false;
                $tableName = str_replace("`", "", $fromNode['table']);
                $posDot = strpos($tableName, ".");
                foreach ($headNodeTables as $headNodeTable) {
                    $posTable = strpos($tableName, $headNodeTable);
                    if ($posTable !== false) {
                        if ($posDot === false) {
                            if (strlen($tableName) === strlen($headNodeTable)) {
                                $found = true;
                            }
                        } else {
                            if (strlen($tableName) - $posDot - 1 === strlen($headNodeTable)) {
                                $found = true;
                            } else {
                                if ($posTable == 0 && strlen($headNodeTable) == $posDot) {
                                    $found = true;
                                }
                            }
                        }
                        if ($found === true) {
                            $parallel = false;
                            break;
                        }
                    }
                }
            }
        }
    }
    $shard_query->subqueries[$shard_query->table_name] = $shard_query->shard_sql;
    $shard_query->subqueries[$shard_query->table_name]['parallel'] = $parallel;
    return $shard_query;
}
 function fetch()
 {
     $schema_id = $this->params['schema_id'];
     $job_id = $this->params['job_id'];
     //lookup schema name
     $schema = Doo::db()->getOne('Schemata', array('where' => 'id = ' . $schema_id));
     //update jobs before we check!
     $shard_query = new ShardQuery($schema->schema_name);
     $shard_query->state->mapper->sweep_jobs();
     //get job
     $j = new Jobs();
     $j->id = $job_id;
     $j = $j->getOne();
     //check if job exist
     if (!$j) {
         $this->res->success = false;
         $this->res->message = "Job_ID: {$job_id} doesnt exist!";
         return null;
     }
     //check if job compeleted
     if ($j->job_status != 'completed') {
         $this->res->success = false;
         $this->res->message = "Job_ID: {$job_id} not completed!";
     } else {
         //get result from job_id
         $stmt = $shard_query->get_async_result($job_id);
         //build data
         if ($stmt) {
             $this->res->success = true;
             $this->res->data = $this->json_format($shard_query, $stmt);
             $shard_query->DAL->my_free_result($stmt);
         } else {
             //error
             $this->res->success = false;
             $this->res->data = $stmt;
             $this->res->message = $shard_query->DAL->my_error();
         }
     }
 }
Exemplo n.º 3
0
 protected function process_sql($sql, &$state)
 {
     //only useful for the fetch worker for debugging
     $state->sql = $sql;
     $parser = null;
     $straight_join = false;
     $state->shard_sql = "";
     //identical SQL which will be broadcast to all shards
     $state->coord_sql = "";
     //the summary table sql
     $state->in_lists = array();
     $error = array();
     $select = null;
     if (!is_array($sql)) {
         #//we need to check the syntax
         $sql = preg_replace(array("%-- [^\\n](?:\\n|\$)%", "%/\\*.*\\*/%"), '', trim($sql));
         if (!$sql) {
             $this->errors[] = "SQL is empty\n";
             return false;
         }
         if (strtolower(substr($sql, 0, 4)) == 'with') {
             // handle the WITH clauses and rewrite the SQL to use the new table names
             $sql = $this->process_subquery_factoring($sql);
             if (!$sql) {
                 return false;
             }
         }
         if (strtolower(substr($sql, 0, 4)) == 'load') {
             $result = $this->process_load_data($sql);
             if (!$result) {
                 return false;
             }
         }
         $blacklist = '/(?:create|drop|alter)\\s+database|last_insert_id|sql_calc_found_rows|row_count|:=|connection_id|' . 'current_user|session_user|user\\(\\)|mysql\\.|system_user|found_rows|get_lock|free_lock|' . 'is_free_lock|is_used_lock|load_file|infile|master_pos_wait|name_const|release_lock|^show\\s+(?:master|slave)|' . '^show\\s+bin|^show\\s+create\\s+database|^show\\s+databases|^show\\s+logs|^show\\s+mutex|' . '^show\\s+engine|^show\\s+innodb|^show\\s+open|^show\\s+privileges|^show\\s+(?:status|variables)|^lock\\s+table|' . '^check\\s+table|^checksum\\s+table|^backup|^repair|^restore|^call|^handler|^begin|^start\\s+transaction|^commit|' . '^rollback|^set transaction|savepoint|^xa\\s+(?:start|begin|end|prepare|commit|rollback|recover)/i';
         if (preg_match($blacklist, $sql, $matches)) {
             $this->errors[] = 'Unsupported SQL: ' . print_r($matches, true);
             return false;
         }
         #syntax check is broken on ICE due to LIMIT 0 issue
         $version_check_sql = 'select @@version_comment c';
         $stmt = $state->DAL->my_query($version_check_sql);
         $comment = "";
         while ($row = $state->DAL->my_fetch_assoc($stmt)) {
             $comment = $row['c'];
         }
         if (strpos($comment, '(ice)') === false) {
             if (preg_match('/^select/i', $sql)) {
                 $check_sql = trim($sql, "; \n");
                 //custom functions have to pass the syntax check.  concat takes a variable number of
                 //parameters, so it can be substituted for any custom function for syntax purposes
                 foreach (ShardQueryCustomFunctions::$list as $key => $val) {
                     $check_sql = preg_replace('/' . $key . '/i', 'concat', $check_sql);
                 }
                 //we need to remove the virtual schema name from any tables in the FROM clause
                 //The parser could be used here, but a simple regex will suffice for this simple
                 //syntax check
                 $schema_tokens = array('"' . $this->current_schema . '"' . ".", "`{$this->current_schema}`.", $this->current_schema . ".");
                 $check_sql = str_replace($schema_tokens, "", $check_sql);
                 $check_sql = preg_replace('/\\s+limit\\s+\\d+,*\\s*\\d*/i', '', $check_sql) . ' LIMIT 0';
                 $check_sql = "select count(*) from (" . trim($check_sql, ";\r\n ") . ") check_sql where 0=1";
                 /* if(!$state->DAL->my_query($check_sql)) {
                        $this->errors[] = $state->DAL->my_error();
                        return false;
                    }*/
             }
         }
         $sql = preg_replace("/\\s+FROM\\s+(LAST|FIRST)\\s+/i", "FROM_\$1 ", $sql);
         $state->parsed = $this->parser->parse($sql);
     } else {
         $state->parsed = $sql;
     }
     if (!empty($state->parsed['UNION ALL'])) {
         $queries = array();
         $table_name = "aggregation_tmp_" . mt_rand(1, 100000000);
         $state->table_name = $table_name;
         $coord_sql = "";
         foreach ($state->parsed['UNION ALL'] as $sub_tree) {
             $sub_state = ShardQuery::new_state();
             //initialize the new state with a call to set_schema
             $this->set_schema($this->schema_name, $sub_state);
             $sub_state->tmp_shard = $state->tmp_shard;
             $sub_table_name = "aggregation_tmp_" . mt_rand(1, 100000000);
             $sub_state->table_name = $sub_table_name;
             $sub_state->DAL = SimpleDAL::factory($sub_state->tmp_shard);
             $this->query($sub_tree, false, $sub_state, true, false);
             $state->extra_tables[] = $sub_table_name;
             $state->extra_tables = array_merge($state->extra_tables, $sub_state->extra_tables);
             if ($coord_sql) {
                 $coord_sql .= " UNION ALL ";
             }
             $coord_sql .= "( " . $sub_state->coord_sql . " )";
             $state->extra_tables[] = $sub_table_name;
         }
         $select['coord_sql'] = $coord_sql;
         unset($coord_sql);
     } elseif (!empty($state->parsed['UNION'])) {
         $queries = array();
         $table_name = "aggregation_tmp_" . mt_rand(1, 100000000);
         $state->table_name = $table_name;
         $coord_sql = "";
         foreach ($state->parsed['UNION'] as $sub_tree) {
             $sub_state = shardquery::new_state();
             //initialize the new state with a call to set_schema
             $this->set_schema($this->schema_name, $sub_state);
             $sub_state->tmp_shard = $state->tmp_shard;
             $sub_table_name = "aggregation_tmp_" . mt_rand(1, 100000000);
             $sub_state->table_name = $sub_table_name;
             $sub_state->dal = simpledal::factory($sub_state->tmp_shard);
             $this->query($sub_tree, false, $sub_state, true, false);
             $state->extra_tables[] = $sub_table_name;
             $state->extra_tables = array_merge($state->extra_tables, $sub_state->extra_tables);
             if ($coord_sql) {
                 $coord_sql .= " UNION ";
             }
             $coord_sql .= "( " . $sub_state->coord_sql . " )";
         }
         //UNION operation requires deduplication of the temporary table
         $select['coord_sql'] = $coord_sql;
         unset($coord_sql);
     } elseif (!empty($state->parsed['SELECT']) && empty($state->parsed['INSERT']) && empty($state->parsed['CREATE']) && empty($state->parsed['REPLACE'])) {
         //reset the important variables
         $select = $from = $where = $group = $order_by = "";
         $straight_join = $distinct = false;
         $this->errors = array();
         /* Attempt star schema optimization */
         if ($state->star_opt) {
             $star_result = $this->star_optimize($state);
             if ($star_result !== false) {
                 $state->parsed = $star_result['parsed'];
                 $state->push_where = $star_result['push_where'];
             } else {
                 #critical error occured during star optimization
                 if (!empty($this->errors)) {
                     return false;
                 }
             }
         }
         //The SELECT clause is processed first.
         if (!empty($state->parsed['OPTIONS']) && in_array('STRAIGHT_JOIN', $state->parsed['OPTIONS'])) {
             $straight_join = true;
         }
         if (strtoupper($state->parsed['SELECT'][0]['base_expr']) == 'DISTINCT') {
             $distinct = true;
         }
         if (!empty($state->parsed['OPTIONS']) && in_array('SQL_CALC_FOUND_ROWS', $state->parsed['OPTIONS'])) {
             $this->errors[] = array('Unsupported query', 'SQL_CALC_FOUND_ROWS is not supported');
             return false;
         }
         if (!empty($state->parsed['OPTIONS']) && in_array('FOR UPDATE', $state->parsed['OPTIONS'])) {
             $this->errors[] = array('Unsupported query', 'FOR UPDATE is not supported');
             return false;
         }
         if (!empty($state->parsed['OPTIONS']) && in_array('LOCK IN SHARE MODE', $state->parsed['OPTIONS'])) {
             $this->errors[] = array('Unsupported query', 'LOCK IN SHARE MODE is not supported');
             return false;
         }
         $with_rollup = "";
         if (!empty($state->parsed['OPTIONS'])) {
             foreach ($state->parsed['OPTIONS'] as $option) {
                 if (strtoupper($option['base_expr']) === 'WITH ROLLUP') {
                     $with_rollup = ' WITH ROLLUP';
                 }
             }
         }
         //ignore any other options
         unset($state->parsed['OPTIONS']);
         $procedure = "";
         if (!empty($state->parsed['PROCEDURE'])) {
             $procedure = 'PROCEDURE ' . join('', $state->parsed['PROCEDURE']);
             unset($state->parsed['PROCEDURE']);
         }
         if (!empty($state->parsed['INTO'])) {
             $this->errors[] = array('Unsupported query', 'INTO is not supported');
             return false;
         }
         /*This will process the SELECT clause, doing any SQL rewrites needed for aggregation at the shard level
           The $this->non_distrib flag will be set if the query contains any non-distributable aggregate functions (none currently)
           
           If non-distributable aggregate functions are detected, we will throw this work away and instead use
           $this->process_undistributable_select() [below]
           */
         $select = $this->process_select($state->parsed['SELECT'], $straight_join, $distinct, $state);
         /* Now some special window function columns are needed */
         if (!empty($state->windows)) {
             $used_cols = array();
             for ($i = 0; $i < count($state->windows); ++$i) {
                 $win = $state->windows[$i];
                 $shard_hash = "";
                 foreach ($win['partition'] as $part) {
                     if (empty($used_cols[$part['base_expr']])) {
                         $select['shard_sql'] .= "," . $part['base_expr'];
                         $used_cols[$part['base_expr']] = 1;
                     } else {
                         continue;
                     }
                     if ($shard_hash) {
                         $shard_hash .= ",";
                     }
                     $shard_hash .= $part['base_expr'];
                 }
                 if ($shard_hash == "") {
                     $shard_hash = "'ONE_PARTITION'";
                 }
                 $select['shard_sql'] .= ",SHA1(CONCAT_WS('#'," . $shard_hash . ")) as wf{$i}_hash";
                 $obclause = "";
                 $obhash = "";
                 foreach ($win['order'] as $ob) {
                     if (empty($used_cols[$ob['base_expr']])) {
                         $select['shard_sql'] .= "," . $ob['base_expr'];
                         $used_cols[$ob['base_expr']] = 1;
                     }
                     if ($obclause) {
                         $obclause .= ",";
                     }
                     if ($obhash) {
                         $obclause .= ",";
                     }
                     $obclause .= $ob['base_expr'] . " " . $ob['direction'];
                     $obhash .= $ob['base_expr'];
                 }
                 if ($obhash) {
                     $select['shard_sql'] .= ",SHA1(CONCAT_WS('#'," . $obhash . ")) as wf{$i}_obhash";
                 }
                 $state->windows[$i]['order_by'] = $obclause;
             }
         }
         if (trim($select['shard_sql']) == 'SELECT') {
             $select['shard_sql'] = 'SELECT 1';
         }
         if (!empty($select['error'])) {
             $this->errors[] = $select['error'];
             return false;
         }
         if (!$state->non_distrib) {
             unset($state->parsed['SELECT']);
         } else {
             $select = $this->process_undistributable_select($state->parsed['SELECT'], $straight_join, $distinct);
             unset($state->parsed['SELECT']);
         }
         if (!empty($select['error'])) {
             $this->errors[] = $select['error'];
             return false;
         }
         $coord_group = "";
         $shard_group = "";
         if (!empty($state->parsed['GROUP'])) {
             $group = $state->parsed['GROUP'];
             foreach ($group as $item) {
                 if ($coord_group) {
                     $coord_group .= ",";
                 }
                 $coord_group .= $this->process_group_item($item, $state->used_colrefs, 0, $select);
             }
         }
         if ($shard_group !== "") {
             if ($select['group_aliases'] !== "") {
                 $select['group_aliases'] .= ",";
             }
             $select['group_aliases'] .= $shard_group;
         }
         $select['coord_group'] = $coord_group;
         $select['shard_group'] = $select['group_aliases'];
         if (empty($state->pushed) && empty($state->parsed['GROUP'])) {
             $select['coord_group'] = "";
             $select['shard_group'] = "";
         }
         unset($state->parsed['GROUP']);
         if ($having_info = $this->process_having($state)) {
             if (trim($having_info['select'])) {
                 $select['shard_sql'] .= "," . $having_info['select'];
             }
         } else {
             $having_info = array('having' => " ");
         }
         $order_by = "";
         if (!empty($state->windows)) {
             $order_by = "wf0_hash";
             if (!empty($state->windows[0]['order_by'])) {
                 $order_by .= ",";
                 $order_by .= $state->windows[0]['order_by'];
             }
         }
         if (!empty($state->parsed['ORDER'])) {
             foreach ($state->parsed['ORDER'] as $o) {
                 if ($order_by) {
                     $order_by .= ',';
                 }
                 switch ($o['expr_type']) {
                     case 'subquery':
                         $this->errors[] = "Subqueries are not supported in the ORDER BY clause.\n";
                         return false;
                         break;
                     case 'custom_function':
                         $this->errors[] = "Custom functions are not supported in the ORDER BY clause.\n";
                         return false;
                         break;
                     case 'function':
                     case 'expression':
                     case 'aggregate_function':
                         $item = "";
                         $expr_alias = 'expr_' . mt_rand(1, 10000000000);
                         $this->concat_all_subtrees($o, $item);
                         $select['shard_sql'] .= ", {$item} as {$expr_alias}";
                         $order_by .= $o['base_expr'] . '(' . $expr_alias . ')';
                         if ($o['expr_type'] === 'aggregate_function') {
                             switch (strtoupper($o['base_expr'])) {
                                 case 'SUM':
                                     $select['coord_odku'][] = "{$expr_alias}={$expr_alias} +  VALUES({$expr_alias})";
                                     break;
                                 case 'MIN':
                                     $select['coord_odku'][] = "{$expr_alias}=IF({$expr_alias} < VALUES({$expr_alias}), VALUES({$expr_alias}),{$expr_alias})";
                                     break;
                                 case 'MAX':
                                     $select['coord_odku'][] = "{$expr_alias}=IF({$expr_alias} > VALUES({$expr_alias}), VALUES({$expr_alias}), {$expr_alias})";
                                     break;
                                 case 'COUNT':
                                     $select['coord_odku'][] = "{$expr_alias}={$expr_alias} +  VALUES({$expr_alias})";
                                     break;
                                 default:
                                     $this->errors[] = "Unsupported aggregate function in ORDER BY";
                                     return false;
                             }
                         }
                         break;
                     default:
                         if (strpos($o['base_expr'], '.') !== false) {
                             $expr_alias = 'expr_' . mt_rand(1, 10000000000);
                             $select['shard_sql'] .= ", {$o['base_expr']} as {$expr_alias}";
                             $order_by .= $expr_alias;
                         } else {
                             $order_by .= $o['base_expr'];
                         }
                         break;
                 }
                 $order_by .= " " . $o['direction'];
             }
         }
         if ($order_by) {
             $order_by = " ORDER BY {$order_by}";
         }
         unset($state->parsed['ORDER']);
         if (empty($state->parsed['FROM'])) {
             $this->errors = array('Unsupported query', 'Missing FROM clause');
             return false;
         } else {
             $select['shard_sql'] .= "\n" . $this->process_from($state->parsed['FROM'], $state);
             $in_tables = "";
             $is_virtual = false;
             foreach ($state->tables as $val => $attr) {
                 if ($attr['is_virtual']) {
                     $is_virtual = true;
                 }
                 if ($in_tables !== "") {
                     $in_tables .= ",";
                 }
                 $in_tables .= "'{$val}'";
             }
             /*
              * If no real tables were used (only subquery in from clause for example)
              * then direct the query to the coordinator shard, as it will read from
              * the output of the subquery in the from clause or other construct.
              */
             if ($in_tables === "") {
                 $state->force_shard = array($state->tmp_shard);
             } elseif ($in_tables !== "" && !$is_virtual) {
                 /* 
                  * Otherwise, if this isn't a virtual table (v_ view) then we have to inspect
                  * the tables to see if any of them contain the shard key.  If any v_ tables are
                  * used then we assume it is sharded (what is point of virtual table otherwise?)
                  */
                 $sql = "select count(*) cnt from information_schema.columns where table_schema='" . $state->tmp_shard['db'] . "' and column_name='" . $this->shard_column . "' and table_name IN({$in_tables});";
                 if (!($stmt = $state->DAL->my_query($sql))) {
                     $this->errors[] = 'Could not access information schema: ' . $state->DAL->my_error();
                     return false;
                 }
                 $row = $state->DAL->my_fetch_assoc();
                 /* If none of the tables contain the sharding key, the query goes to all shards */
                 if ($row['cnt'] === '0') {
                     if (isset($state->tmp_shard)) {
                         $state->force_shard = array($state->tmp_shard);
                     } else {
                         $state->force_shard = array($state->shards[array_rand($state->shards)]);
                     }
                 }
             } else {
                 $state->force_shard = array();
             }
             if (!isset($state->table_name) || $state->table_name == "") {
                 $state->table_name = "aggregation_tmp_" . mt_rand(1, 100000000);
             }
             $select['coord_sql'] .= "\nFROM `{$state->table_name}`";
             $select['winfunc_sql'] .= "\nFROM `{$state->table_name}`";
             unset($state->parsed['FROM']);
         }
         //note that this will extract inlists and store them in $this->in_lists (if inlist optimization is on)
         $where_clauses = $this->process_where(empty($state->parsed['WHERE']) ? array() : $state->parsed['WHERE'], $state);
         if (!empty($state->parsed['WHERE'])) {
             unset($state->parsed['WHERE']);
         }
         if (empty($state->force_shard)) {
             $state->force_shard = $state->shards;
         }
         /* Handle dependent subqueries if the subquery has to be on one or more shards that are different from tmp_shard.*/
         foreach ($state->dependent_subqueries as $sub_table) {
             if (empty($state->tables)) {
                 continue;
             }
             //don't do this work if there are no real tables
             $sql = "SHOW CREATE TABLE {$sub_table}";
             $stmt = $state->DAL->my_query($sql);
             $row = $state->DAL->my_fetch_array();
             $create_sql = $row[1];
             if (!$row) {
                 $this->errors[] = 'Could not get DDL for intermediate table: ' . $sub_table . "(" . $state->DAL->my_error() . ")";
                 $this->cleanup_tables($state);
                 return false;
             }
             $export_sql = "SELECT * FROM {$sub_table}";
             $stmt = $state->DAL->my_unbuffered_query($export_sql);
             if (!$stmt) {
                 $this->errors[] = 'Could not open intermediate result from: ' . $sub_table . "(" . $state->DAL->my_error() . ")";
                 $this->cleanup_tables($state);
                 return false;
             }
             $fname = tempnam('/tmp', 'SQ');
             $fh = fopen($fname, 'wb');
             if (!$fh) {
                 $this->errors[] = 'Could not open temporary file for spooling dependent subquery to shards: ' . $fname;
                 return false;
             }
             while ($row = $state->DAL->my_fetch_array()) {
                 $outrow = "";
                 foreach ($row as $col) {
                     $col = str_replace('"', '\\"', $col);
                     if (!is_numeric($col)) {
                         $col = '"' . $col . '"';
                     }
                     if ($outrow) {
                         $outrow .= ",";
                     }
                     $outrow .= $col;
                 }
                 if (!fputs($fh, $outrow . "\n")) {
                     $this->errors[] = 'Could not write to temporary file for spooling dependent subquery to shards: ' . $fname;
                     return false;
                 }
             }
             fclose($fh);
             foreach ($state->force_shard as $shard) {
                 if ($shard === $state->tmp_shard) {
                     continue;
                 }
                 $dal = SimpleDAL::factory($shard);
                 $dal->my_select_db($shard['db']);
                 $stmt = $dal->my_query($create_sql);
                 if (!$stmt) {
                     $this->errors[] = "Failed to create destination table: {$sub_table} during intermediate copy" . "(" . $dal->my_error() . ")";
                     $this->cleanup_tables($state);
                     return false;
                 }
                 $load_sql = "LOAD DATA LOCAL INFILE '{$fname}' INTO TABLE `{$shard['db']}`.{$sub_table} FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\\\\'";
                 $stmt = $dal->my_query($load_sql);
                 if (!$stmt) {
                     $this->errors[] = 'Failed to LOAD DATA LOCAL INFILE to table: $sub_table during intermediate copy' . "(" . $dal->my_error() . ")";
                     $this->cleanup_tables($state);
                     return false;
                 }
                 $dal->my_close();
             }
             unlink($fname);
         }
         /* semijoin optimization */
         if (!empty($state->push_join)) {
             $select['shard_sql'] .= join('', $state->push_join);
         }
         $used_limit = 0;
         if (!empty($state->parsed['LIMIT'])) {
             $used_limit = 1;
             if ($state->parsed['LIMIT']['offset'] == "") {
                 $state->parsed['LIMIT']['offset'] = 0;
             }
             $order_by .= " LIMIT {$state->parsed['LIMIT']['offset']},{$state->parsed['LIMIT']['rowcount']}";
             unset($state->parsed['LIMIT']);
         }
         foreach ($state->parsed as $key => $clauses) {
             $this->errors[] = array('Unsupported query', $key . ' clause is not supported');
         }
         if ($this->errors) {
             return false;
         }
         //process_select only provides a list of positions
         if ($select['coord_group']) {
             $select['coord_group'] = ' GROUP BY ' . $select['coord_group'] . $having_info['having'] . $with_rollup . $procedure;
         }
         if ($select['shard_group']) {
             $select['shard_group'] = ' GROUP BY ' . $select['shard_group'];
         }
         $queries = array();
         $where_base = " 1=1 ";
         if (!empty($state->push_where)) {
             $where_base .= ' AND ' . join(' ', $state->push_where);
         }
         if ($state->mysql_version['supports_partition_hint'] == 1) {
             #if table is not partitioned, then don't inject any partition hints
             #have to do all removals in a first pass
             foreach ($state->tables as $table_name => $table_info) {
                 if (empty($table_info['alias']) || $table_info['alias'] === '') {
                     $alias = $table_name;
                 } else {
                     $alias = $table_info['alias'];
                 }
                 $alias = trim($alias, '`');
                 if ($table_info['partition_info']['partition_names'][0] == "") {
                     $select['shard_sql'] = str_replace('%p' . $alias, "", $select['shard_sql']);
                 }
             }
             #now build out queries for each partition for each table
             foreach ($state->tables as $table_name => $table_info) {
                 if (empty($table_info['alias']) || $table_info['alias'] === '') {
                     $alias = $table_name;
                 } else {
                     $alias = $table_info['alias'];
                 }
                 $alias = trim($alias, '`');
                 if ($table_info['partition_info']['partition_names'][0] != "") {
                     if (empty($queries)) {
                         foreach ($table_info['partition_info']['partition_names'] as $x => $partition_name) {
                             $queries[] = str_replace('%p' . $alias, " PARTITION(" . $partition_name . ") ", $select['shard_sql']);
                         }
                     } else {
                         $old_queries = $queries;
                         $queries = array();
                         foreach ($old_queries as $query) {
                             foreach ($table_info['partition_info']['partition_names'] as $x => $partition_name) {
                                 $query = str_replace('%p' . $alias, " PARTITION(" . $partition_name . ") ", $query);
                                 $queries[] = $query;
                             }
                         }
                     }
                     #remove the token so that it doesn't mess up further tables in the query
                     $select['shard_sql'] = str_replace('%p' . $alias, "", $select['shard_sql']);
                 }
             }
         } else {
             $explain_query = "EXPLAIN PARTITIONS " . $select['shard_sql'] . ' ' . $where;
             $stmt = $state->DAL->my_query($explain_query);
             $state->added_where = 0;
             if ($where == "") {
                 $where = " WHERE 1=1";
             }
             while ($row = $state->DAL->my_fetch_assoc($stmt)) {
                 $table_or_alias = $row['table'];
                 $partitions = $row['partitions'];
                 $partitions = explode(',', $partitions);
                 foreach ($partitions as $partition_name) {
                     foreach ($state->tables as $table_name => $table_info) {
                         if ($table_or_alias === $table_name || $table_or_alias === $table_info['alias']) {
                             if (!empty($table_info['partition_info']['where_clauses'][$partition_name])) {
                                 $expr = $table_info['partition_info']['where_clauses'][$partition_name];
                                 if ($table_info['alias'] !== '') {
                                     $expr = preg_replace("/" . $table_info['partition_info']['partition_expression'] . "/", $table_info['alias'] . '.' . $table_info['partition_info']['partition_expression'], $expr);
                                 }
                                 $queries[] = $select['shard_sql'] . ' ' . $where . ' AND ' . $expr;
                                 $state->added_where = 1;
                             }
                         }
                     }
                 }
             }
         }
         $where_base = $state->added_where != 1 ? " WHERE {$where_base} " : " AND 1=1 ";
         if (empty($where_clauses)) {
             $where_clauses = array($where_base);
         } else {
             $old_clauses = $where_clauses;
             $where_clauses = array();
             foreach ($old_clauses as $new_where_clause) {
                 $where_clauses[] = $where_base . ' AND ' . $new_where_clause;
             }
         }
         #queries is empty if no partition parallelism was added
         #parallelism may still have been added from BETWEEN clauses ($where_clauses may be an array of clauses)
         if (empty($queries)) {
             foreach ($where_clauses as $where_clause) {
                 if (!$state->no_pushdown_limit) {
                     $queries[] = $select['shard_sql'] . $where_clause . $select['shard_group'] . ($used_limit ? $order_by : '');
                 } else {
                     $queries[] = $select['shard_sql'] . $where_clause . $select['shard_group'];
                 }
             }
         } else {
             $old_queries = $queries;
             $queries = array();
             foreach ($old_queries as $query) {
                 foreach ($where_clauses as $where_clause) {
                     if (!$state->no_pushdown_limit) {
                         $nq = $query . ' ' . $where_clause . ' ' . $select['shard_group'] . ($used_limit ? $order_by : '');
                     } else {
                         $nq = $query . ' ' . $where_clause . ' ' . $select['shard_group'];
                     }
                     $queries[] = $nq;
                 }
             }
         }
     } elseif (!empty($state->parsed['INSERT']) || !empty($state->parsed['REPLACE'])) {
         if (!empty($state->parsed['INSERT'])) {
             $replace = "";
             $to_table = $state->parsed['INSERT'][1]['table'];
         } else {
             $replace = " REPLACE ";
             $to_table = $state->parsed['REPLACE'][1]['table'];
         }
         $result = $this->get_insert_cols($state);
         if ($result === false) {
             return false;
         }
         $cols = $result[0];
         $col_list = $result[1];
         $shard_column_pos = false;
         foreach ($cols as $pos => $col) {
             if (trim($col, '` ') == $this->shard_column) {
                 $shard_column_pos = $pos;
                 break;
             }
         }
         if (!empty($state->parsed['SELECT'])) {
             $parsed = $state->parsed;
             $ignore = "";
             if (!empty($parsed['OPTIONS'])) {
                 foreach ($parsed['OPTIONS'] as $option) {
                     if (strtolower($option) == 'ignore') {
                         $ignore = 'ignore';
                         break;
                     }
                 }
             }
             unset($parsed['INSERT']);
             unset($parsed['REPLACE']);
             unset($parsed['OPTIONS']);
             $sub_state = shardquery::new_state();
             $this->set_schema($this->schema_name, $sub_state);
             $sub_state->tmp_shard = $state->tmp_shard;
             $sub_table_name = "aggregation_tmp_" . mt_rand(1, 100000000);
             $sub_state->table_name = $sub_table_name;
             $sub_state->dal = simpledal::factory($sub_state->tmp_shard);
             $this->query($parsed, false, $sub_state, true, false);
             $state->extra_tables[] = $sub_table_name;
             $state->extra_tables = array_merge($state->extra_tables, $sub_state->extra_tables);
             $result = $this->load_from_table($sub_state, $to_table, $col_list, $shard_column_pos, $ignore, $replace);
             if ($result) {
                 $this->state->coord_sql = "select 'INSERT .. SELECT completed ok' as message from dual";
                 return true;
             } else {
                 return false;
             }
         } else {
             if ($replace == "") {
                 $sql = "INSERT INTO `" . trim($state->parsed['INSERT'][1]['table'], '`') . "` {$col_list} VALUES ";
                 $replace = "INSERT";
             } else {
                 $sql = "REPLACE INTO `" . trim($state->parsed['REPLACE'][1]['table'], '`') . "` {$col_list} VALUES ";
             }
             $values = array();
             $val_count = 0;
             $column_count = count($cols);
             foreach ($state->parsed['VALUES'] as $record) {
                 $vals = trim($record['base_expr'], ',');
                 //this row is going to go to a single shard
                 if ($shard_column_pos !== false) {
                     if (count($record['data']) !== $column_count) {
                         $this->errors[] = 'Value count does not match column count!';
                         return false;
                     }
                     $shard_id = $this->map_shard($this->shard_column, $record['data'][$shard_column_pos]['base_expr'], $this->state->schema_name, '=', true);
                     $shard_id = @array_pop(array_keys(array_pop($shard_id)));
                     if (empty($values[$shard_id])) {
                         $values[$shard_id] = $vals;
                     } else {
                         $values[$shard_id] .= ",{$vals}";
                     }
                     $val_count++;
                     //the row is going to all shards
                 } else {
                     foreach ($this->shards as $shard_id => $shard) {
                         if (empty($values[$shard_id])) {
                             $values[$shard_id] = $vals;
                         } else {
                             $values[$shard_id] .= ",{$vals}";
                         }
                         $val_count++;
                     }
                 }
             }
             $state->insert_sql = $sql;
             $state->insert_values = $values;
         }
         return true;
     } else {
         if (!empty($state->parsed['CREATE']) && !empty($state->parsed['TABLE']) && !empty($state->parsed['SELECT'])) {
             /* CTAS */
             $table_name = $state->parsed['TABLE']['base_expr'];
             $schema_tokens = array(';', '"' . $this->current_schema . '"' . ".", "`{$this->current_schema}`.", $this->current_schema . ".");
             $create_sql = $state->orig_sql;
             $create_sql = str_replace($schema_tokens, "", $create_sql);
             $create_sql = preg_replace('/\\s+limit\\s+\\d+/i', '', $create_sql) . ' LIMIT 0';
             foreach ($this->shards as $name => $shard) {
                 $dal = SimpleDAL::factory($shard);
                 $dal->my_select_db($shard['db']);
                 $dal->my_query($create_sql);
                 if ($dal->my_error()) {
                     $this->errors[] = "on shard: " . $name . " errorno:" . $dal->my_errno() . ",message:" . $dal->my_error();
                 }
                 $dal->my_close();
             }
             unset($state->parsed['CREATE']);
             unset($state->parsed['TABLE']);
             $result = $this->get_insert_cols($state, $table_name);
             if ($result === false) {
                 return false;
             }
             $cols = $result[0];
             $col_list = $result[1];
             $shard_column_pos = false;
             foreach ($cols as $pos => $col) {
                 if (trim($col, '` ') == $this->shard_column) {
                     $shard_column_pos = $pos;
                     break;
                 }
             }
             $ignore = "";
             $replace = "";
             $sub_state = shardquery::new_state();
             $this->set_schema($this->schema_name, $sub_state);
             $sub_state->tmp_shard = $state->tmp_shard;
             $sub_table_name = "aggregation_tmp_" . mt_rand(1, 100000000);
             $sub_state->table_name = $sub_table_name;
             $sub_state->dal = simpledal::factory($sub_state->tmp_shard);
             $this->query($state->parsed, false, $sub_state, true, false);
             $state->extra_tables[] = $sub_table_name;
             $state->extra_tables = array_merge($state->extra_tables, $sub_state->extra_tables);
             $result = $this->load_from_table($sub_state, $table_name, "", $shard_column_pos, $ignore, $replace);
             if ($result) {
                 $this->state->coord_sql = "select 'CREATE .. SELECT completed ok' as message from dual";
                 return true;
             } else {
                 return false;
             }
         }
         /*else*/
         //This query should be broadcast to all nodes
         #$state->broadcast_query = $state->orig_sql;
         foreach ($this->shards as $shard_name => $shard) {
             $dal = SimpleDAL::factory($shard);
             $dal->my_select_db($shard['db']);
             if ($dal->my_error()) {
                 $this->errors[] = $dal->my_error();
                 continue;
             }
             $dal->my_query($state->orig_sql);
             if ($dal->my_error()) {
                 $this->errors[] = $dal->my_error();
             }
         }
         $this->state->coord_sql = "select 'Sent query to all shards' as message from dual";
         return true;
     }
     //if(!isset($state->table_name)) $state->table_name = "agg_
     if (empty($order_by)) {
         $order_by = "";
     }
     if (empty($select['coord_group'])) {
         $select['coord_group'] = "";
     }
     if (empty($select['coord_odku'])) {
         $select['coord_odku'] = "";
     }
     if (empty($select['group_aliases'])) {
         $select['group_aliases'] = "";
     }
     $state->coord_sql = $select['coord_sql'] . ' ' . $select['coord_group'] . ' ' . $order_by;
     $state->winfunc_sql = $select['winfunc_sql'];
     $state->winfunc_group = $select['coord_group'];
     if (!isset($state->coord_odku) || empty($state->coord_odku)) {
         $state->coord_odku = $select['coord_odku'];
     }
     $state->shard_sql = $queries;
     $state->agg_key_cols = $select['group_aliases'];
     if (!empty($select['custom_functions'])) {
         $state->custom_functions = $select['custom_functions'];
     } else {
         $state->custom_functions = array();
     }
     $explain = "Shard-Query optimizer messages:";
     if ($state->agg_key_cols) {
         $explain .= "\n  * The following projections may be selected for a UNIQUE CHECK on the storage node operation:\n  {$state->agg_key_cols}\n";
         if ($state->coord_odku) {
             $explain .= "\n  * storage node result set merge optimization enabled:\n  ON DUPLICATE KEY UPDATE\n\t" . join(",\n\t", $state->coord_odku);
         }
     }
     if (isset($state->messages)) {
         foreach ($state->messages as $msg) {
             $explain .= "\n  * {$msg}";
         }
     }
     if ($state->non_distrib) {
         $explain .= "\n  * Aggregation pushdown disabled due to use of non-distributable and/or custom functions.";
     }
     $explain .= "\n\nSQL TO SEND TO SHARDS:\n";
     $explain .= print_r($state->shard_sql, true);
     $explain .= "SQL TO SEND TO COORDINATOR NODE:\n{$state->coord_sql}\n";
     $state->explain = $explain;
     return true;
 }
Exemplo n.º 4
0
 public function run($arg)
 {
     require 'shard-query-config.php';
     if (isset($cache_servers) && !empty($cache_servers)) {
         $this->cache = new SaltedCache($cache_servers);
     }
     if (isset($cache_rules) && !empty($cache_rules)) {
         $this->cache_rules = $cache_rules;
     }
     if (!$arg) {
         return false;
     }
     $arg = (object) $arg;
     $fields = false;
     $errors = false;
     $rows = false;
     $sql = "";
     $has_rows = false;
     $resultset = null;
     if (!isset($arg->schema_name)) {
         $arg->schema_name = null;
     }
     $SQ = new ShardQuery($arg->schema_name);
     print_r($arg);
     if (empty($arg->sql) || !trim($arg->sql)) {
         return false;
     }
     if (stripos($arg->sql, 'select database()') !== false) {
         $fields = array(array('type' => 250, 'name' => 'DATABASE()'));
         $rows = array(array($SQ->state->schema_name));
         $resultset = array('fields' => &$fields, 'rows' => &$rows);
     } elseif (preg_match('/^SHOW/i', $arg->sql)) {
         $shard_id = array_rand($SQ->state->shards);
         $DAL = SimpleDAL::factory($SQ->state->shards[$shard_id]);
         $DAL->my_select_db($SQ->state->shards[$shard_id]['db']);
         if (preg_match('/show\\s+databases/i', $arg->sql)) {
             $databases = $SQ->state->mapper->get_schemata();
             $fields = array(array('type' => 250, 'name' => 'Database'));
             /*	$rows = array();
             				foreach($databases as $schema_info) {
             					$rows[] = array($schema_info['schema_name']);
             				}		
             			*/
             $rows = array(array($SQ->state->schema_name));
             $resultset = array('fields' => &$fields, 'rows' => &$rows);
         } elseif ($stmt = $DAL->my_query($arg->sql)) {
             $row = $DAL->my_fetch_assoc();
             foreach ($row as $field => $val) {
                 $rows[0][] = $val;
                 $fields[] = array('type' => 250, 'name' => $field);
             }
             while ($row = $DAL->my_fetch_array($stmt, MYSQL_NUM)) {
                 $rows[] = $row;
             }
             print_r($rows);
             $resultset = array('fields' => &$fields, 'rows' => &$rows);
         }
         $DAL->my_close();
         unset($DAL);
     } elseif (preg_match('/select\\s+.*\\sfrom\\s.*/i', $arg->sql) || preg_match('/(create|drop|alter)\\s+.*/i', $arg->sql)) {
         $cache_ttl = null;
         if (isset($this->cache)) {
             $patterns = $this->cache_rules;
             foreach ($patterns as $pattern => $ttl) {
                 if (preg_match($pattern, $arg->sql)) {
                     $cache_ttl = $ttl;
                     $resultset = $this->cache->get($arg->sql, $arg->schema_name);
                     break;
                 }
             }
         }
         if (!$resultset) {
             $stmt = $SQ->query($arg->sql);
             if (!empty($SQ->errors)) {
                 $errors = trim(str_replace(array("\n", "Array", "(", ")", "  "), "", print_r($SQ->errors, true)));
             }
             if ($stmt) {
                 $has_rows = true;
                 $rows = array(array());
                 # get the first row and use it to construct the list of fields + collect row data
                 # in this first fetch we process data one output column at a time
                 $row = $SQ->DAL->my_fetch_assoc($stmt);
                 foreach ($row as $field => $val) {
                     $rows[0][] = $val;
                     $fields[] = array('type' => 250, 'name' => $field);
                 }
                 # fetch the rest of the rows numerically and stuff into $rows, a row at a time
                 while ($row = $SQ->DAL->my_fetch_array($stmt, MYSQL_NUM)) {
                     $rows[] = $row;
                 }
                 $resultset = array('fields' => &$fields, 'rows' => &$rows);
             }
             if (isset($cache_ttl)) {
                 $this->cache->set($arg->sql, $resultset, $cache_ttl, $arg->schema_name);
             }
             # return the actual object so that the proxy can finish aggregation and drop the table
         }
     } else {
         $sql = $arg->sql;
     }
     return json_encode(array('resultset' => $resultset, 'errors' => $errors, 'sql' => $sql, 'has_rows' => $has_rows));
 }