function remove_agg_tmp_tables() { require 'instrumentation.php'; require 'simple-dal.php'; $schema_id = $this->params['schema_id']; // shards $shards = Doo::db()->find('Shards', array('where' => "schema_id = {$schema_id}")); $drop_count = 0; foreach ($shards as $s) { $server = get_object_vars($s); $server['user'] = $s->username; $conn = SimpleDAL::factory($server); $conn->my_select_db($server['db']); //get tables $stmt = $conn->my_query("SHOW TABLES"); while ($row = $conn->my_fetch_assoc($stmt)) { $table = $row['Tables_in_' . $server['db']]; if (preg_match('/(aggregation_tmp_|agg_tmp_)/', $table)) { $conn->my_query("DROP TABLE " . $table); $drop_count++; } } $conn->my_close(); } $this->res->message = "Dropped {$drop_count} Tables"; $this->res->success = true; }
public function run($arg) { static $created_table = array(); $func_arg = $arg['function_info']['arg']; switch (strtolower($arg['function_info']['function'])) { case 'percentile': if ($func_arg[0]['expr_type'] != 'colref') { return return_error('PERCENTILE: Only column references are allowed as the first argument to this function', $arg['tmp_shard'], 'ERR_SQ_INVALID_FUNC_CALL'); } if ($func_arg[1]['expr_type'] != 'const') { return return_error('PERCENTILE: Only constants are allowed as the second argument to this function', $arg['tmp_shard'], 'ERR_SQ_INVALID_FUNC_CALL'); } if (!empty($func_arg[2])) { return return_error('PERCENTILE: Wrong number of arguments to this function', $arg['tmp_shard'], 'ERR_SQ_INVALID_FUNC_CALL'); } $colname = $arg['function_info']['colref_map'][$func_arg['0']['base_expr']]; $conn = SimpleDAL::factory($arg['tmp_shard']); if ($conn->my_error()) { return return_error('Failed to connect to storage node', $arg['tmp_shard'], $conn->my_error()); } $conn->my_select_db($arg['tmp_shard']['db']); if (empty($created_table[$arg['func_call_id']])) { $sql = "CREATE TABLE IF NOT EXISTS`" . $arg['func_call_id'] . "` (gb_hash char(40) primary key, retval double) "; $result = $conn->my_query($sql); if (!$result || $conn->my_error()) { return return_error('SQL error:', $arg['tmp_shard'], $conn->my_error()); } $created_table[$arg['func_call_id']] = 1; } $sql = "select count(distinct {$colname}) cnt from `" . $arg['table'] . "` where gb_hash = '" . $arg['gb_hash'] . "'"; $result = $conn->my_query($sql); if (!$result || $conn->my_error()) { return return_error('SQL error:', $arg['tmp_shard'], $conn->my_error()); } $row = $conn->my_fetch_assoc(); if (!$row) { return return_error('No row found for given gb_hash:' . $arg['gb_hash'], $arg['tmp_shard'], 'ERR_SQ_NO_ROW_FOUND'); } $percentile_at = $func_arg[1]['base_expr']; $limit = floor(0.01 * $percentile_at * $row['cnt']); if ($limit < 1) { $limit = 0; } $sql = "insert into `" . $arg['func_call_id'] . "` select distinct '" . $arg['gb_hash'] . "', {$colname} from `" . $arg['table'] . "` where gb_hash ='" . $arg['gb_hash'] . "' order by {$colname} limit {$limit},1"; $conn->my_query($sql); if (!$result || $conn->my_error()) { return return_error('SQL error:', $arg['tmp_shard'], $conn->my_error()); } return true; break; } }
function register_completion($query_id, $job_type, $completion_type, $completion_message = null) { require 'shard-query-config.php'; if (!$completion_message) { $completion_message = "NULL"; } else { $completion_message = "'" . str_replace("'", "''", $completion_message) . "'"; } $sql = "INSERT INTO job_worker_status ( job_id, worker_type, completion_type, completion_message, complete_time )"; $sql .= " VALUES ( {$query_id}, '{$job_type}', '{$completion_type}', {$completion_message}, NOW()) "; $conn = SimpleDAL::factory($config_database); $conn->my_select_db($config_database['db']); $conn->my_query($sql); }
public function refresh_information_schema($schema_name = null, $shard_name = "%") { if (isset($schema_name)) { $subquery = "(select id from schemata where schema_name='" . $this->escape($schema_name) . "' limit 1)"; } else { $subquery = "(select id from schemata where is_default_schema = true limit 1)"; } $shard_name = $this->escape($shard_name); $sql = "select * from shards where schema_id = {$subquery} and shard_name like '{$shard_name}';"; $stmt = $this->execute($sql); while ($row = $this->conn->my_fetch_assoc($stmt)) { #these calls block and setting some kind of timeout would be nice but oh well $conn = SimpleDAL::factory($row); if (!$conn) { continue; } $shard_id = $row['shard_id']; $sql = "replace into is_tables select *, {$shard_id} from information_schema.tables;"; $conn->my_query($sql); $sql = "replace into is_columns select *, {$shard_id} from information_schema.columns;"; $conn->my_query($sql); $sql = "replace into is_partitions select *, {$shard_id} from information_schema.partitions;"; $conn->my_query($sql); $conn->my_query('FLUSH TABLES'); } }
$sql = "insert into schemata_config(schema_id, var_name, var_value) values({$schema_id}, '" . $mapper->conn->my_escape_string($var_name) . "','" . $mapper->conn->my_escape_string($var_value) . "') ON DUPLICATE KEY UPDATE var_value = '" . $mapper->conn->my_escape_string($var_value) . "'"; if ($var_name != 'mapper') { unset($config[$var_name]); } $mapper->conn->my_query($sql) or die("SETUP FAILED: " . $mapper->conn->my_error()); } echo "* Populating/Updating shard list\n"; $mapper->conn->my_query('DELETE IGNORE shards.* from shards join schemata on shards.schema_id = schemata.id where schema_name = "' . $config['schema_name'] . '"') or die($mapper->conn->my_error()); foreach ($shards as $shard_name => $shard) { if (!isset($coord_shard_name) || isset($coord_shard_name) && $shard_name == $coord_shard_name) { $coord_shard = 1; } else { $coord_shard = 0; } $mapper->add_shard($shard_name, $shard, $coord_shard, $accepts_new_rows = true, null, $config['schema_name']); $dal = SimpleDAL::factory($shard); $dal->my_select_db($shard['db']); if ($dal->my_error()) { die("SETUP FAILED: Could not verify write access to {$shard_name}\n" . $dal->my_error() . "\n"); } $sql = "CREATE TABLE IF NOT EXISTS shard_write_ok(c1 int);"; $dal->my_query($sql); if ($dal->my_error()) { die("SETUP FAILED: Could not verify write access to {$shard_name}\n" . $dal->my_error() . "\n"); } $dal->my_query('DROP TABLE IF EXISTS shard_write_ok;'); if ($dal->my_error()) { die("SETUP FAILED: Could not verify write access to {$shard_name}\n" . $dal->my_error() . "\n"); } } echo "* Populating gearman servers\n";
function enumerate_partitions($schema_name, $table_name, $all_partition_types, $conn = null) { switch ($this->server['dsn-prefix']) { case 'mysql': $schema_name = trim($schema_name, '`'); $table_name = trim($table_name, '`'); $sql = SimpleDAL::get_mysql_partition_sql($schema_name, $table_name, $all_partition_types); break; case 'pgsql': #FIXME: probably broken $sql = "\n\t\t\tSELECT chk.consrc as expression\n\t\t\t FROM pg_catalog.pg_inherits i\n\t\t\t JOIN pg_catalog.pg_class c1\n\t\t\t ON i.inhparent= c1.oid\n\t\t\t JOIN pg_catalog.pg_class c2\n\t\t\t ON c2.oid = i.inhrelid\n\t\t\t JOIN pg_catalog.pg_constraint chk\n\t\t\t ON chk.contype = 'c'\n\t\t\t AND chk.conrelid = c2.oid\n\t\t\t JOIN pg_catalog.pg_namespace n \n\t\t\t ON n.oid = c1.relnamespace \n\t\t\t WHERE c1.relname = '{$table_name}'\n\t\t\t and n.nspname = '{$schema_name};"; break; } $stmt = $this->my_query($sql, $conn); if (!$stmt) { return false; } $rows = null; $partition_names = array(); $partition_expression = null; $rows = null; while ($row = $this->my_fetch_assoc()) { if ($all_partition_types) { $partition_names[] = $row['partition_name']; } else { $rows[$row['partition_name']] = $row['where_expression']; $partition_expression = $row['partition_expression']; } } if (isset($partition_names[0]) && $partition_names[0] == "") { unset($partition_names); } if (isset($rows) || count($partition_names) > 0) { $return = array('where_clauses' => $rows, 'partition_expression' => $partition_expression, 'partition_names' => $partition_names); } else { $return = false; } return $return; }
protected function load_from_table(&$state, $to_table, $columns_str, $shard_col_pos = false, $ignore = "", $replace = "") { $dal =& $state->dal; $from_table = "`" . trim($state->table_name, '`') . "`"; $to_table = "`" . trim($to_table, '`') . "`"; $sql = "select * from {$from_table}"; $dal->my_select_db($state->tmp_shard['db']); $this->check_error($dal); $state->DAL->my_query($sql); $insert_sql = "INSERT INTO {$to_table} (" . trim($columns_str, '()') . ") VALUES "; $values = ""; if ($shard_col_pos === false) { /* load goes to all shards */ foreach ($this->shards as $shard_name => $shard) { $stmt = $dal->my_query($sql); $dal2 = SimpleDAL::factory($shard); if ($this->check_error($dal2)) { return false; } $dal2->my_query("select @@max_allowed_packet as map"); if ($this->check_error($dal2)) { return false; } $row = $dal2->my_fetch_assoc(); $max_allowed_packet = $row['map'] - 16384; $row = false; $dal2->my_select_db($shard['db']); if ($this->check_error($dal2)) { return false; } while ($row = $dal->my_fetch_assoc($stmt)) { if (strlen($values) >= $max_allowed_packet) { $dal2->my_query($insert_sql . $values); if ($this->check_error($dal2)) { return false; } $values = ""; } if ($values) { $values .= ","; } $rowvals = ""; foreach ($row as $key => $val) { if (!is_numeric($val)) { $val = "'" . $dal2->my_real_escape_string($val) . "'"; } if ($rowvals) { $rowvals .= ","; } $rowvals .= $val; } $values .= "({$rowvals})"; } if ($values != "") { $dal2->my_query($insert_sql . $values); if ($this->check_error($dal2)) { return false; } } } return true; } else { /* load goes to specific shards */ $stmt = $dal->my_query($sql); if ($this->check_error($dal)) { return false; } $out_data = array(); // buffer the data for each shard in here while ($row = $dal->my_fetch_assoc($stmt)) { $row = array_values($row); $info = $this->map_shard($this->shard_column, $row[$shard_col_pos], $this->state->current_schema, '=', true); if (!$info) { $err = "Discarded row because the partition mapper did not return a valid result.\n"; $errors[] = array('error' => $err, 'file_pos' => $line_start); continue; } if (count($info) > 1) { $err = "Discarded row because the partition mapper returned more than one shard for this row.\n"; $errors[] = array('error' => $err, 'file_pos' => $line_start); continue; } $shard_name = @array_pop(array_keys(array_pop($info))); if (empty($out_data[$shard_name])) { $out_data[$shard_name] = array(); } $line = ""; foreach ($row as $key => $val) { $rowvals = ""; foreach ($row as $key => $val) { if (!is_numeric($val)) { $val = "'" . $dal2->my_real_escape_string($val) . "'"; } if ($rowvals) { $rowvals .= ","; } $rowvals .= $val; } $line = "({$rowvals})"; } $out_data[$shard_name][] = $line; } $dal2 = null; foreach ($out_data as $shard_name => $lines) { if (isset($dal2)) { $dal2->my_close(); } $dal2 = SimpleDAL::factory($this->shards[$shard_name]); if ($this->check_error($dal2)) { return false; } $dal2->my_select_db($this->shards[$shard_name]['db']); if ($this->check_error($dal2)) { return false; } $dal2->my_query("select @@max_allowed_packet as map"); if ($this->check_error($dal2)) { return false; } $row = $dal2->my_fetch_assoc(); if ($this->check_error($dal2)) { return false; } $max_allowed_packet = $row['map'] - 16384; $row = false; $values = ""; foreach ($lines as $line) { if (strlen($values) >= $max_allowed_packet) { $dal->my_query($insert_sql . $values); if ($this->check_error($dal)) { return false; } $values = ""; } if ($values) { $values .= ","; } $values .= $line; } if ($values != "") { $dal2->my_query($insert_sql . $values); if ($this->check_error($dal2)) { return false; } } } if (!empty($errors)) { $this->errors = $errors; return false; } /* ALL OK */ return true; } }
public function run($arg) { require 'shard-query-config.php'; if (isset($cache_servers) && !empty($cache_servers)) { $this->cache = new SaltedCache($cache_servers); } if (isset($cache_rules) && !empty($cache_rules)) { $this->cache_rules = $cache_rules; } if (!$arg) { return false; } $arg = (object) $arg; $fields = false; $errors = false; $rows = false; $sql = ""; $has_rows = false; $resultset = null; if (!isset($arg->schema_name)) { $arg->schema_name = null; } $SQ = new ShardQuery($arg->schema_name); print_r($arg); if (empty($arg->sql) || !trim($arg->sql)) { return false; } if (stripos($arg->sql, 'select database()') !== false) { $fields = array(array('type' => 250, 'name' => 'DATABASE()')); $rows = array(array($SQ->state->schema_name)); $resultset = array('fields' => &$fields, 'rows' => &$rows); } elseif (preg_match('/^SHOW/i', $arg->sql)) { $shard_id = array_rand($SQ->state->shards); $DAL = SimpleDAL::factory($SQ->state->shards[$shard_id]); $DAL->my_select_db($SQ->state->shards[$shard_id]['db']); if (preg_match('/show\\s+databases/i', $arg->sql)) { $databases = $SQ->state->mapper->get_schemata(); $fields = array(array('type' => 250, 'name' => 'Database')); /* $rows = array(); foreach($databases as $schema_info) { $rows[] = array($schema_info['schema_name']); } */ $rows = array(array($SQ->state->schema_name)); $resultset = array('fields' => &$fields, 'rows' => &$rows); } elseif ($stmt = $DAL->my_query($arg->sql)) { $row = $DAL->my_fetch_assoc(); foreach ($row as $field => $val) { $rows[0][] = $val; $fields[] = array('type' => 250, 'name' => $field); } while ($row = $DAL->my_fetch_array($stmt, MYSQL_NUM)) { $rows[] = $row; } print_r($rows); $resultset = array('fields' => &$fields, 'rows' => &$rows); } $DAL->my_close(); unset($DAL); } elseif (preg_match('/select\\s+.*\\sfrom\\s.*/i', $arg->sql) || preg_match('/(create|drop|alter)\\s+.*/i', $arg->sql)) { $cache_ttl = null; if (isset($this->cache)) { $patterns = $this->cache_rules; foreach ($patterns as $pattern => $ttl) { if (preg_match($pattern, $arg->sql)) { $cache_ttl = $ttl; $resultset = $this->cache->get($arg->sql, $arg->schema_name); break; } } } if (!$resultset) { $stmt = $SQ->query($arg->sql); if (!empty($SQ->errors)) { $errors = trim(str_replace(array("\n", "Array", "(", ")", " "), "", print_r($SQ->errors, true))); } if ($stmt) { $has_rows = true; $rows = array(array()); # get the first row and use it to construct the list of fields + collect row data # in this first fetch we process data one output column at a time $row = $SQ->DAL->my_fetch_assoc($stmt); foreach ($row as $field => $val) { $rows[0][] = $val; $fields[] = array('type' => 250, 'name' => $field); } # fetch the rest of the rows numerically and stuff into $rows, a row at a time while ($row = $SQ->DAL->my_fetch_array($stmt, MYSQL_NUM)) { $rows[] = $row; } $resultset = array('fields' => &$fields, 'rows' => &$rows); } if (isset($cache_ttl)) { $this->cache->set($arg->sql, $resultset, $cache_ttl, $arg->schema_name); } # return the actual object so that the proxy can finish aggregation and drop the table } } else { $sql = $arg->sql; } return json_encode(array('resultset' => $resultset, 'errors' => $errors, 'sql' => $sql, 'has_rows' => $has_rows)); }
private function broadcast($arg) { if (!$arg) { return; } $arg = (object) $arg; $conn = SimpleDAL::factory($arg->shard); if ($conn->my_error()) { return return_message($conn->my_error(), $arg->shard, $arg->query_id); } if (!$conn->my_select_db($arg->shard['db'])) { return return_message($conn->my_error(), $arg->shard, $arg->query_id); } $stmt = $conn->my_query($arg->sql); if (!$stmt) { return return_message($conn->my_error(), $arg->shard, $arg->query_id); } $last_insert_id = 0; if ($affected_row_count = $conn->my_affected_rows() == 0) { $affected_row_count = "0"; } if (preg_match('/insert |last_insert_id/', $arg->sql)) { $last_insert_id = $conn->my_insert_id(); } unset($conn); return return_message('success', $arg->shard, 'info', $affected_row_count, $last_insert_id, $arg->query_id); }
public function load_segment($path, $table, $start_pos, $end_pos, $columns_str = null, $set_str = null, $ignore = "", $replace = "") { $SQ = $this->SQ; $shard_col_pos = null; $errors = array(); $loader_handles = array(); $loader_fifos = array(); $bucket = null; $file = null; if (!is_array($path)) { if (!trim($path)) { throw new Exception('Empty path not supported'); } } else { $bucket = $path[1]; $file = $path[2]; } $delimiter = $this->delimiter; $delimiter = str_replace('|', '\\|', $delimiter); if ($this->enclosure != "") { $regex = "/{$this->enclosure}([^{$this->enclosure}]*){$this->enclosure}|{$delimiter}/ix"; } else { $regex = "/{$delimiter}/"; } $all_shards = $SQ->state->shards; $exit_error = false; /* Get the column list for the given table. */ if (!$columns_str) { $db = $SQ->tmp_shard['db']; $dal = SimpleDAL::factory($SQ->tmp_shard); if ($dal->my_error()) { echo $dal->my_error(); $errors[] = array('error' => "Could not get list of columns for table", 'file_pos' => $start_pos); return $errors; } $table = $dal->my_real_escape_string($table); $db = $dal->my_real_escape_string($db); $sql = "set group_concat_max_len=1024*1024*4;"; $stmt = $dal->my_query($sql); if (!$stmt) { throw new Exception($dal->my_error()); } $sql = "select group_concat(column_name order by ordinal_position) columns_str from information_schema.columns where table_schema='{$db}' and table_name = '{$table}'"; $stmt = $dal->my_query($sql); if (!$stmt) { throw new Exception($dal->my_error()); } $row = $dal->my_fetch_assoc($stmt); $columns_str = $row['columns_str']; $loader_handles[$SQ->tmp_shard['shard_name']] = $dal; $dal = null; } /* Try to find the shard column in the list of columns. (if it isn't found don't do anything) */ if (isset($SQ->shard_column)) { $columns = explode(",", $columns_str); foreach ($columns as $key => $column) { if ($column == $SQ->shard_column) { $shard_col_pos = $key; break; } } } #handle s3 if ($bucket != null) { $fname = tempnam("/tmp", mt_rand(1, 999999999)); unlink($fname); echo "Fetching a chunk from S3 for loading (tempname: {$fname})\n"; $s3 = new S3($SQ->state->aws_access_key, $SQ->state->aws_secret_key); @$s3->getObject($bucket, $file, $fname, array($start_pos, $end_pos)); #because the chunks are in individual new files, reset the offsets for the small file $start_pos = 0; $end_pos = filesize($fname); if (!($fh = fopen($fname, 'rb'))) { $errors[] = "could not open input stream or S3 failure"; return $errors; } unlink($fname); } elseif (strstr($path, 'http://') || strstr($path, 'https://')) { $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $path); curl_setopt($curl, CURLOPT_USERAGENT, 'Shard-Query/loader'); curl_setopt($curl, CURLOPT_RANGE, $start_pos . "-" . $end_pos); curl_setopt($curl, CURLOPT_BINARYTRANSFER, 1); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); $result = curl_exec($curl); curl_close($curl); if (!$result) { $errors[] = "could not open input stream or HTTP failure"; return $errors; } $fname = tempnam("/tmp", mt_rand(1, 999999999)); if (!($fh = fopen($fname, 'wb'))) { $errors[] = "could not open output stream"; return $errors; } if (!fputs($fh, $result)) { $errors[] = "Could not put contents into file"; return $errors; } unset($result); $start_pos = 0; $end_pos = ftell($fh); // the "chunk" on disk starts at 0 fclose($fh); $fh = fopen($fname, 'rb'); unlink($fname); } else { if (!($fh = fopen($path, 'rb'))) { $errors[] = "could not open input stream or S3 failure"; return $errors; } } /* Since the table does not contain the shard key, LOAD DATA INFILE should just be used to serially load the table onto each shard. */ if (!isset($shard_col_pos)) { foreach ($all_shards as $shard_name => $shard) { $fifo = $this->start_fifo($table, $shard, $columns_str, $set_str, $ignore, $replace); if (!$fifo) { $err = "Could not start a FIFO to a destination database. This will result in too many errors, so failing completely.\n"; $errors[] = array('error' => $err, 'file_pos' => $line_start); return $errors; } if (fseek($fh, $start_pos, SEEK_SET) === -1) { throw new Exception('could not seek to start pos'); } while (!feof($fh) && ftell($fh) < $end_pos) { $line = fgets($fh); $result = fwrite($fifo['fh'], $line); if ($result === false) { $err = "Could not write to a destination FIFO. This will result in too many errors, so failing completely.\n"; $errors[] = array('error' => $err, 'file_pos' => $line_start); return $errors; } } fclose($fifo['fh']); sleep(1); foreach ($fifo['ph']['pipes'] as $pipe) { fclose($pipe); } proc_close($fifo['ph']['ph']); } fclose($fh); // this will also unlink the temporary file if (!empty($errors)) { return $errors; } else { return true; } } /* Figure out on which shard this row belongs. A buffer for each shard is created. Each buffer is loaded serially. */ $out_data = array(); if (fseek($fh, $start_pos, SEEK_SET) === -1) { throw new Exception('could not seek to start pos'); } while (!feof($fh) && ftell($fh) < $end_pos) { $line = fgets($fh); $values = preg_split($regex, $line, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); if (count($values) == 0) { $errors[] = "could not split line: {$line}\n"; return $errors; } $values[count($values) - 1] = trim($values[count($values) - 1], $this->line_terminator); #lookup the value to see which shard it goes to $info = $SQ->map_shard($SQ->shard_column, $values[$shard_col_pos], $SQ->state->current_schema, '=', true); if (!$info) { $err = "Discarded row because the partition mapper did not return a valid result.\n"; $errors[] = array('error' => $err, 'file_pos' => $line_start); continue; } if (count($info) > 1) { $err = "Discarded row because the partition mapper returned more than one shard for this row.\n"; $errors[] = array('error' => $err, 'file_pos' => $line_start); continue; } $shard_name = @array_pop(array_keys(array_pop($info))); if (empty($out_data[$shard_name])) { $out_data[$shard_name] = array(); } $out_data[$shard_name][] = $line; } foreach ($out_data as $shard_name => $lines) { $fifo = $this->start_fifo($table, $SQ->state->shards[$shard_name], $columns_str, $set_str, $ignore, $replace); if (!$fifo) { $err = "Could not start a FIFO to a destination database.\n"; $errors[] = array('error' => $err, 'file_pos' => $line_start); return $errors; } foreach ($lines as $line) { $result = fwrite($fifo['fh'], $line); if ($result === false) { $err = "Could not write to a destination FIFO.\n"; $errors[] = array('error' => $err, 'file_pos' => $line_start); return $errors; } } fclose($fifo['fh']); sleep(1); foreach ($fifo['ph']['pipes'] as $pipe) { fclose($pipe); } proc_close($fifo['ph']['ph']); } if (!empty($errors)) { return $errors; } /* ALL OK */ return true; }