function main() { global $argv; global $datasets; // from require_once "../analysis/common/functions.php" global $api_timezone; // from reqire_once "./lib/common.php" //---------------- // Obtain parameters if (PHP_SAPI != 'cli') { // Invoked by Web server expected_query_parameters(['action', 'startdate', 'enddate', 'format']); // Determine resource ($querybin_name will be set or not) $components = explode('/', $_SERVER['PATH_INFO']); assert($components[0] === ''); // since PATH_INFO started with a "/" array_shift($components); // remove empty component from first "/" switch (count($components)) { case 0: // No query bin specified: list all (e.g. /api/querybin.php) $resource = 'all'; $querybin_name = NULL; break; case 1: // Query bin (e.g. /api/querybin.php/foobar) $resource = 'querybin'; $querybin_name = $components[0]; break; case 2: // Query bin's tweets (e.g. /api/querybin.php/foobar/tweets) if ($components[1] != 'tweets') { abort_with_error(404, "Not found: " . $_SERVER['PATH_INFO']); } $resource = 'querybin/tweets'; $querybin_name = $components[0]; break; default: $resource = NULL; abort_with_error(404, "Not found: " . $_SERVER['PATH_INFO']); } // Determine action if ($_SERVER['REQUEST_METHOD'] === 'GET' && array_key_exists('action', $_GET)) { $action = $_GET['action']; // explicit action } else { if ($_SERVER['REQUEST_METHOD'] === 'POST' && array_key_exists('action', $_POST)) { $action = $_POST['action']; // explicit action } else { // Use default action for resource switch ($resource) { case 'all': $action = 'list'; break; case 'querybin': $action = 'bin-info'; break; case 'querybin/tweets': $action = 'tweet-info'; break; default: $action = NULL; abort_with_error(500, "Internal error: bad rsrc: {$resource}"); } } } // Check combination of resource, action and method $method = $_SERVER['REQUEST_METHOD']; if ($method != 'GET' && $method != 'POST' && $method != 'DELETE') { abort_with_error(405, "Method not allowed: {$method}"); } $bad_combination = false; switch ($resource) { case 'all': if (!($action === 'list' && $method === 'GET')) { $bad_combination = true; } break; case 'querybin': if (!($action === 'bin-info' && $method === 'GET')) { $bad_combination = true; } break; case 'querybin/tweets': if (!($action === 'tweet-info' && $method === 'GET' || $action === 'export-tweets' && $method === 'GET' || $action === 'purge-tweets' && $method === 'DELETE' || $action === 'purge-tweets' && $method === 'POST')) { $bad_combination = true; } break; default: abort_with_error(500, "Internal error: bad rsrc: {$resource}"); } if ($bad_combination) { abort_with_error(400, "Invalid: {$resource}, {$action}, {$method}"); } // Get parameters $str_start = $_REQUEST['startdate']; $str_end = $_REQUEST['enddate']; if (isset($_REQUEST['format']) && $_REQUEST['format'] !== '') { $format = $_REQUEST['format']; if ($format != 'csv' && $format != 'tsv') { abort_with_error(400, "Invalid format: {$format}"); } } else { $format = 'csv'; // default } } else { // Invoked from command line // PHP's getopt is terrible, but it is always available. $skip_num = 1; $options = getopt("hlbtps:e:", ['help', 'list', 'bin-info', 'tweet-info', 'purge-tweets', 'start:', 'end:']); if ($options != false) { foreach ($options as $opt => $optarg) { $skip_num++; if ($optarg != NULL) { $skip_num++; } switch ($opt) { case 'h': case 'help': $script_name = basename($argv[0]); $dtz = $api_timezone != NULL ? $api_timezone : "UTC"; echo <<<END Usage: php {$script_name} [options] [queryBinName] Options: -l | --list list names of all query bins (default without name) -b | --bin-info show information about query bin (default with name) -t | --tweet-info show information about tweets in named query bin -p | --purge-tweets purge tweets in named query bin -s | --start tm start time for tweet viewing/purging -e | --end tm end time for tweet viewing/purging -h | --help show this help message Format for tm: yyyy-mm-ddThh:mm:ss[tz] tz: 'Z', 'UTC' or [+|-]HH:MM (default: {$dtz}) END; exit(0); break; case 'l': case 'list': if (isset($action)) { fwrite(STDERR, "Usage error: multiple actions\n"); exit(2); } $action = 'list'; break; case 'b': case 'bin-info': if (isset($action)) { fwrite(STDERR, "Usage error: multiple actions\n"); exit(2); } $action = 'bin-info'; break; case 't': case 'tweet-info': if (isset($action)) { fwrite(STDERR, "Usage error: multiple actions\n"); exit(2); } $action = 'tweet-info'; break; case 'p': case 'purge-tweets': if (isset($action)) { fwrite(STDERR, "Usage error: multiple actions\n"); exit(2); } $action = 'purge-tweets'; break; case 's': case 'start': $str_start = $optarg; break; case 'e': case 'end': $str_end = $optarg; break; } } } $args = array_slice($argv, $skip_num); if (0 < count($args)) { // Compensate for PHP's getopt's incomplete processing // Check for options after getopt stops processing if ($args[0] === '--') { array_shift($args); } foreach ($args as $arg) { if ($arg[0] === '-') { fwrite(STDERR, "Usage error: options not allowed after arguments: {$arg}\n"); exit(2); } } } // Arguments if (count($args) == 0) { $querybin_name = NULL; } else { if (count($args) == 1) { $querybin_name = $args[0]; } else { fwrite(STDERR, "Usage error: too many arguments\n"); exit(2); } } // Derive implied action if no explicit action was provided if (!isset($action)) { if (isset($querybin_name)) { $action = 'bin-info'; } else { $action = 'list'; } } // Check for invalid option/argument combinations if ($action !== 'list') { // All these actions require a query bin to be specified if (!isset($querybin_name)) { fwrite(STDERR, "Usage error: missing query bin name\n"); exit(2); } } else { if (isset($querybin_name)) { fwrite(STDERR, "Usage error: query bin name not required\n"); exit(2); } } if ($action === 'list' || $action === 'bin-info') { // These actions do not use start/end time if (isset($str_start) || isset($str_end)) { fwrite(STDERR, "Usage error: start/end time not required\n"); exit(2); } } } //---------------- // Check/process parameters // Check and set $querybin (if needed) if (isset($querybin_name)) { if (!isset($datasets[$querybin_name])) { abort_with_error(404, "Unknown query bin: {$querybin_name}"); } $querybin = $datasets[$querybin_name]; } else { $querybin = NULL; } // Check start and end time // // Code convention: // $str_start and $str_end are string values (or null). // $dt_start and $dt_end are DateTime objects (or null). $dt_start = NULL; if (isset($str_start) && $str_start !== '') { try { $dt_start = dt_parse($str_start, false, $api_timezone); } catch (DtException $e) { $m = $e->getMessage(); abort_with_error(400, "Bad start time ({$m}): {$e->dt_str}"); } } $dt_end = NULL; if (isset($str_end) && $str_end !== '') { try { $dt_end = dt_parse($str_end, true, $api_timezone); } catch (DtException $e) { $m = $e->getMessage(); abort_with_error(400, "Bad end time ({$m}): {$e->dt_str}"); } } if (isset($dt_start) && isset($dt_end)) { if ($dt_end < $dt_start) { abort_with_error(400, "End time is before start time."); } } //---------------- // Perform action and produce response switch ($action) { case 'list': do_list_bins(); break; case 'bin-info': do_bin_info($querybin); break; case 'tweet-info': do_view_or_export_tweets($querybin, $dt_start, $dt_end, NULL); break; case 'export-tweets': assert(isset($format)); do_view_or_export_tweets($querybin, $dt_start, $dt_end, $format); break; case 'purge-tweets': do_purge_tweets($querybin, $dt_start, $dt_end); break; default: abort_with_error(500, "Internal error: unexpected action: {$action}"); } exit(0); }
function expected_query_parameters(array $expected_params) { foreach (array_keys($_GET) as $param) { if (!in_array($param, $expected_params)) { abort_with_error(400, "Unexpected query parameter: {$param}"); } } }