function process_data($opts, $pipe, $cmd = __FUNCTION__, $opt_prefix = false) { # set prefix $prefix = 'process_data'; # merge opts $opts = merge_opts($opts, $pipe); # adjust opt prefix $cmd = adjust_opt_prefix($cmd, $opts, $opt_prefix, 'edit'); # get debug opt $debug = get_opt_config_value($prefix, $opts, 'debug', false); if (!check_opt_set_type($cmd, $debug, 'debug', 'boolean')) { return false; } # get type opt $join_type = get_opt_config_value($prefix, $opts, 'join_type', 'outer'); if (!check_opt_set_type($cmd, $join_type, 'type', 'data_join_type')) { return false; } # get sources opt $sources = get_opt($prefix, $opts, 'sources'); if (!check_opt_if_set_type($cmd, $sources, 'sources', 'array')) { return false; } if (!$sources) { $source = build_source_data($prefix, $opts, $cmd); if (!$source) { return error($cmd, "sources opt is not set and the data and fields opts are not set either, so can't be used to create one"); } $sources = array($source); } # get sources values foreach ($sources as $i => $val) { $sources[$i] = value($val); } # get create_indexes opt $create_indexes = get_opt($prefix, $opts, 'create_indexes'); if (!check_opt_if_set_type($cmd, $create_indexes, 'create_indexes', 'array_of_strings')) { return false; } # get join field opt $primary_key = get_opt_config_value($prefix, $opts, 'primary_key', 'guid'); if (!check_opt_set_type($cmd, $primary_key, 'primary_key', 'string')) { return false; } # get mawk rules opt $mawk = get_opt_config_value($prefix, $opts, 'mawk'); if (!check_opt_set_type($cmd, $mawk, 'mawk', 'string')) { return false; } # get update frequency opt $update_frequency = get_opt_config_value($prefix, $opts, 'update_frequency', 0); if (!check_opt_set_type($cmd, $update_frequency, 'update_frequency', 'integer')) { return false; } # create fields and data arrays $fields_arr = array(); $data_arr = array(); /* for ($i=0; $i<count ($sources); $i++) { $source_no = $i+1; $fields = $sources[$i]['fields']; if (!$fields) return error ($cmd, "no fields defined for source $source_no"); $fields_arr[] = $fields; if ($i == 0) { $data_arr[] = $sources[$i]['data']; } else { $data = $sources[$i]['indexes'][$primary_key]; if ($data === null) { return error ($cmd, "index on primary key '$primary_key' does not exis for source $source_no"); # TODO: auto-create } $data_arr[] = $data; } }*/ $sources_count = count($sources); for ($i = 0; $i < $sources_count; $i++) { $source_no = $i + 1; # add fields $fields = $sources[$i]['fields']; if (!$fields) { return error($cmd, "no fields defined for source {$source_no}"); } if ($sources_count > 1) { if (!in_array($primary_key, $fields)) { return error($cmd, "primary key '{$primary_key}' not defined for source {$source_no}"); } } $fields_arr[] = $fields; # add data $data = $sources[$i]['data']; if (!$data) { return error($cmd, "no data defined for source {$source_no}"); } $data_arr[] = $data; } # display info about the join, if there is one if (count($fields_arr) > 1) { debug_echo($cmd, "performing {$join_type} join on primary key / field '{$primary_key}'"); } for ($i = 1; $i <= count($fields_arr); $i++) { debug_echo($cmd, "fields for data source {$i} :"); debug_dump_list($fields_arr[$i - 1], true); } # parse the line rules $parsed_mawk = mawk_parse_code($mawk, $fields_arr, $cmd); if (is_int($parsed_mawk)) { return error($cmd, "code line {$i} of the mawk has an invalid syntax"); } $parsed_mawk_code = $parsed_mawk['code']; # set up indexes $indexes_arr = $parsed_mawk['indexes']; if (count($fields_arr) > 1) { $r = mawk_add_primary_index($primary_key, $indexes_arr, $fields_arr, $cmd); if ($r === false) { return false; } } # display info about the line rules debug_echo($cmd, "the following mawk code will be used :"); debug_echo_txt(mawk_clean_code($mawk), true); if ($debug) { debug_echo($cmd, "which translates to the following parsed code :"); debug_dump_yaml($parsed_mawk_code, true); if ($indexes_arr) { debug_echo($cmd, "and includes the following indexes :"); debug_dump_yaml($indexes_arr, true); } } /* if (count ($fields_arr) > 1) { $primary_indexes = mawk_get_indexes ($primary_key, $fields_arr, $cmd); if ($primary_indexes === false) return false; } else { $primary_indexes = array (); } # set up create indexes $new_indexes = mawk_get_indexes ($create_indexes, $fields_arr, $cmd); if ($new_indexes === false) return false; */ # set up discard if ($join_type == 'inner') { $inner_join = true; } else { $inner_join = false; } # run mawk code try { debug_echo($cmd, "processing the data (this can take some time depending on the inputs) ..."); $mawk_res = mawk_process_data($parsed_mawk_code, $fields_arr, $data_arr, $indexes_arr, $update_frequency, $inner_join, $cmd, $debug); } catch (MawkError $e) { $msg = $e->getMessage(); return error($cmd, $msg); } return $mawk_res; }
function load_data_from_doc_file($opts, $pipe, $cmd = __FUNCTION__) { # set prefix $prefix = $cmd; # merge opts $opts = merge_opts($opts, $pipe, 'doc_type'); # get file opt $file = get_opt($prefix, $opts, 'file'); if (!check_opt_set_type($cmd, $file, 'file', 'string')) { return false; } # get defined fields opt $defined_fields = get_opt($prefix, $opts, 'defined_fields'); if (!check_opt_if_set_type($cmd, $defined_fields, 'defined_fields', 'array_of_strings')) { return false; } # get save fields $save_fields = get_opt($prefix, $opts, 'save_fields'); if (!check_opt_if_set_type($cmd, $save_fields, 'save_fields', 'array_of_strings')) { return false; } # get doc type opt $doc_type = get_opt($prefix, $opts, 'doc_type', 'json'); if (!check_opt_set_type($cmd, $doc_type, 'doc_type', 'document_file_type')) { return false; } $doc_type_upper = strtoupper($doc_type); # get data structure opt $data_structure = get_opt($prefix, $opts, 'data_structure', 'list_of_objects'); if (!check_opt_set_type($cmd, $data_structure, 'data_structure', 'data_structure_type')) { return false; } # get limit opt $limit = get_opt($prefix, $opts, 'limit', 0); if (!check_opt_set_type($cmd, $limit, 'limit', 'positive_integer')) { return false; } # get offset opt $offset = get_opt($prefix, $opts, 'offset', 0); if (!check_opt_set_type($cmd, $offset, 'offset', 'positive_integer')) { return false; } # check the file exists if (!file_exists($file)) { return error($cmd, "file does not exist : {$file}"); } # display generic info about the data $data_structure_str = str_replace('_', ' ', $data_structure); debug_echo($cmd, "creating data from {$doc_type_upper} file : {$file}"); debug_echo($cmd, "source data of type '{$data_structure_str}'"); # read the file into memory $data_str = @file_get_contents($file); if (!is_string($data_str)) { return error($cmd, "could not read file : {$file}"); } # load data depending on type switch ($doc_type) { case 'json': $data = @json_decode($data_str, true); break; case 'yaml': $data = @yaml_decode($data_str); break; } if (!$data) { return error($cmd, "invalid {$doc_type_upper} in file : {$file}"); } $data_count = count($data); # set up end point if ($limit == 0) { $end = $data_count; } else { $end = $offset + $limit; } # set up the fields and build data $res_data = array(); switch ($data_structure) { case 'list_of_columns': # set up fields if ($defined_fields) { $defined_fields_source = '(from the config)'; } else { $defined_fields = array_shift($data); $defined_fields_source = '(from the source file)'; } # set up response field indexes if ($save_fields) { $res_fields = $save_fields; $res_field_count = count($res_fields); $res_field_indexes = build_field_indexes($defined_fields, $res_fields, $cmd); if ($res_field_indexes === false) { return; } } else { $res_fields = $defined_fields; } # display info about the fields debug_echo($cmd, "defined fields {$defined_fields_source} : "); debug_dump_list($defined_fields, true); if ($save_fields) { debug_echo($cmd, "saved fields :"); debug_dump_list($save_fields, true); } # gather the data # TODO: change for having offset / limit $res_data = array(); if ($save_fields) { $res_data = array(); for ($i = $offset; $i < $end; $i++) { $line = $data[$i]; $new_line = array(); for ($j = 0; $j < $res_field_count; $j++) { $new_line[] = $line[$res_field_indexes[$j]]; } $res_data[] = $new_line; } } elseif ($offset == 0 && $limit == 0) { $res_data = $data; } else { $res_data = array(); for ($i = $offset; $i < $end; $i++) { $res_data[] = $data[$i]; } } break; case 'list_of_objects': $res_data = array(); if ($save_fields) { # set up result fields $res_fields = $save_fields; $res_fields_count = count($res_fields); # display info about what fields will be saved debug_echo($cmd, "saved fields :"); debug_dump_list($res_fields, true); # build the data for ($i = $offset; $i < $end; $i++) { $obj = $data[$i]; $new_line = array(); for ($j = 0; $j < $res_fields_count; $j++) { $new_line[] = @$obj[$res_fields[$j]]; } $res_data[] = $new_line; } $res_fields = $save_fields; } else { # display info about saved files debug_echo($cmd, "saving all fields - they will be listed as they are added"); # build the data $res_fields = array(); $res_fields_count = 0; for ($i = $offset; $i < $end; $i++) { $obj = $data[$i]; $new_line = array(); # save all the existing data in order for ($j = 0; $j < $res_fields_count; $j++) { $new_line[] = @$obj[$res_fields[$j]]; } # index and save any new fields $res_fields_added = array(); foreach ($obj as $key => $value) { if (in_array($key, $res_fields)) { continue; } $res_fields_added[] = $key; $res_fields[] = $key; $res_fields_count++; $new_line[] = $value; } $res_data[$i] = $new_line; # display list of fields added (if any) if ($res_fields_added) { $line_no = $i + 1; debug_echo($cmd, "the following fields were added on row {$line_no} :"); debug_dump_list($res_fields_added, true); } } # add back in any empty fields for ($i = 0; $i < count($res_data); $i++) { $line =& $res_data[$i]; $line_fields_count = count($line); if ($line_fields_count == $res_fields_count) { break; } for ($j = $line_fields_count; $j < $res_fields_count; $j++) { $line[] = ''; } } } break; } # detail results $line_count = count($res_data); debug_echo($cmd, "creation of data from JSON file complete ({$line_count} lines processed)"); # create result return build_result_data($res_fields, $res_data); }
function debug_echo_txt($txt, $indent) { $lines = explode("\n", $txt); return debug_dump_list($lines, $indent); }
function ftp_scandir($opts, $pipe = false, $cmd = __FUNCTION__) { # set prefix $prefix = 'ftp'; # merge opts $opts = merge_opts($opts, $pipe, 'dir'); # get execute opt $execute = get_opt_config_value($prefix, $opts, 'execute', true); if (!check_opt_set_type($cmd, $execute, 'execute', 'boolean')) { return false; } # check if we should execute or not if (!$execute) { return true; } # get remote dir opt $dir = get_opt($prefix, $opts, 'dir'); if (!check_opt_set_type($cmd, $dir, 'dir', 'string')) { return false; } # get search opt $search = get_opt($prefix, $opts, 'search', ''); if (!check_opt_set_type($cmd, $search, 'search', 'string')) { return false; } # get chdir retries $chdir_retries = get_opt_config_value($prefix, $opts, 'chdir_retries', 3); if (!check_opt_set_type($cmd, $chdir_retries, 'chdir_retries', 'integer')) { return false; } # get scandir retries opt $scandir_retries = get_opt_config_value($prefix, $opts, 'scandir_retries', 3); if (!check_opt_set_type($cmd, $scandir_retries, 'scandir_retries', 'integer')) { return false; } # setup connection $conn = ftp_get_conn($opts, $cmd); if (!$conn) { return false; } # get connection values $ftp_handle = $conn['handle']; $svr = $conn['svr']; # change to dir (note: we chdir because some server do not allow rawlist except in current working directory) debug_echo($cmd, "scanning dir on FTP server : {$dir} ..."); $success = false; for ($i = 0; $i < $chdir_retries; $i++) { if (@ftp_chdir($ftp_handle, $dir)) { $success = true; break; } } if (!$success) { return error($cmd, "could not chdir to dir on FTP server : {$dir} ({$chdir_retries} attempts)"); } # build full search if ($dir == '/') { $full_search = "/{$search}"; $path_offset = 1; } else { $full_search = "{$dir}/{$search}"; $path_offset = strlen($dir) + 1; } # try to get list for ($i = 0; $i < $scandir_retries; $i++) { // Note: if rawlist is used then it's because a server does not accept nlist $list = @ftp_nlist($ftp_handle, $full_search); if (!is_array($list)) { break; } } if (!$list) { return error($cmd, "could not scan dir on FTP server : {$dir} ({$scandir_retries} attempts)"); } # convert rawlist //$list = ftp_rawlist_2_nlist ($list); # remove directory name $list = array_strip_offset_util($list, $path_offset); # display list of files $count = count($list); debug_echo($cmd, "{$count} files found in dir on FTP server : {$dir}"); debug_dump_list($list, true); # return if ($pipe === false) { return $list; } return array('files' => $list); }
function reorder_data_fields(&$data, &$fields, $reorder_fields, $cmd) { $new_fields = array(); # check all the fields to re-order foreach ($reorder_fields as $field) { if (!in_array($field, $fields)) { return error($cmd, "field '{$field}' cannot be reordered from the data because it does not exist"); } $new_fields[] = $field; } # include all the original fields that are not re-ordered foreach ($fields as $field) { if (!in_array($field, $new_fields)) { $new_fields[] = $field; } } # display message debug_echo($cmd, "new order for fields :"); debug_dump_list($new_fields, true); # set up field indexes $fields_flipped = array_flip($fields); $new_fields_indexes = array(); foreach ($new_fields as $field) { $new_fields_indexes[] = $fields_flipped[$field]; } # reset the fields $field_count = count($fields); for ($i = 0; $i < $field_count; $i++) { $fields[$i] = $new_fields[$i]; } # loop through the data $data_count = count($data); for ($i = 0; $i < $data_count; $i++) { $line = $data[$i]; $new_line = array(); for ($j = 0; $j < $field_count; $j++) { $new_line[] = $line[$new_fields_indexes[$j]]; } $data[$i] = $new_line; } return true; }