function build_indexes($opts, $pipe, $cmd = __FUNCTION__, $msg_suffix = null) { # set prefix $prefix = 'build_indexes'; # merge opts if ($pipe !== null) { $opts = merge_opts($opts, $pipe, 'indexes'); } # get fields opt $fields = get_opt($prefix, $opts, 'fields'); if (!check_opt_set_type($cmd, $fields, 'fields', 'array_of_strings')) { return false; } # get data opt $data =& get_opt($prefix, $opts, 'data'); if (!check_opt_set_type($cmd, $data, 'data', 'array')) { return false; } # get indexes opt $indexes =& get_opt($prefix, $opts, 'indexes'); if (!check_opt_set_type($cmd, $indexes, 'indexes', 'array_of_strings')) { return false; } # set up indexes $res_indexes_assoc = array(); $res_indexes_list = array(); foreach ($indexes as $index_name) { $index = array(); $res_indexes_assoc[$index_name] =& $index; $res_indexes_list[] =& $index; } # set up field indexes $field_indexes = build_field_indexes($fields, $indexes, $cmd, $msg_suffix); if ($field_indexes === false) { return false; } $index_count = count($indexes); # loop over data to build indexes for ($i = 0; $i < count($data); $i++) { unset($line); $line =& $data[$i]; for ($j = 0; $j < $index_count; $j++) { $idx = $field_indexes[$j]; $key = $line[$idx]; $res_indexes_list[$j][$key] =& $line; } } # sort the indexes foreach ($res_indexes_assoc as $name => $index) { ksort($index); } # save the indxes $opts['indexes'] = $res_indexes_assoc; return $opts; }
function load_data_from_doc_file($opts, $pipe, $cmd = __FUNCTION__) { # set prefix $prefix = $cmd; # merge opts $opts = merge_opts($opts, $pipe, 'doc_type'); # get file opt $file = get_opt($prefix, $opts, 'file'); if (!check_opt_set_type($cmd, $file, 'file', 'string')) { return false; } # get defined fields opt $defined_fields = get_opt($prefix, $opts, 'defined_fields'); if (!check_opt_if_set_type($cmd, $defined_fields, 'defined_fields', 'array_of_strings')) { return false; } # get save fields $save_fields = get_opt($prefix, $opts, 'save_fields'); if (!check_opt_if_set_type($cmd, $save_fields, 'save_fields', 'array_of_strings')) { return false; } # get doc type opt $doc_type = get_opt($prefix, $opts, 'doc_type', 'json'); if (!check_opt_set_type($cmd, $doc_type, 'doc_type', 'document_file_type')) { return false; } $doc_type_upper = strtoupper($doc_type); # get data structure opt $data_structure = get_opt($prefix, $opts, 'data_structure', 'list_of_objects'); if (!check_opt_set_type($cmd, $data_structure, 'data_structure', 'data_structure_type')) { return false; } # get limit opt $limit = get_opt($prefix, $opts, 'limit', 0); if (!check_opt_set_type($cmd, $limit, 'limit', 'positive_integer')) { return false; } # get offset opt $offset = get_opt($prefix, $opts, 'offset', 0); if (!check_opt_set_type($cmd, $offset, 'offset', 'positive_integer')) { return false; } # check the file exists if (!file_exists($file)) { return error($cmd, "file does not exist : {$file}"); } # display generic info about the data $data_structure_str = str_replace('_', ' ', $data_structure); debug_echo($cmd, "creating data from {$doc_type_upper} file : {$file}"); debug_echo($cmd, "source data of type '{$data_structure_str}'"); # read the file into memory $data_str = @file_get_contents($file); if (!is_string($data_str)) { return error($cmd, "could not read file : {$file}"); } # load data depending on type switch ($doc_type) { case 'json': $data = @json_decode($data_str, true); break; case 'yaml': $data = @yaml_decode($data_str); break; } if (!$data) { return error($cmd, "invalid {$doc_type_upper} in file : {$file}"); } $data_count = count($data); # set up end point if ($limit == 0) { $end = $data_count; } else { $end = $offset + $limit; } # set up the fields and build data $res_data = array(); switch ($data_structure) { case 'list_of_columns': # set up fields if ($defined_fields) { $defined_fields_source = '(from the config)'; } else { $defined_fields = array_shift($data); $defined_fields_source = '(from the source file)'; } # set up response field indexes if ($save_fields) { $res_fields = $save_fields; $res_field_count = count($res_fields); $res_field_indexes = build_field_indexes($defined_fields, $res_fields, $cmd); if ($res_field_indexes === false) { return; } } else { $res_fields = $defined_fields; } # display info about the fields debug_echo($cmd, "defined fields {$defined_fields_source} : "); debug_dump_list($defined_fields, true); if ($save_fields) { debug_echo($cmd, "saved fields :"); debug_dump_list($save_fields, true); } # gather the data # TODO: change for having offset / limit $res_data = array(); if ($save_fields) { $res_data = array(); for ($i = $offset; $i < $end; $i++) { $line = $data[$i]; $new_line = array(); for ($j = 0; $j < $res_field_count; $j++) { $new_line[] = $line[$res_field_indexes[$j]]; } $res_data[] = $new_line; } } elseif ($offset == 0 && $limit == 0) { $res_data = $data; } else { $res_data = array(); for ($i = $offset; $i < $end; $i++) { $res_data[] = $data[$i]; } } break; case 'list_of_objects': $res_data = array(); if ($save_fields) { # set up result fields $res_fields = $save_fields; $res_fields_count = count($res_fields); # display info about what fields will be saved debug_echo($cmd, "saved fields :"); debug_dump_list($res_fields, true); # build the data for ($i = $offset; $i < $end; $i++) { $obj = $data[$i]; $new_line = array(); for ($j = 0; $j < $res_fields_count; $j++) { $new_line[] = @$obj[$res_fields[$j]]; } $res_data[] = $new_line; } $res_fields = $save_fields; } else { # display info about saved files debug_echo($cmd, "saving all fields - they will be listed as they are added"); # build the data $res_fields = array(); $res_fields_count = 0; for ($i = $offset; $i < $end; $i++) { $obj = $data[$i]; $new_line = array(); # save all the existing data in order for ($j = 0; $j < $res_fields_count; $j++) { $new_line[] = @$obj[$res_fields[$j]]; } # index and save any new fields $res_fields_added = array(); foreach ($obj as $key => $value) { if (in_array($key, $res_fields)) { continue; } $res_fields_added[] = $key; $res_fields[] = $key; $res_fields_count++; $new_line[] = $value; } $res_data[$i] = $new_line; # display list of fields added (if any) if ($res_fields_added) { $line_no = $i + 1; debug_echo($cmd, "the following fields were added on row {$line_no} :"); debug_dump_list($res_fields_added, true); } } # add back in any empty fields for ($i = 0; $i < count($res_data); $i++) { $line =& $res_data[$i]; $line_fields_count = count($line); if ($line_fields_count == $res_fields_count) { break; } for ($j = $line_fields_count; $j < $res_fields_count; $j++) { $line[] = ''; } } } break; } # detail results $line_count = count($res_data); debug_echo($cmd, "creation of data from JSON file complete ({$line_count} lines processed)"); # create result return build_result_data($res_fields, $res_data); }
function map_data_fields(&$data, &$fields, $map_fields, $cmd) { # sort fields to map / not $new_map_fields = array(); $ignore_fields = array(); foreach ($map_fields as $orig => $new) { if ($new) { $new_map_fields[$orig] = $new; } else { $ignore_fileds[] = $orig; } } $map_fields = $new_map_fields; # set up new field indexes $index_fields = array_keys($map_fields); $field_indexes = build_field_indexes($fields, $index_fields, $cmd); if ($field_indexes === false) { return false; } # display info about the map debug_echo($cmd, "mapping data fields using the following map :"); debug_dump_yaml($map_fields, true); if ($ignore_fields) { debug_echo($cmd, "ignoring the following empty result fields in the map :"); debug_dump_yaml($ignore_fields, true); } # set up new fields $fields = array_values($map_fields); $field_count = count($field_indexes); # build the data $data_count = count($data); for ($i = 0; $i < $data_count; $i++) { $line = $data[$i]; $new_line = array(); for ($j = 0; $j < $field_count; $j++) { $new_line[] = $line[$field_indexes[$j]]; } $data[$i] = $new_line; } return true; }