Beispiel #1
0
function build_indexes($opts, $pipe, $cmd = __FUNCTION__, $msg_suffix = null)
{
    # set prefix
    $prefix = 'build_indexes';
    # merge opts
    if ($pipe !== null) {
        $opts = merge_opts($opts, $pipe, 'indexes');
    }
    # get fields opt
    $fields = get_opt($prefix, $opts, 'fields');
    if (!check_opt_set_type($cmd, $fields, 'fields', 'array_of_strings')) {
        return false;
    }
    # get data opt
    $data =& get_opt($prefix, $opts, 'data');
    if (!check_opt_set_type($cmd, $data, 'data', 'array')) {
        return false;
    }
    # get indexes opt
    $indexes =& get_opt($prefix, $opts, 'indexes');
    if (!check_opt_set_type($cmd, $indexes, 'indexes', 'array_of_strings')) {
        return false;
    }
    # set up indexes
    $res_indexes_assoc = array();
    $res_indexes_list = array();
    foreach ($indexes as $index_name) {
        $index = array();
        $res_indexes_assoc[$index_name] =& $index;
        $res_indexes_list[] =& $index;
    }
    # set up field indexes
    $field_indexes = build_field_indexes($fields, $indexes, $cmd, $msg_suffix);
    if ($field_indexes === false) {
        return false;
    }
    $index_count = count($indexes);
    # loop over data to build indexes
    for ($i = 0; $i < count($data); $i++) {
        unset($line);
        $line =& $data[$i];
        for ($j = 0; $j < $index_count; $j++) {
            $idx = $field_indexes[$j];
            $key = $line[$idx];
            $res_indexes_list[$j][$key] =& $line;
        }
    }
    # sort the indexes
    foreach ($res_indexes_assoc as $name => $index) {
        ksort($index);
    }
    # save the indxes
    $opts['indexes'] = $res_indexes_assoc;
    return $opts;
}
Beispiel #2
0
function load_data_from_doc_file($opts, $pipe, $cmd = __FUNCTION__)
{
    # set prefix
    $prefix = $cmd;
    # merge opts
    $opts = merge_opts($opts, $pipe, 'doc_type');
    # get file opt
    $file = get_opt($prefix, $opts, 'file');
    if (!check_opt_set_type($cmd, $file, 'file', 'string')) {
        return false;
    }
    # get defined fields opt
    $defined_fields = get_opt($prefix, $opts, 'defined_fields');
    if (!check_opt_if_set_type($cmd, $defined_fields, 'defined_fields', 'array_of_strings')) {
        return false;
    }
    # get save fields
    $save_fields = get_opt($prefix, $opts, 'save_fields');
    if (!check_opt_if_set_type($cmd, $save_fields, 'save_fields', 'array_of_strings')) {
        return false;
    }
    # get doc type opt
    $doc_type = get_opt($prefix, $opts, 'doc_type', 'json');
    if (!check_opt_set_type($cmd, $doc_type, 'doc_type', 'document_file_type')) {
        return false;
    }
    $doc_type_upper = strtoupper($doc_type);
    # get data structure opt
    $data_structure = get_opt($prefix, $opts, 'data_structure', 'list_of_objects');
    if (!check_opt_set_type($cmd, $data_structure, 'data_structure', 'data_structure_type')) {
        return false;
    }
    # get limit opt
    $limit = get_opt($prefix, $opts, 'limit', 0);
    if (!check_opt_set_type($cmd, $limit, 'limit', 'positive_integer')) {
        return false;
    }
    # get offset opt
    $offset = get_opt($prefix, $opts, 'offset', 0);
    if (!check_opt_set_type($cmd, $offset, 'offset', 'positive_integer')) {
        return false;
    }
    # check the file exists
    if (!file_exists($file)) {
        return error($cmd, "file does not exist : {$file}");
    }
    # display generic info about the data
    $data_structure_str = str_replace('_', ' ', $data_structure);
    debug_echo($cmd, "creating data from {$doc_type_upper} file : {$file}");
    debug_echo($cmd, "source data of type '{$data_structure_str}'");
    # read the file into memory
    $data_str = @file_get_contents($file);
    if (!is_string($data_str)) {
        return error($cmd, "could not read file : {$file}");
    }
    # load data depending on type
    switch ($doc_type) {
        case 'json':
            $data = @json_decode($data_str, true);
            break;
        case 'yaml':
            $data = @yaml_decode($data_str);
            break;
    }
    if (!$data) {
        return error($cmd, "invalid {$doc_type_upper} in file : {$file}");
    }
    $data_count = count($data);
    # set up end point
    if ($limit == 0) {
        $end = $data_count;
    } else {
        $end = $offset + $limit;
    }
    # set up the fields and build data
    $res_data = array();
    switch ($data_structure) {
        case 'list_of_columns':
            # set up fields
            if ($defined_fields) {
                $defined_fields_source = '(from the config)';
            } else {
                $defined_fields = array_shift($data);
                $defined_fields_source = '(from the source file)';
            }
            # set up response field indexes
            if ($save_fields) {
                $res_fields = $save_fields;
                $res_field_count = count($res_fields);
                $res_field_indexes = build_field_indexes($defined_fields, $res_fields, $cmd);
                if ($res_field_indexes === false) {
                    return;
                }
            } else {
                $res_fields = $defined_fields;
            }
            # display info about the fields
            debug_echo($cmd, "defined fields {$defined_fields_source} : ");
            debug_dump_list($defined_fields, true);
            if ($save_fields) {
                debug_echo($cmd, "saved fields :");
                debug_dump_list($save_fields, true);
            }
            # gather the data
            # TODO: change for having offset / limit
            $res_data = array();
            if ($save_fields) {
                $res_data = array();
                for ($i = $offset; $i < $end; $i++) {
                    $line = $data[$i];
                    $new_line = array();
                    for ($j = 0; $j < $res_field_count; $j++) {
                        $new_line[] = $line[$res_field_indexes[$j]];
                    }
                    $res_data[] = $new_line;
                }
            } elseif ($offset == 0 && $limit == 0) {
                $res_data = $data;
            } else {
                $res_data = array();
                for ($i = $offset; $i < $end; $i++) {
                    $res_data[] = $data[$i];
                }
            }
            break;
        case 'list_of_objects':
            $res_data = array();
            if ($save_fields) {
                # set up result fields
                $res_fields = $save_fields;
                $res_fields_count = count($res_fields);
                # display info about what fields will be saved
                debug_echo($cmd, "saved fields :");
                debug_dump_list($res_fields, true);
                # build the data
                for ($i = $offset; $i < $end; $i++) {
                    $obj = $data[$i];
                    $new_line = array();
                    for ($j = 0; $j < $res_fields_count; $j++) {
                        $new_line[] = @$obj[$res_fields[$j]];
                    }
                    $res_data[] = $new_line;
                }
                $res_fields = $save_fields;
            } else {
                # display info about saved files
                debug_echo($cmd, "saving all fields - they will be listed as they are added");
                # build the data
                $res_fields = array();
                $res_fields_count = 0;
                for ($i = $offset; $i < $end; $i++) {
                    $obj = $data[$i];
                    $new_line = array();
                    # save all the existing data in order
                    for ($j = 0; $j < $res_fields_count; $j++) {
                        $new_line[] = @$obj[$res_fields[$j]];
                    }
                    # index and save any new fields
                    $res_fields_added = array();
                    foreach ($obj as $key => $value) {
                        if (in_array($key, $res_fields)) {
                            continue;
                        }
                        $res_fields_added[] = $key;
                        $res_fields[] = $key;
                        $res_fields_count++;
                        $new_line[] = $value;
                    }
                    $res_data[$i] = $new_line;
                    # display list of fields added (if any)
                    if ($res_fields_added) {
                        $line_no = $i + 1;
                        debug_echo($cmd, "the following fields were added on row {$line_no} :");
                        debug_dump_list($res_fields_added, true);
                    }
                }
                # add back in any empty fields
                for ($i = 0; $i < count($res_data); $i++) {
                    $line =& $res_data[$i];
                    $line_fields_count = count($line);
                    if ($line_fields_count == $res_fields_count) {
                        break;
                    }
                    for ($j = $line_fields_count; $j < $res_fields_count; $j++) {
                        $line[] = '';
                    }
                }
            }
            break;
    }
    # detail results
    $line_count = count($res_data);
    debug_echo($cmd, "creation of data from JSON file complete ({$line_count} lines processed)");
    # create result
    return build_result_data($res_fields, $res_data);
}
Beispiel #3
0
function map_data_fields(&$data, &$fields, $map_fields, $cmd)
{
    # sort fields to map / not
    $new_map_fields = array();
    $ignore_fields = array();
    foreach ($map_fields as $orig => $new) {
        if ($new) {
            $new_map_fields[$orig] = $new;
        } else {
            $ignore_fileds[] = $orig;
        }
    }
    $map_fields = $new_map_fields;
    # set up new field indexes
    $index_fields = array_keys($map_fields);
    $field_indexes = build_field_indexes($fields, $index_fields, $cmd);
    if ($field_indexes === false) {
        return false;
    }
    # display info about the map
    debug_echo($cmd, "mapping data fields using the following map :");
    debug_dump_yaml($map_fields, true);
    if ($ignore_fields) {
        debug_echo($cmd, "ignoring the following empty result fields in the map :");
        debug_dump_yaml($ignore_fields, true);
    }
    # set up new fields
    $fields = array_values($map_fields);
    $field_count = count($field_indexes);
    # build the data
    $data_count = count($data);
    for ($i = 0; $i < $data_count; $i++) {
        $line = $data[$i];
        $new_line = array();
        for ($j = 0; $j < $field_count; $j++) {
            $new_line[] = $line[$field_indexes[$j]];
        }
        $data[$i] = $new_line;
    }
    return true;
}