Exemplo n.º 1
0
function process_data($opts, $pipe, $cmd = __FUNCTION__, $opt_prefix = false)
{
    # set prefix
    $prefix = 'process_data';
    # merge opts
    $opts = merge_opts($opts, $pipe);
    # adjust opt prefix
    $cmd = adjust_opt_prefix($cmd, $opts, $opt_prefix, 'edit');
    # get debug opt
    $debug = get_opt_config_value($prefix, $opts, 'debug', false);
    if (!check_opt_set_type($cmd, $debug, 'debug', 'boolean')) {
        return false;
    }
    # get type opt
    $join_type = get_opt_config_value($prefix, $opts, 'join_type', 'outer');
    if (!check_opt_set_type($cmd, $join_type, 'type', 'data_join_type')) {
        return false;
    }
    # get sources opt
    $sources = get_opt($prefix, $opts, 'sources');
    if (!check_opt_if_set_type($cmd, $sources, 'sources', 'array')) {
        return false;
    }
    if (!$sources) {
        $source = build_source_data($prefix, $opts, $cmd);
        if (!$source) {
            return error($cmd, "sources opt is not set and the data and fields opts are not set either, so can't be used to create one");
        }
        $sources = array($source);
    }
    # get sources values
    foreach ($sources as $i => $val) {
        $sources[$i] = value($val);
    }
    # get create_indexes opt
    $create_indexes = get_opt($prefix, $opts, 'create_indexes');
    if (!check_opt_if_set_type($cmd, $create_indexes, 'create_indexes', 'array_of_strings')) {
        return false;
    }
    # get join field opt
    $primary_key = get_opt_config_value($prefix, $opts, 'primary_key', 'guid');
    if (!check_opt_set_type($cmd, $primary_key, 'primary_key', 'string')) {
        return false;
    }
    # get mawk rules opt
    $mawk = get_opt_config_value($prefix, $opts, 'mawk');
    if (!check_opt_set_type($cmd, $mawk, 'mawk', 'string')) {
        return false;
    }
    # get update frequency opt
    $update_frequency = get_opt_config_value($prefix, $opts, 'update_frequency', 0);
    if (!check_opt_set_type($cmd, $update_frequency, 'update_frequency', 'integer')) {
        return false;
    }
    # create fields and data arrays
    $fields_arr = array();
    $data_arr = array();
    /*
      for ($i=0; $i<count ($sources); $i++) {
      
        $source_no = $i+1;
      
        $fields = $sources[$i]['fields'];
        
        if (!$fields)
          return  error ($cmd, "no fields defined for source $source_no");
      
        $fields_arr[] = $fields;
        
        if ($i == 0) {
        
          $data_arr[] = $sources[$i]['data'];
        
        } else {
        
          $data = $sources[$i]['indexes'][$primary_key];
          
          if ($data === null) {
          
            return  error ($cmd, "index on primary key '$primary_key' does not exis for source $source_no");   # TODO: auto-create
          }
          
          $data_arr[] = $data;
        }
      }*/
    $sources_count = count($sources);
    for ($i = 0; $i < $sources_count; $i++) {
        $source_no = $i + 1;
        # add fields
        $fields = $sources[$i]['fields'];
        if (!$fields) {
            return error($cmd, "no fields defined for source {$source_no}");
        }
        if ($sources_count > 1) {
            if (!in_array($primary_key, $fields)) {
                return error($cmd, "primary key '{$primary_key}' not defined for source {$source_no}");
            }
        }
        $fields_arr[] = $fields;
        # add data
        $data = $sources[$i]['data'];
        if (!$data) {
            return error($cmd, "no data defined for source {$source_no}");
        }
        $data_arr[] = $data;
    }
    # display info about the join, if there is one
    if (count($fields_arr) > 1) {
        debug_echo($cmd, "performing {$join_type} join on primary key / field '{$primary_key}'");
    }
    for ($i = 1; $i <= count($fields_arr); $i++) {
        debug_echo($cmd, "fields for data source {$i} :");
        debug_dump_list($fields_arr[$i - 1], true);
    }
    # parse the line rules
    $parsed_mawk = mawk_parse_code($mawk, $fields_arr, $cmd);
    if (is_int($parsed_mawk)) {
        return error($cmd, "code line {$i} of the mawk has an invalid syntax");
    }
    $parsed_mawk_code = $parsed_mawk['code'];
    # set up indexes
    $indexes_arr = $parsed_mawk['indexes'];
    if (count($fields_arr) > 1) {
        $r = mawk_add_primary_index($primary_key, $indexes_arr, $fields_arr, $cmd);
        if ($r === false) {
            return false;
        }
    }
    # display info about the line rules
    debug_echo($cmd, "the following mawk code will be used :");
    debug_echo_txt(mawk_clean_code($mawk), true);
    if ($debug) {
        debug_echo($cmd, "which translates to the following parsed code :");
        debug_dump_yaml($parsed_mawk_code, true);
        if ($indexes_arr) {
            debug_echo($cmd, "and includes the following indexes :");
            debug_dump_yaml($indexes_arr, true);
        }
    }
    /*
    if (count ($fields_arr) > 1) {
      $primary_indexes = mawk_get_indexes ($primary_key, $fields_arr, $cmd);
        
        if ($primary_indexes === false)
          return  false;
          
      } else {
        $primary_indexes = array ();
      }
    
      # set up create indexes
         
      $new_indexes = mawk_get_indexes ($create_indexes, $fields_arr, $cmd);
    if ($new_indexes === false)
        return  false;
    */
    # set up discard
    if ($join_type == 'inner') {
        $inner_join = true;
    } else {
        $inner_join = false;
    }
    # run mawk code
    try {
        debug_echo($cmd, "processing the data (this can take some time depending on the inputs) ...");
        $mawk_res = mawk_process_data($parsed_mawk_code, $fields_arr, $data_arr, $indexes_arr, $update_frequency, $inner_join, $cmd, $debug);
    } catch (MawkError $e) {
        $msg = $e->getMessage();
        return error($cmd, $msg);
    }
    return $mawk_res;
}
Exemplo n.º 2
0
function load_data_from_doc_file($opts, $pipe, $cmd = __FUNCTION__)
{
    # set prefix
    $prefix = $cmd;
    # merge opts
    $opts = merge_opts($opts, $pipe, 'doc_type');
    # get file opt
    $file = get_opt($prefix, $opts, 'file');
    if (!check_opt_set_type($cmd, $file, 'file', 'string')) {
        return false;
    }
    # get defined fields opt
    $defined_fields = get_opt($prefix, $opts, 'defined_fields');
    if (!check_opt_if_set_type($cmd, $defined_fields, 'defined_fields', 'array_of_strings')) {
        return false;
    }
    # get save fields
    $save_fields = get_opt($prefix, $opts, 'save_fields');
    if (!check_opt_if_set_type($cmd, $save_fields, 'save_fields', 'array_of_strings')) {
        return false;
    }
    # get doc type opt
    $doc_type = get_opt($prefix, $opts, 'doc_type', 'json');
    if (!check_opt_set_type($cmd, $doc_type, 'doc_type', 'document_file_type')) {
        return false;
    }
    $doc_type_upper = strtoupper($doc_type);
    # get data structure opt
    $data_structure = get_opt($prefix, $opts, 'data_structure', 'list_of_objects');
    if (!check_opt_set_type($cmd, $data_structure, 'data_structure', 'data_structure_type')) {
        return false;
    }
    # get limit opt
    $limit = get_opt($prefix, $opts, 'limit', 0);
    if (!check_opt_set_type($cmd, $limit, 'limit', 'positive_integer')) {
        return false;
    }
    # get offset opt
    $offset = get_opt($prefix, $opts, 'offset', 0);
    if (!check_opt_set_type($cmd, $offset, 'offset', 'positive_integer')) {
        return false;
    }
    # check the file exists
    if (!file_exists($file)) {
        return error($cmd, "file does not exist : {$file}");
    }
    # display generic info about the data
    $data_structure_str = str_replace('_', ' ', $data_structure);
    debug_echo($cmd, "creating data from {$doc_type_upper} file : {$file}");
    debug_echo($cmd, "source data of type '{$data_structure_str}'");
    # read the file into memory
    $data_str = @file_get_contents($file);
    if (!is_string($data_str)) {
        return error($cmd, "could not read file : {$file}");
    }
    # load data depending on type
    switch ($doc_type) {
        case 'json':
            $data = @json_decode($data_str, true);
            break;
        case 'yaml':
            $data = @yaml_decode($data_str);
            break;
    }
    if (!$data) {
        return error($cmd, "invalid {$doc_type_upper} in file : {$file}");
    }
    $data_count = count($data);
    # set up end point
    if ($limit == 0) {
        $end = $data_count;
    } else {
        $end = $offset + $limit;
    }
    # set up the fields and build data
    $res_data = array();
    switch ($data_structure) {
        case 'list_of_columns':
            # set up fields
            if ($defined_fields) {
                $defined_fields_source = '(from the config)';
            } else {
                $defined_fields = array_shift($data);
                $defined_fields_source = '(from the source file)';
            }
            # set up response field indexes
            if ($save_fields) {
                $res_fields = $save_fields;
                $res_field_count = count($res_fields);
                $res_field_indexes = build_field_indexes($defined_fields, $res_fields, $cmd);
                if ($res_field_indexes === false) {
                    return;
                }
            } else {
                $res_fields = $defined_fields;
            }
            # display info about the fields
            debug_echo($cmd, "defined fields {$defined_fields_source} : ");
            debug_dump_list($defined_fields, true);
            if ($save_fields) {
                debug_echo($cmd, "saved fields :");
                debug_dump_list($save_fields, true);
            }
            # gather the data
            # TODO: change for having offset / limit
            $res_data = array();
            if ($save_fields) {
                $res_data = array();
                for ($i = $offset; $i < $end; $i++) {
                    $line = $data[$i];
                    $new_line = array();
                    for ($j = 0; $j < $res_field_count; $j++) {
                        $new_line[] = $line[$res_field_indexes[$j]];
                    }
                    $res_data[] = $new_line;
                }
            } elseif ($offset == 0 && $limit == 0) {
                $res_data = $data;
            } else {
                $res_data = array();
                for ($i = $offset; $i < $end; $i++) {
                    $res_data[] = $data[$i];
                }
            }
            break;
        case 'list_of_objects':
            $res_data = array();
            if ($save_fields) {
                # set up result fields
                $res_fields = $save_fields;
                $res_fields_count = count($res_fields);
                # display info about what fields will be saved
                debug_echo($cmd, "saved fields :");
                debug_dump_list($res_fields, true);
                # build the data
                for ($i = $offset; $i < $end; $i++) {
                    $obj = $data[$i];
                    $new_line = array();
                    for ($j = 0; $j < $res_fields_count; $j++) {
                        $new_line[] = @$obj[$res_fields[$j]];
                    }
                    $res_data[] = $new_line;
                }
                $res_fields = $save_fields;
            } else {
                # display info about saved files
                debug_echo($cmd, "saving all fields - they will be listed as they are added");
                # build the data
                $res_fields = array();
                $res_fields_count = 0;
                for ($i = $offset; $i < $end; $i++) {
                    $obj = $data[$i];
                    $new_line = array();
                    # save all the existing data in order
                    for ($j = 0; $j < $res_fields_count; $j++) {
                        $new_line[] = @$obj[$res_fields[$j]];
                    }
                    # index and save any new fields
                    $res_fields_added = array();
                    foreach ($obj as $key => $value) {
                        if (in_array($key, $res_fields)) {
                            continue;
                        }
                        $res_fields_added[] = $key;
                        $res_fields[] = $key;
                        $res_fields_count++;
                        $new_line[] = $value;
                    }
                    $res_data[$i] = $new_line;
                    # display list of fields added (if any)
                    if ($res_fields_added) {
                        $line_no = $i + 1;
                        debug_echo($cmd, "the following fields were added on row {$line_no} :");
                        debug_dump_list($res_fields_added, true);
                    }
                }
                # add back in any empty fields
                for ($i = 0; $i < count($res_data); $i++) {
                    $line =& $res_data[$i];
                    $line_fields_count = count($line);
                    if ($line_fields_count == $res_fields_count) {
                        break;
                    }
                    for ($j = $line_fields_count; $j < $res_fields_count; $j++) {
                        $line[] = '';
                    }
                }
            }
            break;
    }
    # detail results
    $line_count = count($res_data);
    debug_echo($cmd, "creation of data from JSON file complete ({$line_count} lines processed)");
    # create result
    return build_result_data($res_fields, $res_data);
}
Exemplo n.º 3
0
function debug_echo_txt($txt, $indent)
{
    $lines = explode("\n", $txt);
    return debug_dump_list($lines, $indent);
}
Exemplo n.º 4
0
function ftp_scandir($opts, $pipe = false, $cmd = __FUNCTION__)
{
    # set prefix
    $prefix = 'ftp';
    # merge opts
    $opts = merge_opts($opts, $pipe, 'dir');
    # get execute opt
    $execute = get_opt_config_value($prefix, $opts, 'execute', true);
    if (!check_opt_set_type($cmd, $execute, 'execute', 'boolean')) {
        return false;
    }
    # check if we should execute or not
    if (!$execute) {
        return true;
    }
    # get remote dir opt
    $dir = get_opt($prefix, $opts, 'dir');
    if (!check_opt_set_type($cmd, $dir, 'dir', 'string')) {
        return false;
    }
    # get search opt
    $search = get_opt($prefix, $opts, 'search', '');
    if (!check_opt_set_type($cmd, $search, 'search', 'string')) {
        return false;
    }
    # get chdir retries
    $chdir_retries = get_opt_config_value($prefix, $opts, 'chdir_retries', 3);
    if (!check_opt_set_type($cmd, $chdir_retries, 'chdir_retries', 'integer')) {
        return false;
    }
    # get scandir retries opt
    $scandir_retries = get_opt_config_value($prefix, $opts, 'scandir_retries', 3);
    if (!check_opt_set_type($cmd, $scandir_retries, 'scandir_retries', 'integer')) {
        return false;
    }
    # setup connection
    $conn = ftp_get_conn($opts, $cmd);
    if (!$conn) {
        return false;
    }
    # get connection values
    $ftp_handle = $conn['handle'];
    $svr = $conn['svr'];
    # change to dir (note: we chdir because some server do not allow rawlist except in current working directory)
    debug_echo($cmd, "scanning dir on FTP server : {$dir} ...");
    $success = false;
    for ($i = 0; $i < $chdir_retries; $i++) {
        if (@ftp_chdir($ftp_handle, $dir)) {
            $success = true;
            break;
        }
    }
    if (!$success) {
        return error($cmd, "could not chdir to dir on FTP server : {$dir} ({$chdir_retries} attempts)");
    }
    # build full search
    if ($dir == '/') {
        $full_search = "/{$search}";
        $path_offset = 1;
    } else {
        $full_search = "{$dir}/{$search}";
        $path_offset = strlen($dir) + 1;
    }
    # try to get list
    for ($i = 0; $i < $scandir_retries; $i++) {
        // Note: if rawlist is used then it's because a server does not accept nlist
        $list = @ftp_nlist($ftp_handle, $full_search);
        if (!is_array($list)) {
            break;
        }
    }
    if (!$list) {
        return error($cmd, "could not scan dir on FTP server : {$dir} ({$scandir_retries} attempts)");
    }
    # convert rawlist
    //$list = ftp_rawlist_2_nlist ($list);
    # remove directory name
    $list = array_strip_offset_util($list, $path_offset);
    # display list of files
    $count = count($list);
    debug_echo($cmd, "{$count} files found in dir on FTP server : {$dir}");
    debug_dump_list($list, true);
    # return
    if ($pipe === false) {
        return $list;
    }
    return array('files' => $list);
}
Exemplo n.º 5
0
function reorder_data_fields(&$data, &$fields, $reorder_fields, $cmd)
{
    $new_fields = array();
    # check all the fields to re-order
    foreach ($reorder_fields as $field) {
        if (!in_array($field, $fields)) {
            return error($cmd, "field '{$field}' cannot be reordered from the data because it does not exist");
        }
        $new_fields[] = $field;
    }
    # include all the original fields that are not re-ordered
    foreach ($fields as $field) {
        if (!in_array($field, $new_fields)) {
            $new_fields[] = $field;
        }
    }
    # display message
    debug_echo($cmd, "new order for fields :");
    debug_dump_list($new_fields, true);
    # set up field indexes
    $fields_flipped = array_flip($fields);
    $new_fields_indexes = array();
    foreach ($new_fields as $field) {
        $new_fields_indexes[] = $fields_flipped[$field];
    }
    # reset the fields
    $field_count = count($fields);
    for ($i = 0; $i < $field_count; $i++) {
        $fields[$i] = $new_fields[$i];
    }
    # loop through the data
    $data_count = count($data);
    for ($i = 0; $i < $data_count; $i++) {
        $line = $data[$i];
        $new_line = array();
        for ($j = 0; $j < $field_count; $j++) {
            $new_line[] = $line[$new_fields_indexes[$j]];
        }
        $data[$i] = $new_line;
    }
    return true;
}