コード例 #1
0
ファイル: xml.php プロジェクト: simpl/datapipe
function xml_parse_document($opts, $pipe, $cmd = __FUNCTION__)
{
    # set prefix
    $prefix = 'xml_parse_document';
    # merge opts
    $opts = merge_opts($opts, $pipe, 'document');
    # get document opt
    $document = get_opt($prefix, $opts, 'document');
    if (!check_opt_set_type($cmd, $document, 'document', 'string')) {
        return false;
    }
    # get doc type opt
    $doc_type = get_opt_config_value($prefix, $opts, 'doc_type', 'xml');
    if (!check_opt_set_type($cmd, $doc_type, 'doc_type', 'string')) {
        return false;
    }
    # get logic opt
    $logic = get_opt($prefix, $opts, 'logic');
    if (!check_opt_set_type($cmd, $logic, 'logic', 'string,array')) {
        return false;
    }
    if (is_string($logic)) {
        $logic = @yaml_decode($logic);
        if (!$logic) {
            return error($cmd, "parse logic not valid YAML");
        }
    }
    # get result type opt
    $result_type = get_opt_config_value($prefix, $opts, 'result_type', 'elements');
    if (!check_opt_set_type($cmd, $result_type, 'result_type', 'string')) {
        return false;
    }
    # get merge results opt
    $merge_results = get_opt($prefix, $opts, 'merge_results', false);
    if (!check_opt_set_type($cmd, $merge_results, 'merge_results', 'boolean')) {
        return false;
    }
    # load the document
    $dom = new DOMDocument('1.0');
    @$dom->loadHTML($document);
    #$dom->preserveWhiteSpace = false;
    # check that the document was parsed correctly
    $res = xml_parse_element($dom, $logic);
    if ($res === false) {
        return error($cmd, "could not parse {$type} document");
    }
    # display info about the parsing
    debug_echo($cmd, "{$doc_type} document parsed successfully for {$result_type}");
    if (empty($res)) {
        debug_echo($cmd, "no {$result_type} found while parsing the {$doc_type} document");
    } else {
        debug_echo($cmd, "the following {$result_type} were found in the {$doc_type} document");
        debug_dump_yaml($res, true);
    }
    # return if we are not merging the results
    if (!$merge_results) {
        return $res;
    }
    # merge the results
    $new_res = array();
    $res_count = count($res);
    for ($i = 0; $i < $res_count; $i++) {
        $elt = $res[$i];
        foreach ($elt as $key => $value) {
            if (is_array($value)) {
                $cur_value = @$new_res[$key];
                if ($cur_value) {
                    $value = array_merge($cur_value, $value);
                }
            }
            $new_res[$key] = $value;
        }
    }
    return $new_res;
}
コード例 #2
0
ファイル: process_data.php プロジェクト: simpl/datapipe
function process_data($opts, $pipe, $cmd = __FUNCTION__, $opt_prefix = false)
{
    # set prefix
    $prefix = 'process_data';
    # merge opts
    $opts = merge_opts($opts, $pipe);
    # adjust opt prefix
    $cmd = adjust_opt_prefix($cmd, $opts, $opt_prefix, 'edit');
    # get debug opt
    $debug = get_opt_config_value($prefix, $opts, 'debug', false);
    if (!check_opt_set_type($cmd, $debug, 'debug', 'boolean')) {
        return false;
    }
    # get type opt
    $join_type = get_opt_config_value($prefix, $opts, 'join_type', 'outer');
    if (!check_opt_set_type($cmd, $join_type, 'type', 'data_join_type')) {
        return false;
    }
    # get sources opt
    $sources = get_opt($prefix, $opts, 'sources');
    if (!check_opt_if_set_type($cmd, $sources, 'sources', 'array')) {
        return false;
    }
    if (!$sources) {
        $source = build_source_data($prefix, $opts, $cmd);
        if (!$source) {
            return error($cmd, "sources opt is not set and the data and fields opts are not set either, so can't be used to create one");
        }
        $sources = array($source);
    }
    # get sources values
    foreach ($sources as $i => $val) {
        $sources[$i] = value($val);
    }
    # get create_indexes opt
    $create_indexes = get_opt($prefix, $opts, 'create_indexes');
    if (!check_opt_if_set_type($cmd, $create_indexes, 'create_indexes', 'array_of_strings')) {
        return false;
    }
    # get join field opt
    $primary_key = get_opt_config_value($prefix, $opts, 'primary_key', 'guid');
    if (!check_opt_set_type($cmd, $primary_key, 'primary_key', 'string')) {
        return false;
    }
    # get mawk rules opt
    $mawk = get_opt_config_value($prefix, $opts, 'mawk');
    if (!check_opt_set_type($cmd, $mawk, 'mawk', 'string')) {
        return false;
    }
    # get update frequency opt
    $update_frequency = get_opt_config_value($prefix, $opts, 'update_frequency', 0);
    if (!check_opt_set_type($cmd, $update_frequency, 'update_frequency', 'integer')) {
        return false;
    }
    # create fields and data arrays
    $fields_arr = array();
    $data_arr = array();
    /*
      for ($i=0; $i<count ($sources); $i++) {
      
        $source_no = $i+1;
      
        $fields = $sources[$i]['fields'];
        
        if (!$fields)
          return  error ($cmd, "no fields defined for source $source_no");
      
        $fields_arr[] = $fields;
        
        if ($i == 0) {
        
          $data_arr[] = $sources[$i]['data'];
        
        } else {
        
          $data = $sources[$i]['indexes'][$primary_key];
          
          if ($data === null) {
          
            return  error ($cmd, "index on primary key '$primary_key' does not exis for source $source_no");   # TODO: auto-create
          }
          
          $data_arr[] = $data;
        }
      }*/
    $sources_count = count($sources);
    for ($i = 0; $i < $sources_count; $i++) {
        $source_no = $i + 1;
        # add fields
        $fields = $sources[$i]['fields'];
        if (!$fields) {
            return error($cmd, "no fields defined for source {$source_no}");
        }
        if ($sources_count > 1) {
            if (!in_array($primary_key, $fields)) {
                return error($cmd, "primary key '{$primary_key}' not defined for source {$source_no}");
            }
        }
        $fields_arr[] = $fields;
        # add data
        $data = $sources[$i]['data'];
        if (!$data) {
            return error($cmd, "no data defined for source {$source_no}");
        }
        $data_arr[] = $data;
    }
    # display info about the join, if there is one
    if (count($fields_arr) > 1) {
        debug_echo($cmd, "performing {$join_type} join on primary key / field '{$primary_key}'");
    }
    for ($i = 1; $i <= count($fields_arr); $i++) {
        debug_echo($cmd, "fields for data source {$i} :");
        debug_dump_list($fields_arr[$i - 1], true);
    }
    # parse the line rules
    $parsed_mawk = mawk_parse_code($mawk, $fields_arr, $cmd);
    if (is_int($parsed_mawk)) {
        return error($cmd, "code line {$i} of the mawk has an invalid syntax");
    }
    $parsed_mawk_code = $parsed_mawk['code'];
    # set up indexes
    $indexes_arr = $parsed_mawk['indexes'];
    if (count($fields_arr) > 1) {
        $r = mawk_add_primary_index($primary_key, $indexes_arr, $fields_arr, $cmd);
        if ($r === false) {
            return false;
        }
    }
    # display info about the line rules
    debug_echo($cmd, "the following mawk code will be used :");
    debug_echo_txt(mawk_clean_code($mawk), true);
    if ($debug) {
        debug_echo($cmd, "which translates to the following parsed code :");
        debug_dump_yaml($parsed_mawk_code, true);
        if ($indexes_arr) {
            debug_echo($cmd, "and includes the following indexes :");
            debug_dump_yaml($indexes_arr, true);
        }
    }
    /*
    if (count ($fields_arr) > 1) {
      $primary_indexes = mawk_get_indexes ($primary_key, $fields_arr, $cmd);
        
        if ($primary_indexes === false)
          return  false;
          
      } else {
        $primary_indexes = array ();
      }
    
      # set up create indexes
         
      $new_indexes = mawk_get_indexes ($create_indexes, $fields_arr, $cmd);
    if ($new_indexes === false)
        return  false;
    */
    # set up discard
    if ($join_type == 'inner') {
        $inner_join = true;
    } else {
        $inner_join = false;
    }
    # run mawk code
    try {
        debug_echo($cmd, "processing the data (this can take some time depending on the inputs) ...");
        $mawk_res = mawk_process_data($parsed_mawk_code, $fields_arr, $data_arr, $indexes_arr, $update_frequency, $inner_join, $cmd, $debug);
    } catch (MawkError $e) {
        $msg = $e->getMessage();
        return error($cmd, $msg);
    }
    return $mawk_res;
}
コード例 #3
0
ファイル: http.php プロジェクト: simpl/datapipe
function http_parse_form(&$opts, $pipe, $cmd = __FUNCTION__, $response_form = false)
{
    # set prefix
    $prefix = 'http';
    # merge opts
    $opts = merge_opts($opts, $pipe, 'form_name');
    # get request_url opt
    $request_url = get_opt($prefix, $opts, 'request_url');
    if (!check_opt_set_type($cmd, $request_url, 'request_url', 'string')) {
        return false;
    }
    # get response body opt
    $response_body = get_opt($prefix, $opts, 'response_body');
    if (!check_opt_if_set_type($cmd, $response_body, 'response_body', 'string')) {
        return false;
    }
    # return if no response body is found
    if (!$response_body) {
        warning("no response body to parse for hidden post args");
        return $opts;
    }
    # get post form opt
    $form_name = get_opt($prefix, $opts, 'form_name', '');
    if (!check_opt_set_type($cmd, $form_name, 'form_name', 'string')) {
        return false;
    }
    # display message
    if ($form_name) {
        debug_echo($cmd, "parsing response form '{$form_name}' for action, method and inputs");
    } else {
        debug_echo($cmd, "parsing all forms (if any) for action, method and inputs");
    }
    # parse the response body for forms
    $form_parse_logic = "\n\n    elements:\n      - form:\n          attributes:\n            name: {$form_name}\n            action:\n            method:\n              \n          elements:\n            - input:\n                label: inputs\n                attributes:\n                  !type: hidden\n                  name:\n                  value:\n\n            - button:\n                label: inputs\n                attributes:\n                  name:\n                  value:\n  ";
    $form_parse = array('document' => $response_body, 'doc_type' => 'HTML', 'result_type' => 'forms', 'logic' => $form_parse_logic, 'merge_results' => true);
    $form = xml_parse_document($form_parse, null, $cmd);
    if (!$form) {
        if ($form_name) {
            warning($cmd, "form '{$form_name}' not found in response body");
        } else {
            debug_echo($cmd, "no forms found in the response body");
        }
        return $opts;
    }
    # set the variables
    $form_action = $form['action'];
    $form_method = $form['method'];
    $form_name = $form['name'];
    $form_inputs = $form['inputs'];
    # set the form method
    $opts['form_method'] = $form_method;
    if ($response_form) {
        $opts['response_form_method'] = $form_method;
    }
    # build the full form action
    if ($form_action) {
        # check for a full URL
        if (substr($form_action, 0, 7) != 'http://' && substr($form_action, 0, 8) != 'https://') {
            $r = preg_match('/^([^?]*)(.*)/', $request_url, $matches);
            $url = $matches[1];
            $get_args = $matches[2];
            if ($form_action[0] == '/') {
                $r = preg_match('/^(http[s]?:\\/\\/[^\\/]*).*/', $url, $matches);
            } else {
                $r = preg_match('/^(http[s]?:\\/\\/.*\\/?)[^\\/]*/', $url, $matches) . '/';
            }
            $form_action_base_url = $matches[1];
            $form_action = $form_action_base_url . $form_action;
            #. $get_args;
        }
        # save the form action
        debug_echo($cmd, "form action url found : {$form_action}");
        if ($response_form) {
            $opts['response_form_action'] = $form_action;
        }
        $opts['form_action'] = $form_action;
    }
    # build string for displaying info about hidden post args
    if ($form_name) {
        $form_name_str = "in form '{$form_name}'";
    } else {
        $form_name_str = "in all forms";
    }
    # add the hidden arguments to the hidden inputs and piped post args
    if (count($form_inputs) == 0) {
        debug_echo($cmd, "no hidden form inputs were found {$form_name_str}");
        return $opts;
    }
    # build the hidden inputs
    $inputs = array();
    $form_inputs_count = count($form_inputs);
    for ($i = 0; $i < $form_inputs_count; $i++) {
        $form_input = $form_inputs[$i];
        $name = $form_input['name'];
        $value = @$form_input['value'];
        $value = $value ? $value : '';
        $inputs[$name] = $value;
    }
    $form_inputs = $inputs;
    # display info about hidden args found
    debug_echo($cmd, "the following hidden form inputs were found {$form_name_str} :");
    debug_dump_yaml($form_inputs, true);
    # save the hidden inputs
    $opts['form_inputs'] = $form_inputs;
    if ($response_form) {
        $opts['response_form_inputs'] = $form_inputs;
    }
    # check for args
    $args_key = "{$form_method}_args";
    # get appropriate args opt
    $args = get_opt($cmd, $opts, $args_key);
    if (!check_opt_if_set_type($cmd, $args, $args_key, 'array')) {
        return false;
    }
    # merge the arts if necessary
    if ($args) {
        # check to see if is a string or an array
        if (is_string($args)) {
            if (strlen($args) == 0) {
                $args = $form_inputs;
            } else {
                $args .= '&' . http_build_query($form_inputs);
            }
        } elseif (is_array($args)) {
            $args = array_merge($form_inputs, $args);
        } else {
            return error($cmd, "option '{$form_method}_args' is not a string or array");
        }
    } else {
        $args = $form_inputs;
    }
    # save the args
    $opts[$args_key] = $args;
    if ($response_form) {
        $opts["response_{$args_key}"] = $args;
    }
    # sort response and return
    ksort($opts);
    return $opts;
}
コード例 #4
0
function search_replace_data_by_key_value($opts, $pipe, $cmd = __FUNCTION__)
{
    # set prefix
    $prefix = 'search_replace_data';
    # merge opts
    $opts = merge_opts($opts, $pipe);
    # get fields opt
    $data = get_opt_config_value($prefix, $opts, 'data');
    if (!check_opt_set_type($cmd, $data, 'data', 'array')) {
        return false;
    }
    # get fields opt
    $fields = get_opt_config_value($prefix, $opts, 'fields');
    if (!check_opt_set_type($cmd, $fields, 'fields', 'array_of_strings')) {
        return false;
    }
    # get map opt
    $search = get_opt_config_value($prefix, $opts, 'search');
    if (!check_opt_set_type($cmd, $search, 'search', 'array,yaml_array')) {
        return false;
    }
    if (is_string($search)) {
        $search = yaml_decode($search);
    }
    # get replace opt
    $replace = get_opt_config_value($prefix, $opts, 'replace');
    if (!check_opt_set_type($cmd, $replace, 'replace', 'array,yaml_array')) {
        return false;
    }
    if (is_string($replace)) {
        $replace = yaml_decode($replace);
    }
    # set all the fields
    $search_fields = array_keys($search);
    $value_fields = array_values($search);
    $replace_fields = array_values($replace);
    # set up new fields
    $new_fields = array();
    foreach ($fields as $field) {
        if (in_array($field, $search_fields) || in_array($field, $value_fields)) {
            continue;
        }
        $new_fields[] = $field;
    }
    foreach ($replace_fields as $field) {
        $new_fields[] = $field;
    }
    $new_fields_count = count($new_fields);
    # set up copy fields
    $new_fields_flipped = array_flip($new_fields);
    $copy_fields = array();
    foreach ($fields as $old_index => $field) {
        if (!in_array($field, $new_fields) || in_array($field, $replace_fields)) {
            continue;
        }
        $new_index = $new_fields_flipped[$field];
        $copy_fields[$old_index] = $new_index;
    }
    $copy_fields_count = count($copy_fields);
    # set up search field indexes
    $fields_flipped = array_flip($fields);
    $searches = array();
    foreach ($search as $check_field => $value_field) {
        $check_field_idx = $fields_flipped[$check_field];
        $value_field_idx = $fields_flipped[$value_field];
        $searches[$check_field_idx] = $value_field_idx;
    }
    # set up replaces
    $replaces = array();
    foreach ($replace as $value => $field) {
        $replaces[$value] = $new_fields_flipped[$field];
    }
    # display messages
    debug_echo($cmd, "doing a search-replace by key value using the following pairs (check_field: value_field):");
    debug_dump_yaml($search, true);
    debug_echo($cmd, "the following found replacements were made (checked_value: replacement_field):");
    debug_dump_yaml($replace, true);
    # loop through data
    $new_data = array();
    foreach ($data as $line) {
        # create new line
        $new_line = array();
        for ($i = 0; $i < $new_fields_count; $i++) {
            $new_line[] = null;
        }
        # copy data
        foreach ($copy_fields as $old_index => $new_index) {
            $new_line[$new_index] = $line[$old_index];
        }
        # search for values
        foreach ($searches as $check_field_index => $value_field_index) {
            $check = $line[$check_field_index];
            $replace_field_index = @$replaces[$check];
            if ($replace_field_index === null) {
                continue;
            }
            $new_line[$replace_field_index] = $line[$value_field_index];
        }
        # add the new line to the data
        $new_data[] = $new_line;
    }
    # build res
    $res = array('fields' => $new_fields, 'data' => $new_data);
    return $res;
}
コード例 #5
0
ファイル: modify_fields.php プロジェクト: simpl/datapipe
function map_data_fields(&$data, &$fields, $map_fields, $cmd)
{
    # sort fields to map / not
    $new_map_fields = array();
    $ignore_fields = array();
    foreach ($map_fields as $orig => $new) {
        if ($new) {
            $new_map_fields[$orig] = $new;
        } else {
            $ignore_fileds[] = $orig;
        }
    }
    $map_fields = $new_map_fields;
    # set up new field indexes
    $index_fields = array_keys($map_fields);
    $field_indexes = build_field_indexes($fields, $index_fields, $cmd);
    if ($field_indexes === false) {
        return false;
    }
    # display info about the map
    debug_echo($cmd, "mapping data fields using the following map :");
    debug_dump_yaml($map_fields, true);
    if ($ignore_fields) {
        debug_echo($cmd, "ignoring the following empty result fields in the map :");
        debug_dump_yaml($ignore_fields, true);
    }
    # set up new fields
    $fields = array_values($map_fields);
    $field_count = count($field_indexes);
    # build the data
    $data_count = count($data);
    for ($i = 0; $i < $data_count; $i++) {
        $line = $data[$i];
        $new_line = array();
        for ($j = 0; $j < $field_count; $j++) {
            $new_line[] = $line[$field_indexes[$j]];
        }
        $data[$i] = $new_line;
    }
    return true;
}