function xml_parse_document($opts, $pipe, $cmd = __FUNCTION__) { # set prefix $prefix = 'xml_parse_document'; # merge opts $opts = merge_opts($opts, $pipe, 'document'); # get document opt $document = get_opt($prefix, $opts, 'document'); if (!check_opt_set_type($cmd, $document, 'document', 'string')) { return false; } # get doc type opt $doc_type = get_opt_config_value($prefix, $opts, 'doc_type', 'xml'); if (!check_opt_set_type($cmd, $doc_type, 'doc_type', 'string')) { return false; } # get logic opt $logic = get_opt($prefix, $opts, 'logic'); if (!check_opt_set_type($cmd, $logic, 'logic', 'string,array')) { return false; } if (is_string($logic)) { $logic = @yaml_decode($logic); if (!$logic) { return error($cmd, "parse logic not valid YAML"); } } # get result type opt $result_type = get_opt_config_value($prefix, $opts, 'result_type', 'elements'); if (!check_opt_set_type($cmd, $result_type, 'result_type', 'string')) { return false; } # get merge results opt $merge_results = get_opt($prefix, $opts, 'merge_results', false); if (!check_opt_set_type($cmd, $merge_results, 'merge_results', 'boolean')) { return false; } # load the document $dom = new DOMDocument('1.0'); @$dom->loadHTML($document); #$dom->preserveWhiteSpace = false; # check that the document was parsed correctly $res = xml_parse_element($dom, $logic); if ($res === false) { return error($cmd, "could not parse {$type} document"); } # display info about the parsing debug_echo($cmd, "{$doc_type} document parsed successfully for {$result_type}"); if (empty($res)) { debug_echo($cmd, "no {$result_type} found while parsing the {$doc_type} document"); } else { debug_echo($cmd, "the following {$result_type} were found in the {$doc_type} document"); debug_dump_yaml($res, true); } # return if we are not merging the results if (!$merge_results) { return $res; } # merge the results $new_res = array(); $res_count = count($res); for ($i = 0; $i < $res_count; $i++) { $elt = $res[$i]; foreach ($elt as $key => $value) { if (is_array($value)) { $cur_value = @$new_res[$key]; if ($cur_value) { $value = array_merge($cur_value, $value); } } $new_res[$key] = $value; } } return $new_res; }
function process_data($opts, $pipe, $cmd = __FUNCTION__, $opt_prefix = false) { # set prefix $prefix = 'process_data'; # merge opts $opts = merge_opts($opts, $pipe); # adjust opt prefix $cmd = adjust_opt_prefix($cmd, $opts, $opt_prefix, 'edit'); # get debug opt $debug = get_opt_config_value($prefix, $opts, 'debug', false); if (!check_opt_set_type($cmd, $debug, 'debug', 'boolean')) { return false; } # get type opt $join_type = get_opt_config_value($prefix, $opts, 'join_type', 'outer'); if (!check_opt_set_type($cmd, $join_type, 'type', 'data_join_type')) { return false; } # get sources opt $sources = get_opt($prefix, $opts, 'sources'); if (!check_opt_if_set_type($cmd, $sources, 'sources', 'array')) { return false; } if (!$sources) { $source = build_source_data($prefix, $opts, $cmd); if (!$source) { return error($cmd, "sources opt is not set and the data and fields opts are not set either, so can't be used to create one"); } $sources = array($source); } # get sources values foreach ($sources as $i => $val) { $sources[$i] = value($val); } # get create_indexes opt $create_indexes = get_opt($prefix, $opts, 'create_indexes'); if (!check_opt_if_set_type($cmd, $create_indexes, 'create_indexes', 'array_of_strings')) { return false; } # get join field opt $primary_key = get_opt_config_value($prefix, $opts, 'primary_key', 'guid'); if (!check_opt_set_type($cmd, $primary_key, 'primary_key', 'string')) { return false; } # get mawk rules opt $mawk = get_opt_config_value($prefix, $opts, 'mawk'); if (!check_opt_set_type($cmd, $mawk, 'mawk', 'string')) { return false; } # get update frequency opt $update_frequency = get_opt_config_value($prefix, $opts, 'update_frequency', 0); if (!check_opt_set_type($cmd, $update_frequency, 'update_frequency', 'integer')) { return false; } # create fields and data arrays $fields_arr = array(); $data_arr = array(); /* for ($i=0; $i<count ($sources); $i++) { $source_no = $i+1; $fields = $sources[$i]['fields']; if (!$fields) return error ($cmd, "no fields defined for source $source_no"); $fields_arr[] = $fields; if ($i == 0) { $data_arr[] = $sources[$i]['data']; } else { $data = $sources[$i]['indexes'][$primary_key]; if ($data === null) { return error ($cmd, "index on primary key '$primary_key' does not exis for source $source_no"); # TODO: auto-create } $data_arr[] = $data; } }*/ $sources_count = count($sources); for ($i = 0; $i < $sources_count; $i++) { $source_no = $i + 1; # add fields $fields = $sources[$i]['fields']; if (!$fields) { return error($cmd, "no fields defined for source {$source_no}"); } if ($sources_count > 1) { if (!in_array($primary_key, $fields)) { return error($cmd, "primary key '{$primary_key}' not defined for source {$source_no}"); } } $fields_arr[] = $fields; # add data $data = $sources[$i]['data']; if (!$data) { return error($cmd, "no data defined for source {$source_no}"); } $data_arr[] = $data; } # display info about the join, if there is one if (count($fields_arr) > 1) { debug_echo($cmd, "performing {$join_type} join on primary key / field '{$primary_key}'"); } for ($i = 1; $i <= count($fields_arr); $i++) { debug_echo($cmd, "fields for data source {$i} :"); debug_dump_list($fields_arr[$i - 1], true); } # parse the line rules $parsed_mawk = mawk_parse_code($mawk, $fields_arr, $cmd); if (is_int($parsed_mawk)) { return error($cmd, "code line {$i} of the mawk has an invalid syntax"); } $parsed_mawk_code = $parsed_mawk['code']; # set up indexes $indexes_arr = $parsed_mawk['indexes']; if (count($fields_arr) > 1) { $r = mawk_add_primary_index($primary_key, $indexes_arr, $fields_arr, $cmd); if ($r === false) { return false; } } # display info about the line rules debug_echo($cmd, "the following mawk code will be used :"); debug_echo_txt(mawk_clean_code($mawk), true); if ($debug) { debug_echo($cmd, "which translates to the following parsed code :"); debug_dump_yaml($parsed_mawk_code, true); if ($indexes_arr) { debug_echo($cmd, "and includes the following indexes :"); debug_dump_yaml($indexes_arr, true); } } /* if (count ($fields_arr) > 1) { $primary_indexes = mawk_get_indexes ($primary_key, $fields_arr, $cmd); if ($primary_indexes === false) return false; } else { $primary_indexes = array (); } # set up create indexes $new_indexes = mawk_get_indexes ($create_indexes, $fields_arr, $cmd); if ($new_indexes === false) return false; */ # set up discard if ($join_type == 'inner') { $inner_join = true; } else { $inner_join = false; } # run mawk code try { debug_echo($cmd, "processing the data (this can take some time depending on the inputs) ..."); $mawk_res = mawk_process_data($parsed_mawk_code, $fields_arr, $data_arr, $indexes_arr, $update_frequency, $inner_join, $cmd, $debug); } catch (MawkError $e) { $msg = $e->getMessage(); return error($cmd, $msg); } return $mawk_res; }
function http_parse_form(&$opts, $pipe, $cmd = __FUNCTION__, $response_form = false) { # set prefix $prefix = 'http'; # merge opts $opts = merge_opts($opts, $pipe, 'form_name'); # get request_url opt $request_url = get_opt($prefix, $opts, 'request_url'); if (!check_opt_set_type($cmd, $request_url, 'request_url', 'string')) { return false; } # get response body opt $response_body = get_opt($prefix, $opts, 'response_body'); if (!check_opt_if_set_type($cmd, $response_body, 'response_body', 'string')) { return false; } # return if no response body is found if (!$response_body) { warning("no response body to parse for hidden post args"); return $opts; } # get post form opt $form_name = get_opt($prefix, $opts, 'form_name', ''); if (!check_opt_set_type($cmd, $form_name, 'form_name', 'string')) { return false; } # display message if ($form_name) { debug_echo($cmd, "parsing response form '{$form_name}' for action, method and inputs"); } else { debug_echo($cmd, "parsing all forms (if any) for action, method and inputs"); } # parse the response body for forms $form_parse_logic = "\n\n elements:\n - form:\n attributes:\n name: {$form_name}\n action:\n method:\n \n elements:\n - input:\n label: inputs\n attributes:\n !type: hidden\n name:\n value:\n\n - button:\n label: inputs\n attributes:\n name:\n value:\n "; $form_parse = array('document' => $response_body, 'doc_type' => 'HTML', 'result_type' => 'forms', 'logic' => $form_parse_logic, 'merge_results' => true); $form = xml_parse_document($form_parse, null, $cmd); if (!$form) { if ($form_name) { warning($cmd, "form '{$form_name}' not found in response body"); } else { debug_echo($cmd, "no forms found in the response body"); } return $opts; } # set the variables $form_action = $form['action']; $form_method = $form['method']; $form_name = $form['name']; $form_inputs = $form['inputs']; # set the form method $opts['form_method'] = $form_method; if ($response_form) { $opts['response_form_method'] = $form_method; } # build the full form action if ($form_action) { # check for a full URL if (substr($form_action, 0, 7) != 'http://' && substr($form_action, 0, 8) != 'https://') { $r = preg_match('/^([^?]*)(.*)/', $request_url, $matches); $url = $matches[1]; $get_args = $matches[2]; if ($form_action[0] == '/') { $r = preg_match('/^(http[s]?:\\/\\/[^\\/]*).*/', $url, $matches); } else { $r = preg_match('/^(http[s]?:\\/\\/.*\\/?)[^\\/]*/', $url, $matches) . '/'; } $form_action_base_url = $matches[1]; $form_action = $form_action_base_url . $form_action; #. $get_args; } # save the form action debug_echo($cmd, "form action url found : {$form_action}"); if ($response_form) { $opts['response_form_action'] = $form_action; } $opts['form_action'] = $form_action; } # build string for displaying info about hidden post args if ($form_name) { $form_name_str = "in form '{$form_name}'"; } else { $form_name_str = "in all forms"; } # add the hidden arguments to the hidden inputs and piped post args if (count($form_inputs) == 0) { debug_echo($cmd, "no hidden form inputs were found {$form_name_str}"); return $opts; } # build the hidden inputs $inputs = array(); $form_inputs_count = count($form_inputs); for ($i = 0; $i < $form_inputs_count; $i++) { $form_input = $form_inputs[$i]; $name = $form_input['name']; $value = @$form_input['value']; $value = $value ? $value : ''; $inputs[$name] = $value; } $form_inputs = $inputs; # display info about hidden args found debug_echo($cmd, "the following hidden form inputs were found {$form_name_str} :"); debug_dump_yaml($form_inputs, true); # save the hidden inputs $opts['form_inputs'] = $form_inputs; if ($response_form) { $opts['response_form_inputs'] = $form_inputs; } # check for args $args_key = "{$form_method}_args"; # get appropriate args opt $args = get_opt($cmd, $opts, $args_key); if (!check_opt_if_set_type($cmd, $args, $args_key, 'array')) { return false; } # merge the arts if necessary if ($args) { # check to see if is a string or an array if (is_string($args)) { if (strlen($args) == 0) { $args = $form_inputs; } else { $args .= '&' . http_build_query($form_inputs); } } elseif (is_array($args)) { $args = array_merge($form_inputs, $args); } else { return error($cmd, "option '{$form_method}_args' is not a string or array"); } } else { $args = $form_inputs; } # save the args $opts[$args_key] = $args; if ($response_form) { $opts["response_{$args_key}"] = $args; } # sort response and return ksort($opts); return $opts; }
function search_replace_data_by_key_value($opts, $pipe, $cmd = __FUNCTION__) { # set prefix $prefix = 'search_replace_data'; # merge opts $opts = merge_opts($opts, $pipe); # get fields opt $data = get_opt_config_value($prefix, $opts, 'data'); if (!check_opt_set_type($cmd, $data, 'data', 'array')) { return false; } # get fields opt $fields = get_opt_config_value($prefix, $opts, 'fields'); if (!check_opt_set_type($cmd, $fields, 'fields', 'array_of_strings')) { return false; } # get map opt $search = get_opt_config_value($prefix, $opts, 'search'); if (!check_opt_set_type($cmd, $search, 'search', 'array,yaml_array')) { return false; } if (is_string($search)) { $search = yaml_decode($search); } # get replace opt $replace = get_opt_config_value($prefix, $opts, 'replace'); if (!check_opt_set_type($cmd, $replace, 'replace', 'array,yaml_array')) { return false; } if (is_string($replace)) { $replace = yaml_decode($replace); } # set all the fields $search_fields = array_keys($search); $value_fields = array_values($search); $replace_fields = array_values($replace); # set up new fields $new_fields = array(); foreach ($fields as $field) { if (in_array($field, $search_fields) || in_array($field, $value_fields)) { continue; } $new_fields[] = $field; } foreach ($replace_fields as $field) { $new_fields[] = $field; } $new_fields_count = count($new_fields); # set up copy fields $new_fields_flipped = array_flip($new_fields); $copy_fields = array(); foreach ($fields as $old_index => $field) { if (!in_array($field, $new_fields) || in_array($field, $replace_fields)) { continue; } $new_index = $new_fields_flipped[$field]; $copy_fields[$old_index] = $new_index; } $copy_fields_count = count($copy_fields); # set up search field indexes $fields_flipped = array_flip($fields); $searches = array(); foreach ($search as $check_field => $value_field) { $check_field_idx = $fields_flipped[$check_field]; $value_field_idx = $fields_flipped[$value_field]; $searches[$check_field_idx] = $value_field_idx; } # set up replaces $replaces = array(); foreach ($replace as $value => $field) { $replaces[$value] = $new_fields_flipped[$field]; } # display messages debug_echo($cmd, "doing a search-replace by key value using the following pairs (check_field: value_field):"); debug_dump_yaml($search, true); debug_echo($cmd, "the following found replacements were made (checked_value: replacement_field):"); debug_dump_yaml($replace, true); # loop through data $new_data = array(); foreach ($data as $line) { # create new line $new_line = array(); for ($i = 0; $i < $new_fields_count; $i++) { $new_line[] = null; } # copy data foreach ($copy_fields as $old_index => $new_index) { $new_line[$new_index] = $line[$old_index]; } # search for values foreach ($searches as $check_field_index => $value_field_index) { $check = $line[$check_field_index]; $replace_field_index = @$replaces[$check]; if ($replace_field_index === null) { continue; } $new_line[$replace_field_index] = $line[$value_field_index]; } # add the new line to the data $new_data[] = $new_line; } # build res $res = array('fields' => $new_fields, 'data' => $new_data); return $res; }
function map_data_fields(&$data, &$fields, $map_fields, $cmd) { # sort fields to map / not $new_map_fields = array(); $ignore_fields = array(); foreach ($map_fields as $orig => $new) { if ($new) { $new_map_fields[$orig] = $new; } else { $ignore_fileds[] = $orig; } } $map_fields = $new_map_fields; # set up new field indexes $index_fields = array_keys($map_fields); $field_indexes = build_field_indexes($fields, $index_fields, $cmd); if ($field_indexes === false) { return false; } # display info about the map debug_echo($cmd, "mapping data fields using the following map :"); debug_dump_yaml($map_fields, true); if ($ignore_fields) { debug_echo($cmd, "ignoring the following empty result fields in the map :"); debug_dump_yaml($ignore_fields, true); } # set up new fields $fields = array_values($map_fields); $field_count = count($field_indexes); # build the data $data_count = count($data); for ($i = 0; $i < $data_count; $i++) { $line = $data[$i]; $new_line = array(); for ($j = 0; $j < $field_count; $j++) { $new_line[] = $line[$field_indexes[$j]]; } $data[$i] = $new_line; } return true; }