# temp input path for Genome File e.g. /username/hpcGDB002/SCFDIR/GDB002gdna.fa $input_p = "{$user_input_base}Protein/{$DBid}prot.fa"; # temp input path for Transcript File e.g. /username/hpcGDB002/Protein/GDB002prot.fa ## Set validation style for input files $valid_est_array = create_input_list($gsq_input_path, "transcript", $dbpass); // generic; covers est, cdna, tsa $valid_est = $valid_est_array[0]; $valid_prot_array = create_input_list($gsq_input_path, "protein", $dbpass); $valid_prot = $valid_prot_array[0]; $valid_gdna_array = create_input_list($gsq_input_path, "gdna", $dbpass); $valid_gdna = $valid_gdna_array[0]; $valid_out_array = create_input_list($output_data_path, "gsq." . $DBid, $dbpass); # e.g. gsq.GDB001cdna, test for type in function. $valid_out = $valid_out_array[0]; ## Set return job estimates based on size distribution of gdna: $scaffolds = calculate_scaffolds($gsq_input_path, $gsq_proc, $gth_proc); # jobs_functions.inc.php // return array($large_scaffold_count, $small_scaffold_count, $chunks, $remainder_size, $gsq_split, $gth_split, $scaffold_size_display, $gsq_split_display, $gth_split_display, $gsq_time_display, $gth_time_display); $large_scaffold_count = $scaffolds[0]; $small_scaffold_count = $scaffolds[1]; $total_scaffold_count = $large_scaffold_count + $small_scaffold_count; $break_point_count = $scaffolds[2]; $remainder_size = $scaffolds[3]; $gsq_split = $scaffolds[4]; $gth_split = $scaffolds[5]; $scaffold_sizes_display = $scaffolds[6]; $gsq_split_display = $scaffolds[7]; $gth_split_display = $scaffolds[8]; $gsq_time_display = $scaffolds[9]; $gth_time_display = $scaffolds[10]; }
####### Part IIC. Set data paths and filenames for Curl ######## ## Specify TEMPORARY user input data paths with user's home directory as base - the pipeine creates this directory and deposits data there $user_input_prot_path = "/{$username}/{$inputTopDir}tmp/{$DBid}_hpc/Protein/"; //e.g."/username/xgdbvm/tmp/GDB001_hpc/Protein/" $user_input_scaff_path = "/{$username}/{$inputTopDir}tmp/{$DBid}_hpc/SCFDIR/"; //e.g."/username/xgdbvm/tmp/GDB001_hpc/SCFDIR/" (note: this dataset is already size-sorted by commands in xGDB_Procedure.sh) ## Construct input path and output name variables for Curl statement. NOTE: these are used by TACC so they are relative to iPlant Data Store user home page ($username) $inputProtein = "{$user_input_prot_path}{$DBid}prot.fa"; //user's DataStore path, for json; Protein File, e.g. /username/xgdb/tmp/GDB001_hpc/Protein/GDB002est $inputGenomic = "{$user_input_scaff_path}{$DBid}gdna.fa"; // user's DataStore path, for json; Genome File, e.g. /username/xgdb/tmp/GDB001_hpc/SCFDIR/GDB002scaffold $outputName = "{$DBid}prot.gth"; // for json; e.g. GDB001prot.gth // deprecated $outPutPath="$user_output_data_path"; //for json; e.g. /username/GDB001_hpc/GTHOUT/ ## Calculate split count and scaffold number for display $scaffolds = calculate_scaffolds($user_input_scaff_path, $proc_per_node, $proc_per_node); # jobs_functions.inc.php $gth_split = $scaffolds[5]; # this is for information purposes only, it is the split of segments we predict will be used $large_scaffold_count = $scaffolds[0]; $small_scaffold_count = $scaffolds[1]; $total_scaffold_count = $large_scaffold_count + $small_scaffold_count; ############## Part III. Refresh access_token for Agave API ############# // A. First, get the OAuth App credentials for this user, VM: $handle = fopen("/xGDBvm/admin/auth", "r"); $auth_error = ""; if ($handle) { while (($line = fgets($handle)) !== false) { $pattern = "/^" . $username . ":([A-Za-z0-9\\_]+?):([A-Za-z0-9\\_]+?)\$/"; # e.g. newuser:hZ_z3f4Hf3CcgvGoMix0aksN4BOD6:UH758djfDF8sdmsi004wER if (preg_match($pattern, $line, $matches)) {