// Top leve input directory path, e.g. /xGDBvm/input/xgdbvm/tmp/. From xGDBvm perspective, relative to root. This is where this script will create a new directory structure RemoteDIR for HPC to grab data from. Update 1-26-16 to include /tmp $TempDIRinputs = "{$TempDIRbase}{$DBid}_hpcs/"; # e.g. /xGDBvm/input/tmp/GDB001_hpcs/ (s is for standalone) The base directory structure for this GDB's remote data. Where remote HPC will look for data (NOTE: from HPC perspective, this is /username/hpcGDB001) $TempDIRscaff = "{$TempDIRinputs}SCFDIR/"; # e.g. /xGDBvm/input/tmp/GDB001_hpcs/SCFDIR/ genome inputs go here $TempDIRtranscript = "{$TempDIRinputs}MRNADIR/"; # e.g. /xGDBvm/input/tmp/GDB001_hpcs/MRNADIR/ EST, cDNA, TSA inputs go here # $TempDIRoutput="${TempDIR}GSQOUT/"; # e.g. /xGDBvm/input/tmp/GDB001_hpcs/GSQOUT/ Output files come back to here. NOT REALLY. DEPRECATED. OUTPUT GOES TO /xGDBvm/input/archive/jobs // Obtain fasta header types $file_path_transcript = `ls -1 {$input_data_path}/*{$transcript_type}.fa | head -1`; # read line 1 of first matching file $fasta_header_array = fasta_header_type($file_path_transcript, $transcript_type); $EstFormat = $fasta_header_array[1]; $file_path_gdna = `ls -1 {$input_data_path}/*gdna.fa | head -1`; # read line 1 of first matching file $fasta_header_array = fasta_header_type($file_path_gdna, "gdna"); $genomeFormat = $fasta_header_array[1]; mkdir($TempDIRinputs, 0777); mkdir($TempDIRscaff, 0777); mkdir($TempDIRtranscript, 0777); # mkdir($TempDIRoutput,0777); // Copy required input data to DataStore ($InputDataDIR). Since this is not a pipeline job, there is no scratch directory involved -- just the user's input dir. $cat_transcript = "cat {$InputDataDIR}/*{$transcript_type}.fa >{$TempDIRtranscript}{$DBid}{$transcript_type}.fa"; // error fixed 8/28; cat together input and copy to hpc input dir. exec($cat_transcript); //should be uncommented unless debug mode $cat_gdna = "cat {$InputDataDIR}/*gdna.fa >{$TempDIRscaff}/{$DBid}gdna.fa"; // cat together input and copy to hpc input dir. exec($cat_gdna); if ($genomeFormat == "l") { $input_format = "-sformat1 ncbi";
function create_input_list($input_dir, $class, $dbpass) { //creates a formatted file or file list based on data $class (gdna, transcript (generic; includes est, cdna, tsa) or protein in argument. // $input_dir is user-specified input data path //Uses sub-function validate_file_type($name) to check validation and insert validation styling. //Returns an array with file list formatted, number of files, file size, and unformatted list. $db = mysql_connect("localhost", "gdbuser", $dbpass); if (!$db) { echo "Error: Could not connect to database!"; exit; } mysql_select_db("Genomes"); $file_list2 = ""; $total_size = 0; $fileID = ""; # $file_list =`ls -l $input_dir`; // $file_list = `ls -l --time-style=long-iso {$input_dir}`; // //system ("chop $list"); $list = explode("\n", $file_list); //file list array $n = 0; //valid file count # now make a version of the file path that can be assigned as an html id tag; escape all forward slashes since we are going to assign them as an html id tag $escaped_path = str_replace("/", "\\/", $input_dir); if (count($list) < 100) { foreach ($list as $item) { $pattern = "/\\s+/"; $replacement = " "; $item = preg_replace($pattern, $replacement, $item); if (substr($item, 0, 1) == "-") { $vals = explode(" ", $item); $filename = $vals[count($vals) - 1]; if (validate_file_type($filename, $class) != "") { $time = $vals[count($vals) - 2]; $year_month_day = $vals[count($vals) - 3]; $size = $vals[count($vals) - 4]; $filestamp = "{$filename}:{$size}:{$year_month_day}-{$time}"; # IMPORTANT: This format MUST be synchronized with FileStamp in /xGDBvm/scripts/xGDB_ValidateFiles.sh $valid = ""; $entries = ""; $file_info_icon = "information.png"; // This icon communicates validation status (by color) and is a click target for opening validation dialog box if ($get_entry = "SELECT Valid, EntryCount FROM Datafiles where FileStamp='{$filestamp}'") { $mysql_get_entry = mysql_query($get_entry); while ($result_get_entry = mysql_fetch_array($mysql_get_entry)) { $valid = $result_get_entry[0]; # T F or NULL $entries = $result_get_entry[1]; # number of entries } } $valid_style = "filenoteval"; # default; blue if ($valid == "T") { $file_info_icon = "information_green.png"; $valid_style = "filevalid"; $v = $v + 1; } elseif ($valid == "F") { $file_info_icon = "information_red.png"; $valid_style = "filenotvalid"; $iv = $iv + 1; } ##### Build more markup including escaped filepath and validation icons. ##### $filepath = $escaped_path . "\\/" . $filename; // We use this as a unique ID tag (with escaped slashes) for opening a Jquery dialog. $info_icon_styled = "\n\t\t\t\t\t\t <span id=\"{$filepath}\" class=\"validatefile-button\" title=\"{$filestamp}\">\n\t\t\t\t\t\t <img class=\"nudge3\" src=\"/XGDB/images/{$file_info_icon}\" />\n\t\t\t\t\t\t </span>\n\t\t\t\t\t\t "; ##### For GeneSeqer jobs we need to know the Fasta header (defline) type; get this and create a GSQ parameter to pass in the form submission ##### $file_path_name = "{$input_dir}/{$filename}"; $fasta_header_type = fasta_header_type($file_path_name, $valid_style); $fasta_type = $fasta_header_type[0]; $GSQparam = $fasta_header_type[1]; $n = $n + 1; # To list all files in directory, not just valid ones, move right angle bracket from below to this line. $filename_type = validate_file_type($filename, $class); $filename_type_styled = "<span class=\"{$valid_style}\">{$filename_type}:</span>"; $filename_styled = "<span class=\"{$valid_style} italic\">{$filename}</span>"; $filename_display = $filename_type_styled . " " . $filename_styled . $info_icon_styled . " (" . $fasta_type . ")" . $list_header_line_styled; # see below; next function. ##### Calculate size in a reasonable numeric range ##### $total_size = $total_size + $size; $unit = 0; //0: bytes, 1:KB, 2: MB, 3: GB while ($size > 1024 && $unit < 4) { $size = round($size / 1024, 1); $unit++; } ##### Continue building the file 'list item' core: filename, validation icon, date and time / size / ##### $file_list2 .= "\n\t\t\t\t \t<li class='smallerfont'>\n\t\t\t\t \t {$filename_display} / {$size}"; ##### If file has been validated, # of entries is available from the MySQL query. Add this here at end of line if available. ##### $entries_styled = empty($entries) ? "" : "<span style=\"color:#00A592\"> \n\t\t\t\t\t\t {$entries} entries\n\t\t\t\t\t\t</span>"; ##### Determine unit for display and compute absolute size in order to get cumulative total ##### if ($unit == 0) { $file_list2 .= " bytes"; $units = "bytes"; $abs_size = $size; } if ($unit == 1) { $file_list2 .= " KB"; $units = "KB"; $abs_size = $size / 0.001; } if ($unit == 2) { $file_list2 .= " MB"; $units = "MB"; $abs_size = $size / 1.0E-7; } if ($unit == 3) { $file_list2 .= " GB"; $units = "GB"; $abs_size = $size / 1.0E-10; } ##### Finish display with the size units and list end ##### $file_list2 .= " / " . $entries_styled . "</li>"; } } } } $total_size_display = convert_bytes($total_size, 1); if ($n < 1) { $class = "smallerfont warning"; } else { $class = "smallerfont checked"; } $file_list1 = "\n\t\t<span class=\"plaintext largerfont bold\">\n\t\t\t{$input_dir} \n\t\t</span>\n\t\t<span class=\"normalfont\">\n\t\t\t{$valid_count_display} {$invalid_count_display} {$noteval_count_display}\n\t\t</span>"; $file_list1 .= "\n\t<ul class='bullet1 indent2'>\t"; $file_list3 = "</ul>"; ##### Assemble the pieces of the header and list ##### $file_list_formatted = $file_list1 . $file_list2 . $file_list3; return array($file_list_formatted, $n, $total_size, $file_list); //$total_size in MB; we return $n in case we want to validate based on valid files }