function input_warnings($path) { # This function compares entry counts for Input files that should have the same number of records, for all valid file_codes in a given path, and creates warning flags where numbers don't match # To do this 1) we generate a list of 'valid' filestamps from the input directory and their totals; 2) generate file code list for the same and compare value pairs, generate warnings as list items ###### 1. Generate Filestamp array $file_list = `ls -l --time-style=long-iso {$path}`; // $list = explode("\n", $file_list); # file list array $filenames = ""; # debug only $file_codes = ""; # string representing each file type. ###### Parse each item on file list ###### if (count($list) < 100) { $filestamps = array(); $file_code_list = ""; foreach ($list as $item) { $pattern = "/\\s+/"; $replacement = " "; $item = preg_replace($pattern, $replacement, $item); # collapase spaces ###### Select file listings (not directories), parse filename, and run filename validation function ###### if (substr($item, 0, 1) == "-") { $vals = explode(" ", $item); $filename = $vals[count($vals) - 1]; //e.g. the filename, MyFile_est.fa ##### 2. For valid files, parse metadata, and recreate unique filestamp ##### if (validate_file_type($filename, "") != "") { $time = $vals[count($vals) - 2]; $year_month_day = $vals[count($vals) - 3]; $size = $vals[count($vals) - 4]; $filestamp = "{$filename}:{$size}:{$year_month_day}-{$time}"; # IMPORTANT: This format MUST be synchronized with FileStamp in /xGDBvm/scripts/xGDB_ValidateFiles.sh array_push($filestamps, $filestamp); ### we want a simple array of filestamps. $file_codes .= get_file_codes($filename); ### generate a string that encodes all valid file types in the Input directory } # done with valid files } # done with files } # done with dir list } # done with count limit ##### 3. Query the Datafiles table for entry sums that should match each other, using only rows that match our $filestamp 'fingerprint' array. Compare values and generate messages. /* */ $warning = ""; $gene1_annot = ""; $gene1_mrna = ""; $gene1_pep = ""; $gene1_desc = ""; $gene2_annot = ""; $gene2_mrna = ""; $gene2_pep = ""; $gene2_desc = ""; $gene1 = "annot"; # current jargon; leave open possibility to generalize and extend these names $gene2 = "cpgat"; # current jargon; leave open possibility to generalize and extend these names $n = 0; # counter mysql_select_db("Genomes"); if (preg_match("/a/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"annot\" AND Track=\"{$gene1}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) { $mysql_get_totals = mysql_query($get_totals); while ($row = mysql_fetch_array($mysql_get_totals)) { $gene1_annot = $row[0]; # Transcripts (not Genes) was assigned to EntryCount in xGDB_validatefile.php $gene1_annot_suffix = $row[1] . "." . $row[2] . "." . $row[3]; $gene1_annot_description = validate_file_type($gene1_annot_suffix, "bold"); } if (preg_match("/m/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"mrna\" AND Track=\"{$gene1}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) { $mysql_get_totals = mysql_query($get_totals); while ($row = mysql_fetch_array($mysql_get_totals)) { $gene1_mrna = $row[0]; $gene1_mrna_suffix = $row[1] . "." . $row[2] . "." . $row[3]; $gene1_mrna_description = validate_file_type($gene1_mrna_suffix, "bold"); } $warning_gene1_mrna = $gene1_annot == $gene1_mrna ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene1_mrna_description} (~{$gene1_mrna_suffix}; {$gene1_mrna} entries) does not match {$gene1_annot_description} (~{$gene1_annot_suffix}; {$gene1_annot} entries) (N-1)</li>"; $warning .= $warning_gene1_mrna; $n = $n + 1; } if (preg_match("/i/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"pep\" AND Track=\"{$gene1}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) { $mysql_get_totals = mysql_query($get_totals); while ($row = mysql_fetch_array($mysql_get_totals)) { $gene1_pep = $row[0]; $gene1_pep_suffix = $row[1] . "." . $row[2] . "." . $row[3]; $gene1_pep_description = validate_file_type($gene1_pep_suffix, "bold"); } $warning_gene1_pep = $gene1_annot == $gene1_pep ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene1_pep_description} (~{$gene1_pep_suffix}; {$gene1_pep} entries) does not match {$gene1_annot_description} (~{$gene1_annot_suffix}; {$gene1_annot} entries) (N-2)</li>"; $warning .= $warning_gene1_pep; } if (preg_match("/s/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"desc\" AND Track=\"{$gene1}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) { $mysql_get_totals = mysql_query($get_totals); while ($row = mysql_fetch_array($mysql_get_totals)) { $gene1_desc = $row[0]; $gene1_desc_suffix = $row[1] . "." . $row[2] . "." . $row[3]; $gene1_desc_description = validate_file_type($gene1_desc_suffix, "bold"); } $warning_gene1_desc = $gene1_annot == $gene1_desc ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene1_desc_description} (~{$gene1_desc_suffix}; {$gene1_desc}) does not match {$gene1_desc_description} (~{$gene1_annot_suffix}; {$gene1_annot}) (N-3)</li>"; $warning .= $warning_gene1_desc; $n = $n + 1; } } ### (not done yet: repeat for other matching pairs) mysql_select_db("Genomes"); if (preg_match("/A/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"annot\" AND Track=\"{$gene2}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) { $mysql_get_totals = mysql_query($get_totals); while ($row = mysql_fetch_array($mysql_get_totals)) { $gene2_annot = $row[0]; # Transcripts (not Genes) was assigned to EntryCount in xGDB_validatefile.php $gene2_annot_suffix = $row[1] . "." . $row[2] . "." . $row[3]; $gene2_annot_description = validate_file_type($gene2_annot_suffix, "bold"); } if (preg_match("/M/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"mrna\" AND Track=\"{$gene2}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) { $mysql_get_totals = mysql_query($get_totals); while ($row = mysql_fetch_array($mysql_get_totals)) { $gene2_mrna = $row[0]; $gene2_mrna_suffix = $row[1] . "." . $row[2] . "." . $row[3]; $gene2_mrna_description = validate_file_type($gene2_mrna_suffix, "bold"); } $warning_gene2_mrna = $gene2_annot == $gene2_mrna ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene2_mrna_description} (~{$gene2_mrna_suffix}; {$gene2_mrna}) does not match {$gene2_annot_description} (~{$gene2_annot_suffix}; {$gene2_annot}) (N-4)</li>"; $warning .= $warning_gene2_mrna; $n = $n + 1; } if (preg_match("/I/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"pep\" AND Track=\"{$gene2}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) { $mysql_get_totals = mysql_query($get_totals); while ($row = mysql_fetch_array($mysql_get_totals)) { $gene2_pep = $row[0]; $gene2_pep_suffix = $row[1] . "." . $row[2] . "." . $row[3]; $gene2_pep_description = validate_file_type($gene2_pep_suffix, "bold"); } $warning_gene2_pep = $gene2_annot == $gene2_pep ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene2_pep_description} (~{$gene2_pep_suffix}; {$gene2_pep}) does not match {$gene2_annot_description} (~{$gene2_annot_suffix}; {$gene2_annot}) (N-5)</li>"; $warning .= $warning_gene2_pep; } if (preg_match("/S/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"desc\" AND Track=\"{$gene2}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) { $mysql_get_totals = mysql_query($get_totals); while ($row = mysql_fetch_array($mysql_get_totals)) { $gene2_desc = $row[0]; $gene2_desc_suffix = $row[1] . "." . $row[2] . "." . $row[3]; $gene2_desc_description = validate_file_type($gene2_desc_suffix, "bold"); } $warning_gene2_desc = $gene2_annot == $gene2_desc ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene2_desc_description} (~{$gene2_desc_suffix}; {$gene2_desc}) does not match {$gene2_annot_description} (~{$gene2_annot_suffix}; {$gene2_annot}) (N-6)</li>"; $warning .= $warning_gene2_desc; $n = $n + 1; } } return $warning; }
function create_input_list($input_dir, $class, $dbpass) { //creates a formatted file or file list based on data $class (gdna, transcript (generic; includes est, cdna, tsa) or protein in argument. // $input_dir is user-specified input data path //Uses sub-function validate_file_type($name) to check validation and insert validation styling. //Returns an array with file list formatted, number of files, file size, and unformatted list. $db = mysql_connect("localhost", "gdbuser", $dbpass); if (!$db) { echo "Error: Could not connect to database!"; exit; } mysql_select_db("Genomes"); $file_list2 = ""; $total_size = 0; $fileID = ""; # $file_list =`ls -l $input_dir`; // $file_list = `ls -l --time-style=long-iso {$input_dir}`; // //system ("chop $list"); $list = explode("\n", $file_list); //file list array $n = 0; //valid file count # now make a version of the file path that can be assigned as an html id tag; escape all forward slashes since we are going to assign them as an html id tag $escaped_path = str_replace("/", "\\/", $input_dir); if (count($list) < 100) { foreach ($list as $item) { $pattern = "/\\s+/"; $replacement = " "; $item = preg_replace($pattern, $replacement, $item); if (substr($item, 0, 1) == "-") { $vals = explode(" ", $item); $filename = $vals[count($vals) - 1]; if (validate_file_type($filename, $class) != "") { $time = $vals[count($vals) - 2]; $year_month_day = $vals[count($vals) - 3]; $size = $vals[count($vals) - 4]; $filestamp = "{$filename}:{$size}:{$year_month_day}-{$time}"; # IMPORTANT: This format MUST be synchronized with FileStamp in /xGDBvm/scripts/xGDB_ValidateFiles.sh $valid = ""; $entries = ""; $file_info_icon = "information.png"; // This icon communicates validation status (by color) and is a click target for opening validation dialog box if ($get_entry = "SELECT Valid, EntryCount FROM Datafiles where FileStamp='{$filestamp}'") { $mysql_get_entry = mysql_query($get_entry); while ($result_get_entry = mysql_fetch_array($mysql_get_entry)) { $valid = $result_get_entry[0]; # T F or NULL $entries = $result_get_entry[1]; # number of entries } } $valid_style = "filenoteval"; # default; blue if ($valid == "T") { $file_info_icon = "information_green.png"; $valid_style = "filevalid"; $v = $v + 1; } elseif ($valid == "F") { $file_info_icon = "information_red.png"; $valid_style = "filenotvalid"; $iv = $iv + 1; } ##### Build more markup including escaped filepath and validation icons. ##### $filepath = $escaped_path . "\\/" . $filename; // We use this as a unique ID tag (with escaped slashes) for opening a Jquery dialog. $info_icon_styled = "\n\t\t\t\t\t\t <span id=\"{$filepath}\" class=\"validatefile-button\" title=\"{$filestamp}\">\n\t\t\t\t\t\t <img class=\"nudge3\" src=\"/XGDB/images/{$file_info_icon}\" />\n\t\t\t\t\t\t </span>\n\t\t\t\t\t\t "; ##### For GeneSeqer jobs we need to know the Fasta header (defline) type; get this and create a GSQ parameter to pass in the form submission ##### $file_path_name = "{$input_dir}/{$filename}"; $fasta_header_type = fasta_header_type($file_path_name, $valid_style); $fasta_type = $fasta_header_type[0]; $GSQparam = $fasta_header_type[1]; $n = $n + 1; # To list all files in directory, not just valid ones, move right angle bracket from below to this line. $filename_type = validate_file_type($filename, $class); $filename_type_styled = "<span class=\"{$valid_style}\">{$filename_type}:</span>"; $filename_styled = "<span class=\"{$valid_style} italic\">{$filename}</span>"; $filename_display = $filename_type_styled . " " . $filename_styled . $info_icon_styled . " (" . $fasta_type . ")" . $list_header_line_styled; # see below; next function. ##### Calculate size in a reasonable numeric range ##### $total_size = $total_size + $size; $unit = 0; //0: bytes, 1:KB, 2: MB, 3: GB while ($size > 1024 && $unit < 4) { $size = round($size / 1024, 1); $unit++; } ##### Continue building the file 'list item' core: filename, validation icon, date and time / size / ##### $file_list2 .= "\n\t\t\t\t \t<li class='smallerfont'>\n\t\t\t\t \t {$filename_display} / {$size}"; ##### If file has been validated, # of entries is available from the MySQL query. Add this here at end of line if available. ##### $entries_styled = empty($entries) ? "" : "<span style=\"color:#00A592\"> \n\t\t\t\t\t\t {$entries} entries\n\t\t\t\t\t\t</span>"; ##### Determine unit for display and compute absolute size in order to get cumulative total ##### if ($unit == 0) { $file_list2 .= " bytes"; $units = "bytes"; $abs_size = $size; } if ($unit == 1) { $file_list2 .= " KB"; $units = "KB"; $abs_size = $size / 0.001; } if ($unit == 2) { $file_list2 .= " MB"; $units = "MB"; $abs_size = $size / 1.0E-7; } if ($unit == 3) { $file_list2 .= " GB"; $units = "GB"; $abs_size = $size / 1.0E-10; } ##### Finish display with the size units and list end ##### $file_list2 .= " / " . $entries_styled . "</li>"; } } } } $total_size_display = convert_bytes($total_size, 1); if ($n < 1) { $class = "smallerfont warning"; } else { $class = "smallerfont checked"; } $file_list1 = "\n\t\t<span class=\"plaintext largerfont bold\">\n\t\t\t{$input_dir} \n\t\t</span>\n\t\t<span class=\"normalfont\">\n\t\t\t{$valid_count_display} {$invalid_count_display} {$noteval_count_display}\n\t\t</span>"; $file_list1 .= "\n\t<ul class='bullet1 indent2'>\t"; $file_list3 = "</ul>"; ##### Assemble the pieces of the header and list ##### $file_list_formatted = $file_list1 . $file_list2 . $file_list3; return array($file_list_formatted, $n, $total_size, $file_list); //$total_size in MB; we return $n in case we want to validate based on valid files }