function input_warnings($path)
{
    # This function compares entry counts for Input files that should have the same number of records, for all valid file_codes in a given path, and creates warning flags where numbers don't match
    # To do this 1) we generate a list of 'valid' filestamps from the input directory and their totals; 2) generate file code list for the same and compare value pairs, generate warnings as list items
    ###### 1. Generate Filestamp array
    $file_list = `ls -l --time-style=long-iso {$path}`;
    //
    $list = explode("\n", $file_list);
    # file list array
    $filenames = "";
    # debug only
    $file_codes = "";
    # string representing each file type.
    ###### Parse each item on file list ######
    if (count($list) < 100) {
        $filestamps = array();
        $file_code_list = "";
        foreach ($list as $item) {
            $pattern = "/\\s+/";
            $replacement = " ";
            $item = preg_replace($pattern, $replacement, $item);
            # collapase spaces
            ###### Select file listings (not directories), parse filename, and run filename validation function   ######
            if (substr($item, 0, 1) == "-") {
                $vals = explode(" ", $item);
                $filename = $vals[count($vals) - 1];
                //e.g. the filename, MyFile_est.fa
                ##### 2. For valid files, parse metadata, and recreate unique filestamp #####
                if (validate_file_type($filename, "") != "") {
                    $time = $vals[count($vals) - 2];
                    $year_month_day = $vals[count($vals) - 3];
                    $size = $vals[count($vals) - 4];
                    $filestamp = "{$filename}:{$size}:{$year_month_day}-{$time}";
                    # IMPORTANT: This format MUST be synchronized with FileStamp in /xGDBvm/scripts/xGDB_ValidateFiles.sh
                    array_push($filestamps, $filestamp);
                    ### we want a simple array of filestamps.
                    $file_codes .= get_file_codes($filename);
                    ### generate a string that encodes all valid file types in the Input directory
                }
                # done with valid files
            }
            # done with files
        }
        # done with dir list
    }
    # done with count limit
    ##### 3. Query the Datafiles table for entry sums that should match each other, using only rows that match our $filestamp 'fingerprint' array. Compare values and generate messages.
    /*
     */
    $warning = "";
    $gene1_annot = "";
    $gene1_mrna = "";
    $gene1_pep = "";
    $gene1_desc = "";
    $gene2_annot = "";
    $gene2_mrna = "";
    $gene2_pep = "";
    $gene2_desc = "";
    $gene1 = "annot";
    # current jargon; leave open possibility to generalize and extend these names
    $gene2 = "cpgat";
    # current jargon; leave open possibility to generalize and extend these names
    $n = 0;
    # counter
    mysql_select_db("Genomes");
    if (preg_match("/a/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format  FROM Datafiles WHERE SeqType=\"annot\" AND Track=\"{$gene1}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) {
        $mysql_get_totals = mysql_query($get_totals);
        while ($row = mysql_fetch_array($mysql_get_totals)) {
            $gene1_annot = $row[0];
            # Transcripts (not Genes) was assigned to EntryCount in xGDB_validatefile.php
            $gene1_annot_suffix = $row[1] . "." . $row[2] . "." . $row[3];
            $gene1_annot_description = validate_file_type($gene1_annot_suffix, "bold");
        }
        if (preg_match("/m/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format  FROM Datafiles WHERE SeqType=\"mrna\" AND Track=\"{$gene1}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) {
            $mysql_get_totals = mysql_query($get_totals);
            while ($row = mysql_fetch_array($mysql_get_totals)) {
                $gene1_mrna = $row[0];
                $gene1_mrna_suffix = $row[1] . "." . $row[2] . "." . $row[3];
                $gene1_mrna_description = validate_file_type($gene1_mrna_suffix, "bold");
            }
            $warning_gene1_mrna = $gene1_annot == $gene1_mrna ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene1_mrna_description} (~{$gene1_mrna_suffix}; {$gene1_mrna} entries) does not match {$gene1_annot_description} (~{$gene1_annot_suffix}; {$gene1_annot} entries) (N-1)</li>";
            $warning .= $warning_gene1_mrna;
            $n = $n + 1;
        }
        if (preg_match("/i/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"pep\" AND Track=\"{$gene1}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) {
            $mysql_get_totals = mysql_query($get_totals);
            while ($row = mysql_fetch_array($mysql_get_totals)) {
                $gene1_pep = $row[0];
                $gene1_pep_suffix = $row[1] . "." . $row[2] . "." . $row[3];
                $gene1_pep_description = validate_file_type($gene1_pep_suffix, "bold");
            }
            $warning_gene1_pep = $gene1_annot == $gene1_pep ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene1_pep_description} (~{$gene1_pep_suffix}; {$gene1_pep} entries) does not match {$gene1_annot_description} (~{$gene1_annot_suffix}; {$gene1_annot} entries) (N-2)</li>";
            $warning .= $warning_gene1_pep;
        }
        if (preg_match("/s/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"desc\" AND Track=\"{$gene1}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) {
            $mysql_get_totals = mysql_query($get_totals);
            while ($row = mysql_fetch_array($mysql_get_totals)) {
                $gene1_desc = $row[0];
                $gene1_desc_suffix = $row[1] . "." . $row[2] . "." . $row[3];
                $gene1_desc_description = validate_file_type($gene1_desc_suffix, "bold");
            }
            $warning_gene1_desc = $gene1_annot == $gene1_desc ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene1_desc_description} (~{$gene1_desc_suffix}; {$gene1_desc}) does not match {$gene1_desc_description} (~{$gene1_annot_suffix}; {$gene1_annot}) (N-3)</li>";
            $warning .= $warning_gene1_desc;
            $n = $n + 1;
        }
    }
    ### (not done yet: repeat for other matching pairs)
    mysql_select_db("Genomes");
    if (preg_match("/A/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"annot\" AND Track=\"{$gene2}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) {
        $mysql_get_totals = mysql_query($get_totals);
        while ($row = mysql_fetch_array($mysql_get_totals)) {
            $gene2_annot = $row[0];
            # Transcripts (not Genes) was assigned to EntryCount in xGDB_validatefile.php
            $gene2_annot_suffix = $row[1] . "." . $row[2] . "." . $row[3];
            $gene2_annot_description = validate_file_type($gene2_annot_suffix, "bold");
        }
        if (preg_match("/M/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"mrna\" AND Track=\"{$gene2}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) {
            $mysql_get_totals = mysql_query($get_totals);
            while ($row = mysql_fetch_array($mysql_get_totals)) {
                $gene2_mrna = $row[0];
                $gene2_mrna_suffix = $row[1] . "." . $row[2] . "." . $row[3];
                $gene2_mrna_description = validate_file_type($gene2_mrna_suffix, "bold");
            }
            $warning_gene2_mrna = $gene2_annot == $gene2_mrna ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene2_mrna_description} (~{$gene2_mrna_suffix}; {$gene2_mrna}) does not match {$gene2_annot_description} (~{$gene2_annot_suffix}; {$gene2_annot}) (N-4)</li>";
            $warning .= $warning_gene2_mrna;
            $n = $n + 1;
        }
        if (preg_match("/I/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format  FROM Datafiles WHERE SeqType=\"pep\" AND Track=\"{$gene2}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) {
            $mysql_get_totals = mysql_query($get_totals);
            while ($row = mysql_fetch_array($mysql_get_totals)) {
                $gene2_pep = $row[0];
                $gene2_pep_suffix = $row[1] . "." . $row[2] . "." . $row[3];
                $gene2_pep_description = validate_file_type($gene2_pep_suffix, "bold");
            }
            $warning_gene2_pep = $gene2_annot == $gene2_pep ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene2_pep_description} (~{$gene2_pep_suffix}; {$gene2_pep}) does not match {$gene2_annot_description} (~{$gene2_annot_suffix}; {$gene2_annot}) (N-5)</li>";
            $warning .= $warning_gene2_pep;
        }
        if (preg_match("/S/", $file_codes) && ($get_totals = "SELECT SUM(EntryCount), Track, SeqType, Format FROM Datafiles WHERE SeqType=\"desc\" AND Track=\"{$gene2}\" AND Path=\"{$path}\" AND FileStamp IN (\"" . implode('","', $filestamps) . "\") GROUP BY SeqType")) {
            $mysql_get_totals = mysql_query($get_totals);
            while ($row = mysql_fetch_array($mysql_get_totals)) {
                $gene2_desc = $row[0];
                $gene2_desc_suffix = $row[1] . "." . $row[2] . "." . $row[3];
                $gene2_desc_description = validate_file_type($gene2_desc_suffix, "bold");
            }
            $warning_gene2_desc = $gene2_annot == $gene2_desc ? "" : "<li><span class=\"caution\">NOTE:</span> {$gene2_desc_description} (~{$gene2_desc_suffix}; {$gene2_desc}) does not match {$gene2_annot_description} (~{$gene2_annot_suffix}; {$gene2_annot}) (N-6)</li>";
            $warning .= $warning_gene2_desc;
            $n = $n + 1;
        }
    }
    return $warning;
}
Example #2
0
function create_input_list($input_dir, $class, $dbpass)
{
    //creates a formatted file or file list based on data $class (gdna, transcript (generic; includes est, cdna, tsa) or protein in argument.
    // $input_dir is user-specified input data path
    //Uses sub-function validate_file_type($name) to check validation and insert validation styling.
    //Returns an array with file list formatted, number of files, file size, and unformatted list.
    $db = mysql_connect("localhost", "gdbuser", $dbpass);
    if (!$db) {
        echo "Error: Could not connect to database!";
        exit;
    }
    mysql_select_db("Genomes");
    $file_list2 = "";
    $total_size = 0;
    $fileID = "";
    #	$file_list =`ls -l $input_dir`; //
    $file_list = `ls -l --time-style=long-iso {$input_dir}`;
    //
    //system ("chop $list");
    $list = explode("\n", $file_list);
    //file list array
    $n = 0;
    //valid file count
    # now make a version of the file path that can be assigned as an html id tag; escape all forward slashes since we are going to assign them as an html id tag
    $escaped_path = str_replace("/", "\\/", $input_dir);
    if (count($list) < 100) {
        foreach ($list as $item) {
            $pattern = "/\\s+/";
            $replacement = " ";
            $item = preg_replace($pattern, $replacement, $item);
            if (substr($item, 0, 1) == "-") {
                $vals = explode(" ", $item);
                $filename = $vals[count($vals) - 1];
                if (validate_file_type($filename, $class) != "") {
                    $time = $vals[count($vals) - 2];
                    $year_month_day = $vals[count($vals) - 3];
                    $size = $vals[count($vals) - 4];
                    $filestamp = "{$filename}:{$size}:{$year_month_day}-{$time}";
                    # IMPORTANT: This format MUST be synchronized with FileStamp in /xGDBvm/scripts/xGDB_ValidateFiles.sh
                    $valid = "";
                    $entries = "";
                    $file_info_icon = "information.png";
                    // This icon communicates validation status (by color) and is a click target for opening validation dialog box
                    if ($get_entry = "SELECT Valid, EntryCount FROM Datafiles where FileStamp='{$filestamp}'") {
                        $mysql_get_entry = mysql_query($get_entry);
                        while ($result_get_entry = mysql_fetch_array($mysql_get_entry)) {
                            $valid = $result_get_entry[0];
                            # T F or NULL
                            $entries = $result_get_entry[1];
                            # number of entries
                        }
                    }
                    $valid_style = "filenoteval";
                    # default; blue
                    if ($valid == "T") {
                        $file_info_icon = "information_green.png";
                        $valid_style = "filevalid";
                        $v = $v + 1;
                    } elseif ($valid == "F") {
                        $file_info_icon = "information_red.png";
                        $valid_style = "filenotvalid";
                        $iv = $iv + 1;
                    }
                    ##### Build more markup including escaped filepath and validation icons.  #####
                    $filepath = $escaped_path . "\\/" . $filename;
                    // We use this as a unique ID tag (with escaped slashes) for opening a Jquery dialog.
                    $info_icon_styled = "\n\t\t\t\t\t\t      <span id=\"{$filepath}\" class=\"validatefile-button\" title=\"{$filestamp}\">\n\t\t\t\t\t\t         <img class=\"nudge3\" src=\"/XGDB/images/{$file_info_icon}\" />\n\t\t\t\t\t\t      </span>\n\t\t\t\t\t\t   ";
                    ##### For GeneSeqer jobs we need to know the Fasta header (defline) type; get this and create a GSQ parameter to pass in the form submission #####
                    $file_path_name = "{$input_dir}/{$filename}";
                    $fasta_header_type = fasta_header_type($file_path_name, $valid_style);
                    $fasta_type = $fasta_header_type[0];
                    $GSQparam = $fasta_header_type[1];
                    $n = $n + 1;
                    # To list all files in directory, not just valid ones, move right angle bracket from below to this line.
                    $filename_type = validate_file_type($filename, $class);
                    $filename_type_styled = "<span class=\"{$valid_style}\">{$filename_type}:</span>";
                    $filename_styled = "<span class=\"{$valid_style} italic\">{$filename}</span>";
                    $filename_display = $filename_type_styled . "  " . $filename_styled . $info_icon_styled . " (" . $fasta_type . ")" . $list_header_line_styled;
                    # see below; next function.
                    ##### Calculate size in a reasonable numeric range #####
                    $total_size = $total_size + $size;
                    $unit = 0;
                    //0: bytes, 1:KB, 2: MB, 3: GB
                    while ($size > 1024 && $unit < 4) {
                        $size = round($size / 1024, 1);
                        $unit++;
                    }
                    ##### Continue building the file 'list item' core: filename, validation icon, date and time / size /  #####
                    $file_list2 .= "\n\t\t\t\t     \t<li class='smallerfont'>\n\t\t\t\t     \t   {$filename_display} / {$size}";
                    ##### If file has been validated, # of entries is available from the MySQL query. Add this here at end of line if available. #####
                    $entries_styled = empty($entries) ? "" : "<span style=\"color:#00A592\"> \n\t\t\t\t\t\t    {$entries} entries\n\t\t\t\t\t\t</span>";
                    ##### Determine unit for display and compute absolute size in order to get cumulative total #####
                    if ($unit == 0) {
                        $file_list2 .= " bytes";
                        $units = "bytes";
                        $abs_size = $size;
                    }
                    if ($unit == 1) {
                        $file_list2 .= " KB";
                        $units = "KB";
                        $abs_size = $size / 0.001;
                    }
                    if ($unit == 2) {
                        $file_list2 .= " MB";
                        $units = "MB";
                        $abs_size = $size / 1.0E-7;
                    }
                    if ($unit == 3) {
                        $file_list2 .= " GB";
                        $units = "GB";
                        $abs_size = $size / 1.0E-10;
                    }
                    ##### Finish display with the size units and list end #####
                    $file_list2 .= " / " . $entries_styled . "</li>";
                }
            }
        }
    }
    $total_size_display = convert_bytes($total_size, 1);
    if ($n < 1) {
        $class = "smallerfont warning";
    } else {
        $class = "smallerfont checked";
    }
    $file_list1 = "\n\t\t<span class=\"plaintext largerfont bold\">\n\t\t\t{$input_dir} \n\t\t</span>\n\t\t<span class=\"normalfont\">\n\t\t\t{$valid_count_display} {$invalid_count_display} {$noteval_count_display}\n\t\t</span>";
    $file_list1 .= "\n\t<ul class='bullet1 indent2'>\t";
    $file_list3 = "</ul>";
    ##### Assemble the pieces of the header and list  #####
    $file_list_formatted = $file_list1 . $file_list2 . $file_list3;
    return array($file_list_formatted, $n, $total_size, $file_list);
    //$total_size in MB; we return $n in case we want to validate based on valid files
}