$sequence = preg_replace("/\\W|\\d/", "", $sequence); // maximum length of amplicons $maxlength = $_POST["length"]; // SET PATTERNS FROM PRIMERS // Change N to point in primers $pattern1 = str_replace("N", ".", $primer1); $pattern2 = str_replace("N", ".", $primer2); // If one missmatch is allowed, create new pattern // example: pattern="ACGT"; to allow one missmatch pattern=".CGT|A.GT|AC.T|ACG." if ($_POST["allowmissmatch"] == 1) { $pattern1 = includeN($primer1); $pattern2 = includeN($primer2); } // SET PATTERN $start_pattern = "{$pattern1}|{$pattern2}"; $end_pattern = RevComp($start_pattern); // CALL Amplify FUNCTION $results_array = Amplify($start_pattern, $end_pattern, $sequence, $maxlength); // PRINT RESULTS print "<pre>Primer 1: {$primer1}\\n"; print "Primer 2: {$primer2}\\n\\n"; if (sizeof($results_array) > 0) { print "List of amplicons: position in sequence, length and sequence\\n\\n"; foreach ($results_array as $key => $val) { print "{$key}\t{$val}\t" . substr($sequence, $key, $val) . "\\n"; } } else { print "No amplification\\n\\n"; } // ############################################################## // FUNCTIONS
function compute_zscores_for_tetranucleotides($theseq) { // as described by Teeling et al. BMC Bioinformatics 2004, 5:163. $theseq .= " " . RevComp($theseq); $i = 0; $len = strlen($theseq) - 2 + 1; while ($i < $len) { $seq = substr($theseq, $i, 2); $oligos2[$seq]++; $i++; } $i = 0; $len = strlen($theseq) - 3 + 1; while ($i < $len) { $seq = substr($theseq, $i, 3); $oligos3[$seq]++; $i++; } $i = 0; $len = strlen($theseq) - 4 + 1; while ($i < $len) { $seq = substr($theseq, $i, 4); $oligos4[$seq]++; $i++; } $base_a = array("A", "C", "G", "T"); $base_b = array("A", "C", "G", "T"); $base_c = array("A", "C", "G", "T"); $base_d = array("A", "C", "G", "T"); $base_e = array("A", "C", "G", "T"); $base_f = array("A", "C", "G", "T"); // COMPUTE Z-SCORES FOR TETRANUCLEOTIDES $i = 0; foreach ($base_a as $key_a => $val_a) { foreach ($base_b as $key_b => $val_b) { foreach ($base_c as $key_c => $val_c) { foreach ($base_d as $key_d => $val_d) { $exp[$val_a . $val_b . $val_c . $val_d] = $oligos3[$val_a . $val_b . $val_c] * $oligos3[$val_b . $val_c . $val_d] / $oligos2[$val_b . $val_c]; $var[$val_a . $val_b . $val_c . $val_d] = $exp[$val_a . $val_b . $val_c . $val_d] * (($oligos2[$val_b . $val_c] - $oligos3[$val_a . $val_b . $val_c]) * ($oligos2[$val_b . $val_c] - $oligos3[$val_b . $val_c . $val_d]) / pow($oligos2[$val_b . $val_c], 2)); $zscore[$i] = ($oligos4[$val_a . $val_b . $val_c . $val_d] - $exp[$val_a . $val_b . $val_c . $val_d]) / sqrt($var[$val_a . $val_b . $val_c . $val_d]); $i++; } } } } return $zscore; }
function FCGR_compute($input_min, $input_max) { print "<p align=right><a href=" . $_SERVER["PHP_SELF"] . ">Home</a></p>\n"; print "Computing...(time depends on sequence length and power of the server)<center><hr>"; flush(); // GET DATA if ($_POST["seq_name"]) { $seq_name = $_POST["seq_name"]; } else { $seq_name = "No name"; } $seq = strtoupper($_POST["seq"]); $seq = preg_replace("/\\W|\\d/", "", $seq); $seq_len = strlen($seq); // limits for length of sequence if ($seq_len > $input_max) { die("<p>Sequence is longer than {$input_max} bp.<p>At this moment we can not provide this service to such a long sequences."); } if ($seq_len < $input_min) { die("<p>Minumum sequence length: {$input_min} bp"); } $oligo_len = $_POST["len"]; // If double strand is requested to be computed... if ($_POST["s"] == 2) { $seq .= " " . RevComp($seq); } // compute nucleotide frequencies $A = substr_count($seq, "A"); $C = substr_count($seq, "C"); $G = substr_count($seq, "G"); $T = substr_count($seq, "T"); // COMPUTE OLIGONUCLEOTIDE FREQUENCIES // frequencies are saved to an array named $oligos $oligos = find_oligos($seq, $oligo_len); // CREATE CHAOS GAME REPRESENTATION OF FREQUENCIES IMAGE // check the function for more info on parameters // $data contains a string with the data to be used to create the image map $for_map = create_FCGR_image($oligos, $seq_name, $A, $C, $G, $T, $seq_len, $_POST["s"], $oligo_len); // PRINT THE IMAGE, WHICH WILL BE A IMAGE MAP WHEN REQUESTED // to avoid submission of a huge amount of data throught the net if ($_POST["map"] == 1) { // image map is requested print "<br><MAP NAME=Kaixo>\n{$for_map}\n</MAP>\n<img USEMAP=\\#Kaixo src=FCGR.png?" . date("U") . " width=552 hight=700 border=0>\n"; } else { print "<br><img src=FCGR.png?" . date("U") . " width=552 hight=700 border=0>"; } // PRINT TEXTAREA WITH OLIGONUCLEOTIDE FREQUENCIES WHEN REQUESTED if ($_POST["freq"] == 1) { // oligonucleotide frequencies are requested print "<p><p>Raw data used to generate images above: <BR><textarea cols=80 rows=10>Sequence\tOccurences\n"; foreach ($oligos as $key => $val) { print "\n{$key}\t{$val}"; } print "</textarea>"; } }
$sequence = preg_replace("/\\W|\\d/", "", $sequence); // removed // when length of query sequence is 0 => error if (strlen($sequence) == 0) { die("Error: query sequence not provided. Plase go back andtry again."); } if (strlen($sequence) > 1000000) { die("Error: sequence is too long. Download the script from biophp.org and used it localy."); } // when length of query sequence is bellow 4^oligo_len => error (to avoid a lot of 0 frequencies); if (strlen($sequence) < pow(4, $oligo_len)) { die("Error: query sequence must be at least 4^(length of oligo) to proceed."); } // when frequencies at both strands are requested, place sequence and reverse complement of sequence in one line if ($strands == 2) { $sequence .= " " . RevComp($sequence); } // compute request and save data in an array $result = find_oligos($sequence, $oligo_len); // print the form print_form($sequence); //print out results print "<p>Frequencie of oligos with length {$oligo_len}<br><textarea cols=60 rows=50>"; foreach ($result as $oligo => $frequency) { print "{$oligo}\t{$frequency}\n"; } print "\n</textarea>\n"; } // ###################################################################################################### // ##################################### FUNCTIONS ################################### // ######################################################################################################