/** * $outfile will be overwritten with a Scheme script for loading in Coot. * $clash is the data structure from loadClashlist() * $rama is the data structure from loadRamachandran() * $rota is the data structure from loadRotamer() * $cbdev is the data structure from loadCbetaDev() * $pperp is the data structure from loadBasePhosPerp() * Any of them can be set to null if the data is unavailable. */ function makeCootClusteredChart($infile, $outfile, $outfile_py, $clash, $rama, $rota, $cbdev, $pperp) { //$startTime1 = time(); //{{{ 0. A lovely Scheme script written for us by Paul Emsley $schemeScript = <<<HEREDOC ; -*-scheme-*- ;;(molprobity-fascinating-clusters-things-gui ;; dialog-name ;; sorting-options ;; list-of-clusters) ;; ;; where a cluster is: ;; (list ;; cluster-name-string ;; cluster-center-go-button-label-string ;; ccgb-x ccgb-y ccgb-z ;; ; a list of specific items: ;; (list ;; (list specific-button-label-string button-red button-green button-blue ;; specific-x specific-y specific-z)))) ;; ;; ;;(molprobity-fascinating-clusters-things-gui ;; gui-name-string ;; (list ;; (list "Active Site" (list 0 1 2 3 4)) ;; (list "Worst First" (list 3 4 2 1 0))) ;; ; now a list of clusters: ;; (list ;; (list cluster-name-string ;; cluster-center-go-button-label-string ;; ccgb-x ccgb-y ccgb-z ;; ; now a list of specific items ;; (list ;; (list specific-button-label-string button-red button-green button-blue ;; specific-x specific-y specific-z) ;; (list specific-button-label-string button-red button-green button-blue ;; specific-x specific-y specific-z))) ;; (list cluster-name-string ;; cluster-center-go-button-label-string ;; ccgb-x ccgb-y ccgb-z ;; ; now a list of specific items ;; (list ;; (list specific-button-label-string button-red button-green button-blue ;; specific-x specific-y specific-z) ;; (list specific-button-label-string button-red button-green button-blue ;; specific-x specific-y specific-z))))) ;; (define (molprobity-fascinating-clusters-things-gui window-name sorting-options cluster-list) (define ncluster-max 75) ;; utility function (define (add-feature-buttons feature-list cluster-vbox) (let ((frame (gtk-frame-new "Cluster Features")) \t (vbox (gtk-vbox-new #f 0))) (gtk-box-pack-start cluster-vbox frame #f #f 2) (gtk-container-add frame vbox) ;; add buttons to vbox for each feature ;; (map (lambda (feature) \t ; (format #t "feature: ~s~%" feature) \t (let ((button (gtk-button-new-with-label (car feature)))) \t (gtk-signal-connect button "clicked" \t\t\t\t (lambda () \t\t\t\t (set-rotation-centre \t\t\t\t (list-ref feature 4) \t\t\t\t (list-ref feature 5) \t\t\t\t (list-ref feature 6)))) \t (gtk-box-pack-start vbox button #f #f 1))) \t feature-list))) ;; main body (let* ((window (gtk-window-new 'toplevel)) \t (scrolled-win (gtk-scrolled-window-new)) \t (outside-vbox (gtk-vbox-new #f 2)) \t (inside-vbox (gtk-vbox-new #f 0))) (format #t "Maxiumum number of clusters displayed: ~s~%" ncluster-max) (gtk-window-set-default-size window 300 200) (gtk-window-set-title window window-name) (gtk-container-border-width inside-vbox 2) (gtk-container-add window outside-vbox) (gtk-box-pack-start outside-vbox scrolled-win #t #t 0) ; expand fill padding (gtk-scrolled-window-add-with-viewport scrolled-win inside-vbox) (gtk-scrolled-window-set-policy scrolled-win 'automatic 'always) (let loop ((cluster-list cluster-list) \t (count 0)) (cond ((null? cluster-list) 'done) ((= ncluster-max count) 'done) (else \t(let ((cluster-info (car cluster-list))) \t (let* ((frame (gtk-frame-new #f)) \t\t (vbox (gtk-vbox-new #f 2))) \t (gtk-container-border-width frame 6) \t (gtk-container-add frame vbox) \t (gtk-box-pack-start inside-vbox frame #f #f 10) \t (let ((go-to-cluster-button (gtk-button-new-with-label \t\t\t\t\t (car cluster-info)))) \t (gtk-signal-connect go-to-cluster-button "clicked" \t\t\t\t (lambda () \t\t\t\t (set-rotation-centre \t\t\t\t (list-ref cluster-info 1) \t\t\t\t (list-ref cluster-info 2) \t\t\t\t (list-ref cluster-info 3)))) \t (gtk-box-pack-start vbox go-to-cluster-button #f #f 2) \t ;; now we have a list of individual features: \t (let ((features (list-ref cluster-info 4))) \t\t (if (> (length features) 0) \t\t (add-feature-buttons features vbox))) \t (loop (cdr cluster-list) (+ count 1)))))))) (gtk-container-border-width outside-vbox 2) (let ((ok-button (gtk-button-new-with-label " Close "))) (gtk-box-pack-end outside-vbox ok-button #f #f 0) (gtk-signal-connect ok-button "clicked" \t\t\t (lambda args \t\t\t (gtk-widget-destroy window)))) (gtk-widget-show-all window))) ;; ;;(molprobity-fascinating-clusters-things-gui ;; "Testing the GUI" ;; (list ;; (list "Active Site" (list 0 1 2 3 4)) ;; (list "Worst First" (list 3 4 1 2 0))) ;; (list ;; (list "The first cluster" ;;\t11 12 15 ;;\t(list ;;\t (list "A bad thing" 0.4 0.6 0.7 10 13 16) ;;\t (list "Another bad thing" 0.4 0.6 0.7 12 15 16))) ;; (list "Another cluster of baddies" ;;\t-11 12 15 ;;\t(list ;;\t (list "A quite bad thing" 0.4 0.6 0.7 -10 -13 16) ;;\t (list "A not so bad thing" 0.4 0.6 0.7 -12 -15 16))) ;; (list "A third cluster of baddies" ;;\t11 12 -15 ;;\t(list ;;\t (list "A quite bad rotamer" 0.4 0.6 0.7 10 13 -16) ;;\t (list "A hydrogen clash" 0.4 0.6 0.7 12 15 -16) ;;\t (list "A not so bad H-H clash" 0.4 0.6 0.7 12 15 -16))))) HEREDOC; $schemeScript_py = <<<HEREDOC def molprobity_fascinating_clusters_things_gui(window_name, sorting_option, cluster_list): ncluster_max = 75 # a callback function def callback_recentre(widget, x, y, z): set_rotation_centre(x, y, z) # utility function def add_feature_buttons(feature_list, cluster_vbox): frame = gtk.Frame("Cluster Features") vbox = gtk.VBox(False, 0) cluster_vbox.pack_start(frame, False, False, 2) frame.add(vbox) # add buttons to vbox for each feature # for feature in feature_list: # print "feature: ", feature button = gtk.Button(feature[0]) button.connect("clicked", callback_recentre, feature[4], feature[5], feature[6]) vbox.pack_start(button, False, False, 1) # main body window = gtk.Window(gtk.WINDOW_TOPLEVEL) scrolled_win = gtk.ScrolledWindow() outside_vbox = gtk.VBox(False, 2) inside_vbox = gtk.VBox(False, 0) print "Maximum number of clusters displayed: ", ncluster_max window.set_default_size(300, 200) window.set_title(window_name) inside_vbox.set_border_width(2) window.add(outside_vbox) outside_vbox.pack_start(scrolled_win, True, True, 0) # expand fill padding scrolled_win.add_with_viewport(inside_vbox) scrolled_win.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_ALWAYS) count = 0 for cluster_info in cluster_list: if (count == ncluster_max): break else: frame = gtk.Frame() vbox = gtk.VBox(False, 2) frame.set_border_width(6) frame.add(vbox) inside_vbox.pack_start(frame, False, False, 10) go_to_cluster_button = gtk.Button(cluster_info[0]) go_to_cluster_button.connect("clicked", callback_recentre, cluster_info[1], cluster_info[2], cluster_info[3]) vbox.pack_start(go_to_cluster_button, False, False, 2) # now we have a list of individual features: features = cluster_info[4] if (len(features) > 0): add_feature_buttons(features, vbox) outside_vbox.set_border_width(2) ok_button = gtk.Button(" Close ") outside_vbox.pack_end(ok_button, False, False, 0) ok_button.connect("clicked", lambda x: window.destroy()) window.show_all() HEREDOC; //}}} 0. A lovely Scheme script written for us by Paul Emsley //{{{ 1. For each outlier, create an array(cnit, description, r, g, b, x, y, z) //$res_xyz = computeResCenters($infile, true); $res_xyz = computeResCenters($infile); $self_bads = array(); if (is_array($clash)) { foreach ($clash['clashes'] as $cnit => $worst) { $ctr = $res_xyz[$cnit]; $self_bads[] = array($cnit, "Clash at {$cnit} ({$worst} A)", 1, 0, 0.5, $ctr['x'], $ctr['y'], $ctr['z']); } } if (is_array($rama)) { foreach ($rama as $item) { if ($item['eval'] == "OUTLIER") { $cnit = $item['resName']; $ctr = $res_xyz[$cnit]; $self_bads[] = array($cnit, "Ramachandran outlier {$cnit} ({$item['type']} {$item['scorePct']}%)", 0, 1, 0, $ctr['x'], $ctr['y'], $ctr['z']); } } } if (is_array($rota)) { foreach ($rota as $item) { if ($item['scorePct'] <= 1.0) { $cnit = $item['resName']; $ctr = $res_xyz[$cnit]; $self_bads[] = array($cnit, "Bad rotamer {$cnit} ({$item['scorePct']}%)", 1, 0.7, 0, $ctr['x'], $ctr['y'], $ctr['z']); } } } if (is_array($cbdev)) { foreach ($cbdev as $item) { if ($item['dev'] >= 0.25) { $cnit = $item['resName']; $ctr = $res_xyz[$cnit]; $self_bads[] = array($cnit, "C-beta deviation {$cnit} ({$item['dev']} A)", 0.5, 0, 1, $ctr['x'], $ctr['y'], $ctr['z']); } } } if (is_array($pperp)) { foreach ($pperp as $item) { if ($item['outlier']) { $cnit = $item['resName']; $ctr = $res_xyz[$cnit]; $reasons = array(); if ($item['deltaOut']) { $reasons[] = "base-phosphate distance"; } if ($item['epsilonOut']) { $reasons[] = "bad epsilon angle"; } $self_bads[] = array($cnit, "Wrong sugar pucker {$cnit} (" . implode(', ', $reasons) . ")", 0.5, 0, 1, $ctr['x'], $ctr['y'], $ctr['z']); } } } //}}} 1. For each outlier, create an array(cnit, description, r, g, b, x, y, z) //{{{ 2. Cluster the outliers, somehow //echo "self_bads has ".count($self_bads)." elements\n"; $range = 12; // a fairly arbitrary value, in Angstroms. $range2 = $range * $range; $worst_res = array(); // cnit => array( bad1, bad2, ... ) $local_bads = array(); $startTime = time(); echo "starting cootclusteredchart\n"; foreach ($res_xyz as $cnit => $xyz) { #$local_bads[$cnit] = array(); foreach ($self_bads as $idx => $a_bad) { if (preg_match('/' . $a_bad[0] . '/', $cnit)) { $res_bads[$cnit]++; } } foreach ($self_bads as $idx => $a_bad) { $cnit2 = $a_bad[0]; $dx = $xyz['x'] - $a_bad[5]; $dy = $xyz['y'] - $a_bad[6]; $dz = $xyz['z'] - $a_bad[7]; if ($dx * $dx + $dy * $dy + $dz * $dz <= $range2 && $res_bads[$cnit] != 0) { if (preg_match('/(HOH|DOD|H20|D20|WAT|SOL|TIP|TP3|MTO|HOD|DOH)/', $cnit)) { if (preg_match('/(HOH|DOD|H20|D20|WAT|SOL|TIP|TP3|MTO|HOD|DOH)/', $a_bad[0])) { $local_bads[$cnit][$idx] = $a_bad; } } else { $local_bads[$cnit][$idx] = $a_bad; } } } } echo "first foreach loop took " . (time() - $startTime) . " seconds\nstarting whiletrue loop\ncycles:"; while (true) { // Get worst residue from list and its count of bads //$startTime = time(); uasort($local_bads, 'makeCootClusteredChart_cmp'); // put worst residue last //echo "Iteration number ".$cycles."\n"; //foreach($self_bads as $key => $value) //{ // echo $self_bads[$key][0]."\n"; //} #foreach($local_bads as $key => $value) #{ # echo $key." ".count($local_bads[$key])."\n"; #} //echo "size of self_bads = ".count($self_bads)."\n"; #var_export($local_bads); echo "\n==========\n"; end($local_bads); // go to last element list($worst_cnit, $worst_bads) = each($local_bads); // get last element $bad_count = count($worst_bads); // Only singletons left (for efficiency) // Also ensures that singletons are listed under their "owner" //if($bad_count <= 1) //{ // foreach($self_bads as $idx => $a_bad) // $worst_res[$a_bad[0]][$idx] = $a_bad; // break; //} // else ... #var_export($local_bads); #echo "\nRemoving $worst_cnit with $bad_count bads...\n==========\n"; $worst_res[$worst_cnit] = $worst_bads; // record it as the worst one this pass // Discard all bads that went to making the worst, the worst; // then re-run the algorithm to find the next worst, until no bads left. foreach ($res_xyz as $cnit2 => $xyz) { foreach ($worst_bads as $idx => $a_bad) { unset($local_bads[$cnit2][$idx]); //assure that once used, a residue can't be a new center foreach ($self_bads as $idx2 => $a_bad2) { unset($local_bads[$a_bad[0]][$idx2]); } unset($self_bads[$idx]); } } if (count($self_bads) == 0) { break; } $cycles++; echo $cycles . " "; //echo "end of while loop took ".(time() - $startTime)." seconds\n"; #if($cycles > 100) break; } //echo "number of cycles: ".$cycles."\n"; #var_export($worst_res); echo "\n==========\n"; //}}} $out = fopen($outfile, 'wb'); $out_py = fopen($outfile_py, 'wb'); //scheme file fwrite($out, ";\n; Multicriterion chart for " . basename($infile) . ", generated by MolProbity.\n"); fwrite($out, "; Open this in Coot using Calculate | Run Script...\n;\n"); fwrite($out, "\n\n" . $schemeScript . "\n\n"); fwrite($out, "(molprobity-fascinating-clusters-things-gui\n \"MolProbity Multi-Chart\"\n (list\n"); // this is where we write possible sort orders //fwrite($out, " (list \"Worst First\" (list 0 1 2 3 4 5))\n"); fwrite($out, " )\n (list\n"); //python file fwrite($out_py, "#\n# Multicriterion chart for " . basename($infile) . ", generated by MolProbity.\n"); fwrite($out_py, "# Open this in Coot using Calculate | Run Script...\n#\n"); fwrite($out_py, "\n\n" . $schemeScript_py . "\n\n"); fwrite($out_py, "molprobity_fascinating_clusters_things_gui(\n \"MolProbity Multi-Chart\",\n [],\n ["); // This is where we write clusters of outliers $outlier_ctr = 0; $loop_ctr = 0; foreach ($worst_res as $cnit => $bads) { $max = 0; foreach ($bads as $b) { //identify which residue has the most outliers in this group, make header name if ($res_bads[$b[0]] > $max && !preg_match('/(HOH|DOD|H20|D20|WAT|SOL|TIP|TP3|MTO|HOD|DOH)/', $b[0])) { $max = $res_bads[$b[0]]; $max_header = $b[0]; } } $xyz = $res_xyz[$cnit]; if (count($bads) > 1) { fwrite($out, " (list\n \"problems near {$max_header}\"\n {$xyz['x']} {$xyz['y']} {$xyz['z']}\n (list\n"); fwrite($out_py, "[\"problems near {$max_header}\",\n {$xyz['x']}, {$xyz['y']}, {$xyz['z']},\n [\n"); foreach ($bads as $b) { fwrite($out, " (list \"{$b['1']}\" {$b['2']} {$b['3']} {$b['4']} {$b['5']} {$b['6']} {$b['7']})\n"); fwrite($out_py, " [\"{$b['1']}\", {$b['2']}, {$b['3']}, {$b['4']}, {$b['5']}, {$b['6']}, {$b['7']}]"); #if($loop_ctr < count($worst_res)-1){ fwrite($out_py, ",\n"); #} #else{ # fwrite($out_py, "\n"); #} $outlier_ctr++; } fwrite($out, " )\n )\n"); fwrite($out_py, " ]\n ],\n "); } else { $b = reset($bads); fwrite($out, " (list\n \"{$b['1']}\"\n {$xyz['x']} {$xyz['y']} {$xyz['z']}\n (list\n"); fwrite($out, " )\n )\n"); fwrite($out_py, "[\"{$b['1']}\", {$xyz['x']}, {$xyz['y']}, {$xyz['z']}, []]"); if ($loop_ctr < count($worst_res) - 1) { fwrite($out_py, ",\n "); } else { fwrite($out_py, "\n "); } $outlier_ctr++; } $loop_ctr++; } fwrite($out, " )\n)\n"); fwrite($out_py, "])"); fclose($out); fclose($out_py); //echo "Making coot clustered chart took ".(time() - $startTime1)." seconds\n"; //echo "printed out ".$outlier_ctr." elements\n"; }
/** * $outfile will be overwritten. * $cnit is an array of CNIT codes for the residues that were processed. */ function makeSswingKin($pdb1, $pdb2, $outfile, $cnit) { if (file_exists($outfile)) { unlink($outfile); } $stats = describePdbStats(pdbstat($pdb1), false); $h = fopen($outfile, 'a'); fwrite($h, "@text\n"); fwrite($h, "Sidechains have been refit by SSWING. Details of the input file:\n\n"); foreach ($stats as $stat) { fwrite($h, "[+] {$stat}\n"); } fwrite($h, "@kinemage 1\n"); // Calculate views for each residue in CNIT $ctr = computeResCenters($pdb1); foreach ($cnit as $res) { $i++; $c = $ctr[$res]; fwrite($h, "@{$i}viewid {{$res}}\n@{$i}span 12\n@{$i}zslab 100\n@{$i}center {$c['x']} {$c['y']} {$c['z']}\n"); } fclose($h); exec("prekin -quiet -append -animate -onegroup -show 'mc,sc(peach),ca,hy,ht,wa' {$pdb1} >> {$outfile}"); exec("phenix.probe -quiet -noticks -nogroup -self 'alta' {$pdb1} >> {$outfile}"); //exec("probe -quiet -noticks -nogroup -self 'alta' $pdb1 >> $outfile"); exec("prekin -quiet -append -animate -onegroup -show 'mc,sc(sky),ca,hy,ht,wa' {$pdb2} >> {$outfile}"); exec("phenix.probe -quiet -noticks -nogroup -self 'alta' {$pdb2} >> {$outfile}"); //exec("probe -quiet -noticks -nogroup -self 'alta' $pdb2 >> $outfile"); }
/** * Count the number of outliers that occur for this residue and other * residues whose centroid is within some number of Anstroms of this one. * * $range is the max distance between two residue centroids * $clash is the data structure from loadClashlist() * $rama is the data structure from loadRamachandran() * $rota is the data structure from loadRotamer() * $cbdev is the data structure from loadCbetaDev() * $pperp is the data structure from loadBasePhosPerp() * Any of them can be set to null if the data is unavailable. */ function calcLocalBadness($infile, $range, $clash, $rama, $rota, $cbdev, $pperp) { $res_xyz = computeResCenters($infile); $self_bads = findAllOutliers($clash, $rama, $rota, $cbdev, $pperp); #var_export($self_bads); echo "\n==========\n"; $range2 = $range * $range; $worst_res = array(); //calculate all distances and build association matrix foreach ($res_xyz as $cnit => $xyz) { foreach ($self_bads as $cnit2 => $bads2) { $xyz2 = $res_xyz[$cnit2]; $dx = $xyz['x'] - $xyz2['x']; $dy = $xyz['y'] - $xyz2['y']; $dz = $xyz['z'] - $xyz2['z']; if ($dx * $dx + $dy * $dy + $dz * $dz <= $range2 && $self_bads[$cnit] != 0) { $local_mat[$cnit][$cnit2] = 1; } } } while (true) { //at each iteration count of how bad each case is $local_bads = array(); foreach ($res_xyz as $cnit => $xyz) { foreach ($self_bads as $cnit2 => $bads2) { if ($local_mat[$cnit][$cnit2] == 1 && !preg_match('/(HOH|DOD|H20|D20|WAT|SOL|TIP|TP3|MTO|HOD|DOH)/', $cnit)) { $local_bads[$cnit] += $bads2; } } } // Get worst residue from list and its count of bads asort($local_bads); // put worst residue last #var_export($local_bads); echo "\n==========\n"; end($local_bads); // go to last element list($worst_cnit, $bad_count) = each($local_bads); // get last element // Only singletons left (for efficiency) // Also ensures that singletons are listed under their "owner" if ($bad_count <= 1) { foreach ($self_bads as $cnit => $bads) { if ($bads > 0) { $worst_res[$cnit] = $bads; } } break; } // else ... #var_export($local_bads); #echo "\nRemoving $worst_cnit with $bad_count bads...\n==========\n"; $worst_res[$worst_cnit] = $bad_count; // record it as the worst one this pass // Discard all bads that went to making the worst, the worst; // then re-run the algorithm to find the next worst, until no bads left. $cnit = $worst_cnit; #$xyz = $res_xyz[$cnit]; $leftover_bads = 0; foreach ($self_bads as $cnit2 => $bads) { if ($local_mat[$cnit][$cnit2] == 1) { unset($self_bads[$cnit2]); // faster than 0 -- won't traverse again unset($local_bads[$cnit2]); } } if (count($self_bads) == 0) { break; } //limit the number of cycles to 25 $cycles++; if ($cycles > 25) { break; } } #var_export($worst_res); echo "\n==========\n"; return $worst_res; }