diff tools/rdock/bin/ht_protocol_finder.pl @ 3:b02d74d22d05 draft default tip

planemo upload
author marpiech
date Mon, 29 Aug 2016 08:23:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/ht_protocol_finder.pl	Mon Aug 29 08:23:52 2016 -0400
@@ -0,0 +1,232 @@
+#!/usr/bin/perl -w
+
+# Script that simulates the result of a high throughput protocol.
+
+if (@ARGV < 4)
+{
+	print "Script that simulates the result of a high throughput protocol.\n";
+	print "1st) exhaustive docking of a small representative part of the\n";
+	print "     whole library.\n";
+	print "2nd) Store the result of sdreport -t over that exhaustive dock.\n";
+	print "     in file <sdreport_file> that will be the input of this\n";
+	print "     script.\n";
+	print "3rd) ht_protocol_finder.pl <sdreport_file> <output_file> <thr1max>";
+	print " <thr1min> <ns1> <ns2>\n";
+	print "     <ns1> and <ns2> are the number of steps in stage 1 and in\n";
+	print "     stage 2. If not present, the default values are 5 and 15\n";
+	print "     <thrmax> and <thrmin> setup the range of thresholds that will\n";
+	print "     be simulated in stage 1. The threshold of stage 2 depends\n";
+	print "     on the value of the threshold of stage 1.\n";
+	print "     An input of -22 -24 will try protocols:\n";
+	print "            5	-22	15	-27\n";
+	print "            5	-22	15	-28\n";
+	print "            5	-22	15	-29\n";
+	print "            5	-23	15	-28\n";
+	print "            5	-23	15	-29\n";
+	print "            5	-23	15	-30\n";
+	print "            5	-24	15	-29\n";
+	print "            5	-24	15	-30\n";
+	print "            5	-24	15	-31\n";
+	print "     Output of the program is a 7 column values. First column\n";
+	print "     represents the time. This is a percentage of the time it\n";
+	print "     would take to do the docking in exhaustive mode, i.e. \n";
+    print "     docking each ligand 100 times. Anything\n";
+	print "     above 12 is too long.\n";
+	print "     Second column is the first percentage. Percentage of\n";
+	print "     ligands that pass the first stage.\n";
+	print "     Third column is the second percentage. Percentage of\n";
+	print "     ligands that pass the second stage.\n";
+	print "     The four last columns represent the protocol.\n";
+	print "     All the protocols tried are written at the end.\n";
+	print "     The ones for which time is less than 12%, perc1 is\n";
+	print "     less than 30% and perc2 is less than 5% but bigger than 1%\n";
+	print "     will have a series of *** after, to indicate they are good choices\n";
+	print "     WARNING! This is a simulation based in a small set.\n";
+	print "     The numbers are an indication, not factual values.\n";
+    exit (0);
+}
+open (IFILE, $ARGV[0]) || die "cannot open $ARGV[0] for reading: $!";
+open (OFILE, ">$ARGV[1]") || die "cannot open $ARGV[1] for writing: $!";
+<IFILE>;   # read first line
+$line = <IFILE>;
+$line =~ /\S+\s+(\S+)\s+\S+\s+(\S+)/;
+$prevname = $1;
+$inter = $2;
+push @tmp, $inter; 
+$i = 0;
+while ($line = <IFILE>)
+{
+  $line =~ /\S+\s+(\S+)\s+\S+\s+(\S+)/;
+  $name = $1;
+  $inter = $2;
+  if ($name ne $prevname)
+  {
+      $prevname = $name;
+      push @inters, [ @tmp ];
+      $runsperligand[$i++] = @tmp;
+      $#tmp = -1;
+  }
+  push @tmp, $inter;
+}
+push @inters, [ @tmp ];
+$runsperligand[$i++] = @tmp;
+$totLigands = @inters;
+$ta = $ARGV[2];
+$tb = $ARGV[3];
+if (@ARGV > 4)
+{
+	$n1 = $ARGV[4];
+}
+else
+{
+	$n1 = 5;
+}
+if (@ARGV > 5)
+{
+	$n2 = $ARGV[5];
+}
+else
+{
+	$n2 = 15;
+}
+printf OFILE "Command line args:\n\t";
+foreach $arg (@ARGV)
+{
+	printf OFILE "%s\t", $arg;
+}
+printf OFILE "\n\n  TIME   PERC1   PERC2    N1  THR1    N2  THR2\n"; 
+for ($t1 = $ta ; $t1 >= $tb ; $t1--)
+{
+    $tc = $t1 - 5;
+    for ($t2 = $tc ; $t2 >= ($tc - 2) ; $t2--)
+    {
+        ($cref, $dref) = 
+          getTables($t1, $t2);
+        @ligBelowThr1 = @$cref;
+        @ligBelowThr2 = @$dref;
+        ($time,$p1,$p2) = simulation($n1,$t1,$n2,$t2);
+        if (($time < 12.0) && ($p1 < 30.0) && ($p2 < 5.0) && ($p2 > 1.0))
+        {
+            printf OFILE "%6.3f, %6.3f, %6.3f, %4i, %4i, %4i, %4i ***\n", 
+                         $time, $p1, $p2, $n1,$t1,$n2,$t2;
+        }
+        else
+        {
+            printf OFILE "%6.3f, %6.3f, %6.3f, %4i, %4i, %4i, %4i\n", 
+                         $time, $p1, $p2, $n1,$t1,$n2,$t2;
+        }
+    }
+}
+
+
+sub simulation
+{
+  my(@params);
+  my(@lt0, @lt1);
+  my($thr1, $thr2,$ns1,$ns2,$tottime,$totnumHits,$total,$totnruns);
+  @params = @_;
+  $ns1 = $params[0];
+  $thr1 = $params[1];
+  $ns2 = $params[2];
+  $thr2 = $params[3];
+  $ntrials = 100;
+  $tottime = 0;
+  $totnumHitss1 = 0;
+  $totnumHitss2 = 0;
+  $total = $totLigands;
+  $totnruns = 0;
+  for ($i = 0 ; $i < $ntrials ; $i++)
+  {
+    $numHitss1 = 0;
+    $numHitss2 = 0;
+    $totnruns = 0;
+    for ($j = 0 ; $j < $totLigands ; $j++)
+    {
+      ($passStage1, $nruns1) = stage($ns1, $ligBelowThr1[$j]);
+      if ($passStage1)
+      {
+        $numHitss1++;
+        ($passStage2, $nruns2) = stage($ns2, $ligBelowThr2[$j]);
+        if ($passStage2)
+        {
+          $numHitss2++;
+        }
+        $totnruns += $nruns2;
+      }
+      $totnruns += $nruns1;
+    }
+    $time = $totnruns / $total;
+    $tottime += $time;
+    $totnumHitss1 += $numHitss1;
+    $totnumHitss2 += $numHitss2;
+  }
+  $tottime /= $ntrials;
+  $totnumHitss1 /= $ntrials;
+  $totnumHitss2 /= $ntrials;
+  $p1 = $totnumHitss1 * 100.0 / $totLigands;
+  $p2 = $totnumHitss2 * 100.0 / $totLigands;
+  return ($tottime, $p1, $p2);
+}
+
+sub getTables
+{
+  my(@params);
+  my($totLigands, @lt1, @lt2);
+  my($row,$inter, $belowThr1, $belowThr2);
+  my($thr0, $thr1);
+  @params = @_;
+  $thr1 = $params[0];
+  $thr2 = $params[1];
+  $totLigands = 0;
+  $belowThr1 = 0;
+  $belowThr2 = 0;
+  $totLigands = 0;
+  for $i (0 .. $#inters)
+  {
+    $row = $inters[$i];
+    for $j (0 .. $#{$row})
+    {
+      $inter = $row->[$j];
+      if ($inter <= $thr1)
+      {
+        $belowThr1++;
+      }
+      if ($inter <= $thr2)
+      {
+        $belowThr2++;
+      }
+    }
+    $lt1[$totLigands] = 
+      $belowThr1 / $runsperligand[$i];
+    $lt2[$totLigands] = 
+      $belowThr2 / $runsperligand[$i];
+    $totLigands++;
+    $belowThr1 = 0;
+    $belowThr2 = 0;
+  }
+  return (\@lt1, \@lt2);
+}
+
+  
+
+sub stage
+{
+  my($nruns, $p, $i, $r);
+  $nruns = $_[0];
+  $p = $_[1];
+  if ($nruns == 0)
+  {
+    return (1, 0);
+  }
+  for ($i = 0 ; $i < $nruns ; $i++)
+  {
+    $r = rand;
+    if ($r < $p)
+    {
+      return (1, $i);
+    }
+  }
+  return (0, $nruns);
+}
+
+