1
|
1 #!/usr/bin/perl -w
|
|
2
|
|
3 # Script that simulates the result of a high throughput protocol.
|
|
4
|
|
5 if (@ARGV < 4)
|
|
6 {
|
|
7 print "Script that simulates the result of a high throughput protocol.\n";
|
|
8 print "1st) exhaustive docking of a small representative part of the\n";
|
|
9 print " whole library.\n";
|
|
10 print "2nd) Store the result of sdreport -t over that exhaustive dock.\n";
|
|
11 print " in file <sdreport_file> that will be the input of this\n";
|
|
12 print " script.\n";
|
|
13 print "3rd) ht_protocol_finder.pl <sdreport_file> <output_file> <thr1max>";
|
|
14 print " <thr1min> <ns1> <ns2>\n";
|
|
15 print " <ns1> and <ns2> are the number of steps in stage 1 and in\n";
|
|
16 print " stage 2. If not present, the default values are 5 and 15\n";
|
|
17 print " <thrmax> and <thrmin> setup the range of thresholds that will\n";
|
|
18 print " be simulated in stage 1. The threshold of stage 2 depends\n";
|
|
19 print " on the value of the threshold of stage 1.\n";
|
|
20 print " An input of -22 -24 will try protocols:\n";
|
|
21 print " 5 -22 15 -27\n";
|
|
22 print " 5 -22 15 -28\n";
|
|
23 print " 5 -22 15 -29\n";
|
|
24 print " 5 -23 15 -28\n";
|
|
25 print " 5 -23 15 -29\n";
|
|
26 print " 5 -23 15 -30\n";
|
|
27 print " 5 -24 15 -29\n";
|
|
28 print " 5 -24 15 -30\n";
|
|
29 print " 5 -24 15 -31\n";
|
|
30 print " Output of the program is a 7 column values. First column\n";
|
|
31 print " represents the time. This is a percentage of the time it\n";
|
|
32 print " would take to do the docking in exhaustive mode, i.e. \n";
|
|
33 print " docking each ligand 100 times. Anything\n";
|
|
34 print " above 12 is too long.\n";
|
|
35 print " Second column is the first percentage. Percentage of\n";
|
|
36 print " ligands that pass the first stage.\n";
|
|
37 print " Third column is the second percentage. Percentage of\n";
|
|
38 print " ligands that pass the second stage.\n";
|
|
39 print " The four last columns represent the protocol.\n";
|
|
40 print " All the protocols tried are written at the end.\n";
|
|
41 print " The ones for which time is less than 12%, perc1 is\n";
|
|
42 print " less than 30% and perc2 is less than 5% but bigger than 1%\n";
|
|
43 print " will have a series of *** after, to indicate they are good choices\n";
|
|
44 print " WARNING! This is a simulation based in a small set.\n";
|
|
45 print " The numbers are an indication, not factual values.\n";
|
|
46 exit (0);
|
|
47 }
|
|
48 open (IFILE, $ARGV[0]) || die "cannot open $ARGV[0] for reading: $!";
|
|
49 open (OFILE, ">$ARGV[1]") || die "cannot open $ARGV[1] for writing: $!";
|
|
50 <IFILE>; # read first line
|
|
51 $line = <IFILE>;
|
|
52 $line =~ /\S+\s+(\S+)\s+\S+\s+(\S+)/;
|
|
53 $prevname = $1;
|
|
54 $inter = $2;
|
|
55 push @tmp, $inter;
|
|
56 $i = 0;
|
|
57 while ($line = <IFILE>)
|
|
58 {
|
|
59 $line =~ /\S+\s+(\S+)\s+\S+\s+(\S+)/;
|
|
60 $name = $1;
|
|
61 $inter = $2;
|
|
62 if ($name ne $prevname)
|
|
63 {
|
|
64 $prevname = $name;
|
|
65 push @inters, [ @tmp ];
|
|
66 $runsperligand[$i++] = @tmp;
|
|
67 $#tmp = -1;
|
|
68 }
|
|
69 push @tmp, $inter;
|
|
70 }
|
|
71 push @inters, [ @tmp ];
|
|
72 $runsperligand[$i++] = @tmp;
|
|
73 $totLigands = @inters;
|
|
74 $ta = $ARGV[2];
|
|
75 $tb = $ARGV[3];
|
|
76 if (@ARGV > 4)
|
|
77 {
|
|
78 $n1 = $ARGV[4];
|
|
79 }
|
|
80 else
|
|
81 {
|
|
82 $n1 = 5;
|
|
83 }
|
|
84 if (@ARGV > 5)
|
|
85 {
|
|
86 $n2 = $ARGV[5];
|
|
87 }
|
|
88 else
|
|
89 {
|
|
90 $n2 = 15;
|
|
91 }
|
|
92 printf OFILE "Command line args:\n\t";
|
|
93 foreach $arg (@ARGV)
|
|
94 {
|
|
95 printf OFILE "%s\t", $arg;
|
|
96 }
|
|
97 printf OFILE "\n\n TIME PERC1 PERC2 N1 THR1 N2 THR2\n";
|
|
98 for ($t1 = $ta ; $t1 >= $tb ; $t1--)
|
|
99 {
|
|
100 $tc = $t1 - 5;
|
|
101 for ($t2 = $tc ; $t2 >= ($tc - 2) ; $t2--)
|
|
102 {
|
|
103 ($cref, $dref) =
|
|
104 getTables($t1, $t2);
|
|
105 @ligBelowThr1 = @$cref;
|
|
106 @ligBelowThr2 = @$dref;
|
|
107 ($time,$p1,$p2) = simulation($n1,$t1,$n2,$t2);
|
|
108 if (($time < 12.0) && ($p1 < 30.0) && ($p2 < 5.0) && ($p2 > 1.0))
|
|
109 {
|
|
110 printf OFILE "%6.3f, %6.3f, %6.3f, %4i, %4i, %4i, %4i ***\n",
|
|
111 $time, $p1, $p2, $n1,$t1,$n2,$t2;
|
|
112 }
|
|
113 else
|
|
114 {
|
|
115 printf OFILE "%6.3f, %6.3f, %6.3f, %4i, %4i, %4i, %4i\n",
|
|
116 $time, $p1, $p2, $n1,$t1,$n2,$t2;
|
|
117 }
|
|
118 }
|
|
119 }
|
|
120
|
|
121
|
|
122 sub simulation
|
|
123 {
|
|
124 my(@params);
|
|
125 my(@lt0, @lt1);
|
|
126 my($thr1, $thr2,$ns1,$ns2,$tottime,$totnumHits,$total,$totnruns);
|
|
127 @params = @_;
|
|
128 $ns1 = $params[0];
|
|
129 $thr1 = $params[1];
|
|
130 $ns2 = $params[2];
|
|
131 $thr2 = $params[3];
|
|
132 $ntrials = 100;
|
|
133 $tottime = 0;
|
|
134 $totnumHitss1 = 0;
|
|
135 $totnumHitss2 = 0;
|
|
136 $total = $totLigands;
|
|
137 $totnruns = 0;
|
|
138 for ($i = 0 ; $i < $ntrials ; $i++)
|
|
139 {
|
|
140 $numHitss1 = 0;
|
|
141 $numHitss2 = 0;
|
|
142 $totnruns = 0;
|
|
143 for ($j = 0 ; $j < $totLigands ; $j++)
|
|
144 {
|
|
145 ($passStage1, $nruns1) = stage($ns1, $ligBelowThr1[$j]);
|
|
146 if ($passStage1)
|
|
147 {
|
|
148 $numHitss1++;
|
|
149 ($passStage2, $nruns2) = stage($ns2, $ligBelowThr2[$j]);
|
|
150 if ($passStage2)
|
|
151 {
|
|
152 $numHitss2++;
|
|
153 }
|
|
154 $totnruns += $nruns2;
|
|
155 }
|
|
156 $totnruns += $nruns1;
|
|
157 }
|
|
158 $time = $totnruns / $total;
|
|
159 $tottime += $time;
|
|
160 $totnumHitss1 += $numHitss1;
|
|
161 $totnumHitss2 += $numHitss2;
|
|
162 }
|
|
163 $tottime /= $ntrials;
|
|
164 $totnumHitss1 /= $ntrials;
|
|
165 $totnumHitss2 /= $ntrials;
|
|
166 $p1 = $totnumHitss1 * 100.0 / $totLigands;
|
|
167 $p2 = $totnumHitss2 * 100.0 / $totLigands;
|
|
168 return ($tottime, $p1, $p2);
|
|
169 }
|
|
170
|
|
171 sub getTables
|
|
172 {
|
|
173 my(@params);
|
|
174 my($totLigands, @lt1, @lt2);
|
|
175 my($row,$inter, $belowThr1, $belowThr2);
|
|
176 my($thr0, $thr1);
|
|
177 @params = @_;
|
|
178 $thr1 = $params[0];
|
|
179 $thr2 = $params[1];
|
|
180 $totLigands = 0;
|
|
181 $belowThr1 = 0;
|
|
182 $belowThr2 = 0;
|
|
183 $totLigands = 0;
|
|
184 for $i (0 .. $#inters)
|
|
185 {
|
|
186 $row = $inters[$i];
|
|
187 for $j (0 .. $#{$row})
|
|
188 {
|
|
189 $inter = $row->[$j];
|
|
190 if ($inter <= $thr1)
|
|
191 {
|
|
192 $belowThr1++;
|
|
193 }
|
|
194 if ($inter <= $thr2)
|
|
195 {
|
|
196 $belowThr2++;
|
|
197 }
|
|
198 }
|
|
199 $lt1[$totLigands] =
|
|
200 $belowThr1 / $runsperligand[$i];
|
|
201 $lt2[$totLigands] =
|
|
202 $belowThr2 / $runsperligand[$i];
|
|
203 $totLigands++;
|
|
204 $belowThr1 = 0;
|
|
205 $belowThr2 = 0;
|
|
206 }
|
|
207 return (\@lt1, \@lt2);
|
|
208 }
|
|
209
|
|
210
|
|
211
|
|
212 sub stage
|
|
213 {
|
|
214 my($nruns, $p, $i, $r);
|
|
215 $nruns = $_[0];
|
|
216 $p = $_[1];
|
|
217 if ($nruns == 0)
|
|
218 {
|
|
219 return (1, 0);
|
|
220 }
|
|
221 for ($i = 0 ; $i < $nruns ; $i++)
|
|
222 {
|
|
223 $r = rand;
|
|
224 if ($r < $p)
|
|
225 {
|
|
226 return (1, $i);
|
|
227 }
|
|
228 }
|
|
229 return (0, $nruns);
|
|
230 }
|
|
231
|
|
232
|