Mercurial > repos > marpiech > norwich_tools
comparison tools/rdock/bin/run_rbscreen.pl @ 0:bc03dbb6eb37 draft
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
author | marpiech |
---|---|
date | Mon, 29 Aug 2016 03:38:13 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bc03dbb6eb37 |
---|---|
1 #!/usr/bin/perl | |
2 use File::stat; | |
3 use lib "$ENV{'RBT_ROOT'}/lib"; | |
4 use strict; | |
5 require "run_rbfuncs.pl"; | |
6 | |
7 my $rbtroot = $ENV{'RBT_ROOT'}; | |
8 my $rbthome = $ENV{'RBT_HOME'}; | |
9 my $rbtligdb = $ENV{'RBT_LIGDB'}; | |
10 my $recepDir = "$rbtroot/data/receptors"; | |
11 my $scriptDir = "$rbtroot/data/scripts"; | |
12 my $filterDir = "$rbtroot/data/filters"; | |
13 my $libraryDir = "$rbtligdb/dock"; | |
14 my $binDir = "$rbtroot/bin"; | |
15 | |
16 #override default directories | |
17 #Parse command line arguments | |
18 my $nArgs = scalar(@ARGV); | |
19 | |
20 while (scalar(@ARGV)) { | |
21 my $arg = shift @ARGV; | |
22 if (index($arg,'-r')==0) { | |
23 $recepDir = substr($arg,2); | |
24 } | |
25 elsif (index($arg,'-s')==0) { | |
26 $scriptDir = substr($arg,2); | |
27 } | |
28 elsif (index($arg,'-l')==0) { | |
29 $libraryDir = substr($arg,2); | |
30 } | |
31 } | |
32 | |
33 | |
34 # Get the receptor, library and script tables | |
35 #my %receptor_table = get_prm_table($recepDir); | |
36 my %receptor_table = get_prm_table($rbthome); | |
37 my %script_table = get_prm_table($scriptDir); | |
38 my %library_table = get_dock_table($libraryDir); | |
39 | |
40 #GET ALL THE INPUTS | |
41 #my $library = get_selection(\%library_table,"library"); | |
42 my $isOK; | |
43 my @libraries; | |
44 my $receptor; | |
45 my $script; | |
46 | |
47 do { | |
48 @libraries = get_multiple_selection(\%library_table,"library"); | |
49 $isOK = get_input("\nIs this OK (Y/N)","Y"); | |
50 } until ($isOK eq "Y"); | |
51 | |
52 do { | |
53 $receptor = get_selection(\%receptor_table,"receptor"); | |
54 $isOK = get_input("\nIs this OK (Y/N)","Y"); | |
55 } until ($isOK eq "Y"); | |
56 | |
57 # (SJ) check whether all the required files are in the dir | |
58 # DM 7 Feb 2002 - get PSF/CRD filenames from PRM file | |
59 print "\nLooking for all required target files in $rbthome...\n\n"; | |
60 my $prm_file = "$rbthome/".$receptor; | |
61 my $prm_root = substr($prm_file, 0, rindex($prm_file,".")); | |
62 my $as_file = "${prm_root}.as"; | |
63 my $vdw1_file = "${prm_root}_vdw1.grd"; | |
64 my $vdw5_file = "${prm_root}_vdw5.grd"; | |
65 | |
66 #Set the default file names equal to their parameter names so that | |
67 #error messages make sense if the parameters are missing completely from | |
68 #the .prm file. | |
69 my $psf_file = "RECEPTOR_TOPOL_FILE"; | |
70 my $crd_file = "RECEPTOR_COORD_FILE"; | |
71 my $mol2_file; | |
72 | |
73 # DM 12 May 2003 | |
74 # Optional target files for experimental restraints | |
75 # | |
76 # These default restraint suffixes and filenames should be synchronised | |
77 # with the default parameter values in the relevant C++ class source files ($RBT_ROOT/fw/src/.cxx) | |
78 # They are used if the user does not explicitly define the parameter in the | |
79 # receptor .prm file | |
80 # | |
81 # Note: this code searches for specific SECTION names in the receptor .prm file | |
82 # thus forcing a naming convention for the restraint terms that is not mandatory in the C++ code itself | |
83 # | |
84 # Pharmacophore restraint file | |
85 # SECTION PHARMA | |
86 # CONSTRAINTS_FILE = explicit filename | |
87 # OPTIONAL_FILE = explicit filename | |
88 my $ph4_filename = ".const";#synchronise with RbtPharmaSF::_CONSTRAINTS_FILE | |
89 my $ph4_optfilename = ""; | |
90 my $has_ph4 = 0; | |
91 # Tether restraint file | |
92 # SECTION TETHER | |
93 # REFERENCE_FILE = suffix to the root receptor.prm filename | |
94 my $tether_suffix = "_reference.sd";#synchronise with RbtTetherSF::_REFERENCE_FILE | |
95 my $has_tether = 0; | |
96 # Nmr restraint file | |
97 # SECTION NMR | |
98 # FILENAME = explicit filename (not suffix) | |
99 my $nmr_filename = "default.noe";#synchronise with RbtTetherSF::_FILENAME | |
100 my $has_nmr = 0; | |
101 | |
102 open PRMHANDLE,$prm_file || die "Cannot read $prm_file"; | |
103 while (<PRMHANDLE>) { | |
104 my @fields = split(' ',$_); | |
105 my $nfields = scalar(@fields); | |
106 $psf_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_TOPOL_FILE') && ($nfields > 1)); | |
107 $crd_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_COORD_FILE') && ($nfields > 1)); | |
108 $mol2_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_FILE') && ($nfields > 1)); | |
109 chomp $psf_file; | |
110 chomp $crd_file; | |
111 $has_ph4 = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'PHARMA') ); | |
112 $ph4_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'CONSTRAINTS_FILE') && $has_ph4); | |
113 $ph4_optfilename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'OPTIONAL_FILE') && $has_ph4); | |
114 $has_tether = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'TETHER') ); | |
115 $tether_suffix = $fields[1] if (($nfields > 1) && ($fields[0] eq 'REFERENCE_FILE') && $has_tether); | |
116 $has_nmr = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'NMR') ); | |
117 $nmr_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'FILENAME') && $has_nmr); | |
118 } | |
119 close PRMHANDLE; | |
120 | |
121 my @src_files = ($prm_file,$as_file, $vdw1_file, $vdw5_file); | |
122 # check for either MOL2 or PSF/CRD | |
123 if ($mol2_file ne "") { | |
124 push (@src_files,"$mol2_file"); | |
125 } | |
126 else { | |
127 push (@src_files,"$psf_file"); | |
128 push (@src_files,"$crd_file"); | |
129 } | |
130 push (@src_files,"${ph4_filename}") if ($has_ph4); | |
131 push (@src_files,"${ph4_optfilename}") if (($has_ph4) && ($ph4_optfilename ne "")); | |
132 push (@src_files,"${prm_root}${tether_suffix}") if ($has_tether); | |
133 push (@src_files,"${nmr_filename}") if ($has_nmr); | |
134 | |
135 my $is_file_missing = 0; | |
136 foreach my $srcfile ( @src_files) { # search through all the target files | |
137 if(-e $srcfile) { | |
138 print "Found: $srcfile"; | |
139 my $sb = stat($srcfile); | |
140 #if ($sb->size < 1024) {print " WARNING size is suspiciously small ",$sb->size}; | |
141 print "\n"; | |
142 } | |
143 else { | |
144 print "ERROR: $srcfile is missing\n"; | |
145 $is_file_missing = 1; | |
146 } | |
147 } | |
148 | |
149 if ($is_file_missing) { | |
150 print "\nCannot continue - please generate all required target files and place in $rbthome\n"; | |
151 exit; | |
152 } | |
153 | |
154 do { | |
155 $script = get_selection(\%script_table,"docking script"); | |
156 $isOK = get_input("\nIs this OK (Y/N)","Y"); | |
157 } until ($isOK eq "Y"); | |
158 | |
159 my $flags = get_input("Enter optional rbdock flags","-ap -an"); | |
160 my $mode; | |
161 do | |
162 { | |
163 $mode = get_input("High Throughput (HT) or Exhaustive Docking (ED)", "HT"); | |
164 }until (($mode eq "HT") || ($mode eq "ED")); | |
165 | |
166 my $HT; | |
167 my $filter; | |
168 my $FILTERFILE; | |
169 my $nRedock; | |
170 my $nStages; | |
171 my @targets; | |
172 my @runs; | |
173 my $redockTarget; | |
174 my $redockTargetDef = -20.0;#Default score threshold for redocking phase | |
175 | |
176 if ($mode eq "ED") | |
177 { | |
178 $HT = 0; | |
179 my $run = get_input("Enter number of runs/ligand",50); | |
180 $flags = $flags . " -n" . $run; | |
181 $run--; | |
182 $filter = "1\nif - SCORE.NRUNS $run 0.0 -1.0,\n0\n"; | |
183 } | |
184 else | |
185 { | |
186 $HT = 1; | |
187 $nStages = get_input("Number of stages (excluding redocking, enter 0 for redocking only)",2); | |
188 | |
189 #GET AND STORE THE TARGET SCORES AND NUMBER OF RUNS FOR EACH STAGE | |
190 | |
191 if ($nStages > 0) { | |
192 for (my $n = 0; $n < $nStages; $n++) { | |
193 print "\nDOCKING STAGE $n\n"; | |
194 my $target = get_input("Enter target score",-20.0); | |
195 my $run = get_input("Enter number of runs/ligand",20); | |
196 if ($run > 0) | |
197 { | |
198 push @targets,$target; | |
199 push @runs,$run; | |
200 } | |
201 } | |
202 $nStages = @targets; | |
203 print "\n"; | |
204 #Default score threshold for redocking is equal to target score for last docking stage | |
205 $redockTargetDef = $targets[$nStages-1]; | |
206 } | |
207 | |
208 $nRedock = get_input("Enter number of redocking runs/ligand",0); | |
209 if ($nRedock > 0) { | |
210 $redockTarget = get_input("Enter target score for redocking",$redockTargetDef); | |
211 } | |
212 else | |
213 { | |
214 $redockTarget = $redockTargetDef; | |
215 } | |
216 my $nPhases = $nStages; | |
217 if ($nRedock > 0) | |
218 { | |
219 $nPhases++; | |
220 } | |
221 $filter = "$nPhases\n"; | |
222 my $nr; | |
223 for (my $n = 0; $n < $nStages ; $n++) | |
224 { | |
225 $nr = $runs[$n] - 1; | |
226 $filter .= "if - $targets[$n] SCORE.INTER 1.0 if - SCORE.NRUNS $nr 0.0 -1.0,\n"; | |
227 } | |
228 if ($nRedock > 0) | |
229 { | |
230 $nr = $nRedock - 1; | |
231 $filter .= "if - SCORE.NRUNS $nr 0.0 -1.0,\n"; | |
232 } | |
233 my %filter_table = get_filter_table($filterDir, $redockTarget); | |
234 my @wfilters; | |
235 do { | |
236 print "\nSelect filters. Only the conformations that pass all the\n", | |
237 "filters chosen AND have SCORE.INTER lower than ", $redockTarget, | |
238 "\nAND SCORE.RESTR.CAVITY < 1.0 will be written down\n"; | |
239 @wfilters = get_multiple_selection(\%filter_table,"filters"); | |
240 $isOK = get_input("\nIs this OK (Y/N)","Y"); | |
241 } until ($isOK eq "Y"); | |
242 | |
243 for (my $i = 0 ; $i < @wfilters ; $i++) # remove the no filters | |
244 { | |
245 if ($wfilters[$i] eq "1no_other_filters") | |
246 { | |
247 splice(@wfilters, $i, 1); | |
248 } | |
249 } | |
250 | |
251 my $nwfilters = @wfilters + 2; | |
252 $filter .= "$nwfilters\n"; | |
253 $filter .= "- SCORE.INTER $redockTarget,\n"; | |
254 $filter .= "- SCORE.RESTR.CAVITY 1.0,\n"; | |
255 foreach my $wfilter (@wfilters) { | |
256 open(FF, "$filterDir/$wfilter") || open(FF, "./$wfilter") || | |
257 die "cannot open $wfilter for reading: $!"; | |
258 my $line; | |
259 while ($line = <FF>){ $filter .= $line;} | |
260 $filter .= ",\n"; | |
261 } | |
262 } | |
263 | |
264 | |
265 | |
266 my $runRoot = get_input("Enter run name","rbdock"); | |
267 | |
268 my $pwd = `pwd`; | |
269 chomp $pwd; | |
270 | |
271 #Make a subdirectory for each docking stage | |
272 #plus a tmp directory | |
273 mkdir($runRoot,0755); | |
274 mkdir("$runRoot/tmp",0755); | |
275 mkdir("$runRoot/tmp/condor_logs",0755); | |
276 mkdir("$runRoot/log",0755); | |
277 mkdir("$runRoot/sd",0755); | |
278 mkdir("$runRoot/errors",0755); | |
279 chdir "$runRoot/tmp"; | |
280 | |
281 my $filterFile = "${runRoot}_filter.txt"; | |
282 print "Writing $filterFile...\n"; | |
283 open FILTERHANDLE,">$filterFile"; | |
284 print FILTERHANDLE $filter; | |
285 #WRITE THE CUSTOM DOCKING SCRIPT FOR USE BY CONDOR | |
286 #This will be the Condor "Executable" | |
287 #gzipped SD file to dock is passed as $1 argument | |
288 #Run name is passed as $2 argument | |
289 my $nqsFile = "run_${runRoot}.csh"; | |
290 print "Writing $nqsFile...\n"; | |
291 open NQSHANDLE,">$nqsFile"; | |
292 print NQSHANDLE "#!/bin/tcsh -f\n"; | |
293 print NQSHANDLE "#CONDOR EXECUTABLE SCRIPT WRITTEN BY run_rbscreen.pl\n"; | |
294 | |
295 # DM 14 Mar 2003 | |
296 #Create a tmp runtime directory on the local disk for rDock SD and log files | |
297 #These will be compressed before copying back to the submission directory | |
298 #on the file server | |
299 #There are now two tmp directories: | |
300 # /tmp/ribodock/<version> - stores the grid files etc that are common between runs | |
301 # /tmp/${PROG}.XXXXXX - stores the I/O SD and log files unique to each run | |
302 | |
303 print NQSHANDLE "set PROG = \${0:t}\n"; | |
304 print NQSHANDLE "set TMPDIR = `mktemp -d /tmp/\${PROG}.XXXXXX`\n"; | |
305 | |
306 # synchronizing target files | |
307 my $versionidx = index $rbtroot,"ribodock"; | |
308 my $targetdir = "/tmp/".substr($rbtroot,$versionidx); | |
309 print NQSHANDLE "mkdir -pv -m0775 /tmp/ribodock\n"; | |
310 print NQSHANDLE "mkdir -pv -m0775 $targetdir\n"; | |
311 print NQSHANDLE "setenv RBT_HOME $targetdir\n"; | |
312 print NQSHANDLE "rsync -avz ".join(" ",@src_files)." $targetdir\n"; | |
313 | |
314 # Copy the compressed sd file to the tmp directory and uncompress | |
315 # DM 28 Feb 2002 - use the /tmp directory on the local disk instead | |
316 # DM 14 Mar 2003 - use TMPDIR = /tmp/${PROG}.XXXXXX instead | |
317 print NQSHANDLE "set gzfile = \$1\n"; | |
318 print NQSHANDLE "set runName = \$2\n"; | |
319 print NQSHANDLE "set copied_gzfile = \${TMPDIR}/\${gzfile:t}\n"; | |
320 print NQSHANDLE "set sdfile = \${copied_gzfile:r}\n"; | |
321 print NQSHANDLE "echo Ligand_DB : \$gzfile\n"; | |
322 print NQSHANDLE "echo Local copy: \$copied_gzfile\n"; | |
323 print NQSHANDLE "echo Unzipped : \$sdfile\n"; | |
324 print NQSHANDLE "cp -f \$gzfile \$copied_gzfile\n"; | |
325 print NQSHANDLE "gunzip -f \$copied_gzfile\n"; | |
326 | |
327 my $target = $redockTarget; | |
328 print NQSHANDLE "#\n"; | |
329 print NQSHANDLE "set outRoot = \${TMPDIR}/\${runName}\n"; | |
330 print NQSHANDLE "set logFile = \${outRoot}.log\n"; | |
331 print NQSHANDLE "if (-e \$logFile) rm -f \$logFile\n"; | |
332 print NQSHANDLE "\$RBT_ROOT/bin/rbdock -i\$sdfile -o\${outRoot} -r$receptor -p$script -t$filterFile $flags > \$logFile\n"; | |
333 | |
334 # Compress the SD and log files and copy back to submission directory | |
335 # The ..._errors.sd files are generated by the pharmacophore restraints term for ligands that | |
336 # do not fulfill the feature requirements | |
337 print NQSHANDLE "if (-e \${outRoot}.sd) then\n"; | |
338 print NQSHANDLE " gzip -9vf \${outRoot}.sd\n"; | |
339 print NQSHANDLE " cp \${outRoot}.sd.gz ../sd\n"; | |
340 print NQSHANDLE "endif\n"; | |
341 print NQSHANDLE "if (-e \${logFile}) then\n"; | |
342 print NQSHANDLE " gzip -9vf \${logFile}\n"; | |
343 print NQSHANDLE " cp \${logFile}.gz ../log\n"; | |
344 print NQSHANDLE "endif\n"; | |
345 print NQSHANDLE "if (-e \${outRoot}_errors.sd) then\n"; | |
346 print NQSHANDLE " gzip -9vf \${outRoot}_errors.sd\n"; | |
347 print NQSHANDLE " cp \${outRoot}_errors.sd.gz ../errors\n"; | |
348 print NQSHANDLE "endif\n"; | |
349 | |
350 print NQSHANDLE "rm -rf \${TMPDIR}\n"; | |
351 close NQSHANDLE; | |
352 chmod 0755,$nqsFile; | |
353 | |
354 #PREPARE THE CONDOR JOB FILE | |
355 my $condorFile = "run_${runRoot}.cmd"; | |
356 print "Writing $condorFile...\n"; | |
357 open CMDHANDLE,">$condorFile"; | |
358 print CMDHANDLE "#CONDOR COMMAND FILE WRITTEN BY run_rbscreen.pl\n"; | |
359 print CMDHANDLE "universe = vanilla\n"; | |
360 print CMDHANDLE "initialdir = $pwd/$runRoot/tmp\n"; | |
361 print CMDHANDLE "environment = RBT_ROOT=$rbtroot;RBT_LIGDB=$rbtligdb;RBT_HOME=$rbthome;LD_LIBRARY_PATH=/usr/local/lib:$rbtroot/lib;\n"; | |
362 print CMDHANDLE "executable = ${nqsFile}\n"; | |
363 #Stop Condor filling your mailbox | |
364 print CMDHANDLE "Notification = Error\n"; | |
365 print CMDHANDLE "log = run_${runRoot}.log\n"; | |
366 # Added by DM, Enspiral Discovery, 28 Feb 2005 | |
367 # Prevent rDock jobs running on dedicated Catalyst nodes | |
368 print CMDHANDLE "Requirements = CATALYST_NODE =!= True\n"; | |
369 | |
370 #GET THE LIST OF COMPRESSED SD FILES TO DOCK | |
371 my $iFile=0; | |
372 foreach my $library (@libraries) { | |
373 my @libFiles = glob "$library_table{$library}"; | |
374 foreach my $libFile (@libFiles) { | |
375 $iFile++; | |
376 my $runName = "${runRoot}_$iFile"; | |
377 print CMDHANDLE "output = condor_logs/run_${runName}.out\n"; | |
378 print CMDHANDLE "error = condor_logs/run_${runName}.err\n"; | |
379 print CMDHANDLE "arguments = $libFile $runName\n"; | |
380 print CMDHANDLE "queue\n"; | |
381 } | |
382 } | |
383 close CMDHANDLE; | |
384 | |
385 #WRITE THE CUSTOM CLEANUP SCRIPT | |
386 #This will be run by the user from the command line | |
387 #after all the jobs have finished | |
388 my $cleanFile = "clean_${runRoot}.csh"; | |
389 print "Writing $cleanFile...\n"; | |
390 open CLEANHANDLE,">$cleanFile"; | |
391 print CLEANHANDLE "#!/bin/tcsh -f\n"; | |
392 print CLEANHANDLE "#CLEANUP SCRIPT WRITTEN BY run_rbscreen.pl\n"; | |
393 print CLEANHANDLE "set sdout = ../${runRoot}_raw_hits.sd\n"; | |
394 print CLEANHANDLE "if (-e \${sdout}) then\n"; | |
395 print CLEANHANDLE " echo ARE YOU SURE YOU WANT TO DO THIS\n"; | |
396 print CLEANHANDLE " echo \${sdout} already exists\n"; | |
397 print CLEANHANDLE " echo To rerun the script remove \${sdout} manually first\n"; | |
398 print CLEANHANDLE " exit\n"; | |
399 print CLEANHANDLE "endif\n"; | |
400 print CLEANHANDLE "echo All compressed SD files in ../sd/ will be concatenated into a single compressed file \${sdout}.gz\n"; | |
401 print CLEANHANDLE "touch \${sdout}\n"; | |
402 print CLEANHANDLE "foreach file (../sd/*.sd.gz)\n"; | |
403 print CLEANHANDLE " echo Sorting and concatenating \${file}...\n"; | |
404 print CLEANHANDLE " zcat \${file} | \${RBT_ROOT}/bin/sdsort -n -fSCORE -s >> \${sdout}\n"; | |
405 print CLEANHANDLE "end\n"; | |
406 print CLEANHANDLE "echo Each block of consecutive records per ligand have been presorted by SCORE "; | |
407 print CLEANHANDLE "ready for filtering\n"; | |
408 print CLEANHANDLE "echo Compressing \${sdout}...\n"; | |
409 print CLEANHANDLE "gzip -9vf \${sdout}\n"; | |
410 print CLEANHANDLE "echo DONE\n"; | |
411 print CLEANHANDLE "echo You may now safely remove the ../sd/ directory if you wish to save disk space\n"; | |
412 close CLEANHANDLE; | |
413 chmod 0755,$cleanFile; | |
414 | |
415 print "To submit, cd $runRoot/tmp; condor_submit $condorFile\n"; | |
416 print "When all jobs have completed, run $runRoot/tmp/$cleanFile to concatenate the output files\n"; |