comparison tools/rdock/bin/run_rbscreen.pl @ 2:0faa03a92843 draft default tip

Uploaded
author dzesikah
date Fri, 26 Aug 2016 10:19:49 -0400
parents
children
comparison
equal deleted inserted replaced
1:ad4bc82457e5 2:0faa03a92843
1 #!/usr/bin/perl
2 use File::stat;
3 use lib "$ENV{'RBT_ROOT'}/lib";
4 use strict;
5 require "run_rbfuncs.pl";
6
7 my $rbtroot = $ENV{'RBT_ROOT'};
8 my $rbthome = $ENV{'RBT_HOME'};
9 my $rbtligdb = $ENV{'RBT_LIGDB'};
10 my $recepDir = "$rbtroot/data/receptors";
11 my $scriptDir = "$rbtroot/data/scripts";
12 my $filterDir = "$rbtroot/data/filters";
13 my $libraryDir = "$rbtligdb/dock";
14 my $binDir = "$rbtroot/bin";
15
16 #override default directories
17 #Parse command line arguments
18 my $nArgs = scalar(@ARGV);
19
20 while (scalar(@ARGV)) {
21 my $arg = shift @ARGV;
22 if (index($arg,'-r')==0) {
23 $recepDir = substr($arg,2);
24 }
25 elsif (index($arg,'-s')==0) {
26 $scriptDir = substr($arg,2);
27 }
28 elsif (index($arg,'-l')==0) {
29 $libraryDir = substr($arg,2);
30 }
31 }
32
33
34 # Get the receptor, library and script tables
35 #my %receptor_table = get_prm_table($recepDir);
36 my %receptor_table = get_prm_table($rbthome);
37 my %script_table = get_prm_table($scriptDir);
38 my %library_table = get_dock_table($libraryDir);
39
40 #GET ALL THE INPUTS
41 #my $library = get_selection(\%library_table,"library");
42 my $isOK;
43 my @libraries;
44 my $receptor;
45 my $script;
46
47 do {
48 @libraries = get_multiple_selection(\%library_table,"library");
49 $isOK = get_input("\nIs this OK (Y/N)","Y");
50 } until ($isOK eq "Y");
51
52 do {
53 $receptor = get_selection(\%receptor_table,"receptor");
54 $isOK = get_input("\nIs this OK (Y/N)","Y");
55 } until ($isOK eq "Y");
56
57 # (SJ) check whether all the required files are in the dir
58 # DM 7 Feb 2002 - get PSF/CRD filenames from PRM file
59 print "\nLooking for all required target files in $rbthome...\n\n";
60 my $prm_file = "$rbthome/".$receptor;
61 my $prm_root = substr($prm_file, 0, rindex($prm_file,"."));
62 my $as_file = "${prm_root}.as";
63 my $vdw1_file = "${prm_root}_vdw1.grd";
64 my $vdw5_file = "${prm_root}_vdw5.grd";
65
66 #Set the default file names equal to their parameter names so that
67 #error messages make sense if the parameters are missing completely from
68 #the .prm file.
69 my $psf_file = "RECEPTOR_TOPOL_FILE";
70 my $crd_file = "RECEPTOR_COORD_FILE";
71 my $mol2_file;
72
73 # DM 12 May 2003
74 # Optional target files for experimental restraints
75 #
76 # These default restraint suffixes and filenames should be synchronised
77 # with the default parameter values in the relevant C++ class source files ($RBT_ROOT/fw/src/.cxx)
78 # They are used if the user does not explicitly define the parameter in the
79 # receptor .prm file
80 #
81 # Note: this code searches for specific SECTION names in the receptor .prm file
82 # thus forcing a naming convention for the restraint terms that is not mandatory in the C++ code itself
83 #
84 # Pharmacophore restraint file
85 # SECTION PHARMA
86 # CONSTRAINTS_FILE = explicit filename
87 # OPTIONAL_FILE = explicit filename
88 my $ph4_filename = ".const";#synchronise with RbtPharmaSF::_CONSTRAINTS_FILE
89 my $ph4_optfilename = "";
90 my $has_ph4 = 0;
91 # Tether restraint file
92 # SECTION TETHER
93 # REFERENCE_FILE = suffix to the root receptor.prm filename
94 my $tether_suffix = "_reference.sd";#synchronise with RbtTetherSF::_REFERENCE_FILE
95 my $has_tether = 0;
96 # Nmr restraint file
97 # SECTION NMR
98 # FILENAME = explicit filename (not suffix)
99 my $nmr_filename = "default.noe";#synchronise with RbtTetherSF::_FILENAME
100 my $has_nmr = 0;
101
102 open PRMHANDLE,$prm_file || die "Cannot read $prm_file";
103 while (<PRMHANDLE>) {
104 my @fields = split(' ',$_);
105 my $nfields = scalar(@fields);
106 $psf_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_TOPOL_FILE') && ($nfields > 1));
107 $crd_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_COORD_FILE') && ($nfields > 1));
108 $mol2_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_FILE') && ($nfields > 1));
109 chomp $psf_file;
110 chomp $crd_file;
111 $has_ph4 = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'PHARMA') );
112 $ph4_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'CONSTRAINTS_FILE') && $has_ph4);
113 $ph4_optfilename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'OPTIONAL_FILE') && $has_ph4);
114 $has_tether = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'TETHER') );
115 $tether_suffix = $fields[1] if (($nfields > 1) && ($fields[0] eq 'REFERENCE_FILE') && $has_tether);
116 $has_nmr = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'NMR') );
117 $nmr_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'FILENAME') && $has_nmr);
118 }
119 close PRMHANDLE;
120
121 my @src_files = ($prm_file,$as_file, $vdw1_file, $vdw5_file);
122 # check for either MOL2 or PSF/CRD
123 if ($mol2_file ne "") {
124 push (@src_files,"$mol2_file");
125 }
126 else {
127 push (@src_files,"$psf_file");
128 push (@src_files,"$crd_file");
129 }
130 push (@src_files,"${ph4_filename}") if ($has_ph4);
131 push (@src_files,"${ph4_optfilename}") if (($has_ph4) && ($ph4_optfilename ne ""));
132 push (@src_files,"${prm_root}${tether_suffix}") if ($has_tether);
133 push (@src_files,"${nmr_filename}") if ($has_nmr);
134
135 my $is_file_missing = 0;
136 foreach my $srcfile ( @src_files) { # search through all the target files
137 if(-e $srcfile) {
138 print "Found: $srcfile";
139 my $sb = stat($srcfile);
140 #if ($sb->size < 1024) {print " WARNING size is suspiciously small ",$sb->size};
141 print "\n";
142 }
143 else {
144 print "ERROR: $srcfile is missing\n";
145 $is_file_missing = 1;
146 }
147 }
148
149 if ($is_file_missing) {
150 print "\nCannot continue - please generate all required target files and place in $rbthome\n";
151 exit;
152 }
153
154 do {
155 $script = get_selection(\%script_table,"docking script");
156 $isOK = get_input("\nIs this OK (Y/N)","Y");
157 } until ($isOK eq "Y");
158
159 my $flags = get_input("Enter optional rbdock flags","-ap -an");
160 my $mode;
161 do
162 {
163 $mode = get_input("High Throughput (HT) or Exhaustive Docking (ED)", "HT");
164 }until (($mode eq "HT") || ($mode eq "ED"));
165
166 my $HT;
167 my $filter;
168 my $FILTERFILE;
169 my $nRedock;
170 my $nStages;
171 my @targets;
172 my @runs;
173 my $redockTarget;
174 my $redockTargetDef = -20.0;#Default score threshold for redocking phase
175
176 if ($mode eq "ED")
177 {
178 $HT = 0;
179 my $run = get_input("Enter number of runs/ligand",50);
180 $flags = $flags . " -n" . $run;
181 $run--;
182 $filter = "1\nif - SCORE.NRUNS $run 0.0 -1.0,\n0\n";
183 }
184 else
185 {
186 $HT = 1;
187 $nStages = get_input("Number of stages (excluding redocking, enter 0 for redocking only)",2);
188
189 #GET AND STORE THE TARGET SCORES AND NUMBER OF RUNS FOR EACH STAGE
190
191 if ($nStages > 0) {
192 for (my $n = 0; $n < $nStages; $n++) {
193 print "\nDOCKING STAGE $n\n";
194 my $target = get_input("Enter target score",-20.0);
195 my $run = get_input("Enter number of runs/ligand",20);
196 if ($run > 0)
197 {
198 push @targets,$target;
199 push @runs,$run;
200 }
201 }
202 $nStages = @targets;
203 print "\n";
204 #Default score threshold for redocking is equal to target score for last docking stage
205 $redockTargetDef = $targets[$nStages-1];
206 }
207
208 $nRedock = get_input("Enter number of redocking runs/ligand",0);
209 if ($nRedock > 0) {
210 $redockTarget = get_input("Enter target score for redocking",$redockTargetDef);
211 }
212 else
213 {
214 $redockTarget = $redockTargetDef;
215 }
216 my $nPhases = $nStages;
217 if ($nRedock > 0)
218 {
219 $nPhases++;
220 }
221 $filter = "$nPhases\n";
222 my $nr;
223 for (my $n = 0; $n < $nStages ; $n++)
224 {
225 $nr = $runs[$n] - 1;
226 $filter .= "if - $targets[$n] SCORE.INTER 1.0 if - SCORE.NRUNS $nr 0.0 -1.0,\n";
227 }
228 if ($nRedock > 0)
229 {
230 $nr = $nRedock - 1;
231 $filter .= "if - SCORE.NRUNS $nr 0.0 -1.0,\n";
232 }
233 my %filter_table = get_filter_table($filterDir, $redockTarget);
234 my @wfilters;
235 do {
236 print "\nSelect filters. Only the conformations that pass all the\n",
237 "filters chosen AND have SCORE.INTER lower than ", $redockTarget,
238 "\nAND SCORE.RESTR.CAVITY < 1.0 will be written down\n";
239 @wfilters = get_multiple_selection(\%filter_table,"filters");
240 $isOK = get_input("\nIs this OK (Y/N)","Y");
241 } until ($isOK eq "Y");
242
243 for (my $i = 0 ; $i < @wfilters ; $i++) # remove the no filters
244 {
245 if ($wfilters[$i] eq "1no_other_filters")
246 {
247 splice(@wfilters, $i, 1);
248 }
249 }
250
251 my $nwfilters = @wfilters + 2;
252 $filter .= "$nwfilters\n";
253 $filter .= "- SCORE.INTER $redockTarget,\n";
254 $filter .= "- SCORE.RESTR.CAVITY 1.0,\n";
255 foreach my $wfilter (@wfilters) {
256 open(FF, "$filterDir/$wfilter") || open(FF, "./$wfilter") ||
257 die "cannot open $wfilter for reading: $!";
258 my $line;
259 while ($line = <FF>){ $filter .= $line;}
260 $filter .= ",\n";
261 }
262 }
263
264
265
266 my $runRoot = get_input("Enter run name","rbdock");
267
268 my $pwd = `pwd`;
269 chomp $pwd;
270
271 #Make a subdirectory for each docking stage
272 #plus a tmp directory
273 mkdir($runRoot,0755);
274 mkdir("$runRoot/tmp",0755);
275 mkdir("$runRoot/tmp/condor_logs",0755);
276 mkdir("$runRoot/log",0755);
277 mkdir("$runRoot/sd",0755);
278 mkdir("$runRoot/errors",0755);
279 chdir "$runRoot/tmp";
280
281 my $filterFile = "${runRoot}_filter.txt";
282 print "Writing $filterFile...\n";
283 open FILTERHANDLE,">$filterFile";
284 print FILTERHANDLE $filter;
285 #WRITE THE CUSTOM DOCKING SCRIPT FOR USE BY CONDOR
286 #This will be the Condor "Executable"
287 #gzipped SD file to dock is passed as $1 argument
288 #Run name is passed as $2 argument
289 my $nqsFile = "run_${runRoot}.csh";
290 print "Writing $nqsFile...\n";
291 open NQSHANDLE,">$nqsFile";
292 print NQSHANDLE "#!/bin/tcsh -f\n";
293 print NQSHANDLE "#CONDOR EXECUTABLE SCRIPT WRITTEN BY run_rbscreen.pl\n";
294
295 # DM 14 Mar 2003
296 #Create a tmp runtime directory on the local disk for rDock SD and log files
297 #These will be compressed before copying back to the submission directory
298 #on the file server
299 #There are now two tmp directories:
300 # /tmp/ribodock/<version> - stores the grid files etc that are common between runs
301 # /tmp/${PROG}.XXXXXX - stores the I/O SD and log files unique to each run
302
303 print NQSHANDLE "set PROG = \${0:t}\n";
304 print NQSHANDLE "set TMPDIR = `mktemp -d /tmp/\${PROG}.XXXXXX`\n";
305
306 # synchronizing target files
307 my $versionidx = index $rbtroot,"ribodock";
308 my $targetdir = "/tmp/".substr($rbtroot,$versionidx);
309 print NQSHANDLE "mkdir -pv -m0775 /tmp/ribodock\n";
310 print NQSHANDLE "mkdir -pv -m0775 $targetdir\n";
311 print NQSHANDLE "setenv RBT_HOME $targetdir\n";
312 print NQSHANDLE "rsync -avz ".join(" ",@src_files)." $targetdir\n";
313
314 # Copy the compressed sd file to the tmp directory and uncompress
315 # DM 28 Feb 2002 - use the /tmp directory on the local disk instead
316 # DM 14 Mar 2003 - use TMPDIR = /tmp/${PROG}.XXXXXX instead
317 print NQSHANDLE "set gzfile = \$1\n";
318 print NQSHANDLE "set runName = \$2\n";
319 print NQSHANDLE "set copied_gzfile = \${TMPDIR}/\${gzfile:t}\n";
320 print NQSHANDLE "set sdfile = \${copied_gzfile:r}\n";
321 print NQSHANDLE "echo Ligand_DB : \$gzfile\n";
322 print NQSHANDLE "echo Local copy: \$copied_gzfile\n";
323 print NQSHANDLE "echo Unzipped : \$sdfile\n";
324 print NQSHANDLE "cp -f \$gzfile \$copied_gzfile\n";
325 print NQSHANDLE "gunzip -f \$copied_gzfile\n";
326
327 my $target = $redockTarget;
328 print NQSHANDLE "#\n";
329 print NQSHANDLE "set outRoot = \${TMPDIR}/\${runName}\n";
330 print NQSHANDLE "set logFile = \${outRoot}.log\n";
331 print NQSHANDLE "if (-e \$logFile) rm -f \$logFile\n";
332 print NQSHANDLE "\$RBT_ROOT/bin/rbdock -i\$sdfile -o\${outRoot} -r$receptor -p$script -t$filterFile $flags > \$logFile\n";
333
334 # Compress the SD and log files and copy back to submission directory
335 # The ..._errors.sd files are generated by the pharmacophore restraints term for ligands that
336 # do not fulfill the feature requirements
337 print NQSHANDLE "if (-e \${outRoot}.sd) then\n";
338 print NQSHANDLE " gzip -9vf \${outRoot}.sd\n";
339 print NQSHANDLE " cp \${outRoot}.sd.gz ../sd\n";
340 print NQSHANDLE "endif\n";
341 print NQSHANDLE "if (-e \${logFile}) then\n";
342 print NQSHANDLE " gzip -9vf \${logFile}\n";
343 print NQSHANDLE " cp \${logFile}.gz ../log\n";
344 print NQSHANDLE "endif\n";
345 print NQSHANDLE "if (-e \${outRoot}_errors.sd) then\n";
346 print NQSHANDLE " gzip -9vf \${outRoot}_errors.sd\n";
347 print NQSHANDLE " cp \${outRoot}_errors.sd.gz ../errors\n";
348 print NQSHANDLE "endif\n";
349
350 print NQSHANDLE "rm -rf \${TMPDIR}\n";
351 close NQSHANDLE;
352 chmod 0755,$nqsFile;
353
354 #PREPARE THE CONDOR JOB FILE
355 my $condorFile = "run_${runRoot}.cmd";
356 print "Writing $condorFile...\n";
357 open CMDHANDLE,">$condorFile";
358 print CMDHANDLE "#CONDOR COMMAND FILE WRITTEN BY run_rbscreen.pl\n";
359 print CMDHANDLE "universe = vanilla\n";
360 print CMDHANDLE "initialdir = $pwd/$runRoot/tmp\n";
361 print CMDHANDLE "environment = RBT_ROOT=$rbtroot;RBT_LIGDB=$rbtligdb;RBT_HOME=$rbthome;LD_LIBRARY_PATH=/usr/local/lib:$rbtroot/lib;\n";
362 print CMDHANDLE "executable = ${nqsFile}\n";
363 #Stop Condor filling your mailbox
364 print CMDHANDLE "Notification = Error\n";
365 print CMDHANDLE "log = run_${runRoot}.log\n";
366 # Added by DM, Enspiral Discovery, 28 Feb 2005
367 # Prevent rDock jobs running on dedicated Catalyst nodes
368 print CMDHANDLE "Requirements = CATALYST_NODE =!= True\n";
369
370 #GET THE LIST OF COMPRESSED SD FILES TO DOCK
371 my $iFile=0;
372 foreach my $library (@libraries) {
373 my @libFiles = glob "$library_table{$library}";
374 foreach my $libFile (@libFiles) {
375 $iFile++;
376 my $runName = "${runRoot}_$iFile";
377 print CMDHANDLE "output = condor_logs/run_${runName}.out\n";
378 print CMDHANDLE "error = condor_logs/run_${runName}.err\n";
379 print CMDHANDLE "arguments = $libFile $runName\n";
380 print CMDHANDLE "queue\n";
381 }
382 }
383 close CMDHANDLE;
384
385 #WRITE THE CUSTOM CLEANUP SCRIPT
386 #This will be run by the user from the command line
387 #after all the jobs have finished
388 my $cleanFile = "clean_${runRoot}.csh";
389 print "Writing $cleanFile...\n";
390 open CLEANHANDLE,">$cleanFile";
391 print CLEANHANDLE "#!/bin/tcsh -f\n";
392 print CLEANHANDLE "#CLEANUP SCRIPT WRITTEN BY run_rbscreen.pl\n";
393 print CLEANHANDLE "set sdout = ../${runRoot}_raw_hits.sd\n";
394 print CLEANHANDLE "if (-e \${sdout}) then\n";
395 print CLEANHANDLE " echo ARE YOU SURE YOU WANT TO DO THIS\n";
396 print CLEANHANDLE " echo \${sdout} already exists\n";
397 print CLEANHANDLE " echo To rerun the script remove \${sdout} manually first\n";
398 print CLEANHANDLE " exit\n";
399 print CLEANHANDLE "endif\n";
400 print CLEANHANDLE "echo All compressed SD files in ../sd/ will be concatenated into a single compressed file \${sdout}.gz\n";
401 print CLEANHANDLE "touch \${sdout}\n";
402 print CLEANHANDLE "foreach file (../sd/*.sd.gz)\n";
403 print CLEANHANDLE " echo Sorting and concatenating \${file}...\n";
404 print CLEANHANDLE " zcat \${file} | \${RBT_ROOT}/bin/sdsort -n -fSCORE -s >> \${sdout}\n";
405 print CLEANHANDLE "end\n";
406 print CLEANHANDLE "echo Each block of consecutive records per ligand have been presorted by SCORE ";
407 print CLEANHANDLE "ready for filtering\n";
408 print CLEANHANDLE "echo Compressing \${sdout}...\n";
409 print CLEANHANDLE "gzip -9vf \${sdout}\n";
410 print CLEANHANDLE "echo DONE\n";
411 print CLEANHANDLE "echo You may now safely remove the ../sd/ directory if you wish to save disk space\n";
412 close CLEANHANDLE;
413 chmod 0755,$cleanFile;
414
415 print "To submit, cd $runRoot/tmp; condor_submit $condorFile\n";
416 print "When all jobs have completed, run $runRoot/tmp/$cleanFile to concatenate the output files\n";