3
|
1 #!/usr/bin/perl
|
|
2 use File::stat;
|
|
3 use lib "$ENV{'RBT_ROOT'}/lib";
|
|
4 use strict;
|
|
5 require "run_rbfuncs.pl";
|
|
6
|
|
7 my $rbtroot = $ENV{'RBT_ROOT'};
|
|
8 my $rbthome = $ENV{'RBT_HOME'};
|
|
9 my $rbtligdb = $ENV{'RBT_LIGDB'};
|
|
10 my $recepDir = "$rbtroot/data/receptors";
|
|
11 my $scriptDir = "$rbtroot/data/scripts";
|
|
12 my $filterDir = "$rbtroot/data/filters";
|
|
13 my $libraryDir = "$rbtligdb/dock";
|
|
14 my $binDir = "$rbtroot/bin";
|
|
15
|
|
16 #override default directories
|
|
17 #Parse command line arguments
|
|
18 my $nArgs = scalar(@ARGV);
|
|
19
|
|
20 while (scalar(@ARGV)) {
|
|
21 my $arg = shift @ARGV;
|
|
22 if (index($arg,'-r')==0) {
|
|
23 $recepDir = substr($arg,2);
|
|
24 }
|
|
25 elsif (index($arg,'-s')==0) {
|
|
26 $scriptDir = substr($arg,2);
|
|
27 }
|
|
28 elsif (index($arg,'-l')==0) {
|
|
29 $libraryDir = substr($arg,2);
|
|
30 }
|
|
31 }
|
|
32
|
|
33
|
|
34 # Get the receptor, library and script tables
|
|
35 #my %receptor_table = get_prm_table($recepDir);
|
|
36 my %receptor_table = get_prm_table($rbthome);
|
|
37 my %script_table = get_prm_table($scriptDir);
|
|
38 my %library_table = get_dock_table($libraryDir);
|
|
39
|
|
40 #GET ALL THE INPUTS
|
|
41 #my $library = get_selection(\%library_table,"library");
|
|
42 my $isOK;
|
|
43 my @libraries;
|
|
44 my $receptor;
|
|
45 my $script;
|
|
46
|
|
47 do {
|
|
48 @libraries = get_multiple_selection(\%library_table,"library");
|
|
49 $isOK = get_input("\nIs this OK (Y/N)","Y");
|
|
50 } until ($isOK eq "Y");
|
|
51
|
|
52 do {
|
|
53 $receptor = get_selection(\%receptor_table,"receptor");
|
|
54 $isOK = get_input("\nIs this OK (Y/N)","Y");
|
|
55 } until ($isOK eq "Y");
|
|
56
|
|
57 # (SJ) check whether all the required files are in the dir
|
|
58 # DM 7 Feb 2002 - get PSF/CRD filenames from PRM file
|
|
59 print "\nLooking for all required target files in $rbthome...\n\n";
|
|
60 my $prm_file = "$rbthome/".$receptor;
|
|
61 my $prm_root = substr($prm_file, 0, rindex($prm_file,"."));
|
|
62 my $as_file = "${prm_root}.as";
|
|
63 my $vdw1_file = "${prm_root}_vdw1.grd";
|
|
64 my $vdw5_file = "${prm_root}_vdw5.grd";
|
|
65
|
|
66 #Set the default file names equal to their parameter names so that
|
|
67 #error messages make sense if the parameters are missing completely from
|
|
68 #the .prm file.
|
|
69 my $psf_file = "RECEPTOR_TOPOL_FILE";
|
|
70 my $crd_file = "RECEPTOR_COORD_FILE";
|
|
71 my $mol2_file;
|
|
72
|
|
73 # DM 12 May 2003
|
|
74 # Optional target files for experimental restraints
|
|
75 #
|
|
76 # These default restraint suffixes and filenames should be synchronised
|
|
77 # with the default parameter values in the relevant C++ class source files ($RBT_ROOT/fw/src/.cxx)
|
|
78 # They are used if the user does not explicitly define the parameter in the
|
|
79 # receptor .prm file
|
|
80 #
|
|
81 # Note: this code searches for specific SECTION names in the receptor .prm file
|
|
82 # thus forcing a naming convention for the restraint terms that is not mandatory in the C++ code itself
|
|
83 #
|
|
84 # Pharmacophore restraint file
|
|
85 # SECTION PHARMA
|
|
86 # CONSTRAINTS_FILE = explicit filename
|
|
87 # OPTIONAL_FILE = explicit filename
|
|
88 my $ph4_filename = ".const";#synchronise with RbtPharmaSF::_CONSTRAINTS_FILE
|
|
89 my $ph4_optfilename = "";
|
|
90 my $has_ph4 = 0;
|
|
91 # Tether restraint file
|
|
92 # SECTION TETHER
|
|
93 # REFERENCE_FILE = suffix to the root receptor.prm filename
|
|
94 my $tether_suffix = "_reference.sd";#synchronise with RbtTetherSF::_REFERENCE_FILE
|
|
95 my $has_tether = 0;
|
|
96 # Nmr restraint file
|
|
97 # SECTION NMR
|
|
98 # FILENAME = explicit filename (not suffix)
|
|
99 my $nmr_filename = "default.noe";#synchronise with RbtTetherSF::_FILENAME
|
|
100 my $has_nmr = 0;
|
|
101
|
|
102 open PRMHANDLE,$prm_file || die "Cannot read $prm_file";
|
|
103 while (<PRMHANDLE>) {
|
|
104 my @fields = split(' ',$_);
|
|
105 my $nfields = scalar(@fields);
|
|
106 $psf_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_TOPOL_FILE') && ($nfields > 1));
|
|
107 $crd_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_COORD_FILE') && ($nfields > 1));
|
|
108 $mol2_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_FILE') && ($nfields > 1));
|
|
109 chomp $psf_file;
|
|
110 chomp $crd_file;
|
|
111 $has_ph4 = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'PHARMA') );
|
|
112 $ph4_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'CONSTRAINTS_FILE') && $has_ph4);
|
|
113 $ph4_optfilename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'OPTIONAL_FILE') && $has_ph4);
|
|
114 $has_tether = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'TETHER') );
|
|
115 $tether_suffix = $fields[1] if (($nfields > 1) && ($fields[0] eq 'REFERENCE_FILE') && $has_tether);
|
|
116 $has_nmr = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'NMR') );
|
|
117 $nmr_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'FILENAME') && $has_nmr);
|
|
118 }
|
|
119 close PRMHANDLE;
|
|
120
|
|
121 my @src_files = ($prm_file,$as_file, $vdw1_file, $vdw5_file);
|
|
122 # check for either MOL2 or PSF/CRD
|
|
123 if ($mol2_file ne "") {
|
|
124 push (@src_files,"$mol2_file");
|
|
125 }
|
|
126 else {
|
|
127 push (@src_files,"$psf_file");
|
|
128 push (@src_files,"$crd_file");
|
|
129 }
|
|
130 push (@src_files,"${ph4_filename}") if ($has_ph4);
|
|
131 push (@src_files,"${ph4_optfilename}") if (($has_ph4) && ($ph4_optfilename ne ""));
|
|
132 push (@src_files,"${prm_root}${tether_suffix}") if ($has_tether);
|
|
133 push (@src_files,"${nmr_filename}") if ($has_nmr);
|
|
134
|
|
135 my $is_file_missing = 0;
|
|
136 foreach my $srcfile ( @src_files) { # search through all the target files
|
|
137 if(-e $srcfile) {
|
|
138 print "Found: $srcfile";
|
|
139 my $sb = stat($srcfile);
|
|
140 #if ($sb->size < 1024) {print " WARNING size is suspiciously small ",$sb->size};
|
|
141 print "\n";
|
|
142 }
|
|
143 else {
|
|
144 print "ERROR: $srcfile is missing\n";
|
|
145 $is_file_missing = 1;
|
|
146 }
|
|
147 }
|
|
148
|
|
149 if ($is_file_missing) {
|
|
150 print "\nCannot continue - please generate all required target files and place in $rbthome\n";
|
|
151 exit;
|
|
152 }
|
|
153
|
|
154 do {
|
|
155 $script = get_selection(\%script_table,"docking script");
|
|
156 $isOK = get_input("\nIs this OK (Y/N)","Y");
|
|
157 } until ($isOK eq "Y");
|
|
158
|
|
159 my $flags = get_input("Enter optional rbdock flags","-ap -an");
|
|
160 my $mode;
|
|
161 do
|
|
162 {
|
|
163 $mode = get_input("High Throughput (HT) or Exhaustive Docking (ED)", "HT");
|
|
164 }until (($mode eq "HT") || ($mode eq "ED"));
|
|
165
|
|
166 my $HT;
|
|
167 my $filter;
|
|
168 my $FILTERFILE;
|
|
169 my $nRedock;
|
|
170 my $nStages;
|
|
171 my @targets;
|
|
172 my @runs;
|
|
173 my $redockTarget;
|
|
174 my $redockTargetDef = -20.0;#Default score threshold for redocking phase
|
|
175
|
|
176 if ($mode eq "ED")
|
|
177 {
|
|
178 $HT = 0;
|
|
179 my $run = get_input("Enter number of runs/ligand",50);
|
|
180 $flags = $flags . " -n" . $run;
|
|
181 $run--;
|
|
182 $filter = "1\nif - SCORE.NRUNS $run 0.0 -1.0,\n0\n";
|
|
183 }
|
|
184 else
|
|
185 {
|
|
186 $HT = 1;
|
|
187 $nStages = get_input("Number of stages (excluding redocking, enter 0 for redocking only)",2);
|
|
188
|
|
189 #GET AND STORE THE TARGET SCORES AND NUMBER OF RUNS FOR EACH STAGE
|
|
190
|
|
191 if ($nStages > 0) {
|
|
192 for (my $n = 0; $n < $nStages; $n++) {
|
|
193 print "\nDOCKING STAGE $n\n";
|
|
194 my $target = get_input("Enter target score",-20.0);
|
|
195 my $run = get_input("Enter number of runs/ligand",20);
|
|
196 if ($run > 0)
|
|
197 {
|
|
198 push @targets,$target;
|
|
199 push @runs,$run;
|
|
200 }
|
|
201 }
|
|
202 $nStages = @targets;
|
|
203 print "\n";
|
|
204 #Default score threshold for redocking is equal to target score for last docking stage
|
|
205 $redockTargetDef = $targets[$nStages-1];
|
|
206 }
|
|
207
|
|
208 $nRedock = get_input("Enter number of redocking runs/ligand",0);
|
|
209 if ($nRedock > 0) {
|
|
210 $redockTarget = get_input("Enter target score for redocking",$redockTargetDef);
|
|
211 }
|
|
212 else
|
|
213 {
|
|
214 $redockTarget = $redockTargetDef;
|
|
215 }
|
|
216 my $nPhases = $nStages;
|
|
217 if ($nRedock > 0)
|
|
218 {
|
|
219 $nPhases++;
|
|
220 }
|
|
221 $filter = "$nPhases\n";
|
|
222 my $nr;
|
|
223 for (my $n = 0; $n < $nStages ; $n++)
|
|
224 {
|
|
225 $nr = $runs[$n] - 1;
|
|
226 $filter .= "if - $targets[$n] SCORE.INTER 1.0 if - SCORE.NRUNS $nr 0.0 -1.0,\n";
|
|
227 }
|
|
228 if ($nRedock > 0)
|
|
229 {
|
|
230 $nr = $nRedock - 1;
|
|
231 $filter .= "if - SCORE.NRUNS $nr 0.0 -1.0,\n";
|
|
232 }
|
|
233 my %filter_table = get_filter_table($filterDir, $redockTarget);
|
|
234 my @wfilters;
|
|
235 do {
|
|
236 print "\nSelect filters. Only the conformations that pass all the\n",
|
|
237 "filters chosen AND have SCORE.INTER lower than ", $redockTarget,
|
|
238 "\nAND SCORE.RESTR.CAVITY < 1.0 will be written down\n";
|
|
239 @wfilters = get_multiple_selection(\%filter_table,"filters");
|
|
240 $isOK = get_input("\nIs this OK (Y/N)","Y");
|
|
241 } until ($isOK eq "Y");
|
|
242
|
|
243 for (my $i = 0 ; $i < @wfilters ; $i++) # remove the no filters
|
|
244 {
|
|
245 if ($wfilters[$i] eq "1no_other_filters")
|
|
246 {
|
|
247 splice(@wfilters, $i, 1);
|
|
248 }
|
|
249 }
|
|
250
|
|
251 my $nwfilters = @wfilters + 2;
|
|
252 $filter .= "$nwfilters\n";
|
|
253 $filter .= "- SCORE.INTER $redockTarget,\n";
|
|
254 $filter .= "- SCORE.RESTR.CAVITY 1.0,\n";
|
|
255 foreach my $wfilter (@wfilters) {
|
|
256 open(FF, "$filterDir/$wfilter") || open(FF, "./$wfilter") ||
|
|
257 die "cannot open $wfilter for reading: $!";
|
|
258 my $line;
|
|
259 while ($line = <FF>){ $filter .= $line;}
|
|
260 $filter .= ",\n";
|
|
261 }
|
|
262 }
|
|
263
|
|
264
|
|
265
|
|
266 my $runRoot = get_input("Enter run name","rbdock");
|
|
267
|
|
268 my $pwd = `pwd`;
|
|
269 chomp $pwd;
|
|
270
|
|
271 #Make a subdirectory for each docking stage
|
|
272 #plus a tmp directory
|
|
273 mkdir($runRoot,0755);
|
|
274 mkdir("$runRoot/tmp",0755);
|
|
275 mkdir("$runRoot/tmp/condor_logs",0755);
|
|
276 mkdir("$runRoot/log",0755);
|
|
277 mkdir("$runRoot/sd",0755);
|
|
278 mkdir("$runRoot/errors",0755);
|
|
279 chdir "$runRoot/tmp";
|
|
280
|
|
281 my $filterFile = "${runRoot}_filter.txt";
|
|
282 print "Writing $filterFile...\n";
|
|
283 open FILTERHANDLE,">$filterFile";
|
|
284 print FILTERHANDLE $filter;
|
|
285 #WRITE THE CUSTOM DOCKING SCRIPT FOR USE BY CONDOR
|
|
286 #This will be the Condor "Executable"
|
|
287 #gzipped SD file to dock is passed as $1 argument
|
|
288 #Run name is passed as $2 argument
|
|
289 my $nqsFile = "run_${runRoot}.csh";
|
|
290 print "Writing $nqsFile...\n";
|
|
291 open NQSHANDLE,">$nqsFile";
|
|
292 print NQSHANDLE "#!/bin/tcsh -f\n";
|
|
293 print NQSHANDLE "#CONDOR EXECUTABLE SCRIPT WRITTEN BY run_rbscreen.pl\n";
|
|
294
|
|
295 # DM 14 Mar 2003
|
|
296 #Create a tmp runtime directory on the local disk for rDock SD and log files
|
|
297 #These will be compressed before copying back to the submission directory
|
|
298 #on the file server
|
|
299 #There are now two tmp directories:
|
|
300 # /tmp/ribodock/<version> - stores the grid files etc that are common between runs
|
|
301 # /tmp/${PROG}.XXXXXX - stores the I/O SD and log files unique to each run
|
|
302
|
|
303 print NQSHANDLE "set PROG = \${0:t}\n";
|
|
304 print NQSHANDLE "set TMPDIR = `mktemp -d /tmp/\${PROG}.XXXXXX`\n";
|
|
305
|
|
306 # synchronizing target files
|
|
307 my $versionidx = index $rbtroot,"ribodock";
|
|
308 my $targetdir = "/tmp/".substr($rbtroot,$versionidx);
|
|
309 print NQSHANDLE "mkdir -pv -m0775 /tmp/ribodock\n";
|
|
310 print NQSHANDLE "mkdir -pv -m0775 $targetdir\n";
|
|
311 print NQSHANDLE "setenv RBT_HOME $targetdir\n";
|
|
312 print NQSHANDLE "rsync -avz ".join(" ",@src_files)." $targetdir\n";
|
|
313
|
|
314 # Copy the compressed sd file to the tmp directory and uncompress
|
|
315 # DM 28 Feb 2002 - use the /tmp directory on the local disk instead
|
|
316 # DM 14 Mar 2003 - use TMPDIR = /tmp/${PROG}.XXXXXX instead
|
|
317 print NQSHANDLE "set gzfile = \$1\n";
|
|
318 print NQSHANDLE "set runName = \$2\n";
|
|
319 print NQSHANDLE "set copied_gzfile = \${TMPDIR}/\${gzfile:t}\n";
|
|
320 print NQSHANDLE "set sdfile = \${copied_gzfile:r}\n";
|
|
321 print NQSHANDLE "echo Ligand_DB : \$gzfile\n";
|
|
322 print NQSHANDLE "echo Local copy: \$copied_gzfile\n";
|
|
323 print NQSHANDLE "echo Unzipped : \$sdfile\n";
|
|
324 print NQSHANDLE "cp -f \$gzfile \$copied_gzfile\n";
|
|
325 print NQSHANDLE "gunzip -f \$copied_gzfile\n";
|
|
326
|
|
327 my $target = $redockTarget;
|
|
328 print NQSHANDLE "#\n";
|
|
329 print NQSHANDLE "set outRoot = \${TMPDIR}/\${runName}\n";
|
|
330 print NQSHANDLE "set logFile = \${outRoot}.log\n";
|
|
331 print NQSHANDLE "if (-e \$logFile) rm -f \$logFile\n";
|
|
332 print NQSHANDLE "\$RBT_ROOT/bin/rbdock -i\$sdfile -o\${outRoot} -r$receptor -p$script -t$filterFile $flags > \$logFile\n";
|
|
333
|
|
334 # Compress the SD and log files and copy back to submission directory
|
|
335 # The ..._errors.sd files are generated by the pharmacophore restraints term for ligands that
|
|
336 # do not fulfill the feature requirements
|
|
337 print NQSHANDLE "if (-e \${outRoot}.sd) then\n";
|
|
338 print NQSHANDLE " gzip -9vf \${outRoot}.sd\n";
|
|
339 print NQSHANDLE " cp \${outRoot}.sd.gz ../sd\n";
|
|
340 print NQSHANDLE "endif\n";
|
|
341 print NQSHANDLE "if (-e \${logFile}) then\n";
|
|
342 print NQSHANDLE " gzip -9vf \${logFile}\n";
|
|
343 print NQSHANDLE " cp \${logFile}.gz ../log\n";
|
|
344 print NQSHANDLE "endif\n";
|
|
345 print NQSHANDLE "if (-e \${outRoot}_errors.sd) then\n";
|
|
346 print NQSHANDLE " gzip -9vf \${outRoot}_errors.sd\n";
|
|
347 print NQSHANDLE " cp \${outRoot}_errors.sd.gz ../errors\n";
|
|
348 print NQSHANDLE "endif\n";
|
|
349
|
|
350 print NQSHANDLE "rm -rf \${TMPDIR}\n";
|
|
351 close NQSHANDLE;
|
|
352 chmod 0755,$nqsFile;
|
|
353
|
|
354 #PREPARE THE CONDOR JOB FILE
|
|
355 my $condorFile = "run_${runRoot}.cmd";
|
|
356 print "Writing $condorFile...\n";
|
|
357 open CMDHANDLE,">$condorFile";
|
|
358 print CMDHANDLE "#CONDOR COMMAND FILE WRITTEN BY run_rbscreen.pl\n";
|
|
359 print CMDHANDLE "universe = vanilla\n";
|
|
360 print CMDHANDLE "initialdir = $pwd/$runRoot/tmp\n";
|
|
361 print CMDHANDLE "environment = RBT_ROOT=$rbtroot;RBT_LIGDB=$rbtligdb;RBT_HOME=$rbthome;LD_LIBRARY_PATH=/usr/local/lib:$rbtroot/lib;\n";
|
|
362 print CMDHANDLE "executable = ${nqsFile}\n";
|
|
363 #Stop Condor filling your mailbox
|
|
364 print CMDHANDLE "Notification = Error\n";
|
|
365 print CMDHANDLE "log = run_${runRoot}.log\n";
|
|
366 # Added by DM, Enspiral Discovery, 28 Feb 2005
|
|
367 # Prevent rDock jobs running on dedicated Catalyst nodes
|
|
368 print CMDHANDLE "Requirements = CATALYST_NODE =!= True\n";
|
|
369
|
|
370 #GET THE LIST OF COMPRESSED SD FILES TO DOCK
|
|
371 my $iFile=0;
|
|
372 foreach my $library (@libraries) {
|
|
373 my @libFiles = glob "$library_table{$library}";
|
|
374 foreach my $libFile (@libFiles) {
|
|
375 $iFile++;
|
|
376 my $runName = "${runRoot}_$iFile";
|
|
377 print CMDHANDLE "output = condor_logs/run_${runName}.out\n";
|
|
378 print CMDHANDLE "error = condor_logs/run_${runName}.err\n";
|
|
379 print CMDHANDLE "arguments = $libFile $runName\n";
|
|
380 print CMDHANDLE "queue\n";
|
|
381 }
|
|
382 }
|
|
383 close CMDHANDLE;
|
|
384
|
|
385 #WRITE THE CUSTOM CLEANUP SCRIPT
|
|
386 #This will be run by the user from the command line
|
|
387 #after all the jobs have finished
|
|
388 my $cleanFile = "clean_${runRoot}.csh";
|
|
389 print "Writing $cleanFile...\n";
|
|
390 open CLEANHANDLE,">$cleanFile";
|
|
391 print CLEANHANDLE "#!/bin/tcsh -f\n";
|
|
392 print CLEANHANDLE "#CLEANUP SCRIPT WRITTEN BY run_rbscreen.pl\n";
|
|
393 print CLEANHANDLE "set sdout = ../${runRoot}_raw_hits.sd\n";
|
|
394 print CLEANHANDLE "if (-e \${sdout}) then\n";
|
|
395 print CLEANHANDLE " echo ARE YOU SURE YOU WANT TO DO THIS\n";
|
|
396 print CLEANHANDLE " echo \${sdout} already exists\n";
|
|
397 print CLEANHANDLE " echo To rerun the script remove \${sdout} manually first\n";
|
|
398 print CLEANHANDLE " exit\n";
|
|
399 print CLEANHANDLE "endif\n";
|
|
400 print CLEANHANDLE "echo All compressed SD files in ../sd/ will be concatenated into a single compressed file \${sdout}.gz\n";
|
|
401 print CLEANHANDLE "touch \${sdout}\n";
|
|
402 print CLEANHANDLE "foreach file (../sd/*.sd.gz)\n";
|
|
403 print CLEANHANDLE " echo Sorting and concatenating \${file}...\n";
|
|
404 print CLEANHANDLE " zcat \${file} | \${RBT_ROOT}/bin/sdsort -n -fSCORE -s >> \${sdout}\n";
|
|
405 print CLEANHANDLE "end\n";
|
|
406 print CLEANHANDLE "echo Each block of consecutive records per ligand have been presorted by SCORE ";
|
|
407 print CLEANHANDLE "ready for filtering\n";
|
|
408 print CLEANHANDLE "echo Compressing \${sdout}...\n";
|
|
409 print CLEANHANDLE "gzip -9vf \${sdout}\n";
|
|
410 print CLEANHANDLE "echo DONE\n";
|
|
411 print CLEANHANDLE "echo You may now safely remove the ../sd/ directory if you wish to save disk space\n";
|
|
412 close CLEANHANDLE;
|
|
413 chmod 0755,$cleanFile;
|
|
414
|
|
415 print "To submit, cd $runRoot/tmp; condor_submit $condorFile\n";
|
|
416 print "When all jobs have completed, run $runRoot/tmp/$cleanFile to concatenate the output files\n";
|