Mercurial > repos > marpiech > norwich_tools_docking2
view tools/rdock/bin/run_rbscreen.pl @ 3:279ba0732f87 draft default tip
planemo upload
author | marpiech |
---|---|
date | Mon, 29 Aug 2016 09:07:58 -0400 |
parents | 30e2440b2173 |
children |
line wrap: on
line source
#!/usr/bin/perl use File::stat; use lib "$ENV{'RBT_ROOT'}/lib"; use strict; require "run_rbfuncs.pl"; my $rbtroot = $ENV{'RBT_ROOT'}; my $rbthome = $ENV{'RBT_HOME'}; my $rbtligdb = $ENV{'RBT_LIGDB'}; my $recepDir = "$rbtroot/data/receptors"; my $scriptDir = "$rbtroot/data/scripts"; my $filterDir = "$rbtroot/data/filters"; my $libraryDir = "$rbtligdb/dock"; my $binDir = "$rbtroot/bin"; #override default directories #Parse command line arguments my $nArgs = scalar(@ARGV); while (scalar(@ARGV)) { my $arg = shift @ARGV; if (index($arg,'-r')==0) { $recepDir = substr($arg,2); } elsif (index($arg,'-s')==0) { $scriptDir = substr($arg,2); } elsif (index($arg,'-l')==0) { $libraryDir = substr($arg,2); } } # Get the receptor, library and script tables #my %receptor_table = get_prm_table($recepDir); my %receptor_table = get_prm_table($rbthome); my %script_table = get_prm_table($scriptDir); my %library_table = get_dock_table($libraryDir); #GET ALL THE INPUTS #my $library = get_selection(\%library_table,"library"); my $isOK; my @libraries; my $receptor; my $script; do { @libraries = get_multiple_selection(\%library_table,"library"); $isOK = get_input("\nIs this OK (Y/N)","Y"); } until ($isOK eq "Y"); do { $receptor = get_selection(\%receptor_table,"receptor"); $isOK = get_input("\nIs this OK (Y/N)","Y"); } until ($isOK eq "Y"); # (SJ) check whether all the required files are in the dir # DM 7 Feb 2002 - get PSF/CRD filenames from PRM file print "\nLooking for all required target files in $rbthome...\n\n"; my $prm_file = "$rbthome/".$receptor; my $prm_root = substr($prm_file, 0, rindex($prm_file,".")); my $as_file = "${prm_root}.as"; my $vdw1_file = "${prm_root}_vdw1.grd"; my $vdw5_file = "${prm_root}_vdw5.grd"; #Set the default file names equal to their parameter names so that #error messages make sense if the parameters are missing completely from #the .prm file. my $psf_file = "RECEPTOR_TOPOL_FILE"; my $crd_file = "RECEPTOR_COORD_FILE"; my $mol2_file; # DM 12 May 2003 # Optional target files for experimental restraints # # These default restraint suffixes and filenames should be synchronised # with the default parameter values in the relevant C++ class source files ($RBT_ROOT/fw/src/.cxx) # They are used if the user does not explicitly define the parameter in the # receptor .prm file # # Note: this code searches for specific SECTION names in the receptor .prm file # thus forcing a naming convention for the restraint terms that is not mandatory in the C++ code itself # # Pharmacophore restraint file # SECTION PHARMA # CONSTRAINTS_FILE = explicit filename # OPTIONAL_FILE = explicit filename my $ph4_filename = ".const";#synchronise with RbtPharmaSF::_CONSTRAINTS_FILE my $ph4_optfilename = ""; my $has_ph4 = 0; # Tether restraint file # SECTION TETHER # REFERENCE_FILE = suffix to the root receptor.prm filename my $tether_suffix = "_reference.sd";#synchronise with RbtTetherSF::_REFERENCE_FILE my $has_tether = 0; # Nmr restraint file # SECTION NMR # FILENAME = explicit filename (not suffix) my $nmr_filename = "default.noe";#synchronise with RbtTetherSF::_FILENAME my $has_nmr = 0; open PRMHANDLE,$prm_file || die "Cannot read $prm_file"; while (<PRMHANDLE>) { my @fields = split(' ',$_); my $nfields = scalar(@fields); $psf_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_TOPOL_FILE') && ($nfields > 1)); $crd_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_COORD_FILE') && ($nfields > 1)); $mol2_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_FILE') && ($nfields > 1)); chomp $psf_file; chomp $crd_file; $has_ph4 = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'PHARMA') ); $ph4_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'CONSTRAINTS_FILE') && $has_ph4); $ph4_optfilename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'OPTIONAL_FILE') && $has_ph4); $has_tether = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'TETHER') ); $tether_suffix = $fields[1] if (($nfields > 1) && ($fields[0] eq 'REFERENCE_FILE') && $has_tether); $has_nmr = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'NMR') ); $nmr_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'FILENAME') && $has_nmr); } close PRMHANDLE; my @src_files = ($prm_file,$as_file, $vdw1_file, $vdw5_file); # check for either MOL2 or PSF/CRD if ($mol2_file ne "") { push (@src_files,"$mol2_file"); } else { push (@src_files,"$psf_file"); push (@src_files,"$crd_file"); } push (@src_files,"${ph4_filename}") if ($has_ph4); push (@src_files,"${ph4_optfilename}") if (($has_ph4) && ($ph4_optfilename ne "")); push (@src_files,"${prm_root}${tether_suffix}") if ($has_tether); push (@src_files,"${nmr_filename}") if ($has_nmr); my $is_file_missing = 0; foreach my $srcfile ( @src_files) { # search through all the target files if(-e $srcfile) { print "Found: $srcfile"; my $sb = stat($srcfile); #if ($sb->size < 1024) {print " WARNING size is suspiciously small ",$sb->size}; print "\n"; } else { print "ERROR: $srcfile is missing\n"; $is_file_missing = 1; } } if ($is_file_missing) { print "\nCannot continue - please generate all required target files and place in $rbthome\n"; exit; } do { $script = get_selection(\%script_table,"docking script"); $isOK = get_input("\nIs this OK (Y/N)","Y"); } until ($isOK eq "Y"); my $flags = get_input("Enter optional rbdock flags","-ap -an"); my $mode; do { $mode = get_input("High Throughput (HT) or Exhaustive Docking (ED)", "HT"); }until (($mode eq "HT") || ($mode eq "ED")); my $HT; my $filter; my $FILTERFILE; my $nRedock; my $nStages; my @targets; my @runs; my $redockTarget; my $redockTargetDef = -20.0;#Default score threshold for redocking phase if ($mode eq "ED") { $HT = 0; my $run = get_input("Enter number of runs/ligand",50); $flags = $flags . " -n" . $run; $run--; $filter = "1\nif - SCORE.NRUNS $run 0.0 -1.0,\n0\n"; } else { $HT = 1; $nStages = get_input("Number of stages (excluding redocking, enter 0 for redocking only)",2); #GET AND STORE THE TARGET SCORES AND NUMBER OF RUNS FOR EACH STAGE if ($nStages > 0) { for (my $n = 0; $n < $nStages; $n++) { print "\nDOCKING STAGE $n\n"; my $target = get_input("Enter target score",-20.0); my $run = get_input("Enter number of runs/ligand",20); if ($run > 0) { push @targets,$target; push @runs,$run; } } $nStages = @targets; print "\n"; #Default score threshold for redocking is equal to target score for last docking stage $redockTargetDef = $targets[$nStages-1]; } $nRedock = get_input("Enter number of redocking runs/ligand",0); if ($nRedock > 0) { $redockTarget = get_input("Enter target score for redocking",$redockTargetDef); } else { $redockTarget = $redockTargetDef; } my $nPhases = $nStages; if ($nRedock > 0) { $nPhases++; } $filter = "$nPhases\n"; my $nr; for (my $n = 0; $n < $nStages ; $n++) { $nr = $runs[$n] - 1; $filter .= "if - $targets[$n] SCORE.INTER 1.0 if - SCORE.NRUNS $nr 0.0 -1.0,\n"; } if ($nRedock > 0) { $nr = $nRedock - 1; $filter .= "if - SCORE.NRUNS $nr 0.0 -1.0,\n"; } my %filter_table = get_filter_table($filterDir, $redockTarget); my @wfilters; do { print "\nSelect filters. Only the conformations that pass all the\n", "filters chosen AND have SCORE.INTER lower than ", $redockTarget, "\nAND SCORE.RESTR.CAVITY < 1.0 will be written down\n"; @wfilters = get_multiple_selection(\%filter_table,"filters"); $isOK = get_input("\nIs this OK (Y/N)","Y"); } until ($isOK eq "Y"); for (my $i = 0 ; $i < @wfilters ; $i++) # remove the no filters { if ($wfilters[$i] eq "1no_other_filters") { splice(@wfilters, $i, 1); } } my $nwfilters = @wfilters + 2; $filter .= "$nwfilters\n"; $filter .= "- SCORE.INTER $redockTarget,\n"; $filter .= "- SCORE.RESTR.CAVITY 1.0,\n"; foreach my $wfilter (@wfilters) { open(FF, "$filterDir/$wfilter") || open(FF, "./$wfilter") || die "cannot open $wfilter for reading: $!"; my $line; while ($line = <FF>){ $filter .= $line;} $filter .= ",\n"; } } my $runRoot = get_input("Enter run name","rbdock"); my $pwd = `pwd`; chomp $pwd; #Make a subdirectory for each docking stage #plus a tmp directory mkdir($runRoot,0755); mkdir("$runRoot/tmp",0755); mkdir("$runRoot/tmp/condor_logs",0755); mkdir("$runRoot/log",0755); mkdir("$runRoot/sd",0755); mkdir("$runRoot/errors",0755); chdir "$runRoot/tmp"; my $filterFile = "${runRoot}_filter.txt"; print "Writing $filterFile...\n"; open FILTERHANDLE,">$filterFile"; print FILTERHANDLE $filter; #WRITE THE CUSTOM DOCKING SCRIPT FOR USE BY CONDOR #This will be the Condor "Executable" #gzipped SD file to dock is passed as $1 argument #Run name is passed as $2 argument my $nqsFile = "run_${runRoot}.csh"; print "Writing $nqsFile...\n"; open NQSHANDLE,">$nqsFile"; print NQSHANDLE "#!/bin/tcsh -f\n"; print NQSHANDLE "#CONDOR EXECUTABLE SCRIPT WRITTEN BY run_rbscreen.pl\n"; # DM 14 Mar 2003 #Create a tmp runtime directory on the local disk for rDock SD and log files #These will be compressed before copying back to the submission directory #on the file server #There are now two tmp directories: # /tmp/ribodock/<version> - stores the grid files etc that are common between runs # /tmp/${PROG}.XXXXXX - stores the I/O SD and log files unique to each run print NQSHANDLE "set PROG = \${0:t}\n"; print NQSHANDLE "set TMPDIR = `mktemp -d /tmp/\${PROG}.XXXXXX`\n"; # synchronizing target files my $versionidx = index $rbtroot,"ribodock"; my $targetdir = "/tmp/".substr($rbtroot,$versionidx); print NQSHANDLE "mkdir -pv -m0775 /tmp/ribodock\n"; print NQSHANDLE "mkdir -pv -m0775 $targetdir\n"; print NQSHANDLE "setenv RBT_HOME $targetdir\n"; print NQSHANDLE "rsync -avz ".join(" ",@src_files)." $targetdir\n"; # Copy the compressed sd file to the tmp directory and uncompress # DM 28 Feb 2002 - use the /tmp directory on the local disk instead # DM 14 Mar 2003 - use TMPDIR = /tmp/${PROG}.XXXXXX instead print NQSHANDLE "set gzfile = \$1\n"; print NQSHANDLE "set runName = \$2\n"; print NQSHANDLE "set copied_gzfile = \${TMPDIR}/\${gzfile:t}\n"; print NQSHANDLE "set sdfile = \${copied_gzfile:r}\n"; print NQSHANDLE "echo Ligand_DB : \$gzfile\n"; print NQSHANDLE "echo Local copy: \$copied_gzfile\n"; print NQSHANDLE "echo Unzipped : \$sdfile\n"; print NQSHANDLE "cp -f \$gzfile \$copied_gzfile\n"; print NQSHANDLE "gunzip -f \$copied_gzfile\n"; my $target = $redockTarget; print NQSHANDLE "#\n"; print NQSHANDLE "set outRoot = \${TMPDIR}/\${runName}\n"; print NQSHANDLE "set logFile = \${outRoot}.log\n"; print NQSHANDLE "if (-e \$logFile) rm -f \$logFile\n"; print NQSHANDLE "\$RBT_ROOT/bin/rbdock -i\$sdfile -o\${outRoot} -r$receptor -p$script -t$filterFile $flags > \$logFile\n"; # Compress the SD and log files and copy back to submission directory # The ..._errors.sd files are generated by the pharmacophore restraints term for ligands that # do not fulfill the feature requirements print NQSHANDLE "if (-e \${outRoot}.sd) then\n"; print NQSHANDLE " gzip -9vf \${outRoot}.sd\n"; print NQSHANDLE " cp \${outRoot}.sd.gz ../sd\n"; print NQSHANDLE "endif\n"; print NQSHANDLE "if (-e \${logFile}) then\n"; print NQSHANDLE " gzip -9vf \${logFile}\n"; print NQSHANDLE " cp \${logFile}.gz ../log\n"; print NQSHANDLE "endif\n"; print NQSHANDLE "if (-e \${outRoot}_errors.sd) then\n"; print NQSHANDLE " gzip -9vf \${outRoot}_errors.sd\n"; print NQSHANDLE " cp \${outRoot}_errors.sd.gz ../errors\n"; print NQSHANDLE "endif\n"; print NQSHANDLE "rm -rf \${TMPDIR}\n"; close NQSHANDLE; chmod 0755,$nqsFile; #PREPARE THE CONDOR JOB FILE my $condorFile = "run_${runRoot}.cmd"; print "Writing $condorFile...\n"; open CMDHANDLE,">$condorFile"; print CMDHANDLE "#CONDOR COMMAND FILE WRITTEN BY run_rbscreen.pl\n"; print CMDHANDLE "universe = vanilla\n"; print CMDHANDLE "initialdir = $pwd/$runRoot/tmp\n"; print CMDHANDLE "environment = RBT_ROOT=$rbtroot;RBT_LIGDB=$rbtligdb;RBT_HOME=$rbthome;LD_LIBRARY_PATH=/usr/local/lib:$rbtroot/lib;\n"; print CMDHANDLE "executable = ${nqsFile}\n"; #Stop Condor filling your mailbox print CMDHANDLE "Notification = Error\n"; print CMDHANDLE "log = run_${runRoot}.log\n"; # Added by DM, Enspiral Discovery, 28 Feb 2005 # Prevent rDock jobs running on dedicated Catalyst nodes print CMDHANDLE "Requirements = CATALYST_NODE =!= True\n"; #GET THE LIST OF COMPRESSED SD FILES TO DOCK my $iFile=0; foreach my $library (@libraries) { my @libFiles = glob "$library_table{$library}"; foreach my $libFile (@libFiles) { $iFile++; my $runName = "${runRoot}_$iFile"; print CMDHANDLE "output = condor_logs/run_${runName}.out\n"; print CMDHANDLE "error = condor_logs/run_${runName}.err\n"; print CMDHANDLE "arguments = $libFile $runName\n"; print CMDHANDLE "queue\n"; } } close CMDHANDLE; #WRITE THE CUSTOM CLEANUP SCRIPT #This will be run by the user from the command line #after all the jobs have finished my $cleanFile = "clean_${runRoot}.csh"; print "Writing $cleanFile...\n"; open CLEANHANDLE,">$cleanFile"; print CLEANHANDLE "#!/bin/tcsh -f\n"; print CLEANHANDLE "#CLEANUP SCRIPT WRITTEN BY run_rbscreen.pl\n"; print CLEANHANDLE "set sdout = ../${runRoot}_raw_hits.sd\n"; print CLEANHANDLE "if (-e \${sdout}) then\n"; print CLEANHANDLE " echo ARE YOU SURE YOU WANT TO DO THIS\n"; print CLEANHANDLE " echo \${sdout} already exists\n"; print CLEANHANDLE " echo To rerun the script remove \${sdout} manually first\n"; print CLEANHANDLE " exit\n"; print CLEANHANDLE "endif\n"; print CLEANHANDLE "echo All compressed SD files in ../sd/ will be concatenated into a single compressed file \${sdout}.gz\n"; print CLEANHANDLE "touch \${sdout}\n"; print CLEANHANDLE "foreach file (../sd/*.sd.gz)\n"; print CLEANHANDLE " echo Sorting and concatenating \${file}...\n"; print CLEANHANDLE " zcat \${file} | \${RBT_ROOT}/bin/sdsort -n -fSCORE -s >> \${sdout}\n"; print CLEANHANDLE "end\n"; print CLEANHANDLE "echo Each block of consecutive records per ligand have been presorted by SCORE "; print CLEANHANDLE "ready for filtering\n"; print CLEANHANDLE "echo Compressing \${sdout}...\n"; print CLEANHANDLE "gzip -9vf \${sdout}\n"; print CLEANHANDLE "echo DONE\n"; print CLEANHANDLE "echo You may now safely remove the ../sd/ directory if you wish to save disk space\n"; close CLEANHANDLE; chmod 0755,$cleanFile; print "To submit, cd $runRoot/tmp; condor_submit $condorFile\n"; print "When all jobs have completed, run $runRoot/tmp/$cleanFile to concatenate the output files\n";