diff tools/rdock/bin/run_rbscreen.pl @ 3:b02d74d22d05 draft default tip

planemo upload
author marpiech
date Mon, 29 Aug 2016 08:23:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/run_rbscreen.pl	Mon Aug 29 08:23:52 2016 -0400
@@ -0,0 +1,416 @@
+#!/usr/bin/perl
+use File::stat;
+use lib "$ENV{'RBT_ROOT'}/lib";
+use strict;
+require "run_rbfuncs.pl";
+
+my $rbtroot = $ENV{'RBT_ROOT'};
+my $rbthome = $ENV{'RBT_HOME'};
+my $rbtligdb = $ENV{'RBT_LIGDB'};
+my $recepDir = "$rbtroot/data/receptors";
+my $scriptDir = "$rbtroot/data/scripts";
+my $filterDir = "$rbtroot/data/filters";
+my $libraryDir = "$rbtligdb/dock";
+my $binDir = "$rbtroot/bin";
+
+#override default directories
+#Parse command line arguments
+my $nArgs = scalar(@ARGV);
+
+while (scalar(@ARGV)) {
+  my $arg = shift @ARGV;
+  if (index($arg,'-r')==0) {
+      $recepDir = substr($arg,2);
+  }
+  elsif (index($arg,'-s')==0) {
+      $scriptDir = substr($arg,2);
+  }
+  elsif (index($arg,'-l')==0) {
+      $libraryDir = substr($arg,2);
+  }
+}
+
+
+# Get the receptor, library and script tables
+#my %receptor_table = get_prm_table($recepDir);
+my %receptor_table = get_prm_table($rbthome);
+my %script_table = get_prm_table($scriptDir);
+my %library_table = get_dock_table($libraryDir);
+
+#GET ALL THE INPUTS
+#my $library = get_selection(\%library_table,"library");
+my $isOK;
+my @libraries;
+my $receptor;
+my $script;
+
+do {
+  @libraries = get_multiple_selection(\%library_table,"library");
+  $isOK = get_input("\nIs this OK (Y/N)","Y");
+} until ($isOK eq "Y");
+
+do {
+  $receptor = get_selection(\%receptor_table,"receptor");
+  $isOK = get_input("\nIs this OK (Y/N)","Y");
+} until ($isOK eq "Y");
+
+# (SJ) check whether all the required files are in the dir
+# DM 7 Feb 2002 - get PSF/CRD filenames from PRM file
+print "\nLooking for all required target files in $rbthome...\n\n";
+my $prm_file = "$rbthome/".$receptor;
+my $prm_root = substr($prm_file, 0, rindex($prm_file,"."));
+my $as_file = "${prm_root}.as";
+my $vdw1_file = "${prm_root}_vdw1.grd";
+my $vdw5_file = "${prm_root}_vdw5.grd";
+
+#Set the default file names equal to their parameter names so that
+#error messages make sense if the parameters are missing completely from
+#the .prm file.
+my $psf_file = "RECEPTOR_TOPOL_FILE";
+my $crd_file = "RECEPTOR_COORD_FILE";
+my $mol2_file;
+
+# DM 12 May 2003
+# Optional target files for experimental restraints
+#
+# These default restraint suffixes and filenames should be synchronised
+# with the default parameter values in the relevant C++ class source files ($RBT_ROOT/fw/src/.cxx)
+# They are used if the user does not explicitly define the parameter in the
+# receptor .prm file
+#
+# Note: this code searches for specific SECTION names in the receptor .prm file
+# thus forcing a naming convention for the restraint terms that is not mandatory in the C++ code itself
+#
+# Pharmacophore restraint file
+# SECTION PHARMA
+# CONSTRAINTS_FILE = explicit filename
+# OPTIONAL_FILE = explicit filename
+my $ph4_filename = ".const";#synchronise with RbtPharmaSF::_CONSTRAINTS_FILE
+my $ph4_optfilename = "";
+my $has_ph4 = 0;
+# Tether restraint file
+# SECTION TETHER
+# REFERENCE_FILE = suffix to the root receptor.prm filename
+my $tether_suffix = "_reference.sd";#synchronise with RbtTetherSF::_REFERENCE_FILE
+my $has_tether = 0;
+# Nmr restraint file
+# SECTION NMR
+# FILENAME = explicit filename (not suffix)
+my $nmr_filename = "default.noe";#synchronise with RbtTetherSF::_FILENAME
+my $has_nmr = 0;
+
+open PRMHANDLE,$prm_file || die "Cannot read $prm_file";
+while (<PRMHANDLE>) {
+  my @fields = split(' ',$_);
+  my $nfields = scalar(@fields);
+  $psf_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_TOPOL_FILE') && ($nfields > 1));
+  $crd_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_COORD_FILE') && ($nfields > 1));
+  $mol2_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_FILE') && ($nfields > 1));
+  chomp $psf_file;
+  chomp $crd_file;
+  $has_ph4 = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'PHARMA') );
+  $ph4_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'CONSTRAINTS_FILE') && $has_ph4);
+  $ph4_optfilename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'OPTIONAL_FILE') && $has_ph4);
+  $has_tether = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'TETHER') );
+  $tether_suffix = $fields[1] if (($nfields > 1) && ($fields[0] eq 'REFERENCE_FILE') && $has_tether);
+  $has_nmr = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'NMR') );
+  $nmr_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'FILENAME') && $has_nmr);
+}
+close PRMHANDLE;
+
+my @src_files = ($prm_file,$as_file, $vdw1_file, $vdw5_file);
+# check for either MOL2 or PSF/CRD
+if ($mol2_file ne "") {
+  push (@src_files,"$mol2_file");
+}
+else {
+  push (@src_files,"$psf_file");
+  push (@src_files,"$crd_file");
+}
+push (@src_files,"${ph4_filename}") if ($has_ph4);
+push (@src_files,"${ph4_optfilename}") if (($has_ph4) && ($ph4_optfilename ne ""));
+push (@src_files,"${prm_root}${tether_suffix}") if ($has_tether);
+push (@src_files,"${nmr_filename}") if ($has_nmr);
+
+my $is_file_missing = 0;
+foreach my $srcfile ( @src_files) {            # search through all the target files
+    if(-e $srcfile) {
+        print "Found: $srcfile";
+        my $sb = stat($srcfile);
+        #if ($sb->size < 1024) {print " WARNING size is suspiciously small ",$sb->size};
+        print "\n";
+    }
+    else {
+        print "ERROR: $srcfile is missing\n";
+        $is_file_missing    = 1;
+    }
+}
+
+if ($is_file_missing) {
+  print "\nCannot continue - please generate all required target files and place in $rbthome\n";
+  exit;
+}
+
+do {
+  $script = get_selection(\%script_table,"docking script");
+  $isOK = get_input("\nIs this OK (Y/N)","Y");
+} until ($isOK eq "Y");
+
+my $flags = get_input("Enter optional rbdock flags","-ap -an");
+my $mode;
+do
+{
+  $mode = get_input("High Throughput (HT) or Exhaustive Docking (ED)", "HT");
+}until (($mode eq "HT") || ($mode eq "ED"));
+
+my $HT;
+my $filter;
+my $FILTERFILE;
+my $nRedock;
+my $nStages;
+my @targets;
+my @runs;
+my $redockTarget;
+my $redockTargetDef = -20.0;#Default score threshold for redocking phase
+
+if ($mode eq "ED")
+{
+  $HT = 0;
+  my $run = get_input("Enter number of runs/ligand",50);
+  $flags = $flags . " -n" . $run;
+  $run--;
+  $filter = "1\nif - SCORE.NRUNS $run 0.0 -1.0,\n0\n";
+}
+else
+{
+  $HT = 1;
+  $nStages = get_input("Number of stages (excluding redocking, enter 0 for redocking only)",2);
+
+  #GET AND STORE THE TARGET SCORES AND NUMBER OF RUNS FOR EACH STAGE
+
+  if ($nStages > 0) {
+    for (my $n = 0; $n < $nStages; $n++) {
+      print "\nDOCKING STAGE $n\n";
+      my $target = get_input("Enter target score",-20.0);
+      my $run = get_input("Enter number of runs/ligand",20);
+      if ($run > 0)
+      {
+        push @targets,$target;
+        push @runs,$run;
+      }
+    }
+    $nStages = @targets;
+    print "\n";
+    #Default score threshold for redocking is equal to target score for last docking stage
+    $redockTargetDef = $targets[$nStages-1];
+  }
+
+  $nRedock = get_input("Enter number of redocking runs/ligand",0);
+  if ($nRedock > 0) {
+    $redockTarget = get_input("Enter target score for redocking",$redockTargetDef);
+  }
+  else
+  {
+    $redockTarget = $redockTargetDef;
+  }
+  my $nPhases = $nStages;
+  if ($nRedock > 0)
+  {
+    $nPhases++;
+  }
+  $filter = "$nPhases\n";
+  my $nr;
+  for (my $n = 0; $n < $nStages ; $n++)
+  {
+    $nr = $runs[$n] - 1;
+    $filter .= "if - $targets[$n] SCORE.INTER 1.0 if - SCORE.NRUNS $nr 0.0 -1.0,\n";
+  }
+  if ($nRedock > 0)
+  {
+    $nr = $nRedock - 1;
+    $filter .= "if - SCORE.NRUNS $nr 0.0 -1.0,\n";
+  }
+  my %filter_table = get_filter_table($filterDir, $redockTarget);
+  my @wfilters;
+  do {
+    print "\nSelect filters. Only the conformations that pass all the\n",
+          "filters chosen AND have SCORE.INTER lower than ", $redockTarget,
+          "\nAND SCORE.RESTR.CAVITY < 1.0 will be written down\n";
+    @wfilters = get_multiple_selection(\%filter_table,"filters");
+    $isOK = get_input("\nIs this OK (Y/N)","Y");
+  } until ($isOK eq "Y");
+
+  for (my $i = 0 ; $i < @wfilters ; $i++) # remove the no filters
+  {
+    if ($wfilters[$i] eq "1no_other_filters")
+    {
+      splice(@wfilters, $i, 1);
+    }
+  }
+  
+  my $nwfilters = @wfilters + 2;
+  $filter .= "$nwfilters\n";
+  $filter .= "- SCORE.INTER $redockTarget,\n";
+  $filter .= "- SCORE.RESTR.CAVITY 1.0,\n";
+  foreach my $wfilter (@wfilters) {
+    open(FF, "$filterDir/$wfilter") || open(FF, "./$wfilter") ||
+           die "cannot open $wfilter for reading: $!";
+    my $line;
+    while ($line = <FF>){ $filter .= $line;}
+    $filter .= ",\n";
+  }
+}
+
+      
+    
+  my $runRoot = get_input("Enter run name","rbdock");
+    
+  my $pwd = `pwd`;
+  chomp $pwd;
+
+  #Make a subdirectory for each docking stage
+  #plus a tmp directory
+  mkdir($runRoot,0755);
+  mkdir("$runRoot/tmp",0755);
+  mkdir("$runRoot/tmp/condor_logs",0755);
+  mkdir("$runRoot/log",0755);
+  mkdir("$runRoot/sd",0755);
+  mkdir("$runRoot/errors",0755);
+  chdir "$runRoot/tmp";
+
+  my $filterFile = "${runRoot}_filter.txt";
+  print "Writing $filterFile...\n";
+open FILTERHANDLE,">$filterFile";
+print FILTERHANDLE $filter;
+#WRITE THE CUSTOM DOCKING SCRIPT FOR USE BY CONDOR
+#This will be the Condor "Executable"
+#gzipped SD file to dock is passed as $1 argument
+#Run name is passed as $2 argument
+my $nqsFile = "run_${runRoot}.csh";
+print "Writing $nqsFile...\n";
+open NQSHANDLE,">$nqsFile";
+print NQSHANDLE "#!/bin/tcsh -f\n";
+print NQSHANDLE "#CONDOR EXECUTABLE SCRIPT WRITTEN BY run_rbscreen.pl\n";
+
+# DM 14 Mar 2003
+#Create a tmp runtime directory on the local disk for rDock SD and log files
+#These will be compressed before copying back to the submission directory
+#on the file server
+#There are now two tmp directories:
+# /tmp/ribodock/<version> - stores the grid files etc that are common between runs
+# /tmp/${PROG}.XXXXXX - stores the I/O SD and log files unique to each run
+
+print NQSHANDLE "set PROG = \${0:t}\n";
+print NQSHANDLE "set TMPDIR = `mktemp -d /tmp/\${PROG}.XXXXXX`\n";
+
+# synchronizing target files
+my $versionidx  = index $rbtroot,"ribodock";
+my $targetdir   = "/tmp/".substr($rbtroot,$versionidx);
+print NQSHANDLE "mkdir -pv -m0775 /tmp/ribodock\n";
+print NQSHANDLE "mkdir -pv -m0775 $targetdir\n";
+print NQSHANDLE "setenv RBT_HOME $targetdir\n";
+print NQSHANDLE "rsync -avz ".join(" ",@src_files)." $targetdir\n";
+
+# Copy the compressed sd file to the tmp directory and uncompress
+# DM 28 Feb 2002 - use the /tmp directory on the local disk instead
+# DM 14 Mar 2003 - use TMPDIR = /tmp/${PROG}.XXXXXX instead
+print NQSHANDLE "set gzfile = \$1\n";
+print NQSHANDLE "set runName = \$2\n";
+print NQSHANDLE "set copied_gzfile = \${TMPDIR}/\${gzfile:t}\n";
+print NQSHANDLE "set sdfile = \${copied_gzfile:r}\n";
+print NQSHANDLE "echo Ligand_DB : \$gzfile\n";
+print NQSHANDLE "echo Local copy: \$copied_gzfile\n";
+print NQSHANDLE "echo Unzipped  : \$sdfile\n";
+print NQSHANDLE "cp -f \$gzfile \$copied_gzfile\n";
+print NQSHANDLE "gunzip -f \$copied_gzfile\n";
+
+my $target = $redockTarget;
+print NQSHANDLE "#\n";
+print NQSHANDLE "set outRoot = \${TMPDIR}/\${runName}\n";
+print NQSHANDLE "set logFile = \${outRoot}.log\n";
+print NQSHANDLE "if (-e \$logFile) rm -f \$logFile\n";
+print NQSHANDLE "\$RBT_ROOT/bin/rbdock -i\$sdfile -o\${outRoot} -r$receptor -p$script -t$filterFile $flags > \$logFile\n";
+
+# Compress the SD and log files and copy back to submission directory
+# The ..._errors.sd files are generated by the pharmacophore restraints term for ligands that
+# do not fulfill the feature requirements
+print NQSHANDLE "if (-e \${outRoot}.sd) then\n";
+print NQSHANDLE "  gzip -9vf \${outRoot}.sd\n";
+print NQSHANDLE "  cp \${outRoot}.sd.gz ../sd\n";
+print NQSHANDLE "endif\n";
+print NQSHANDLE "if (-e \${logFile}) then\n";
+print NQSHANDLE "  gzip -9vf \${logFile}\n";
+print NQSHANDLE "  cp \${logFile}.gz ../log\n";
+print NQSHANDLE "endif\n";
+print NQSHANDLE "if (-e \${outRoot}_errors.sd) then\n";
+print NQSHANDLE "  gzip -9vf \${outRoot}_errors.sd\n";
+print NQSHANDLE "  cp \${outRoot}_errors.sd.gz ../errors\n";
+print NQSHANDLE "endif\n";
+
+print NQSHANDLE "rm -rf \${TMPDIR}\n";
+close NQSHANDLE;
+chmod 0755,$nqsFile;
+
+#PREPARE THE CONDOR JOB FILE
+my $condorFile = "run_${runRoot}.cmd";
+print "Writing $condorFile...\n";
+open CMDHANDLE,">$condorFile";
+print CMDHANDLE "#CONDOR COMMAND FILE WRITTEN BY run_rbscreen.pl\n";
+print CMDHANDLE "universe        = vanilla\n";
+print CMDHANDLE "initialdir      = $pwd/$runRoot/tmp\n";
+print CMDHANDLE "environment     = RBT_ROOT=$rbtroot;RBT_LIGDB=$rbtligdb;RBT_HOME=$rbthome;LD_LIBRARY_PATH=/usr/local/lib:$rbtroot/lib;\n";
+print CMDHANDLE "executable      = ${nqsFile}\n";
+#Stop Condor filling your mailbox
+print CMDHANDLE "Notification    = Error\n";
+print CMDHANDLE "log             = run_${runRoot}.log\n";
+# Added by DM, Enspiral Discovery, 28 Feb 2005
+# Prevent rDock jobs running on dedicated Catalyst nodes
+print CMDHANDLE "Requirements    = CATALYST_NODE =!= True\n"; 
+
+#GET THE LIST OF COMPRESSED SD FILES TO DOCK
+my $iFile=0;
+foreach my $library (@libraries) {
+  my @libFiles = glob "$library_table{$library}";
+  foreach my $libFile (@libFiles) {
+    $iFile++;
+    my $runName = "${runRoot}_$iFile";
+    print CMDHANDLE "output          = condor_logs/run_${runName}.out\n";
+    print CMDHANDLE "error           = condor_logs/run_${runName}.err\n";
+    print CMDHANDLE "arguments       = $libFile $runName\n";
+    print CMDHANDLE "queue\n";
+  }
+}
+close CMDHANDLE;
+
+#WRITE THE CUSTOM CLEANUP SCRIPT
+#This will be run by the user from the command line
+#after all the jobs have finished
+my $cleanFile = "clean_${runRoot}.csh";
+print "Writing $cleanFile...\n";
+open CLEANHANDLE,">$cleanFile";
+print CLEANHANDLE "#!/bin/tcsh -f\n";
+print CLEANHANDLE "#CLEANUP SCRIPT WRITTEN BY run_rbscreen.pl\n";
+print CLEANHANDLE "set sdout = ../${runRoot}_raw_hits.sd\n";
+print CLEANHANDLE "if (-e \${sdout}) then\n";
+print CLEANHANDLE "  echo ARE YOU SURE YOU WANT TO DO THIS\n";
+print CLEANHANDLE "  echo \${sdout} already exists\n";
+print CLEANHANDLE "  echo To rerun the script remove \${sdout} manually first\n";
+print CLEANHANDLE "  exit\n";
+print CLEANHANDLE "endif\n";
+print CLEANHANDLE "echo All compressed SD files in ../sd/ will be concatenated into a single compressed file \${sdout}.gz\n";
+print CLEANHANDLE "touch \${sdout}\n";
+print CLEANHANDLE "foreach file (../sd/*.sd.gz)\n";
+print CLEANHANDLE "  echo Sorting and concatenating \${file}...\n";
+print CLEANHANDLE "  zcat \${file} | \${RBT_ROOT}/bin/sdsort -n -fSCORE -s >> \${sdout}\n";
+print CLEANHANDLE "end\n";
+print CLEANHANDLE "echo Each block of consecutive records per ligand have been presorted by SCORE ";
+print CLEANHANDLE "ready for filtering\n";
+print CLEANHANDLE "echo Compressing \${sdout}...\n";
+print CLEANHANDLE "gzip -9vf \${sdout}\n";
+print CLEANHANDLE "echo DONE\n";
+print CLEANHANDLE "echo You may now safely remove the ../sd/ directory if you wish to save disk space\n";
+close CLEANHANDLE;
+chmod 0755,$cleanFile;
+
+print "To submit, cd $runRoot/tmp; condor_submit $condorFile\n";
+print "When all jobs have completed, run $runRoot/tmp/$cleanFile to concatenate the output files\n";