Mercurial > repos > marpiech > norwich_tools_docking2

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/ht_protocol_finder.pl	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,232 @@
+#!/usr/bin/perl -w
+
+# Script that simulates the result of a high throughput protocol.
+
+if (@ARGV < 4)
+{
+	print "Script that simulates the result of a high throughput protocol.\n";
+	print "1st) exhaustive docking of a small representative part of the\n";
+	print "     whole library.\n";
+	print "2nd) Store the result of sdreport -t over that exhaustive dock.\n";
+	print "     in file <sdreport_file> that will be the input of this\n";
+	print "     script.\n";
+	print "3rd) ht_protocol_finder.pl <sdreport_file> <output_file> <thr1max>";
+	print " <thr1min> <ns1> <ns2>\n";
+	print "     <ns1> and <ns2> are the number of steps in stage 1 and in\n";
+	print "     stage 2. If not present, the default values are 5 and 15\n";
+	print "     <thrmax> and <thrmin> setup the range of thresholds that will\n";
+	print "     be simulated in stage 1. The threshold of stage 2 depends\n";
+	print "     on the value of the threshold of stage 1.\n";
+	print "     An input of -22 -24 will try protocols:\n";
+	print "            5	-22	15	-27\n";
+	print "            5	-22	15	-28\n";
+	print "            5	-22	15	-29\n";
+	print "            5	-23	15	-28\n";
+	print "            5	-23	15	-29\n";
+	print "            5	-23	15	-30\n";
+	print "            5	-24	15	-29\n";
+	print "            5	-24	15	-30\n";
+	print "            5	-24	15	-31\n";
+	print "     Output of the program is a 7 column values. First column\n";
+	print "     represents the time. This is a percentage of the time it\n";
+	print "     would take to do the docking in exhaustive mode, i.e. \n";
+    print "     docking each ligand 100 times. Anything\n";
+	print "     above 12 is too long.\n";
+	print "     Second column is the first percentage. Percentage of\n";
+	print "     ligands that pass the first stage.\n";
+	print "     Third column is the second percentage. Percentage of\n";
+	print "     ligands that pass the second stage.\n";
+	print "     The four last columns represent the protocol.\n";
+	print "     All the protocols tried are written at the end.\n";
+	print "     The ones for which time is less than 12%, perc1 is\n";
+	print "     less than 30% and perc2 is less than 5% but bigger than 1%\n";
+	print "     will have a series of *** after, to indicate they are good choices\n";
+	print "     WARNING! This is a simulation based in a small set.\n";
+	print "     The numbers are an indication, not factual values.\n";
+    exit (0);
+}
+open (IFILE, $ARGV[0]) || die "cannot open $ARGV[0] for reading: $!";
+open (OFILE, ">$ARGV[1]") || die "cannot open $ARGV[1] for writing: $!";
+<IFILE>;   # read first line
+$line = <IFILE>;
+$line =~ /\S+\s+(\S+)\s+\S+\s+(\S+)/;
+$prevname = $1;
+$inter = $2;
+push @tmp, $inter;
+$i = 0;
+while ($line = <IFILE>)
+{
+  $line =~ /\S+\s+(\S+)\s+\S+\s+(\S+)/;
+  $name = $1;
+  $inter = $2;
+  if ($name ne $prevname)
+  {
+      $prevname = $name;
+      push @inters, [ @tmp ];
+      $runsperligand[$i++] = @tmp;
+      $#tmp = -1;
+  }
+  push @tmp, $inter;
+}
+push @inters, [ @tmp ];
+$runsperligand[$i++] = @tmp;
+$totLigands = @inters;
+$ta = $ARGV[2];
+$tb = $ARGV[3];
+if (@ARGV > 4)
+{
+	$n1 = $ARGV[4];
+}
+else
+{
+	$n1 = 5;
+}
+if (@ARGV > 5)
+{
+	$n2 = $ARGV[5];
+}
+else
+{
+	$n2 = 15;
+}
+printf OFILE "Command line args:\n\t";
+foreach $arg (@ARGV)
+{
+	printf OFILE "%s\t", $arg;
+}
+printf OFILE "\n\n  TIME   PERC1   PERC2    N1  THR1    N2  THR2\n";
+for ($t1 = $ta ; $t1 >= $tb ; $t1--)
+{
+    $tc = $t1 - 5;
+    for ($t2 = $tc ; $t2 >= ($tc - 2) ; $t2--)
+    {
+        ($cref, $dref) =
+          getTables($t1, $t2);
+        @ligBelowThr1 = @$cref;
+        @ligBelowThr2 = @$dref;
+        ($time,$p1,$p2) = simulation($n1,$t1,$n2,$t2);
+        if (($time < 12.0) && ($p1 < 30.0) && ($p2 < 5.0) && ($p2 > 1.0))
+        {
+            printf OFILE "%6.3f, %6.3f, %6.3f, %4i, %4i, %4i, %4i ***\n",
+                         $time, $p1, $p2, $n1,$t1,$n2,$t2;
+        }
+        else
+        {
+            printf OFILE "%6.3f, %6.3f, %6.3f, %4i, %4i, %4i, %4i\n",
+                         $time, $p1, $p2, $n1,$t1,$n2,$t2;
+        }
+    }
+}
+
+
+sub simulation
+{
+  my(@params);
+  my(@lt0, @lt1);
+  my($thr1, $thr2,$ns1,$ns2,$tottime,$totnumHits,$total,$totnruns);
+  @params = @_;
+  $ns1 = $params[0];
+  $thr1 = $params[1];
+  $ns2 = $params[2];
+  $thr2 = $params[3];
+  $ntrials = 100;
+  $tottime = 0;
+  $totnumHitss1 = 0;
+  $totnumHitss2 = 0;
+  $total = $totLigands;
+  $totnruns = 0;
+  for ($i = 0 ; $i < $ntrials ; $i++)
+  {
+    $numHitss1 = 0;
+    $numHitss2 = 0;
+    $totnruns = 0;
+    for ($j = 0 ; $j < $totLigands ; $j++)
+    {
+      ($passStage1, $nruns1) = stage($ns1, $ligBelowThr1[$j]);
+      if ($passStage1)
+      {
+        $numHitss1++;
+        ($passStage2, $nruns2) = stage($ns2, $ligBelowThr2[$j]);
+        if ($passStage2)
+        {
+          $numHitss2++;
+        }
+        $totnruns += $nruns2;
+      }
+      $totnruns += $nruns1;
+    }
+    $time = $totnruns / $total;
+    $tottime += $time;
+    $totnumHitss1 += $numHitss1;
+    $totnumHitss2 += $numHitss2;
+  }
+  $tottime /= $ntrials;
+  $totnumHitss1 /= $ntrials;
+  $totnumHitss2 /= $ntrials;
+  $p1 = $totnumHitss1 * 100.0 / $totLigands;
+  $p2 = $totnumHitss2 * 100.0 / $totLigands;
+  return ($tottime, $p1, $p2);
+}
+
+sub getTables
+{
+  my(@params);
+  my($totLigands, @lt1, @lt2);
+  my($row,$inter, $belowThr1, $belowThr2);
+  my($thr0, $thr1);
+  @params = @_;
+  $thr1 = $params[0];
+  $thr2 = $params[1];
+  $totLigands = 0;
+  $belowThr1 = 0;
+  $belowThr2 = 0;
+  $totLigands = 0;
+  for $i (0 .. $#inters)
+  {
+    $row = $inters[$i];
+    for $j (0 .. $#{$row})
+    {
+      $inter = $row->[$j];
+      if ($inter <= $thr1)
+      {
+        $belowThr1++;
+      }
+      if ($inter <= $thr2)
+      {
+        $belowThr2++;
+      }
+    }
+    $lt1[$totLigands] =
+      $belowThr1 / $runsperligand[$i];
+    $lt2[$totLigands] =
+      $belowThr2 / $runsperligand[$i];
+    $totLigands++;
+    $belowThr1 = 0;
+    $belowThr2 = 0;
+  }
+  return (\@lt1, \@lt2);
+}
+
+
+
+sub stage
+{
+  my($nruns, $p, $i, $r);
+  $nruns = $_[0];
+  $p = $_[1];
+  if ($nruns == 0)
+  {
+    return (1, 0);
+  }
+  for ($i = 0 ; $i < $nruns ; $i++)
+  {
+    $r = rand;
+    if ($r < $p)
+    {
+      return (1, $i);
+    }
+  }
+  return (0, $nruns);
+}
+
+
Binary file tools/rdock/bin/libRbt.so has changed
Binary file tools/rdock/bin/libRbt.so.rDock_2013.1_src has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/make_grid.csh	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,11 @@
+#!/bin/csh -f
+# Creates vdW grids for all receptor prm files listed on command line
+#
+set gridstep = 0.3
+set border = 1.0
+
+foreach grid ('vdw1' 'vdw5')
+  foreach file ($argv)
+      $RBT_ROOT/bin/rbcalcgrid -r${file} -pcalcgrid_${grid}.prm -o_${grid}.grd -g${gridstep} -b${border}
+  end
+end
Binary file tools/rdock/bin/rbcalcgrid has changed
Binary file tools/rdock/bin/rbcavity has changed
Binary file tools/rdock/bin/rbdock has changed
Binary file tools/rdock/bin/rblist has changed
Binary file tools/rdock/bin/rbmoegrid has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/run_rbdock.pl	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,116 @@
+#!/usr/bin/perl
+use lib "$ENV{'RBT_ROOT'}/lib";
+require "run_rbfuncs.pl";
+
+my $rbtroot = $ENV{'RBT_ROOT'};
+my $rbthome = $ENV{'RBT_HOME'};
+my $recepDir = "$rbtroot/data/receptors";
+my $scriptDir = "$rbtroot/data/scripts";
+my $libraryDir = "$rbtroot/data/libraries";
+my $binDir = "$rbtroot/bin";
+
+#override default directories
+#Parse command line arguments
+my $nArgs = scalar(@ARGV);
+
+while (scalar(@ARGV)) {
+  my $arg = shift @ARGV;
+  if (index($arg,'-r')==0) {
+      $recepDir = substr($arg,2);
+  }
+  elsif (index($arg,'-s')==0) {
+      $scriptDir = substr($arg,2);
+  }
+  elsif (index($arg,'-l')==0) {
+      $libraryDir = substr($arg,2);
+  }
+}
+
+
+# Get the receptor, library and script tables
+my %receptor_table = get_prm_table($recepDir);
+my %script_table = get_prm_table($scriptDir);
+my %library_table = get_prm_table($libraryDir);
+
+#GET ALL THE INPUTS
+my $defFiles = "*.{sd,sdf,mol,mdl}";
+print join("\n",glob $defFiles);
+my $libFiles = get_input("\n\nEnter expression for SD files to dock",$defFiles);
+my $receptor = get_selection(\%receptor_table,"receptor");
+my $script = get_selection(\%script_table,"docking script");
+my $flags = get_input("Enter optional rbdock flags","-ap -an");
+my $runs = get_input("Enter number of runs/ligand",20);
+
+my $runRoot = get_input("Enter run name","rbdock");
+my $queueSys = get_input("Enter queuing system (NQS,CODINE,CONDOR,NONE)","CONDOR");
+my $queueName;
+$queueName = get_input("Enter NQS queue","QRBD") if ($queueSys eq "NQS");
+
+$pwd = `pwd`;
+chomp $pwd;
+
+#WRITE THE NQS SCRIPTS
+
+#GET THE LIST OF SD FILES TO DOCK
+my @libList = glob "${libFiles}";
+
+my $subFile = "submit_$runRoot.csh";
+open SUBHANDLE,">$subFile";
+print SUBHANDLE "#!/bin/csh -ef\n";
+
+my $iFile=0;
+foreach $libFile (@libList) {
+  $iFile++;
+  #Write the NQS script file
+  $runName = "${runRoot}_$iFile";
+  $nqsFile = "run_${runName}.csh";
+  my $logFile = "${runName}.log";
+
+  print "Writing $nqsFile...\n";
+
+  open NQSHANDLE,">$nqsFile";
+  print NQSHANDLE "#!/bin/csh -ef\n";
+  print NQSHANDLE "setenv RBT_ROOT $rbtroot\n";
+  print NQSHANDLE "setenv RBT_HOME $rbthome\n";
+  print NQSHANDLE "source \$RBT_ROOT/bin/setlibrbt.com\n";
+  print NQSHANDLE "cd $pwd\n";
+  print NQSHANDLE "if (-e $logFile) rm -f $logFile\n";
+  print NQSHANDLE "\$RBT_ROOT/bin/rbdock -i$libFile -o${runName} -r$receptor -p$script -n$runs $flags > $logFile\n";
+  close NQSHANDLE;
+
+  chmod 0755,$nqsFile;
+  print SUBHANDLE "qsub -q $queueName $nqsFile\n" if ($queueSys eq "NQS");
+  print SUBHANDLE "qsub -e $pwd/$runRoot/tmp/ -o $pwd/$runRoot/tmp/ $nqsFile\n" if ($queueSys eq "CODINE");
+  if ($queueSys eq "CONDOR") {
+    my $condorFile = "run_${runName}.cmd";
+    open CMDHANDLE,">$condorFile";
+    print CMDHANDLE "#CONDOR FILE WRITTEN BY run_rbscreen.pl\n";
+    print CMDHANDLE "universe        = vanilla\n";
+    print CMDHANDLE "executable      = ${nqsFile}\n";
+    print CMDHANDLE "Requirements    = CATALYST_NODE =!= True\n";
+    print CMDHANDLE "output          = ${nqsFile}.out\n";
+    print CMDHANDLE "error           = ${nqsFile}.err\n";
+    print CMDHANDLE "log             = ${nqsFile}.log\n";
+    print CMDHANDLE "queue\n";
+    close NQSHANDLE;
+    print SUBHANDLE "condor_submit $condorFile\n";
+  }
+}
+close SUBHANDLE;
+chmod 0755,$subFile;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/run_rbscreen.pl	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,416 @@
+#!/usr/bin/perl
+use File::stat;
+use lib "$ENV{'RBT_ROOT'}/lib";
+use strict;
+require "run_rbfuncs.pl";
+
+my $rbtroot = $ENV{'RBT_ROOT'};
+my $rbthome = $ENV{'RBT_HOME'};
+my $rbtligdb = $ENV{'RBT_LIGDB'};
+my $recepDir = "$rbtroot/data/receptors";
+my $scriptDir = "$rbtroot/data/scripts";
+my $filterDir = "$rbtroot/data/filters";
+my $libraryDir = "$rbtligdb/dock";
+my $binDir = "$rbtroot/bin";
+
+#override default directories
+#Parse command line arguments
+my $nArgs = scalar(@ARGV);
+
+while (scalar(@ARGV)) {
+  my $arg = shift @ARGV;
+  if (index($arg,'-r')==0) {
+      $recepDir = substr($arg,2);
+  }
+  elsif (index($arg,'-s')==0) {
+      $scriptDir = substr($arg,2);
+  }
+  elsif (index($arg,'-l')==0) {
+      $libraryDir = substr($arg,2);
+  }
+}
+
+
+# Get the receptor, library and script tables
+#my %receptor_table = get_prm_table($recepDir);
+my %receptor_table = get_prm_table($rbthome);
+my %script_table = get_prm_table($scriptDir);
+my %library_table = get_dock_table($libraryDir);
+
+#GET ALL THE INPUTS
+#my $library = get_selection(\%library_table,"library");
+my $isOK;
+my @libraries;
+my $receptor;
+my $script;
+
+do {
+  @libraries = get_multiple_selection(\%library_table,"library");
+  $isOK = get_input("\nIs this OK (Y/N)","Y");
+} until ($isOK eq "Y");
+
+do {
+  $receptor = get_selection(\%receptor_table,"receptor");
+  $isOK = get_input("\nIs this OK (Y/N)","Y");
+} until ($isOK eq "Y");
+
+# (SJ) check whether all the required files are in the dir
+# DM 7 Feb 2002 - get PSF/CRD filenames from PRM file
+print "\nLooking for all required target files in $rbthome...\n\n";
+my $prm_file = "$rbthome/".$receptor;
+my $prm_root = substr($prm_file, 0, rindex($prm_file,"."));
+my $as_file = "${prm_root}.as";
+my $vdw1_file = "${prm_root}_vdw1.grd";
+my $vdw5_file = "${prm_root}_vdw5.grd";
+
+#Set the default file names equal to their parameter names so that
+#error messages make sense if the parameters are missing completely from
+#the .prm file.
+my $psf_file = "RECEPTOR_TOPOL_FILE";
+my $crd_file = "RECEPTOR_COORD_FILE";
+my $mol2_file;
+
+# DM 12 May 2003
+# Optional target files for experimental restraints
+#
+# These default restraint suffixes and filenames should be synchronised
+# with the default parameter values in the relevant C++ class source files ($RBT_ROOT/fw/src/.cxx)
+# They are used if the user does not explicitly define the parameter in the
+# receptor .prm file
+#
+# Note: this code searches for specific SECTION names in the receptor .prm file
+# thus forcing a naming convention for the restraint terms that is not mandatory in the C++ code itself
+#
+# Pharmacophore restraint file
+# SECTION PHARMA
+# CONSTRAINTS_FILE = explicit filename
+# OPTIONAL_FILE = explicit filename
+my $ph4_filename = ".const";#synchronise with RbtPharmaSF::_CONSTRAINTS_FILE
+my $ph4_optfilename = "";
+my $has_ph4 = 0;
+# Tether restraint file
+# SECTION TETHER
+# REFERENCE_FILE = suffix to the root receptor.prm filename
+my $tether_suffix = "_reference.sd";#synchronise with RbtTetherSF::_REFERENCE_FILE
+my $has_tether = 0;
+# Nmr restraint file
+# SECTION NMR
+# FILENAME = explicit filename (not suffix)
+my $nmr_filename = "default.noe";#synchronise with RbtTetherSF::_FILENAME
+my $has_nmr = 0;
+
+open PRMHANDLE,$prm_file || die "Cannot read $prm_file";
+while (<PRMHANDLE>) {
+  my @fields = split(' ',$_);
+  my $nfields = scalar(@fields);
+  $psf_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_TOPOL_FILE') && ($nfields > 1));
+  $crd_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_COORD_FILE') && ($nfields > 1));
+  $mol2_file = "$rbthome/".$fields[1] if (($fields[0] eq 'RECEPTOR_FILE') && ($nfields > 1));
+  chomp $psf_file;
+  chomp $crd_file;
+  $has_ph4 = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'PHARMA') );
+  $ph4_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'CONSTRAINTS_FILE') && $has_ph4);
+  $ph4_optfilename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'OPTIONAL_FILE') && $has_ph4);
+  $has_tether = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'TETHER') );
+  $tether_suffix = $fields[1] if (($nfields > 1) && ($fields[0] eq 'REFERENCE_FILE') && $has_tether);
+  $has_nmr = 1 if (($nfields > 1) && ($fields[0] eq 'SECTION') && ($fields[1] eq 'NMR') );
+  $nmr_filename = "$rbthome/".$fields[1] if (($nfields > 1) && ($fields[0] eq 'FILENAME') && $has_nmr);
+}
+close PRMHANDLE;
+
+my @src_files = ($prm_file,$as_file, $vdw1_file, $vdw5_file);
+# check for either MOL2 or PSF/CRD
+if ($mol2_file ne "") {
+  push (@src_files,"$mol2_file");
+}
+else {
+  push (@src_files,"$psf_file");
+  push (@src_files,"$crd_file");
+}
+push (@src_files,"${ph4_filename}") if ($has_ph4);
+push (@src_files,"${ph4_optfilename}") if (($has_ph4) && ($ph4_optfilename ne ""));
+push (@src_files,"${prm_root}${tether_suffix}") if ($has_tether);
+push (@src_files,"${nmr_filename}") if ($has_nmr);
+
+my $is_file_missing = 0;
+foreach my $srcfile ( @src_files) {            # search through all the target files
+    if(-e $srcfile) {
+        print "Found: $srcfile";
+        my $sb = stat($srcfile);
+        #if ($sb->size < 1024) {print " WARNING size is suspiciously small ",$sb->size};
+        print "\n";
+    }
+    else {
+        print "ERROR: $srcfile is missing\n";
+        $is_file_missing    = 1;
+    }
+}
+
+if ($is_file_missing) {
+  print "\nCannot continue - please generate all required target files and place in $rbthome\n";
+  exit;
+}
+
+do {
+  $script = get_selection(\%script_table,"docking script");
+  $isOK = get_input("\nIs this OK (Y/N)","Y");
+} until ($isOK eq "Y");
+
+my $flags = get_input("Enter optional rbdock flags","-ap -an");
+my $mode;
+do
+{
+  $mode = get_input("High Throughput (HT) or Exhaustive Docking (ED)", "HT");
+}until (($mode eq "HT") || ($mode eq "ED"));
+
+my $HT;
+my $filter;
+my $FILTERFILE;
+my $nRedock;
+my $nStages;
+my @targets;
+my @runs;
+my $redockTarget;
+my $redockTargetDef = -20.0;#Default score threshold for redocking phase
+
+if ($mode eq "ED")
+{
+  $HT = 0;
+  my $run = get_input("Enter number of runs/ligand",50);
+  $flags = $flags . " -n" . $run;
+  $run--;
+  $filter = "1\nif - SCORE.NRUNS $run 0.0 -1.0,\n0\n";
+}
+else
+{
+  $HT = 1;
+  $nStages = get_input("Number of stages (excluding redocking, enter 0 for redocking only)",2);
+
+  #GET AND STORE THE TARGET SCORES AND NUMBER OF RUNS FOR EACH STAGE
+
+  if ($nStages > 0) {
+    for (my $n = 0; $n < $nStages; $n++) {
+      print "\nDOCKING STAGE $n\n";
+      my $target = get_input("Enter target score",-20.0);
+      my $run = get_input("Enter number of runs/ligand",20);
+      if ($run > 0)
+      {
+        push @targets,$target;
+        push @runs,$run;
+      }
+    }
+    $nStages = @targets;
+    print "\n";
+    #Default score threshold for redocking is equal to target score for last docking stage
+    $redockTargetDef = $targets[$nStages-1];
+  }
+
+  $nRedock = get_input("Enter number of redocking runs/ligand",0);
+  if ($nRedock > 0) {
+    $redockTarget = get_input("Enter target score for redocking",$redockTargetDef);
+  }
+  else
+  {
+    $redockTarget = $redockTargetDef;
+  }
+  my $nPhases = $nStages;
+  if ($nRedock > 0)
+  {
+    $nPhases++;
+  }
+  $filter = "$nPhases\n";
+  my $nr;
+  for (my $n = 0; $n < $nStages ; $n++)
+  {
+    $nr = $runs[$n] - 1;
+    $filter .= "if - $targets[$n] SCORE.INTER 1.0 if - SCORE.NRUNS $nr 0.0 -1.0,\n";
+  }
+  if ($nRedock > 0)
+  {
+    $nr = $nRedock - 1;
+    $filter .= "if - SCORE.NRUNS $nr 0.0 -1.0,\n";
+  }
+  my %filter_table = get_filter_table($filterDir, $redockTarget);
+  my @wfilters;
+  do {
+    print "\nSelect filters. Only the conformations that pass all the\n",
+          "filters chosen AND have SCORE.INTER lower than ", $redockTarget,
+          "\nAND SCORE.RESTR.CAVITY < 1.0 will be written down\n";
+    @wfilters = get_multiple_selection(\%filter_table,"filters");
+    $isOK = get_input("\nIs this OK (Y/N)","Y");
+  } until ($isOK eq "Y");
+
+  for (my $i = 0 ; $i < @wfilters ; $i++) # remove the no filters
+  {
+    if ($wfilters[$i] eq "1no_other_filters")
+    {
+      splice(@wfilters, $i, 1);
+    }
+  }
+
+  my $nwfilters = @wfilters + 2;
+  $filter .= "$nwfilters\n";
+  $filter .= "- SCORE.INTER $redockTarget,\n";
+  $filter .= "- SCORE.RESTR.CAVITY 1.0,\n";
+  foreach my $wfilter (@wfilters) {
+    open(FF, "$filterDir/$wfilter") || open(FF, "./$wfilter") ||
+           die "cannot open $wfilter for reading: $!";
+    my $line;
+    while ($line = <FF>){ $filter .= $line;}
+    $filter .= ",\n";
+  }
+}
+
+
+
+  my $runRoot = get_input("Enter run name","rbdock");
+
+  my $pwd = `pwd`;
+  chomp $pwd;
+
+  #Make a subdirectory for each docking stage
+  #plus a tmp directory
+  mkdir($runRoot,0755);
+  mkdir("$runRoot/tmp",0755);
+  mkdir("$runRoot/tmp/condor_logs",0755);
+  mkdir("$runRoot/log",0755);
+  mkdir("$runRoot/sd",0755);
+  mkdir("$runRoot/errors",0755);
+  chdir "$runRoot/tmp";
+
+  my $filterFile = "${runRoot}_filter.txt";
+  print "Writing $filterFile...\n";
+open FILTERHANDLE,">$filterFile";
+print FILTERHANDLE $filter;
+#WRITE THE CUSTOM DOCKING SCRIPT FOR USE BY CONDOR
+#This will be the Condor "Executable"
+#gzipped SD file to dock is passed as $1 argument
+#Run name is passed as $2 argument
+my $nqsFile = "run_${runRoot}.csh";
+print "Writing $nqsFile...\n";
+open NQSHANDLE,">$nqsFile";
+print NQSHANDLE "#!/bin/tcsh -f\n";
+print NQSHANDLE "#CONDOR EXECUTABLE SCRIPT WRITTEN BY run_rbscreen.pl\n";
+
+# DM 14 Mar 2003
+#Create a tmp runtime directory on the local disk for rDock SD and log files
+#These will be compressed before copying back to the submission directory
+#on the file server
+#There are now two tmp directories:
+# /tmp/ribodock/<version> - stores the grid files etc that are common between runs
+# /tmp/${PROG}.XXXXXX - stores the I/O SD and log files unique to each run
+
+print NQSHANDLE "set PROG = \${0:t}\n";
+print NQSHANDLE "set TMPDIR = `mktemp -d /tmp/\${PROG}.XXXXXX`\n";
+
+# synchronizing target files
+my $versionidx  = index $rbtroot,"ribodock";
+my $targetdir   = "/tmp/".substr($rbtroot,$versionidx);
+print NQSHANDLE "mkdir -pv -m0775 /tmp/ribodock\n";
+print NQSHANDLE "mkdir -pv -m0775 $targetdir\n";
+print NQSHANDLE "setenv RBT_HOME $targetdir\n";
+print NQSHANDLE "rsync -avz ".join(" ",@src_files)." $targetdir\n";
+
+# Copy the compressed sd file to the tmp directory and uncompress
+# DM 28 Feb 2002 - use the /tmp directory on the local disk instead
+# DM 14 Mar 2003 - use TMPDIR = /tmp/${PROG}.XXXXXX instead
+print NQSHANDLE "set gzfile = \$1\n";
+print NQSHANDLE "set runName = \$2\n";
+print NQSHANDLE "set copied_gzfile = \${TMPDIR}/\${gzfile:t}\n";
+print NQSHANDLE "set sdfile = \${copied_gzfile:r}\n";
+print NQSHANDLE "echo Ligand_DB : \$gzfile\n";
+print NQSHANDLE "echo Local copy: \$copied_gzfile\n";
+print NQSHANDLE "echo Unzipped  : \$sdfile\n";
+print NQSHANDLE "cp -f \$gzfile \$copied_gzfile\n";
+print NQSHANDLE "gunzip -f \$copied_gzfile\n";
+
+my $target = $redockTarget;
+print NQSHANDLE "#\n";
+print NQSHANDLE "set outRoot = \${TMPDIR}/\${runName}\n";
+print NQSHANDLE "set logFile = \${outRoot}.log\n";
+print NQSHANDLE "if (-e \$logFile) rm -f \$logFile\n";
+print NQSHANDLE "\$RBT_ROOT/bin/rbdock -i\$sdfile -o\${outRoot} -r$receptor -p$script -t$filterFile $flags > \$logFile\n";
+
+# Compress the SD and log files and copy back to submission directory
+# The ..._errors.sd files are generated by the pharmacophore restraints term for ligands that
+# do not fulfill the feature requirements
+print NQSHANDLE "if (-e \${outRoot}.sd) then\n";
+print NQSHANDLE "  gzip -9vf \${outRoot}.sd\n";
+print NQSHANDLE "  cp \${outRoot}.sd.gz ../sd\n";
+print NQSHANDLE "endif\n";
+print NQSHANDLE "if (-e \${logFile}) then\n";
+print NQSHANDLE "  gzip -9vf \${logFile}\n";
+print NQSHANDLE "  cp \${logFile}.gz ../log\n";
+print NQSHANDLE "endif\n";
+print NQSHANDLE "if (-e \${outRoot}_errors.sd) then\n";
+print NQSHANDLE "  gzip -9vf \${outRoot}_errors.sd\n";
+print NQSHANDLE "  cp \${outRoot}_errors.sd.gz ../errors\n";
+print NQSHANDLE "endif\n";
+
+print NQSHANDLE "rm -rf \${TMPDIR}\n";
+close NQSHANDLE;
+chmod 0755,$nqsFile;
+
+#PREPARE THE CONDOR JOB FILE
+my $condorFile = "run_${runRoot}.cmd";
+print "Writing $condorFile...\n";
+open CMDHANDLE,">$condorFile";
+print CMDHANDLE "#CONDOR COMMAND FILE WRITTEN BY run_rbscreen.pl\n";
+print CMDHANDLE "universe        = vanilla\n";
+print CMDHANDLE "initialdir      = $pwd/$runRoot/tmp\n";
+print CMDHANDLE "environment     = RBT_ROOT=$rbtroot;RBT_LIGDB=$rbtligdb;RBT_HOME=$rbthome;LD_LIBRARY_PATH=/usr/local/lib:$rbtroot/lib;\n";
+print CMDHANDLE "executable      = ${nqsFile}\n";
+#Stop Condor filling your mailbox
+print CMDHANDLE "Notification    = Error\n";
+print CMDHANDLE "log             = run_${runRoot}.log\n";
+# Added by DM, Enspiral Discovery, 28 Feb 2005
+# Prevent rDock jobs running on dedicated Catalyst nodes
+print CMDHANDLE "Requirements    = CATALYST_NODE =!= True\n";
+
+#GET THE LIST OF COMPRESSED SD FILES TO DOCK
+my $iFile=0;
+foreach my $library (@libraries) {
+  my @libFiles = glob "$library_table{$library}";
+  foreach my $libFile (@libFiles) {
+    $iFile++;
+    my $runName = "${runRoot}_$iFile";
+    print CMDHANDLE "output          = condor_logs/run_${runName}.out\n";
+    print CMDHANDLE "error           = condor_logs/run_${runName}.err\n";
+    print CMDHANDLE "arguments       = $libFile $runName\n";
+    print CMDHANDLE "queue\n";
+  }
+}
+close CMDHANDLE;
+
+#WRITE THE CUSTOM CLEANUP SCRIPT
+#This will be run by the user from the command line
+#after all the jobs have finished
+my $cleanFile = "clean_${runRoot}.csh";
+print "Writing $cleanFile...\n";
+open CLEANHANDLE,">$cleanFile";
+print CLEANHANDLE "#!/bin/tcsh -f\n";
+print CLEANHANDLE "#CLEANUP SCRIPT WRITTEN BY run_rbscreen.pl\n";
+print CLEANHANDLE "set sdout = ../${runRoot}_raw_hits.sd\n";
+print CLEANHANDLE "if (-e \${sdout}) then\n";
+print CLEANHANDLE "  echo ARE YOU SURE YOU WANT TO DO THIS\n";
+print CLEANHANDLE "  echo \${sdout} already exists\n";
+print CLEANHANDLE "  echo To rerun the script remove \${sdout} manually first\n";
+print CLEANHANDLE "  exit\n";
+print CLEANHANDLE "endif\n";
+print CLEANHANDLE "echo All compressed SD files in ../sd/ will be concatenated into a single compressed file \${sdout}.gz\n";
+print CLEANHANDLE "touch \${sdout}\n";
+print CLEANHANDLE "foreach file (../sd/*.sd.gz)\n";
+print CLEANHANDLE "  echo Sorting and concatenating \${file}...\n";
+print CLEANHANDLE "  zcat \${file} | \${RBT_ROOT}/bin/sdsort -n -fSCORE -s >> \${sdout}\n";
+print CLEANHANDLE "end\n";
+print CLEANHANDLE "echo Each block of consecutive records per ligand have been presorted by SCORE ";
+print CLEANHANDLE "ready for filtering\n";
+print CLEANHANDLE "echo Compressing \${sdout}...\n";
+print CLEANHANDLE "gzip -9vf \${sdout}\n";
+print CLEANHANDLE "echo DONE\n";
+print CLEANHANDLE "echo You may now safely remove the ../sd/ directory if you wish to save disk space\n";
+close CLEANHANDLE;
+chmod 0755,$cleanFile;
+
+print "To submit, cd $runRoot/tmp; condor_submit $condorFile\n";
+print "When all jobs have completed, run $runRoot/tmp/$cleanFile to concatenate the output files\n";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/sdfield	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+# add extra fields to SD records
+use lib "$ENV{'RBT_ROOT'}/lib";
+use SDRecord;
+
+printHelpAndExit() if (scalar(@ARGV) == 0);
+#Parse command line arguments
+my @files;
+my $arg = shift @ARGV;
+printHelpAndExit() if ($arg eq '-h');
+#read records
+my $sdRec = new SDRecord;
+my $nRec=0;
+# if not -h, then must be the FieldName
+my $FieldName = $arg;
+my $FieldVal  = shift @ARGV; # and this must be the filed value
+# rest are the filenames
+while ($sdRec->readRec('DATA'=>1,'LINES'=>1)) {
+	$sdRec->addData('_REC' => ++$nRec);#add record# as temp data field
+	$sdRec->addData('_FILE' => $ARGV);#add filename as temp data field
+	$sdRec->addDataAndLines($FieldName => $FieldVal);
+	$sdRec->writeRec();#write record
+}
+
+sub printHelpAndExit {
+	print "\nAdding fields to SD files\n";
+	print "Usage:\n";
+	print "\tsdfield <Fieldname> <value> <infile[s]>\n";
+	print "\t-h\tthis help\n";
+	print "\t\n";
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/sdfilter	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,221 @@
+#!/usr/bin/perl
+# Filters SD records by given data field                                       *
+#                                                                              *
+# Usage:  sdfilter -f'$<DataField> <Operator> <Value>'  [sdFiles]              *
+#      or sdfilter -f<filename> [sdFiles]                                      *
+#                                                                              *
+#         where <filename> contains a list of <DataField> <Operator> <Value>   *
+#         filters, one per line.                                               *
+#         Multiple filters are OR'd together (for AND, pipe to a 2nd sdfilter) *
+#                                                                              *
+#         If sdFiles not given, reads from standard input                      *
+#         Output is to standard output                                         *
+#*******************************************************************************
+use lib "$ENV{'RBT_ROOT'}/lib";
+
+use SDRecord;
+
+#keep track of the number of occurrences of each value of this key
+my $summaryKey = "Name";
+
+#list of filter strings
+#all filters in the list are OR'd together
+my @SDFILTERS;
+
+# DM 5 Dec 2000 - new option to filter on diverse_molecules.list
+# as output by C2
+# Essentially just a list of record numbers
+my $cerius2filter = 0;
+my $cerius2file = "diverse_molecules.list";
+my @cerius2recs;
+
+#Print help if no command line arguments
+printHelpAndExit() if (scalar(@ARGV) == 0);
+
+#Parse command line arguments
+my @files;
+while (scalar(@ARGV)) {
+  my $arg = shift @ARGV;
+  printHelpAndExit() if ($arg eq '-h');
+  if (index($arg,'-f')==0) {
+    $arg = substr($arg,2);
+    #try opening the argument as a file
+    if (open FILTERFILE,$arg) {
+      #get the list of filter strings from the file
+      my @strList = <FILTERFILE>;
+      close FILTERFILE;
+      chomp @strList;
+      #build the filter strings into a proper Perl expression for eval
+      #and add to the list
+      push @SDFILTERS,buildFilterList(@strList);
+    }
+    else {
+      #build the filter string into a proper Perl expression for eval
+      #and add to the list
+      push @SDFILTERS, buildFilterList($arg);
+    }
+  }
+  elsif (index($arg,'-sup')==0) {
+    my $supplier = substr($arg,4);
+    my $supplierFilter = "scalar(grep /$supplier/, \@supplierList) > 0";
+    #print "Supplier filter = $supplierFilter\n";
+    push @SDFILTERS, $supplierFilter;
+  }
+  elsif (index($arg,'-s')==0) {
+    $summaryKey = substr($arg,2);
+  }
+  #DM 05 Dec 2000 - c2 option
+  elsif (index($arg,'-c2')==0) {
+    $cerius2filter = 1;
+    $arg = substr($arg,3);
+    $cerius2file = $arg if ($arg); #argument overrides default file name
+    if (open FILTERFILE,$cerius2file) {
+      #Skip first two lines
+      my $tmp = <FILTERFILE> || die "Invalid Cerius2 file $cerius2file";
+      $tmp = <FILTERFILE> || die "Invalid Cerius2 file $cerius2file";
+      while (<FILTERFILE>) {
+	chomp;
+	push @cerius2recs,split(" ",$_);
+      }
+      close FILTERFILE;
+    }
+    die "No records in $cerius2file\n" if (scalar(@cerius2recs)==0);
+    #Sort into reverse order so we can pop the record #'s off later
+    @cerius2recs = reverse (sort {$a<=>$b} @cerius2recs);
+    #foreach $rec (@cerius2recs) {
+    #  print "$rec\n";
+    #}
+  }
+  else {
+    push @files,$arg;#must be a filename
+  }
+}
+push @ARGV,@files;#put the filenames back in the arg list
+
+#read records
+my $sdRec = new SDRecord;
+my %count;#record the number of occurrences of each value of $summaryKey
+my $nRec=0;
+#In C2 mode, just need to check record numbers so we can read the SD file
+#without parsing the data fields
+if ($cerius2filter) {
+  my $c2nextRec = pop @cerius2recs;
+  while ($sdRec->readRec('LINES'=>1)) {
+    if ((++$nRec)==$c2nextRec) {
+      $sdRec->writeRec();
+      exit if (scalar(@cerius2recs)==0);
+      $c2nextRec=pop @cerius2recs;
+    }
+  }
+}
+#Normal mode, need to parse the data fields
+else {
+  while ($sdRec->readRec('DATA'=>1,'LINES'=>1)) {
+    $sdRec->addData('_REC' => ++$nRec);#add record# as temp data field
+    $sdRec->addData('_COUNT' => ++$count{$sdRec->{'DATA'}->{$summaryKey}});#add count as temp data field
+    my @supplierList = buildSupplierList($sdRec);
+    foreach $filter (@SDFILTERS) {
+      if (eval $filter) {
+	$sdRec->writeRec();#write record if one of the filters returns true
+	last;
+      }
+    }
+  }
+}
+
+#######################################################################
+# Compose a complete SD filter string for each string in the parameter list
+# Complete strings are of the form: $sdRec->{'DATA'}->{'Name'} eq RBT162"
+# Note: the SDRecord object name is hardcoded as $sdRec
+sub buildFilterList {
+  my @strList = @_;#input list of unexpanded filter strings
+  my @filterList;#output list of expanded filter strings
+
+  #loop over each input string
+  foreach $str (@strList) {
+    my $filter;
+    my $i1 = 0;
+    my $lenstr = length($str);
+    while ($i1 < $lenstr) {
+      #print "i1=$i1\n";
+      $i2 = index($str,"\$",$i1);
+      #print "i2=$i2\n";
+      if ($i2 == -1) {
+	$filter .= substr($str,$i1);
+	last;
+      }
+      $filter .= substr($str,$i1,$i2-$i1);
+      #DM 04 Sept 2000 - allow for spaces in field names
+      #by surrounding with {} brackets
+      #e.g. ${CDD id}
+      #Check for { first, if not found assume standard format (e.g. $Name)
+      my $i3 = index($str,"{",$i2+1);
+      if ($i3 == $i2+1) {
+	my $i4 = index($str,"}",$i3+1);
+	my $var;
+	if ($i4 == -1) {
+	  $var = substr($str,$i3+1);
+	  $filter .= "\$sdRec->{'DATA'}->{'$var'}";
+	  last;
+	}
+	else {
+	  $var = substr($str,$i3+1,$i4-$i3-1);
+	  $filter .= "\$sdRec->{'DATA'}->{'$var'}";
+	}
+	$i1 = $i4+1;
+      }
+      #Standard format
+      else {
+	$i3 = index($str," ",$i2+1);
+	#print "i3=$i3\n";
+	my $var;
+	if ($i3 == -1) {
+	  $var = substr($str,$i2+1);
+	  $filter .= "\$sdRec->{'DATA'}->{'$var'}";
+	  last;
+	}
+	else {
+	  $var = substr($str,$i2+1,$i3-$i2-1);
+	  $filter .= "\$sdRec->{'DATA'}->{'$var'}";
+	}
+	$i1 = $i3;
+      }
+    }
+    #print "$filter\n";
+    push @filterList,$filter;
+  }
+  return @filterList;
+}
+
+#######################################################################
+# Extract the list of suppliers from the Cat_coden multi-line data field
+sub buildSupplierList {
+  my $rec = shift;
+  my @suppliers;
+  my $suppBase = $rec->{'DATAREF'}->{'Cat_coden'}+1;
+  my $linesRef = $rec->{'LINES'};
+  if (defined $suppBase) {
+    while ($$linesRef[$suppBase] ne "") {
+      push @suppliers,$$linesRef[$suppBase++];
+    }
+  }
+  #print "Supplier list:\n";
+  #print join("\n",@suppliers);
+  return @suppliers;
+}
+
+#######################################################################
+sub printHelpAndExit {
+  print "\nFilters SD records by data fields\n";
+  print "\nUsage:\tsdfilter -f'\$<DataField> <Operator> <Value>' [-s<DataField>] [sdFiles]\n";
+  print "or\tsdfilter -f<filename> [-s<DataField>] [sdFiles]\n\n";
+  print "Note:\tMultiple filters are allowed and are OR'd together.\n";
+  print "\tFilters can be provided in a file, one per line.\n";
+  print "\n\tStandard Perl operators should be used. e.g.\n";
+  print "\teq ne lt gt le ge for strings\n\t== != < > <= >= for numeric\n";
+  print "\n\t_REC (record #) is provided as a pseudo-data field\n";
+  print "\tif -s option is used, _COUNT (#occurrences of DataField) is provided as a pseudo-data field\n";
+  print "\n\tIf SD file list not given, reads from standard input\n";
+  print "\tOutput is to standard output\n\n";
+  exit;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/sdmodify	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,59 @@
+#!/usr/bin/perl
+# Script to set the first title line equal to a given data field
+#
+# Usage:  sdmodify -f<DataField> [sdFiles]
+#
+#         If sdFiles not given, reads from standard input
+#         Output is to standard output
+
+use lib "$ENV{'RBT_ROOT'}/lib";
+
+use SDRecord;
+
+my $SDMODKEY = "ID";
+
+#Print help if no command line arguments
+printHelpAndExit() if (scalar(@ARGV) == 0);
+
+#Parse command line arguments
+my @files;
+while (scalar(@ARGV)) {
+  my $arg = shift @ARGV;
+  printHelpAndExit() if ($arg eq '-h');
+  if (index($arg,'-f')==0) {
+    $SDMODKEY = substr($arg,2);#modification key
+  }
+  else {
+    push @files,$arg;#must be a filename
+  }
+}
+push @ARGV,@files;#put the filenames back in the arg list
+
+#read records
+my $sdRec = new SDRecord;
+my $nRec=0;
+while ($sdRec->readRec('DATA'=>1,'LINES'=>1)) {
+  $sdRec->addData('_REC' => ++$nRec);#add record# as temp data field
+  #set the first line equal to the given data field
+  my $nLines = scalar (@{$sdRec->{'LINES'}});
+  if ($nLines > 0) {
+    ${$sdRec->{'LINES'}}[0] = $sdRec->{'DATA'}->{$SDMODKEY};
+  }
+  $sdRec->writeRec();
+}
+
+#!/usr/bin/perl
+# Script to set the first title line equal to a given data field
+#
+# Usage:  sdmodify -f<DataField> [sdFiles]
+#
+#         If sdFiles not given, reads from standard input
+#         Output is to standard output
+
+sub printHelpAndExit {
+  print "\nScript to set the first title line equal to a given data field.\n";
+  print "\nUsage:\tsdmodify -f<DataField> [sdFiles]\n";
+  print "\n\tIf sdFiles not given, reads from standard input.\n";
+  print "\n\tOutput is to standard output.\n\n";
+  exit;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/sdreport	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,437 @@
+#!/usr/bin/perl
+# Produces text summaries of SD records                                        *
+#                                                                              *
+# Usage:  sdreport [-l] [-t] [-s<DataField>] [sdFiles]                         *
+#                                                                              *
+#         -l output data fields for each record as processed                   *
+#         -t tabulate Rbt.Score.* fields for each record as processed          *
+#         -s summarise data fields for all records                             *
+#         -s<DataField> summarise data fields for each unique value            *
+#                       of <DataField>                                         *
+#                                                                              *
+# Note:   If -l or -t are combined with -s, the listing/table is output        *
+#         within each ligand summary                                           *
+#         If sdFiles not given, reads from standard input                      *
+#         Output is to standard output                                         *
+#         Default is equivalent to sdfilter -l                                 *
+#*******************************************************************************
+use lib "$ENV{'RBT_ROOT'}/lib";
+
+use SDRecord;
+
+# Default field names and headings for rDock v4.00 scores
+my @defaultFields = ('SCORE','SCORE.INTER','SCORE.INTRA','SCORE.RESTR','SCORE.INTER.VDW');
+my @defaultHeadings = ('TOTAL','INTER','INTRA','RESTR','VDW');
+
+# Default field names and headings for normalised scores (score / #ligand heavy atoms)
+my @defaultNormFields = ('SCORE.norm','SCORE.INTER.norm','SCORE.INTRA.norm','SCORE.RESTR.norm','SCORE.heavy');
+my @defaultNormHeadings = ('TOTALn','INTERn','INTRAn','RESTRn','#heavy');
+
+# Default field names and headings for rDock v3.00 scores
+my @defaultOldFields = ('Rbt.Score.Corrected','Rbt.Score.Inter','Rbt.Score.Intra','Rbt.Score.IntraMin','Rbt.Score.Restraint');
+my @defaultOldHeadings = ('TOTAL','INTER','INTRA','INTRAMIN','RESTR');
+
+my $listFormat = 0;
+my $summaryFormat = 0;
+my $tableFormat = 0;
+my $supplierFormat = 0;
+my $csvFormat = 0;
+my $summaryKey = "_TITLE1";
+my $oldFields = 0;#If true, use old default field names for component scores
+my $headings = 1;#DM 21 Nov 2000, If false, don't output headings
+my @outputFields;
+my @outputHeadings;
+
+#Print help if no command line arguments
+printHelpAndExit() if (scalar(@ARGV) == 0);
+
+#Parse command line arguments
+my $nArgs = scalar(@ARGV);
+
+while (scalar(@ARGV)) {
+  my $arg = shift @ARGV;
+  printHelpAndExit() if ($arg eq '-h');
+  if (index($arg,'-l')==0) {
+    $listFormat = 1;
+  }
+  elsif (index($arg,'-o')==0) {
+    $oldFields = 1;
+  }
+  # 7 Feb 2005 (DM) Option to report normalised aggregate scores
+  elsif (index($arg,'-norm')==0) {
+    $oldFields = 2;
+  }
+  elsif (index($arg,'-sup')==0) {
+    $supplierFormat = 1;
+  }
+  elsif (index($arg,'-s')==0) {
+    $summaryFormat = 1;
+  }
+  elsif (index($arg,'-id')==0) {
+    $summaryKey = substr($arg,3);
+  }
+  elsif (index($arg,'-nh')==0) {
+    $headings = 0;
+  }
+  elsif (index($arg,'-t')==0) {
+    $tableFormat = 1;
+    push @outputFields, split(',',substr($arg,2));
+    push @outputHeadings, @outputFields;
+  }
+  elsif (index($arg,'-c')==0) {
+    $csvFormat = 1;
+    push @outputFields, split(',',substr($arg,2));
+    push @outputHeadings, @outputFields;
+  }
+  else {
+    push @files,$arg;#must be a filename
+  }
+}
+push @ARGV,@files;#put the filenames back in the arg list
+
+#use -l if neither table format is specified
+$listFormat = (!$tableFormat && !$csvFormat && !$supplierFormat);
+
+#If no output fields defined for -t or -c use the defaults (old or new)
+if (scalar(@outputFields)==0) {
+	if ($oldFields == 1) {
+		@outputFields = @defaultOldFields;
+		@outputHeadings = @defaultOldHeadings;
+	}
+	elsif ($oldFields == 2) {
+                @outputFields = @defaultNormFields;
+                @outputHeadings = @defaultNormHeadings;
+	}
+	else {
+		@outputFields = @defaultFields;
+		@outputHeadings = @defaultHeadings;
+	}
+}
+
+my $sdRec = new SDRecord;
+my %summary;#hash of SDRecord lists, indexed by user-defined summary key
+my %indexByName;
+my %indexByNum;
+my $idx = 0;
+my $nRec = 0;
+
+#Column headings for tab and csv format
+#DM 21 Nov 2000 - if $headings is false, then don't output the column headings
+if ($tableFormat && !$summaryFormat && $headings) {
+  tabHeadings($summaryKey,@outputHeadings);
+}
+if ($csvFormat && !$summaryFormat && $headings) {
+  csvHeadings($summaryKey,@outputHeadings);
+}
+
+#read records
+while ($sdRec->readRec('LINES'=>1,'DATA'=>1)) {
+  $sdRec->addData('_REC' => ++$nRec);#add record# as temp data field
+  if ($listFormat && !$summaryFormat) {
+    print "\n\nRECORD #$nRec\n";
+    $sdRec->writeData();
+  }
+  if ($tableFormat && !$summaryFormat) {
+    @recList = ($sdRec);
+    tabScores(\@recList,$summaryKey,@outputFields);
+  }
+  elsif ($csvFormat && !$summaryFormat) {
+    @recList = ($sdRec);
+    csvScores(\@recList,$summaryKey,@outputFields);
+  }
+  elsif ($supplierFormat && !$summaryFormat) {
+    @recList = ($sdRec);
+    tabulateSuppliers(\@recList,$summaryKey);
+  }
+  #add record to summary, indexed by user field value
+  #keep a separate index of unique values of user field values,
+  #indexed by number in the order the values first appear
+  #In this way we can maintain the sorted order of ligands
+  #when we come to print out the summary
+  if ($summaryFormat) {
+    my $summaryValue = $sdRec->{'DATA'}->{$summaryKey};
+    #New data field value encountered
+    if (!defined $indexByName{$summaryValue}) {
+      $idx++;#incr the number of unique values
+      #keep cross-referenced indexes (field value <-> number)
+      $indexByName{$summaryValue} = $idx;
+      $indexByNum{$idx} = $summaryValue;
+    }
+    push @{$summary{$summaryValue}},$sdRec->copy('DATA'=>1);
+  }
+}
+
+#Print summary if required
+if ($summaryFormat) {
+  print "\n===============================================================\n";
+  print "SUMMARY BY $summaryKey\n";
+  foreach $idx (sort {$a<=>$b} keys %indexByNum) {#numberic sort of index numbers
+    my $key = $indexByNum{$idx};#look up corresponding data field value
+    print "\n===============================================================\n";
+    print "$summaryKey = $key (#$idx)\n\n";
+    writeSummary($summary{$key});
+    if ($listFormat) {
+      print "\nIndividual records:\n";
+      foreach $rec (@{$summary{$key}}) {
+        print "\n";
+	      $rec->writeData();
+      }
+    }
+    if ($tableFormat) {
+      print "\nScores:\n";
+      tabHeadings($summaryKey,@outputHeadings);
+      tabScores($summary{$key},$summaryKey,@outputFields);
+    }
+    if ($csvFormat) {
+      print "\nScores:\n";
+      csvHeadings($summaryKey,@outputHeadings);
+      csvScores($summary{$key},$summaryKey,@outputFields);
+    }
+  }
+}
+
+##############################################################
+# writes a summary to STDOUT for a list of SDRecords
+# Input is a reference to an array of SDRecords
+sub writeSummary {
+  my $recListRef = shift;
+
+  #Extract the list of data values for each fieldname into a hash array
+  #(key=fieldname, value=array ref)
+  my %fields;
+  foreach $rec (@{$recListRef}) {
+    my ($key,$value);
+    while ( ($key,$value) = each %{$rec->{'DATA'}}) {
+      push @{$fields{$key}},$value;
+    }
+  }
+
+  #Look for constant fields and store separately
+  my %constFields;
+  foreach $key (keys %fields) {
+    my @values = sort  @{$fields{$key}};
+    my $nVal = scalar(@values);
+    if ($values[0] eq $values[$nVal -1]) {
+      $constFields{$key} = $values[0];#store the field name and the constant value
+      delete $fields{$key};#remove from (non-const) array
+    }
+  }
+
+  #Print constant fields
+  print "\nConstant fields:\n\n";
+  foreach $key (sort keys %constFields) {
+    printf "%-40s%s\n",$key,$constFields{$key};
+  }
+  #Print min and max value for non-const fields
+  print "\nVariable fields:\n\n";
+  foreach $key (sort keys %fields) {
+    my @values = @{$fields{$key}};
+    #Look at first value to decide whether to do text or numeric sort
+    if (isNaN($values[0])) {
+      @values = sort @values;#text sort
+    }
+    else {
+      @values = sort {$a <=> $b} @values;#numeric sort
+    }
+    my $nVal = scalar(@values);
+    printf "%-40s",$key;
+    print "Min = $values[0]\tMax = $values[$nVal-1]\t(N = $nVal)\n";
+  }
+}
+
+##############################################################
+# function isNaN equivalent to the C++, java, javascript isNaN
+# From P Vaglio, ~intranet/lib/rbt_func.pl
+sub isNaN {
+  local($_) = @_;
+  s/\s+//g;         # strip white space
+  # match +or- beginning of line 0 or 1 time
+  # then any numeric 0 or more
+  # then a decimal point
+  # then any numeric 0 or more after decimal point
+  # then possibly an e or E then + or - then any numreci at least once
+  if (/^([+-]?)(\d*)(\.(\d*))?([Ee]([+-]?\d+))?$/ && (defined $2 || defined $4)) {
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+##############################################################
+# output corresponding headings for use with tabScores
+sub tabHeadings {
+  my $summaryKey = shift;
+  my @fieldNames = @_;
+  printf("%-10s%-30s","REC",$summaryKey);
+  foreach $field (@fieldNames) {
+    printf("%10s",$field);
+  }
+  print "\n";
+}
+
+##############################################################
+# tab-delimited output of named data field values
+sub tabScores {
+  my $recListRef = shift;
+  my $summaryKey = shift;
+  my @fieldNames = @_;
+  foreach $rec (@{$recListRef}) {
+    printf("%03d\t%-30.30s",$rec->{'DATA'}->{'_REC'},$rec->{'DATA'}->{$summaryKey});
+    foreach $field (@fieldNames) {
+      my $val = $rec->{'DATA'}->{$field};
+      if (isNaN($val)) {
+        printf("%-10.12s",$val);
+      }
+      else {
+        printf("%10.3f",$val);
+      }
+    }
+    print "\n";
+  }
+}
+
+##############################################################
+# output corresponding headings for use with csvScores
+sub csvHeadings {
+  my $summaryKey = shift;
+  my @fieldNames = @_;
+  printf("%s,%s","REC",$summaryKey);
+  foreach $field (@fieldNames) {
+    printf(",%s",$field);
+  }
+  print "\n";
+}
+
+##############################################################
+# comma-delimited output of named data field values
+sub csvScores {
+  my $recListRef = shift;
+  my $summaryKey = shift;
+  my @fieldNames = @_;
+  foreach $rec (@{$recListRef}) {
+    printf("%d,%s",$rec->{'DATA'}->{'_REC'},$rec->{'DATA'}->{$summaryKey});
+    foreach $field (@fieldNames) {
+      my $val = $rec->{'DATA'}->{$field};
+      if (isNaN($val)) {
+        printf(",%s",$val);
+      }
+      else {
+        printf(",%.3f",$val);
+      }
+    }
+    print "\n";
+  }
+}
+
+
+##############################################################
+# standardised output of Catalyst supplier field
+# Input is a reference to an array of SDRecords
+# and a ligand identifier field to output in column 1 (def=Name)
+sub tabulateSuppliers {
+  my $recListRef = shift;
+  my $summaryKey = shift || 'Name';
+  foreach $rec (@{$recListRef}) {
+    my $suppBase = $rec->{'DATAREF'}->{'Supplier'}+1;
+    my $linesRef = $rec->{'LINES'};
+    my $name = $rec->{'DATA'}->{$summaryKey};
+
+    #Output some useful compound info
+    my $name = $rec->{'DATA'}->{$summaryKey};
+    my $molFormula = $rec->{'DATA'}->{'MolFormula'};
+    my $molWt = $rec->{'DATA'}->{'MolWt'};
+    my $casNum = $rec->{'DATA'}->{'CAS_num'};
+    my $mdlNum = $rec->{'DATA'}->{'MDLNUMBER'};
+    print "\n\n====================================================================================================\n";
+    printf("%-10.10s%s\n","Name:",$name);
+    printf("%-10.10s%s\n","Formula:",$molFormula);
+    printf("%-10.10s%s\n","Mol.wt:",$molWt);
+    printf("%-10.10s%s\n","CAS #:",$casNum);
+    printf("%-10.10s%s\n","MDL #:",$mdlNum);
+
+    #Get all the supplier record lines into a list
+    #Record is terminated by blank line
+    my @lines;
+    my $nLines = 0;
+    for (; $$linesRef[$suppBase+$nLines] ne ""; $nLines++) {
+      push @lines,$$linesRef[$suppBase+$nLines];
+    }
+
+    #Column headings
+    printf("\n%-20.20s%-40.40s%-40.40s\n",
+	   "Supplier",
+	   "Comment",
+	   "Price"
+	  );
+    print "----------------------------------------------------------------------------------------------------\n";
+
+    #Loop over each supplier
+    my $iLine = 0;
+    for (; $iLine < $nLines; $iLine++) {
+      #collect supplier info lines
+      my @supplierInfo = ();
+      for (; $lines[$iLine] ne "." && $iLine < $nLines; $iLine++) {
+	push @supplierInfo,$lines[$iLine];
+      }
+      #Check for incomplete record
+      if ($iLine == $nLines) {
+	print "** INCOMPLETE RECORD **\n";
+	last;
+      }
+      my $nSupplierInfo = scalar(@supplierInfo);
+      my $supplier = $supplierInfo[0];
+      #loop over each grade
+      for ($iLine++; ($lines[$iLine] ne "........................") && ($iLine < $nLines); $iLine++) {
+	#collect grade info lines
+	my @gradeInfo = ();
+	for (; index($lines[$iLine],"_") ne 0 && $iLine < $nLines; $iLine++) {
+	  push @gradeInfo,$lines[$iLine];
+	}
+	#Check for incomplete record
+	if ($iLine == $nLines) {
+	  print "** INCOMPLETE RECORD **\n";
+	  last;
+	}
+	my $grade = $gradeInfo[0];
+	#loop over each price info line
+	for (; index($lines[$iLine],".") ne 0 && $iLine < $nLines; $iLine++) {
+	  my @priceInfo = split(" ",$lines[$iLine]);
+	  my $price = join(" ",@priceInfo);
+	  printf("%-20.20s%-40.39s%-40.40s\n",
+		 $supplier,
+		 $grade,
+		 $price);
+	}
+	#Check for incomplete record
+	if ($iLine == $nLines) {
+	  print "** INCOMPLETE RECORD **\n";
+	  last;
+	}
+	last if $lines[$iLine] eq "........................";
+      }
+    }
+  }
+}
+
+
+#######################################################################
+sub printHelpAndExit {
+  print "\nProduces text summaries of SD records\n";
+  print "\nUsage:\tsdreport [-l] [-t[<FieldName,FieldName...>]] [-c<FieldName,FieldName...>] [-id<IDField>] [-nh] [-o] [-s] [-sup] [sdFiles]\n";
+  print "\n\t-l (list format) output all data fields for each record as processed\n";
+  print "\t-t (tab format) tabulate selected fields for each record as processed\n";
+  print "\t-c (csv format) comma delimited output of selected fields for each record as processed\n";
+  print "\t-s (summary format) output summary statistics for each unique value of ligand ID\n";
+  print "\t-sup (supplier format) tabulate supplier details (from Catalyst)\n";
+  print "\t-id<IDField> data field to use as ligand ID\n";
+  print "\t-nh don't output column headings in -t and -c formats\n";
+  print "\t-o use old (v3.00) score field names as default columns in -t and -c formats, else use v4.00 field names\n";
+  print "\t-norm use normalised score field names as default columns in -t and -c formats (normalised = score / #ligand heavy atoms)\n";
+  print "\nNote:\tIf -l, -t or -c are combined with -s, the listing/table is output within each ligand summary\n";
+  print "\t-sup should not be combined with other options\n";
+  print "\tDefault field names for -t and -c are rDock score field names\n";
+  print "\tDefault ID field name is Name\n";
+  print "\n\tIf sdFiles not given, reads from standard input\n";
+  print "\tOutput is to standard output\n\n";
+  exit;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/sdrmsd	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,272 @@
+#! /usr/bin/env python
+#
+# Calculate SMART RMSD with or without molecular superposition (FIT or NOFIT)
+# Script distributed under GNU LGPL 3.0 along rDock software.
+#
+# This algorithm takes into account molecular automorphism. That is, it identifies
+# molecules which are the same but might have atom orders changed and still be able to
+# match the pairs and correctly calculate the RMSD.
+#
+# Author: Daniel Alvarez-Garcia
+# Date: 08-11-2013
+
+import math
+import pybel
+import numpy as npy
+import optparse
+
+def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRotMat=False):
+    """superpose3D performs 3d superposition using a weighted Kabsch algorithm : http://dx.doi.org/10.1107%2FS0567739476001873 & doi: 10.1529/biophysj.105.066654
+    definition : superpose3D(ref, target, weights,refmask,targetmask)
+    @parameter 1 :  ref - xyz coordinates of the reference structure (the ligand for instance)
+    @type 1 :       float64 numpy array (nx3)
+    ---
+    @parameter 2 :  target - theoretical target positions to which we should move (does not need to be physically relevant.
+    @type 2 :       float 64 numpy array (nx3)
+    ---
+    @parameter 3:   weights - numpy array of atom weights (usuallly between 0 and 1)
+    @type 3 :       float 64 numpy array (n)
+    @parameter 4:   mask - a numpy boolean mask for designating atoms to include
+    Note ref and target positions must have the same dimensions -> n*3 numpy arrays where n is the number of points (or atoms)
+    Returns a set of new coordinates, aligned to the target state as well as the rmsd
+    """
+    if weights == None :
+        weights=1.0
+    if refmask == None :
+        refmask=npy.ones(len(ref),"bool")
+    if targetmask == None :
+        targetmask=npy.ones(len(target),"bool")
+    #first get the centroid of both states
+    ref_centroid = npy.mean(ref[refmask]*weights,axis=0)
+    #print ref_centroid
+    refCenteredCoords=ref-ref_centroid
+    #print refCenteredCoords
+    target_centroid=npy.mean(target[targetmask]*weights,axis=0)
+    targetCenteredCoords=target-target_centroid
+    #print targetCenteredCoords
+    #the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm
+    # Initial residual, see Kabsch.
+    E0 = npy.sum( npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights,axis=0),axis=0) + npy.sum( npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights,axis=0),axis=0)
+    reftmp=npy.copy(refCenteredCoords[refmask])
+    targettmp=npy.copy(targetCenteredCoords[targetmask])
+    #print refCenteredCoords[refmask]
+    #single value decomposition of the dotProduct of both position vectors
+    try:
+        dotProd = npy.dot( npy.transpose(reftmp), targettmp* weights)
+        V, S, Wt = npy.linalg.svd(dotProd )
+    except Exception:
+        try:
+            dotProd = npy.dot( npy.transpose(reftmp), targettmp)
+            V, S, Wt = npy.linalg.svd(dotProd )
+        except Exception:
+            print >> sys.stderr,"Couldn't perform the Single Value Decomposition, skipping alignment"
+        return ref, 0
+    # we already have our solution, in the results from SVD.
+    # we just need to check for reflections and then produce
+    # the rotation.  V and Wt are orthonormal, so their det's
+    # are +/-1.
+    reflect = float(str(float(npy.linalg.det(V) * npy.linalg.det(Wt))))
+    if reflect == -1.0:
+        S[-1] = -S[-1]
+        V[:,-1] = -V[:,-1]
+    rmsd = E0 - (2.0 * sum(S))
+    rmsd = npy.sqrt(abs(rmsd / len(ref[refmask])))   #get the rmsd
+    #U is simply V*Wt
+    U = npy.dot(V, Wt)  #get the rotation matrix
+    # rotate and translate the molecule
+    new_coords = npy.dot((refCenteredCoords), U)+ target_centroid  #translate & rotate
+    #new_coords=(refCenteredCoords + target_centroid)
+    #print U
+    if returnRotMat :
+        return new_coords,rmsd, U
+    return new_coords,rmsd
+
+
+def squared_distance(coordsA, coordsB):
+    """Find the squared distance between two 3-tuples"""
+    sqrdist = sum( (a-b)**2 for a, b in zip(coordsA, coordsB) )
+    return sqrdist
+
+def rmsd(allcoordsA, allcoordsB):
+    """Find the RMSD between two lists of 3-tuples"""
+    deviation = sum(squared_distance(atomA, atomB) for
+                    (atomA, atomB) in zip(allcoordsA, allcoordsB))
+    return math.sqrt(deviation / float(len(allcoordsA)))
+
+def mapToCrystal(xtal, pose):
+    """Some docking programs might alter the order of the atoms in the output (like Autodock Vina does...)
+     this will mess up the rmsd calculation with OpenBabel"""
+    query = pybel.ob.CompileMoleculeQuery(xtal.OBMol)
+    mapper=pybel.ob.OBIsomorphismMapper.GetInstance(query)
+    mappingpose = pybel.ob.vvpairUIntUInt()
+    exit=mapper.MapUnique(pose.OBMol,mappingpose)
+    return mappingpose[0]
+
+def parseArguments():
+	optparse.OptionParser.format_epilog = lambda self, formatter: self.epilog
+	epilog = """Args:
+	reference.sdf		SDF file with the reference molecule.
+	input.sdf		SDF file with the molecules to be compared to reference.\n"""
+	parser = optparse.OptionParser("usage: %prog [options] reference.sdf input.sdf", epilog=epilog)
+	parser.add_option("-f", "--fit",dest="fit", action="store_true", default=False,
+                  help="Superpose molecules before RMSD calculation")
+	parser.add_option("--threshold","-t",dest="threshold", action="store", nargs=1,
+                  help="Discard poses with RMSD < THRESHOLD with respect previous poses which where not rejected based on same principle. A Population SDField will be added to output SD with the population number.", type=float)
+	parser.add_option("-o","--out", dest="outfilename", metavar="FILE", default=False,
+                  help="If declared, write an output SDF file with the input molecules with a new sdfield <RMSD>. If molecule was fitted, the fitted molecule coordinates will be saved.")
+	(options, args) =  parser.parse_args()
+
+	#Check we have two arguments
+	if len(args) < 2:
+		parser.error("Incorrect number of arguments. Use -h or --help options to print help.")
+
+	return options, args
+
+def updateCoords(obmol, newcoords):
+    "Update OBMol coordinates. newcoords is a numpy array"
+    for i,atom in enumerate(obmol):
+        atom.OBAtom.SetVector(*newcoords[i])
+
+def getAutomorphRMSD(target, molec, fit=False):
+    """
+    Use Automorphism to reorder target coordinates to match ref coordinates atom order
+    for correct RMSD comparison. Only the lowest RMSD will be returned.
+
+    Returns:
+      If fit=False: 	bestRMSD	(float)					RMSD of the best matching mapping.
+      If fit=True:	(bestRMSD, molecCoordinates)	(float, npy.array)	RMSD of best match and its molecule fitted coordinates.
+    """
+    mappings = pybel.ob.vvpairUIntUInt()
+    bitvec = pybel.ob.OBBitVec()
+    lookup = []
+    for i, atom in enumerate(target):
+        lookup.append(i)
+    success = pybel.ob.FindAutomorphisms(target.OBMol, mappings)
+    targetcoords = [atom.coords for atom in target]
+    mappose = npy.array(mapToCrystal(target, molec))
+    mappose = mappose[npy.argsort(mappose[:,0])][:,1]
+    posecoords = npy.array([atom.coords for atom in molec])[mappose]
+    resultrmsd = 999999999999
+    for mapping in mappings:
+	automorph_coords = [None] * len(targetcoords)
+	for x, y in mapping:
+	    automorph_coords[lookup.index(x)] = targetcoords[lookup.index(y)]
+	mapping_rmsd = rmsd(posecoords, automorph_coords)
+	if mapping_rmsd < resultrmsd:
+	    resultrmsd = mapping_rmsd
+	    fitted_result = False
+	if fit:
+	    fitted_pose, fitted_rmsd = superpose3D(npy.array(automorph_coords), npy.array(posecoords))
+	    if fitted_rmsd < resultrmsd:
+		resultrmsd = fitted_rmsd
+		fitted_result = fitted_pose
+
+    if fit:
+      return (resultrmsd, fitted_pose)
+    else:
+      return resultrmsd
+
+def saveMolecWithRMSD(outsdf, molec, rmsd, population=False):
+    newData = pybel.ob.OBPairData()
+    newData.SetAttribute("RMSD")
+    newData.SetValue('%.3f'%rmsd)
+
+    if population:
+	popData = pybel.ob.OBPairData()
+	popData.SetAttribute("Population")
+	popData.SetValue('%i'%population)
+	molec.OBMol.CloneData(popData)
+
+    molec.OBMol.CloneData(newData)           # Add new data
+    outsdf.write(molec)
+
+if __name__ == "__main__":
+    import sys, os
+
+    (opts, args) = parseArguments()
+
+    xtal = args[0]
+    poses = args[1]
+
+    if not os.path.exists(xtal) or not os.path.exists(poses):
+	sys.exit("Input files not found. Please check the path given is correct.")
+
+    fit = opts.fit
+    outfname = opts.outfilename
+    threshold = opts.threshold
+
+    # Read crystal pose
+    crystal = next(pybel.readfile("sdf", xtal))
+    crystal.removeh()
+    crystalnumatoms = len(crystal.atoms)
+
+    #If outfname is defined, prepare an output SDF sink to write molecules
+    if outfname:
+	outsdf = pybel.Outputfile('sdf', outfname, overwrite=True)
+
+    # Find the RMSD between the crystal pose and each docked pose
+    dockedposes = pybel.readfile("sdf", poses)
+    if fit: print "POSE\tRMSD_FIT"
+    else: print "POSE\tRMSD_NOFIT"
+    skipped = []
+    moleclist = {}	# Save all poses with their dockid
+    population = {}	# Poses to be written
+    outlist = {}
+    for docki, dockedpose in enumerate(dockedposes):
+        dockedpose.removeh()
+	natoms = len(dockedpose.atoms)
+	if natoms != crystalnumatoms:
+		skipped.append(docki+1)
+		continue
+	if fit:
+	    resultrmsd, fitted_result = getAutomorphRMSD(crystal, dockedpose, fit=True)
+	    updateCoords(dockedpose, fitted_result)
+	else:
+	    resultrmsd = getAutomorphRMSD(crystal, dockedpose, fit=False)
+
+	if threshold:
+	    # Calculate RMSD between all previous poses
+	    # Discard if rmsd < FILTER threshold
+	    if moleclist:
+		match = None
+		bestmatchrmsd = 999999
+		for did,prevmol in moleclist.iteritems():
+		    tmprmsd = getAutomorphRMSD(prevmol, dockedpose)
+		    if tmprmsd < threshold:
+			if tmprmsd < bestmatchrmsd:
+			    bestmatchrmsd = tmprmsd
+			    match = did
+
+		if match != None:
+		    # Do not write this one
+		    # sum one up to the matching previous molecule id
+		    print >> sys.stderr, "Pose %i matches pose %i with %.3f RMSD"%(docki+1, match+1, bestmatchrmsd)
+		    population[match] += 1
+		else:
+		    # There's no match. Print info for this one and write to outsdf if needed
+		    # Save this one!
+		    if outfname: outlist[docki] = (dockedpose, resultrmsd)
+		    print "%d\t%.2f"%((docki+1),resultrmsd)
+		    moleclist[docki] = dockedpose
+		    population[docki] = 1
+	    else:
+		# First molecule in list. Append for sure
+		moleclist[docki] = dockedpose
+		population[docki] = 1
+		if outfname: outlist[docki] = (dockedpose, resultrmsd)
+	else:
+	    # Just write best rmsd found and the molecule to outsdf if demanded
+	    if outfname: saveMolecWithRMSD(outsdf, dockedpose, resultrmsd)
+	    print "%d\t%.2f"%((docki+1),resultrmsd)
+
+    if outlist:
+	# Threshold applied and outlist need to be written
+	for docki in sorted(outlist.iterkeys()):
+	    molrmsd = outlist[docki]
+	    # Get number of matchs in thresholding operation
+	    pop = population.get(docki)
+	    if not pop: pop = 1
+	    # Save molecule
+	    saveMolecWithRMSD(outsdf, molrmsd[0], molrmsd[1], pop)
+
+    if skipped: print >> sys.stderr, "SKIPPED input molecules due to number of atom missmatch: %s"%skipped
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/sdsort	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,98 @@
+#!/usr/bin/perl
+# Sorts SD records by given data field
+use lib "$ENV{'RBT_ROOT'}/lib";
+
+use SDRecord;
+
+my $SDSORTKEY; # sort key
+my $SDSORTASCEND = 1;# 1 = ascending, 0 = descending
+my $SDSORTTEXT = 1;# 1 = text sort, 0 = numeric sort
+
+my $FASTFORMAT = 0;
+my $FASTKEY = "_TITLE1";
+
+#Print help if no command line arguments
+printHelpAndExit() if (scalar(@ARGV) == 0);
+
+#Parse command line arguments
+my @files;
+while (scalar(@ARGV)) {
+  $arg = shift @ARGV;
+  printHelpAndExit() if ($arg eq '-h');
+  if ($arg eq '-r') {
+    $SDSORTASCEND = 0;#descending sort requested
+  }
+  elsif ($arg eq '-n') {
+    $SDSORTTEXT = 0;;#numeric sort requested
+  }
+  elsif (index($arg,'-s')==0) {
+    $FASTFORMAT = 1;
+  }
+  elsif (index($arg,'-id')==0) {
+    $FASTKEY = substr($arg,3);
+  }
+  elsif (index($arg,'-f')==0) {
+    $SDSORTKEY = substr($arg,2);#sort key
+  }
+  else {
+    push @files,$arg;#must be a filename
+  }
+}
+push @ARGV,@files;#put the filenames back in the arg list
+
+#read records
+my $sdRec = new SDRecord;
+my @records;
+my $nRec=0;
+
+my $lastid="";
+while ($sdRec->readRec('DATA'=>1,'LINES'=>1)) {
+  $sdRec->addData('_REC' => ++$nRec);#add record# as temp data field
+  if ($FASTFORMAT) {
+    my $id = $sdRec->{'DATA'}->{$FASTKEY};
+    if (($lastid ne "") && ($lastid ne $id)) {
+      foreach $rec (sort sortSD @records) {
+	$rec->writeRec();
+      }
+      @records = ();#clear the list
+    }
+    $lastid = $id;
+  }
+  push(@records,$sdRec->copy('DATA'=>1,'LINES'=>1));
+}
+
+#write sorted records
+foreach $rec (sort sortSD @records) {
+  $rec->writeRec();
+}
+
+#######################################################
+# sort function to sort SD records by given field
+# handles text/numeric and ascending/descending sort
+sub sortSD {
+  if ($SDSORTTEXT) {
+    return $a->{'DATA'}->{$SDSORTKEY} cmp $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND);
+    return $b->{'DATA'}->{$SDSORTKEY} cmp $a->{'DATA'}->{$SDSORTKEY};
+  }
+  else {
+    return $a->{'DATA'}->{$SDSORTKEY} <=> $b->{'DATA'}->{$SDSORTKEY} if ($SDSORTASCEND);
+    return $b->{'DATA'}->{$SDSORTKEY} <=> $a->{'DATA'}->{$SDSORTKEY};
+  }
+}
+
+#######################################################################
+sub printHelpAndExit {
+  print "\nSorts SD records by given data field\n";
+  print "\nUsage:\tsdsort [-n] [-r] [-f<DataField>] [sdFiles]\n\n";
+  print "\t-n\t\tnumeric sort (default is text sort)\n";
+  print "\t-r\t\tdescending sort (default is ascending sort)\n";
+  print "\t-f<DataField>\tspecifies sort field\n";
+  print "\t-s\t\tfast mode. Sorts the records for each named compound independently (must be consecutive)\n";
+  print "\t-id<NameField>\tspecifies compound name field (default = 1st title line)\n\n";
+  print "Note:\t_REC (record #) is provided as a pseudo-data field\n";
+  print "\n\tIf SD file list not given, reads from standard input\n";
+  print "\tOutput is to standard output\n";
+  print "\tFast mode can be safely used for partial sorting of huge SD files of raw docking hits\n";
+  print "\twithout running into memory problems.\n\n";
+  exit;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/sdsplit	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,72 @@
+#!/usr/bin/perl
+# Splits SD records into multiple files of equal no. of records
+use lib "$ENV{'RBT_ROOT'}/lib";
+
+use FileHandle;
+use SDRecord;
+
+# Record size to split into
+my $recSize = 1000;
+
+#output root
+my $outRoot = "tmp";
+
+#Print help if no command line arguments
+printHelpAndExit() if (scalar(@ARGV) == 0);
+
+#Parse command line arguments
+my @files;
+while (scalar(@ARGV)) {
+  my $arg = shift @ARGV;
+  printHelpAndExit() if ($arg eq '-h');
+  if (index($arg,'-o')==0) {
+    $outRoot = substr($arg,2);
+  }
+  elsif (index($arg,'-')==0) {
+    $recSize = substr($arg,1);
+  }
+  else {
+    push @files,$arg;#must be a filename
+  }
+}
+push @ARGV,@files;#put the filenames back in the arg list
+
+my $sdRec = new SDRecord;
+my $nRec=0;
+my $nFile=0;
+my $sdfh;
+
+#read records
+while ($sdRec->readRec('LINES'=>1)) {
+  #check if we need to start a new output file
+  if ($nRec % $recSize == 0) {
+    $nFile++;
+    my $outFile = $outRoot . $nFile . ".sd";
+    if (defined $sdfh) {
+      undef $sdfh;
+    }
+    $sdfh = new FileHandle ">$outFile";
+    if (!defined $sdfh) {
+      die "Can't open $outFile";
+    }
+    else {
+      print STDOUT "Opening $outFile\n";
+    }
+    $sdfh->autoflush(1);
+    select($sdfh);
+  }
+  $nRec++;
+  $sdRec->writeRec();
+}
+undef $sdfh;
+select(STDOUT);#reselect STDOUT as default
+
+#######################################################################
+sub printHelpAndExit {
+  print "\Splits SD records into multiple files of equal size\n";
+  print "\nUsage:\tsdsplit [-<RecSize>] [-o<OutputRoot>] [sdFiles]\n\n";
+  print "\t-<RecSize>\trecord size to split into (default = 1000 records)\n";
+  print "\t-o<OutputRoot>\tRoot name for output files (default = tmp)\n";
+  print "\n\tIf SD file list not given, reads from standard input\n";
+  exit;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/sdtether	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,263 @@
+#! /usr/bin/env python
+#
+# Substitute for rbtether of rDock. Will align input molecules to a reference fragment defined by a smarts string,
+# it will add a TETHERED ATOM property field to the output SDF that is correctly understood by rDock
+# rDock will restrain the matching atom positions to the reference molecule coordinates.
+#
+# Initially implemented with a conformational search algorithm to better match target coordinates.
+# But had problems with OBabel FF generating non-sense conformers. So in this version the conformer search is commented out.
+# Now if the input molecule do not have a good conformation, might not align well with the target. This effect will be
+# dimished or even vanish if the SMARTS string is defined for a rigid region (like a ring).
+# I'm still trying to incorporate somehow this conformational search.
+#
+# Script distributed under GNU LGPL 3.0 along rDock software.
+#
+# Author: Daniel Alvarez-Garcia
+# Date: 08-11-2013
+
+import math
+import pybel
+import numpy as npy
+
+def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRotMat=False):
+    """superpose3D performs 3d superposition using a weighted Kabsch algorithm : http://dx.doi.org/10.1107%2FS0567739476001873 & doi: 10.1529/biophysj.105.066654
+    definition : superpose3D(ref, target, weights,refmask,targetmask)
+    @parameter 1 :  ref - xyz coordinates of the reference structure (the ligand for instance)
+    @type 1 :       float64 numpy array (nx3)
+    ---
+    @parameter 2 :  target - theoretical target positions to which we should move (does not need to be physically relevant.
+    @type 2 :       float 64 numpy array (nx3)
+    ---
+    @parameter 3:   weights - numpy array of atom weights (usuallly between 0 and 1)
+    @type 3 :       float 64 numpy array (n)
+    @parameter 4:   mask - a numpy boolean mask for designating atoms to include
+    Note ref and target positions must have the same dimensions -> n*3 numpy arrays where n is the number of points (or atoms)
+    Returns a set of new coordinates, aligned to the target state as well as the rmsd
+    """
+    if weights == None :
+        weights=1.0
+    if refmask == None :
+        refmask=npy.ones(len(ref),"bool")
+    if targetmask == None :
+        targetmask=npy.ones(len(target),"bool")
+    #first get the centroid of both states
+    ref_centroid = npy.mean(ref[refmask]*weights,axis=0)
+    #print ref_centroid
+    refCenteredCoords=ref-ref_centroid
+    #print refCenteredCoords
+    target_centroid=npy.mean(target[targetmask]*weights,axis=0)
+    targetCenteredCoords=target-target_centroid
+    #print targetCenteredCoords
+    #the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm
+    # Initial residual, see Kabsch.
+    E0 = npy.sum( npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights,axis=0),axis=0) + npy.sum( npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights,axis=0),axis=0)
+    reftmp=npy.copy(refCenteredCoords[refmask])
+    targettmp=npy.copy(targetCenteredCoords[targetmask])
+    #print refCenteredCoords[refmask]
+    #single value decomposition of the dotProduct of both position vectors
+    try:
+        dotProd = npy.dot( npy.transpose(reftmp), targettmp* weights)
+        V, S, Wt = npy.linalg.svd(dotProd )
+    except Exception:
+        try:
+            dotProd = npy.dot( npy.transpose(reftmp), targettmp)
+            V, S, Wt = npy.linalg.svd(dotProd )
+        except Exception:
+            print >> sys.stderr,"Couldn't perform the Single Value Decomposition, skipping alignment"
+        return ref, 0
+    # we already have our solution, in the results from SVD.
+    # we just need to check for reflections and then produce
+    # the rotation.  V and Wt are orthonormal, so their det's
+    # are +/-1.
+    reflect = float(str(float(npy.linalg.det(V) * npy.linalg.det(Wt))))
+    if reflect == -1.0:
+        S[-1] = -S[-1]
+        V[:,-1] = -V[:,-1]
+    rmsd = E0 - (2.0 * sum(S))
+    rmsd = npy.sqrt(abs(rmsd / len(ref[refmask])))   #get the rmsd
+    #U is simply V*Wt
+    U = npy.dot(V, Wt)  #get the rotation matrix
+    # rotate and translate the molecule
+    new_coords = npy.dot((refCenteredCoords), U)+ target_centroid  #translate & rotate
+    #new_coords=(refCenteredCoords + target_centroid)
+    #print U
+    if returnRotMat :
+        return U, ref_centroid, target_centroid, rmsd
+    return new_coords,rmsd
+
+
+def squared_distance(coordsA, coordsB):
+    """Find the squared distance between two 3-tuples"""
+    sqrdist = sum( (a-b)**2 for a, b in zip(coordsA, coordsB) )
+    return sqrdist
+
+def rmsd(allcoordsA, allcoordsB):
+    """Find the RMSD between two lists of 3-tuples"""
+    deviation = sum(squared_distance(atomA, atomB) for
+                    (atomA, atomB) in zip(allcoordsA, allcoordsB))
+    return math.sqrt(deviation / float(len(allcoordsA)))
+
+def mapToCrystal(xtal, pose):
+    """Some docking programs might alter the order of the atoms in the output (like Autodock Vina does...)
+     this will mess up the rmsd calculation with OpenBabel"""
+    query = pybel.ob.CompileMoleculeQuery(xtal.OBMol)
+    mapper=pybel.ob.OBIsomorphismMapper.GetInstance(query)
+    mappingpose = pybel.ob.vvpairUIntUInt()
+    exit=mapper.MapUnique(pose.OBMol,mappingpose)
+    return mappingpose[0]
+
+def takeCoords(obmol):
+    """Take coordinates of an OBMol as a npy array"""
+    return npy.array([atom.coords for atom in obmol])
+
+def updateCoords(obmol, newcoords):
+    "Update OBMol coordinates. newcoords is a numpy array"
+    for i,atom in enumerate(obmol):
+        atom.OBAtom.SetVector(*newcoords[i])
+
+def prepareAtomString(idlist):
+    s = ""
+    n = len(idlist)
+    for i, id in enumerate(idlist):
+        s += "%i"%id
+        if (i+1) == n: s+="\n"
+        elif (i+1)%35 == 0: s+=",\n"
+        else: s+=","
+    return s
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) != 5:
+        sys.exit("USAGE: %s reference.sdf input.sdf output.sdf 'SMARTS'"%sys.argv[0])
+
+    refsdf = sys.argv[1]
+    molsdf = sys.argv[2]
+    outsdf = sys.argv[3]
+    smarts = pybel.Smarts(sys.argv[4])
+
+    # Read reference pose and get atom list matching smarts query
+    # if more than 1 match, take the first one
+    ref = next(pybel.readfile("sdf", refsdf))
+    refMatchIds = smarts.findall(ref)
+    numRefMatchs = len(refMatchIds)
+
+    if not numRefMatchs:
+	sys.exit("No match found in the reference structure and the SMARTS string given. Please check it.")
+
+    if numRefMatchs > 1:
+	print "More than one match in the reference molecule for the SMARTS string given. Will tether each input molecule all possible ways."
+
+    refIndxPerMatch = [npy.array(rmi) - 1 for rmi in refMatchIds]
+
+    # Take coordinates for the reference matched atoms
+    refCoords = takeCoords(ref)
+    refMatchCoords = [npy.take(refCoords, refIndx, axis=0) for refIndx in refIndxPerMatch]
+
+    # Do the same for molecule in molsdf
+    out=pybel.Outputfile('sdf', outsdf, overwrite=True)
+    molSupp = pybel.readfile("sdf", molsdf)
+    ff = pybel.ob.OBForceField_FindForceField('MMFF94')
+    for i,mol in enumerate(molSupp):
+	print "## Molecule %i"%(i+1),
+	mol.OBMol.DeleteNonPolarHydrogens()
+   	molMatchAllIds = smarts.findall(mol)
+	numMatchs = len(molMatchAllIds)
+
+	if numMatchs == 0:
+		print "No_Match",
+		continue
+	elif numMatchs ==1:
+		print "Match",
+	elif numMatchs > 1:
+                print "Multiple_Match SMART Matches for this molecule (%d)"%numMatchs,
+
+	# If more than one match, write an output of the same molecule for each match
+	# Start a default bestcoord and rmsd for later looping for each pose
+	bestCoordPerMatch = [[0 for i in range(numMatchs)] for i in range(numRefMatchs)]
+	bestRMSPerMatch = [[999 for i in range(numMatchs)] for i in range(numRefMatchs)]
+
+        # Will do a randomrotorsearch to find conformer with the lower rmsd when superposing
+        # At least 20 when possible
+        #ff.Setup(mol.OBMol)
+	#numats = mol.OBMol.NumAtoms()
+	#numrot = mol.OBMol.NumRotors()
+	#print "Atoms: %i, Rotors: %i"%(numats, numrot)
+	#geomopt = 300
+	#genconf = 100
+	# increase iterations if bigger molecule or bigger number of rotatable bonds
+	# for allowing better sampling
+	#if numats > 40 and numrot > 5:
+	#	geomopt = 300
+	#	genconf = 150
+	#if numats > 55 and numrot > 7:
+	#	genconf = 100
+	#	geomopt = 500
+        #print "\tDoing conformational search with WeightedRotorSearch (%i, %i)..."%(genconf, geomopt),
+	#ff.SteepestDescent(500, 1.0e-4)
+        #ff.WeightedRotorSearch(genconf,geomopt)
+	#ff.ConjugateGradients(500, 1.0e-6)
+	#ff.GetConformers(mol.OBMol)
+	#numconf = mol.OBMol.NumConformers()
+	numconf = 1
+	#print "%i conformers generated"%numconf
+        if numconf > 1:
+            # Doing conf search
+            #for i in range(numconf):
+            #    mol.OBMol.SetConformer(i)
+            #    confCoords = takeCoords(mol)
+	    # 	print 'coord:',confCoords[0,:]
+            #
+            #    for imatch, molMatchIds in enumerate(molMatchAllIds):
+            #        molMatchIndx = npy.array(molMatchIds) - 1
+            #        confMatchCoords = npy.take(confCoords, molMatchIndx, axis=0)
+            #
+            #        # Align: Get rotation matrix between the two sets of coords
+            #        # Apply rotation to the whole target molecule
+            #        rotMat, targetCentroid, refCentroid, rmsd = superpose3D(confMatchCoords, refMatchCoords, returnRotMat=True)
+            #        if rmsd < bestRMSPerMatch[imatch]:
+            #            newcoords = npy.dot((confCoords - targetCentroid), rotMat) + refCentroid
+            #            bestRMSPerMatch[imatch] = rmsd
+            #            bestCoordPerMatch[imatch] = newcoords
+	    #	#if bestrms < 0.01: break
+       	    pass
+        else:
+            molCoords = takeCoords(mol)
+            for imatch, molMatchIds in enumerate(molMatchAllIds):
+		    # loop in each matching way for the input molecule
+                    molMatchIndx = npy.array(molMatchIds) - 1
+                    molMatchCoords = npy.take(molCoords, molMatchIndx, axis=0)
+
+		    # Loop over the reference matches
+                    # Align: Get rotation matrix between the two sets of coords
+                    # Apply rotation to the whole target molecule
+		    for ir, refMatchCoord in enumerate(refMatchCoords):
+                    	rotMat, targetCentroid, refCentroid, rmsd = superpose3D(molMatchCoords, refMatchCoord, returnRotMat=True)
+                    	if rmsd < bestRMSPerMatch[ir][imatch]:
+                        	newcoords = npy.dot((molCoords - targetCentroid), rotMat) + refCentroid
+                        	bestRMSPerMatch[ir][imatch] = rmsd
+                        	bestCoordPerMatch[ir][imatch] = newcoords
+
+        # Finally update molecule coordinates with the best matching coordinates found
+	# change molecule coordinates, set TETHERED ATOMS property and save
+	for imatch in range(numMatchs):
+		for irefmatch in range(numRefMatchs):
+			bestCoord = bestCoordPerMatch[irefmatch][imatch]
+			bestRMS = bestRMSPerMatch[irefmatch][imatch]
+        		print "\tBest RMSD reached (match %d, refmatch %d): %s"%(imatch, irefmatch, bestRMS)
+			molMatchID = molMatchAllIds[imatch]
+        		updateCoords(mol, bestCoord)
+        		newData = pybel.ob.OBPairData()
+        		newData.SetAttribute("TETHERED ATOMS")
+        		newData.SetValue(prepareAtomString(molMatchID))
+
+			mol.OBMol.DeleteData("TETHERED ATOMS") # Remove Previous DATA
+        		mol.OBMol.CloneData(newData)	       # Add new data
+        		out.write(mol)
+
+    out.close()
+
+    print "DONE"
+    sys.stdout.close()
+    sys.stderr.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rdock/bin/to_unix	Mon Aug 29 08:38:19 2016 -0400
@@ -0,0 +1,32 @@
+#! /bin/sh -e
+#Tag 1538
+#
+# convert a DOS ASCII file to a UNIX ASCII file by removing trailing ^M at the
+# end of each line and ^Z at the end of the file
+
+#DM 23 Oct 2000 - tmp file in current directory, not /tmp
+#TMPFILE=/tmp/to_unix$$
+TMPFILE=to_unix$$
+
+if [ $# -gt 2 ]
+then
+	echo "usage: to_unix [<dos_file> [<unix_file>]]"
+	exit 1
+fi
+
+# First strip out all carriage-return and ctrl-Z's
+if [ $# -gt 0 ]
+then
+	tr -d '\015\032' < "$1" > $TMPFILE
+else
+	tr -d '\015\032' > $TMPFILE
+fi
+
+
+if [ $# -eq 2 ]
+then
+	mv -f $TMPFILE "$2"
+else
+	cat $TMPFILE
+	rm $TMPFILE
+fi