diff commons/tools/dbConsensus.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/dbConsensus.py	Tue Apr 30 14:33:21 2013 -0400
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use, 
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info". 
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability. 
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or 
+# data to be ensured and,  more generally, to use and operate it in the 
+# same conditions as regards security. 
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import os
+import sys
+import getopt
+
+##@file
+# usage: dbConsensus.py [ options ]
+# options:
+#      -h: this help
+#      -i: name of the input file (format=aligned fasta)
+#      -n: minimum number of nucleotides in a column to edit a consensus (default=1)
+#      -p: minimum proportion for the major nucleotide to be used, otherwise add 'N' (default=0.0)
+#      -o: name of the output file (default=inFileName+'.cons')
+#      -v: verbose (default=0/1/2)
+
+if not os.environ.has_key( "REPET_PATH" ):
+    print "ERROR: no environment variable REPET_PATH"
+    sys.exit(1)
+sys.path.append( os.environ["REPET_PATH"] )
+
+import commons.core.seq.AlignedBioseqDB
+
+
+def help():
+    """
+    Give the list of the command-line options.
+    """
+    print
+    print "usage:",sys.argv[0]," [ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (format=aligned fasta)"
+    print "     -n: minimum number of nucleotides in a column to edit a consensus (default=1)"
+    print "     -p: minimum proportion for the major nucleotide to be used, otherwise add 'N' (default=0.0)"
+    print "     -o: name of the output file (default=inFileName+'.cons')"
+    print "     -H: format the header with pyramid and piles informations (SATannot)"
+    print "     -v: verbose (default=0/1/2)"
+    print
+
+
+def main():
+
+    inFileName = ""
+    minNbNt = 1
+    minPropNt = 0.0
+    outFileName = ""
+    header_SATannot = False
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hi:n:p:o:v:H")
+    except getopt.GetoptError, err:
+        print str(err); help(); sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-n":
+            minNbNt = int(a)
+        elif o == "-p":
+            minPropNt = float(a)
+        elif o == "-o":
+            outFileName = a
+        elif o == "-H":
+            header_SATannot = True
+        elif o == "-v":
+            verbose = int(a)
+
+    if inFileName == "":
+        print "ERROR: missing input file name"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "START %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    alnDB = commons.core.seq.AlignedBioseqDB.AlignedBioseqDB( inFileName )
+
+    if alnDB.getSize() < minNbNt:
+        print "WARNING: not enough sequences (<%i)" % ( minNbNt )
+
+    else:
+        consensus = alnDB.getConsensus( minNbNt, minPropNt, verbose,  header_SATannot)
+        if consensus != None:
+            consensus.upCase()
+            if outFileName == "":
+                outFileName = "%s.cons" % ( inFileName )
+            outFile = open( outFileName, "w" )
+            consensus.write( outFile )
+            outFile.close()
+
+    if verbose > 0:
+        print "END %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    return 0
+
+if __name__ == "__main__":
+    main ()