Mercurial > repos > yufei-luo > s_mart
diff commons/tools/dbConsensus.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/tools/dbConsensus.py Tue Apr 30 14:33:21 2013 -0400 @@ -0,0 +1,133 @@ +#!/usr/bin/env python + +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +import os +import sys +import getopt + +##@file +# usage: dbConsensus.py [ options ] +# options: +# -h: this help +# -i: name of the input file (format=aligned fasta) +# -n: minimum number of nucleotides in a column to edit a consensus (default=1) +# -p: minimum proportion for the major nucleotide to be used, otherwise add 'N' (default=0.0) +# -o: name of the output file (default=inFileName+'.cons') +# -v: verbose (default=0/1/2) + +if not os.environ.has_key( "REPET_PATH" ): + print "ERROR: no environment variable REPET_PATH" + sys.exit(1) +sys.path.append( os.environ["REPET_PATH"] ) + +import commons.core.seq.AlignedBioseqDB + + +def help(): + """ + Give the list of the command-line options. + """ + print + print "usage:",sys.argv[0]," [ options ]" + print "options:" + print " -h: this help" + print " -i: name of the input file (format=aligned fasta)" + print " -n: minimum number of nucleotides in a column to edit a consensus (default=1)" + print " -p: minimum proportion for the major nucleotide to be used, otherwise add 'N' (default=0.0)" + print " -o: name of the output file (default=inFileName+'.cons')" + print " -H: format the header with pyramid and piles informations (SATannot)" + print " -v: verbose (default=0/1/2)" + print + + +def main(): + + inFileName = "" + minNbNt = 1 + minPropNt = 0.0 + outFileName = "" + header_SATannot = False + verbose = 0 + + try: + opts, args = getopt.getopt(sys.argv[1:],"hi:n:p:o:v:H") + except getopt.GetoptError, err: + print str(err); help(); sys.exit(1) + for o,a in opts: + if o == "-h": + help() + sys.exit(0) + elif o == "-i": + inFileName = a + elif o == "-n": + minNbNt = int(a) + elif o == "-p": + minPropNt = float(a) + elif o == "-o": + outFileName = a + elif o == "-H": + header_SATannot = True + elif o == "-v": + verbose = int(a) + + if inFileName == "": + print "ERROR: missing input file name" + help() + sys.exit(1) + + if verbose > 0: + print "START %s" % (sys.argv[0].split("/")[-1]) + sys.stdout.flush() + + alnDB = commons.core.seq.AlignedBioseqDB.AlignedBioseqDB( inFileName ) + + if alnDB.getSize() < minNbNt: + print "WARNING: not enough sequences (<%i)" % ( minNbNt ) + + else: + consensus = alnDB.getConsensus( minNbNt, minPropNt, verbose, header_SATannot) + if consensus != None: + consensus.upCase() + if outFileName == "": + outFileName = "%s.cons" % ( inFileName ) + outFile = open( outFileName, "w" ) + consensus.write( outFile ) + outFile.close() + + if verbose > 0: + print "END %s" % (sys.argv[0].split("/")[-1]) + sys.stdout.flush() + + return 0 + +if __name__ == "__main__": + main ()