Mercurial > repos > yufei-luo > s_mart
diff commons/tools/FilterAlign.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/tools/FilterAlign.py Tue Apr 30 14:33:21 2013 -0400 @@ -0,0 +1,149 @@ +#!/usr/bin/env python + +import sys +import getopt +import os + + +def help(): + print + print "usage: ",sys.argv[0].split("/")[-1],"[ options ]" + print "options:" + print " -h: this help" + print " -i: name of the input file (format='align')" + print " -E: maximum E-value (default=100)" + print " -S: minimum score (default=0)" + print " -I: minimum identity (default=0)" + print " -l: minimum length (default=0)" + print " -L: maximum length (default=1000000000)" + print " -o: name of the output file (default=inFileName+'.filtered')" + print " -v: verbose (default=0/1)" + print + + +def main(): + """ + This program filters the output from BLASTER ('align' file recording HSPs). + """ + + inFileName = "" + outFileName = "" + maxEValue = 100 + minIdentity = 0 + minLength = 0 + maxLength = 1000000000 + minScore = 0 + verbose = 0 + + try: + opts, args = getopt.getopt(sys.argv[1:],"hi:E:S:I:l:L:o:v:") + except getopt.GetoptError, err: + print str(err) + help() + sys.exit(1) + for o,a in opts: + if o == "-h": + help() + sys.exit() + elif o == "-i": + inFileName = a + elif o == "-E": + maxEValue = float(a) + elif o == "-S": + minScore = int(float(a)) + elif o == "-I": + minIdentity = int(float(a)) + elif o == "-l": + minLength = int(a) + elif o == "-L": + maxLength = int(a) + elif o == "-o": + outFileName = a + elif o == "-v": + verbose = int(a) + + if inFileName == "": + print "ERROR: missing input file name" + help() + sys.exit(1) + + if outFileName == "": + outFileName = "%s.filtered" % ( inFileName ) + + if os.path.exists( os.environ["REPET_PATH"] + "/bin/filterAlign" ): + prg = os.environ["REPET_PATH"] + "/bin/filterAlign" + cmd = prg + cmd += " -i %s" % ( inFileName ) + cmd += " -E %g" % ( maxEValue ) + cmd += " -S %i" % ( minScore ) + cmd += " -I %f" % ( minIdentity ) + cmd += " -l %i" % ( minLength ) + cmd += " -L %i" % ( maxLength ) + cmd += " -o %s" % ( outFileName ) + cmd += " -v %i" % ( verbose ) + return os.system( cmd ) + + if verbose > 0: + print "START %s" % (sys.argv[0].split("/")[-1]) + sys.stdout.flush() + + inFile = open( inFileName, "r" ) + outFile = open( outFileName, "w" ) + + nbMatches = 0 + nbFiltered = 0 + + line = inFile.readline() + while True: + if line == "": + break + nbMatches += 1 + data = line.split("\t") + qryName = data[0] + qryStart = data[1] + qryEnd = data[2] + sbjName = data[3] + sbjStart = data[4] + sbjEnd = data[5] + Evalue = data[6] + score = data[7] + identity = data[8] + + if int(qryStart) < int(qryEnd): + matchLength = int(qryEnd) - int(qryStart) + 1 + elif int(qryStart) > int(qryEnd): + matchLength = int(qryStart) - int(qryEnd) + 1 + + if float(Evalue) <= maxEValue and matchLength >= minLength and \ + float(identity) >= minIdentity and matchLength <= maxLength and \ + int(score) >= minScore: + string = qryName + "\t" + qryStart + "\t" + qryEnd + "\t" +\ + sbjName + "\t" + sbjStart + "\t" + sbjEnd + "\t" +\ + Evalue + "\t" + score + "\t" + identity + outFile.write( string ) + else: + nbFiltered += 1 + string = "qry %s (%s-%s) vs subj %s (%s-%s): Eval=%s identity=%s matchLength=%s score=%s" %\ + ( qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, Evalue, identity.split("\n")[0], matchLength, score ) + if verbose > 1: + print string; sys.stdout.flush() + + line = inFile.readline() + + inFile.close() + outFile.close() + + if verbose > 0: + msg = "total number of matches: %i" % ( nbMatches ) + msg += "\nnumber of filtered matches: %i" % ( nbFiltered ) + print msg; sys.stdout.flush() + + if verbose > 0: + print "END %s" % (sys.argv[0].split("/")[-1]) + sys.stdout.flush() + + return 0 + + +if __name__ == "__main__": + main()