comparison commons/tools/FilterAlign.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
comparison
equal deleted inserted replaced
30:5677346472b5 31:0ab839023fe4
1 #!/usr/bin/env python
2
3 import sys
4 import getopt
5 import os
6
7
8 def help():
9 print
10 print "usage: ",sys.argv[0].split("/")[-1],"[ options ]"
11 print "options:"
12 print " -h: this help"
13 print " -i: name of the input file (format='align')"
14 print " -E: maximum E-value (default=100)"
15 print " -S: minimum score (default=0)"
16 print " -I: minimum identity (default=0)"
17 print " -l: minimum length (default=0)"
18 print " -L: maximum length (default=1000000000)"
19 print " -o: name of the output file (default=inFileName+'.filtered')"
20 print " -v: verbose (default=0/1)"
21 print
22
23
24 def main():
25 """
26 This program filters the output from BLASTER ('align' file recording HSPs).
27 """
28
29 inFileName = ""
30 outFileName = ""
31 maxEValue = 100
32 minIdentity = 0
33 minLength = 0
34 maxLength = 1000000000
35 minScore = 0
36 verbose = 0
37
38 try:
39 opts, args = getopt.getopt(sys.argv[1:],"hi:E:S:I:l:L:o:v:")
40 except getopt.GetoptError, err:
41 print str(err)
42 help()
43 sys.exit(1)
44 for o,a in opts:
45 if o == "-h":
46 help()
47 sys.exit()
48 elif o == "-i":
49 inFileName = a
50 elif o == "-E":
51 maxEValue = float(a)
52 elif o == "-S":
53 minScore = int(float(a))
54 elif o == "-I":
55 minIdentity = int(float(a))
56 elif o == "-l":
57 minLength = int(a)
58 elif o == "-L":
59 maxLength = int(a)
60 elif o == "-o":
61 outFileName = a
62 elif o == "-v":
63 verbose = int(a)
64
65 if inFileName == "":
66 print "ERROR: missing input file name"
67 help()
68 sys.exit(1)
69
70 if outFileName == "":
71 outFileName = "%s.filtered" % ( inFileName )
72
73 if os.path.exists( os.environ["REPET_PATH"] + "/bin/filterAlign" ):
74 prg = os.environ["REPET_PATH"] + "/bin/filterAlign"
75 cmd = prg
76 cmd += " -i %s" % ( inFileName )
77 cmd += " -E %g" % ( maxEValue )
78 cmd += " -S %i" % ( minScore )
79 cmd += " -I %f" % ( minIdentity )
80 cmd += " -l %i" % ( minLength )
81 cmd += " -L %i" % ( maxLength )
82 cmd += " -o %s" % ( outFileName )
83 cmd += " -v %i" % ( verbose )
84 return os.system( cmd )
85
86 if verbose > 0:
87 print "START %s" % (sys.argv[0].split("/")[-1])
88 sys.stdout.flush()
89
90 inFile = open( inFileName, "r" )
91 outFile = open( outFileName, "w" )
92
93 nbMatches = 0
94 nbFiltered = 0
95
96 line = inFile.readline()
97 while True:
98 if line == "":
99 break
100 nbMatches += 1
101 data = line.split("\t")
102 qryName = data[0]
103 qryStart = data[1]
104 qryEnd = data[2]
105 sbjName = data[3]
106 sbjStart = data[4]
107 sbjEnd = data[5]
108 Evalue = data[6]
109 score = data[7]
110 identity = data[8]
111
112 if int(qryStart) < int(qryEnd):
113 matchLength = int(qryEnd) - int(qryStart) + 1
114 elif int(qryStart) > int(qryEnd):
115 matchLength = int(qryStart) - int(qryEnd) + 1
116
117 if float(Evalue) <= maxEValue and matchLength >= minLength and \
118 float(identity) >= minIdentity and matchLength <= maxLength and \
119 int(score) >= minScore:
120 string = qryName + "\t" + qryStart + "\t" + qryEnd + "\t" +\
121 sbjName + "\t" + sbjStart + "\t" + sbjEnd + "\t" +\
122 Evalue + "\t" + score + "\t" + identity
123 outFile.write( string )
124 else:
125 nbFiltered += 1
126 string = "qry %s (%s-%s) vs subj %s (%s-%s): Eval=%s identity=%s matchLength=%s score=%s" %\
127 ( qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, Evalue, identity.split("\n")[0], matchLength, score )
128 if verbose > 1:
129 print string; sys.stdout.flush()
130
131 line = inFile.readline()
132
133 inFile.close()
134 outFile.close()
135
136 if verbose > 0:
137 msg = "total number of matches: %i" % ( nbMatches )
138 msg += "\nnumber of filtered matches: %i" % ( nbFiltered )
139 print msg; sys.stdout.flush()
140
141 if verbose > 0:
142 print "END %s" % (sys.argv[0].split("/")[-1])
143 sys.stdout.flush()
144
145 return 0
146
147
148 if __name__ == "__main__":
149 main()