Mercurial > repos > yufei-luo > s_mart
comparison commons/tools/FilterAlign.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
comparison
equal
deleted
inserted
replaced
30:5677346472b5 | 31:0ab839023fe4 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import sys | |
4 import getopt | |
5 import os | |
6 | |
7 | |
8 def help(): | |
9 print | |
10 print "usage: ",sys.argv[0].split("/")[-1],"[ options ]" | |
11 print "options:" | |
12 print " -h: this help" | |
13 print " -i: name of the input file (format='align')" | |
14 print " -E: maximum E-value (default=100)" | |
15 print " -S: minimum score (default=0)" | |
16 print " -I: minimum identity (default=0)" | |
17 print " -l: minimum length (default=0)" | |
18 print " -L: maximum length (default=1000000000)" | |
19 print " -o: name of the output file (default=inFileName+'.filtered')" | |
20 print " -v: verbose (default=0/1)" | |
21 print | |
22 | |
23 | |
24 def main(): | |
25 """ | |
26 This program filters the output from BLASTER ('align' file recording HSPs). | |
27 """ | |
28 | |
29 inFileName = "" | |
30 outFileName = "" | |
31 maxEValue = 100 | |
32 minIdentity = 0 | |
33 minLength = 0 | |
34 maxLength = 1000000000 | |
35 minScore = 0 | |
36 verbose = 0 | |
37 | |
38 try: | |
39 opts, args = getopt.getopt(sys.argv[1:],"hi:E:S:I:l:L:o:v:") | |
40 except getopt.GetoptError, err: | |
41 print str(err) | |
42 help() | |
43 sys.exit(1) | |
44 for o,a in opts: | |
45 if o == "-h": | |
46 help() | |
47 sys.exit() | |
48 elif o == "-i": | |
49 inFileName = a | |
50 elif o == "-E": | |
51 maxEValue = float(a) | |
52 elif o == "-S": | |
53 minScore = int(float(a)) | |
54 elif o == "-I": | |
55 minIdentity = int(float(a)) | |
56 elif o == "-l": | |
57 minLength = int(a) | |
58 elif o == "-L": | |
59 maxLength = int(a) | |
60 elif o == "-o": | |
61 outFileName = a | |
62 elif o == "-v": | |
63 verbose = int(a) | |
64 | |
65 if inFileName == "": | |
66 print "ERROR: missing input file name" | |
67 help() | |
68 sys.exit(1) | |
69 | |
70 if outFileName == "": | |
71 outFileName = "%s.filtered" % ( inFileName ) | |
72 | |
73 if os.path.exists( os.environ["REPET_PATH"] + "/bin/filterAlign" ): | |
74 prg = os.environ["REPET_PATH"] + "/bin/filterAlign" | |
75 cmd = prg | |
76 cmd += " -i %s" % ( inFileName ) | |
77 cmd += " -E %g" % ( maxEValue ) | |
78 cmd += " -S %i" % ( minScore ) | |
79 cmd += " -I %f" % ( minIdentity ) | |
80 cmd += " -l %i" % ( minLength ) | |
81 cmd += " -L %i" % ( maxLength ) | |
82 cmd += " -o %s" % ( outFileName ) | |
83 cmd += " -v %i" % ( verbose ) | |
84 return os.system( cmd ) | |
85 | |
86 if verbose > 0: | |
87 print "START %s" % (sys.argv[0].split("/")[-1]) | |
88 sys.stdout.flush() | |
89 | |
90 inFile = open( inFileName, "r" ) | |
91 outFile = open( outFileName, "w" ) | |
92 | |
93 nbMatches = 0 | |
94 nbFiltered = 0 | |
95 | |
96 line = inFile.readline() | |
97 while True: | |
98 if line == "": | |
99 break | |
100 nbMatches += 1 | |
101 data = line.split("\t") | |
102 qryName = data[0] | |
103 qryStart = data[1] | |
104 qryEnd = data[2] | |
105 sbjName = data[3] | |
106 sbjStart = data[4] | |
107 sbjEnd = data[5] | |
108 Evalue = data[6] | |
109 score = data[7] | |
110 identity = data[8] | |
111 | |
112 if int(qryStart) < int(qryEnd): | |
113 matchLength = int(qryEnd) - int(qryStart) + 1 | |
114 elif int(qryStart) > int(qryEnd): | |
115 matchLength = int(qryStart) - int(qryEnd) + 1 | |
116 | |
117 if float(Evalue) <= maxEValue and matchLength >= minLength and \ | |
118 float(identity) >= minIdentity and matchLength <= maxLength and \ | |
119 int(score) >= minScore: | |
120 string = qryName + "\t" + qryStart + "\t" + qryEnd + "\t" +\ | |
121 sbjName + "\t" + sbjStart + "\t" + sbjEnd + "\t" +\ | |
122 Evalue + "\t" + score + "\t" + identity | |
123 outFile.write( string ) | |
124 else: | |
125 nbFiltered += 1 | |
126 string = "qry %s (%s-%s) vs subj %s (%s-%s): Eval=%s identity=%s matchLength=%s score=%s" %\ | |
127 ( qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, Evalue, identity.split("\n")[0], matchLength, score ) | |
128 if verbose > 1: | |
129 print string; sys.stdout.flush() | |
130 | |
131 line = inFile.readline() | |
132 | |
133 inFile.close() | |
134 outFile.close() | |
135 | |
136 if verbose > 0: | |
137 msg = "total number of matches: %i" % ( nbMatches ) | |
138 msg += "\nnumber of filtered matches: %i" % ( nbFiltered ) | |
139 print msg; sys.stdout.flush() | |
140 | |
141 if verbose > 0: | |
142 print "END %s" % (sys.argv[0].split("/")[-1]) | |
143 sys.stdout.flush() | |
144 | |
145 return 0 | |
146 | |
147 | |
148 if __name__ == "__main__": | |
149 main() |