18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import sys
|
|
4 import getopt
|
|
5 import os
|
|
6
|
|
7
|
|
8 def help():
|
|
9 print
|
|
10 print "usage: ",sys.argv[0].split("/")[-1],"[ options ]"
|
|
11 print "options:"
|
|
12 print " -h: this help"
|
|
13 print " -i: name of the input file (format='align')"
|
|
14 print " -E: maximum E-value (default=100)"
|
|
15 print " -S: minimum score (default=0)"
|
|
16 print " -I: minimum identity (default=0)"
|
|
17 print " -l: minimum length (default=0)"
|
|
18 print " -L: maximum length (default=1000000000)"
|
|
19 print " -o: name of the output file (default=inFileName+'.filtered')"
|
|
20 print " -v: verbose (default=0/1)"
|
|
21 print
|
|
22
|
|
23
|
|
24 def main():
|
|
25 """
|
|
26 This program filters the output from BLASTER ('align' file recording HSPs).
|
|
27 """
|
|
28
|
|
29 inFileName = ""
|
|
30 outFileName = ""
|
|
31 maxEValue = 100
|
|
32 minIdentity = 0
|
|
33 minLength = 0
|
|
34 maxLength = 1000000000
|
|
35 minScore = 0
|
|
36 verbose = 0
|
|
37
|
|
38 try:
|
|
39 opts, args = getopt.getopt(sys.argv[1:],"hi:E:S:I:l:L:o:v:")
|
|
40 except getopt.GetoptError, err:
|
|
41 print str(err)
|
|
42 help()
|
|
43 sys.exit(1)
|
|
44 for o,a in opts:
|
|
45 if o == "-h":
|
|
46 help()
|
|
47 sys.exit()
|
|
48 elif o == "-i":
|
|
49 inFileName = a
|
|
50 elif o == "-E":
|
|
51 maxEValue = float(a)
|
|
52 elif o == "-S":
|
|
53 minScore = int(float(a))
|
|
54 elif o == "-I":
|
|
55 minIdentity = int(float(a))
|
|
56 elif o == "-l":
|
|
57 minLength = int(a)
|
|
58 elif o == "-L":
|
|
59 maxLength = int(a)
|
|
60 elif o == "-o":
|
|
61 outFileName = a
|
|
62 elif o == "-v":
|
|
63 verbose = int(a)
|
|
64
|
|
65 if inFileName == "":
|
|
66 print "ERROR: missing input file name"
|
|
67 help()
|
|
68 sys.exit(1)
|
|
69
|
|
70 if outFileName == "":
|
|
71 outFileName = "%s.filtered" % ( inFileName )
|
|
72
|
|
73 if os.path.exists( os.environ["REPET_PATH"] + "/bin/filterAlign" ):
|
|
74 prg = os.environ["REPET_PATH"] + "/bin/filterAlign"
|
|
75 cmd = prg
|
|
76 cmd += " -i %s" % ( inFileName )
|
|
77 cmd += " -E %g" % ( maxEValue )
|
|
78 cmd += " -S %i" % ( minScore )
|
|
79 cmd += " -I %f" % ( minIdentity )
|
|
80 cmd += " -l %i" % ( minLength )
|
|
81 cmd += " -L %i" % ( maxLength )
|
|
82 cmd += " -o %s" % ( outFileName )
|
|
83 cmd += " -v %i" % ( verbose )
|
|
84 return os.system( cmd )
|
|
85
|
|
86 if verbose > 0:
|
|
87 print "START %s" % (sys.argv[0].split("/")[-1])
|
|
88 sys.stdout.flush()
|
|
89
|
|
90 inFile = open( inFileName, "r" )
|
|
91 outFile = open( outFileName, "w" )
|
|
92
|
|
93 nbMatches = 0
|
|
94 nbFiltered = 0
|
|
95
|
|
96 line = inFile.readline()
|
|
97 while True:
|
|
98 if line == "":
|
|
99 break
|
|
100 nbMatches += 1
|
|
101 data = line.split("\t")
|
|
102 qryName = data[0]
|
|
103 qryStart = data[1]
|
|
104 qryEnd = data[2]
|
|
105 sbjName = data[3]
|
|
106 sbjStart = data[4]
|
|
107 sbjEnd = data[5]
|
|
108 Evalue = data[6]
|
|
109 score = data[7]
|
|
110 identity = data[8]
|
|
111
|
|
112 if int(qryStart) < int(qryEnd):
|
|
113 matchLength = int(qryEnd) - int(qryStart) + 1
|
|
114 elif int(qryStart) > int(qryEnd):
|
|
115 matchLength = int(qryStart) - int(qryEnd) + 1
|
|
116
|
|
117 if float(Evalue) <= maxEValue and matchLength >= minLength and \
|
|
118 float(identity) >= minIdentity and matchLength <= maxLength and \
|
|
119 int(score) >= minScore:
|
|
120 string = qryName + "\t" + qryStart + "\t" + qryEnd + "\t" +\
|
|
121 sbjName + "\t" + sbjStart + "\t" + sbjEnd + "\t" +\
|
|
122 Evalue + "\t" + score + "\t" + identity
|
|
123 outFile.write( string )
|
|
124 else:
|
|
125 nbFiltered += 1
|
|
126 string = "qry %s (%s-%s) vs subj %s (%s-%s): Eval=%s identity=%s matchLength=%s score=%s" %\
|
|
127 ( qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, Evalue, identity.split("\n")[0], matchLength, score )
|
|
128 if verbose > 1:
|
|
129 print string; sys.stdout.flush()
|
|
130
|
|
131 line = inFile.readline()
|
|
132
|
|
133 inFile.close()
|
|
134 outFile.close()
|
|
135
|
|
136 if verbose > 0:
|
|
137 msg = "total number of matches: %i" % ( nbMatches )
|
|
138 msg += "\nnumber of filtered matches: %i" % ( nbFiltered )
|
|
139 print msg; sys.stdout.flush()
|
|
140
|
|
141 if verbose > 0:
|
|
142 print "END %s" % (sys.argv[0].split("/")[-1])
|
|
143 sys.stdout.flush()
|
|
144
|
|
145 return 0
|
|
146
|
|
147
|
|
148 if __name__ == "__main__":
|
|
149 main()
|