annotate bin/last-postmask @ 2:f274c166e738 default tip

remove comments in bsfcall_wrapper.xml
author yutaka-saito
date Sun, 19 Apr 2015 23:02:04 +0900
parents 06f8460885ff
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1 #! /usr/bin/env python
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
2
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
3 # Copyright 2014 Martin C. Frith
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
4
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
5 # Read MAF-format alignments, and write those that have a segment with
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
6 # score >= threshold, with gentle masking of lowercase letters. There
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
7 # must be a lastal header with score parameters.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
8
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
9 # Gentle masking is described in: MC Frith, PLoS One 2011;6(12):e28819
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
10 # "Gentle masking of low-complexity sequences improves homology search"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
11
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
12 # Limitations: doesn't (yet) handle sequence quality data,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
13 # frameshifts, or generalized affine gaps.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
14
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
15 import fileinput, itertools, optparse, os, signal, sys
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
16
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
17 def getScoreMatrix(rowHeads, colHeads, matrix, deleteCost, insertCost):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
18 defaultScore = min(map(min, matrix))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
19 scoreMatrix = [[defaultScore for i in range(128)] for j in range(128)]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
20 for i, x in enumerate(rowHeads):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
21 for j, y in enumerate(colHeads):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
22 xu = ord(x.upper())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
23 xl = ord(x.lower())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
24 yu = ord(y.upper())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
25 yl = ord(y.lower())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
26 score = matrix[i][j]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
27 maskScore = min(score, 0)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
28 scoreMatrix[xu][yu] = score
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
29 scoreMatrix[xu][yl] = maskScore
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
30 scoreMatrix[xl][yu] = maskScore
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
31 scoreMatrix[xl][yl] = maskScore
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
32 for i in range(128):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
33 scoreMatrix[i][ord("-")] = -deleteCost
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
34 scoreMatrix[ord("-")][i] = -insertCost
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
35 return scoreMatrix
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
36
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
37 def isGoodAlignment(seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
38 """Does the alignment have a segment with score >= minScore?"""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
39 r, q = seqs
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
40 score = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
41 xOld = " "
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
42 yOld = " "
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
43 for x, y in itertools.izip(r, q):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
44 score += scoreMatrix[ord(x)][ord(y)]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
45 if score >= minScore: return True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
46 if x == "-" and xOld != "-": score -= insOpenCost
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
47 if y == "-" and yOld != "-": score -= delOpenCost
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
48 if score < 0: score = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
49 xOld = x
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
50 yOld = y
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
51 return False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
52
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
53 def printIfGood(maf, seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
54 if isGoodAlignment(seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
55 for line in maf:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
56 print line,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
57 print
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
58
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
59 def lastPostmask(args):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
60 scoreMatrix = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
61 maf = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
62 seqs = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
63
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
64 for line in fileinput.input(args):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
65 if line[0] == "#":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
66 print line,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
67 w = line.split()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
68 for i in w:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
69 if i.startswith("a="): aDel = int(i[2:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
70 if i.startswith("b="): bDel = int(i[2:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
71 if i.startswith("A="): aIns = int(i[2:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
72 if i.startswith("B="): bIns = int(i[2:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
73 if i.startswith("e="): minScore = int(i[2:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
74 if len(w) > 1 and max(map(len, w)) == 1:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
75 colHeads = w[1:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
76 rowHeads = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
77 matrix = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
78 elif len(w) > 2 and len(w[1]) == 1:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
79 rowHeads.append(w[1])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
80 matrix.append(map(int, w[2:]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
81 elif line.isspace():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
82 if seqs: printIfGood(maf, seqs, scoreMatrix, aDel, aIns, minScore)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
83 maf = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
84 seqs = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
85 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
86 if not scoreMatrix:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
87 scoreMatrix = getScoreMatrix(rowHeads, colHeads, matrix,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
88 bDel, bIns)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
89 maf.append(line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
90 if line[0] == "s": seqs.append(line.split()[6])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
91 if seqs: printIfGood(maf, seqs, scoreMatrix, aDel, aIns, minScore)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
92
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
93 if __name__ == "__main__":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
94 signal.signal(signal.SIGPIPE, signal.SIG_DFL) # avoid silly error message
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
95
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
96 usage = "%prog in.maf > out.maf"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
97 description = "Get alignments that have a segment with score >= threshold, with gentle masking of lowercase letters."
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
98 op = optparse.OptionParser(usage=usage, description=description)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
99 (opts, args) = op.parse_args()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
100
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
101 try: lastPostmask(args)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
102 except KeyboardInterrupt: pass # avoid silly error message
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
103 except Exception, e:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
104 prog = os.path.basename(sys.argv[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
105 sys.exit(prog + ": error: " + str(e))