annotate bin/maf-swap @ 0:06f8460885ff

migrate from GitHub
author yutaka-saito
date Sun, 19 Apr 2015 20:51:13 +0900
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1 #! /usr/bin/env python
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
2
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
3 # Read MAF-format alignments, and write them, after moving the Nth
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
4 # sequence to the top in each alignment.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
5
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
6 # Before writing, if the top sequence would be on the - strand, then
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
7 # flip all the strands. But don't do this if the top sequence is
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
8 # translated DNA.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
9
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
10 # Seems to work with Python 2.x, x>=4
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
11
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
12 import fileinput, itertools, optparse, os, signal, string, sys
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
13
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
14 def filterComments(lines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
15 for i in lines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
16 if i.startswith("#"): print i,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
17 else: yield i
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
18
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
19 def mafInput(lines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
20 for k, v in itertools.groupby(lines, str.isspace):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
21 if not k: yield list(v)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
22
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
23 def indexOfNthSequence(mafLines, n):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
24 for i, line in enumerate(mafLines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
25 if line.startswith("s"):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
26 if n == 1: return i
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
27 n -= 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
28 raise Exception("encountered an alignment with too few sequences")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
29
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
30 def rangeOfNthSequence(mafLines, n):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
31 """Get the range of lines associated with the Nth sequence."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
32 start = indexOfNthSequence(mafLines, n)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
33 stop = start + 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
34 while stop < len(mafLines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
35 line = mafLines[stop]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
36 if not (line.startswith("q") or line.startswith("i")): break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
37 stop += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
38 return start, stop
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
39
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
40 complement = string.maketrans('ACGTNSWRYKMBDHVacgtnswrykmbdhv',
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
41 'TGCANSWYRMKVHDBtgcanswyrmkvhdb')
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
42 # doesn't handle "U" in RNA sequences
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
43 def revcomp(seq):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
44 return seq[::-1].translate(complement)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
45
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
46 def flippedMafS(words):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
47 alnStart = int(words[2])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
48 alnSize = int(words[3])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
49 strand = words[4]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
50 seqSize = int(words[5])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
51 alnString = words[6]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
52 newStart = seqSize - alnStart - alnSize
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
53 if strand == "-": newStrand = "+"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
54 else: newStrand = "-"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
55 newString = revcomp(alnString)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
56 out = words[0], words[1], newStart, alnSize, newStrand, seqSize, newString
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
57 return map(str, out)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
58
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
59 def flippedMafP(words):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
60 flippedString = words[1][::-1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
61 return words[:1] + [flippedString]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
62
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
63 def flippedMafQ(words):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
64 qualityString = words[2]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
65 flippedString = qualityString[::-1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
66 return words[:2] + [flippedString]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
67
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
68 def flippedMafLine(mafLine):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
69 words = mafLine.split()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
70 if words[0] == "s": return flippedMafS(words)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
71 elif words[0] == "p": return flippedMafP(words)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
72 elif words[0] == "q": return flippedMafQ(words)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
73 else: return words
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
74
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
75 def maxlen(s):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
76 return max(map(len, s))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
77
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
78 def sLineFieldWidths(mafLines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
79 sLines = (i for i in mafLines if i[0] == "s")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
80 sColumns = zip(*sLines)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
81 return map(maxlen, sColumns)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
82
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
83 def joinedMafS(words, fieldWidths):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
84 formatParams = itertools.chain(*zip(fieldWidths, words))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
85 return "%*s %-*s %*s %*s %*s %*s %*s\n" % tuple(formatParams)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
86
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
87 def joinedMafLine(words, fieldWidths):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
88 if words[0] == "s":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
89 return joinedMafS(words, fieldWidths)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
90 elif words[0] == "q":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
91 words = words[:2] + [""] * 4 + words[2:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
92 return joinedMafS(words, fieldWidths)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
93 elif words[0] == "p":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
94 words = words[:1] + [""] * 5 + words[1:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
95 return joinedMafS(words, fieldWidths)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
96 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
97 return " ".join(words) + "\n"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
98
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
99 def flippedMaf(mafLines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
100 flippedLines = map(flippedMafLine, mafLines)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
101 fieldWidths = sLineFieldWidths(flippedLines)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
102 return (joinedMafLine(i, fieldWidths) for i in flippedLines)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
103
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
104 def isCanonicalStrand(mafLine):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
105 words = mafLine.split()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
106 strand = words[4]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
107 if strand == "+": return True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
108 alnString = words[6]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
109 if "/" in alnString or "\\" in alnString: return True # frameshifts
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
110 alnSize = int(words[3])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
111 gapCount = alnString.count("-")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
112 if len(alnString) - gapCount < alnSize: return True # translated DNA
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
113 return False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
114
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
115 def mafSwap(opts, args):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
116 inputLines = fileinput.input(args)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
117 for mafLines in mafInput(filterComments(inputLines)):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
118 start, stop = rangeOfNthSequence(mafLines, opts.n)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
119 mafLines[1:stop] = mafLines[start:stop] + mafLines[1:start]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
120 if not isCanonicalStrand(mafLines[1]):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
121 mafLines = flippedMaf(mafLines)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
122 for i in mafLines: print i,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
123 print # blank line after each alignment
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
124
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
125 if __name__ == "__main__":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
126 signal.signal(signal.SIGPIPE, signal.SIG_DFL) # avoid silly error message
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
127
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
128 usage = "%prog [options] my-alignments.maf"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
129 description = "Change the order of sequences in MAF-format alignments."
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
130 op = optparse.OptionParser(usage=usage, description=description)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
131 op.add_option("-n", type="int", default=2,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
132 help="move the Nth sequence to the top (default: %default)")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
133 (opts, args) = op.parse_args()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
134 if opts.n < 1: op.error("option -n: should be >= 1")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
135
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
136 try: mafSwap(opts, args)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
137 except KeyboardInterrupt: pass # avoid silly error message
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
138 except Exception, e:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
139 prog = os.path.basename(sys.argv[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
140 sys.exit(prog + ": error: " + str(e))