annotate bin/maf-join @ 0:06f8460885ff

migrate from GitHub
author yutaka-saito
date Sun, 19 Apr 2015 20:51:13 +0900
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1 #! /usr/bin/env python
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
2
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
3 # Copyright 2009, 2010, 2011 Martin C. Frith
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
4
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
5 # Join two or more sets of MAF-format multiple alignments into bigger
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
6 # multiple alignments. The 'join field' is the top genome, which
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
7 # should be the same for each input. Each input should be sorted by
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
8 # position in the top genome.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
9
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
10 # WARNING: Alignment columns with a gap in the top genome are joined
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
11 # arbitrarily!!!
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
12
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
13 import sys, os, fileinput, optparse, signal
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
14
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
15 signal.signal(signal.SIGPIPE, signal.SIG_DFL) # stop spurious error message
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
16
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
17 class peekable: # Adapted from Python Cookbook 2nd edition
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
18 """An iterator that supports a peek operation."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
19 def __init__(self, iterable):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
20 self.it = iter(iterable)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
21 self.cache = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
22 def __iter__(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
23 return self
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
24 def next(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
25 if self.cache: return self.cache.pop()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
26 else: return self.it.next()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
27 def peek(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
28 if not self.cache: self.cache.append(self.it.next())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
29 return self.cache[0]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
30
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
31 def maxLen(things): return max(map(len, things))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
32
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
33 class MafBlock:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
34 def __init__(self, chr, beg, end, strand, chrSize, seq, prob):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
35 self.chr = chr # chromosome names
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
36 self.beg = beg # alignment begin coordinates
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
37 self.end = end # alignment end coordinates
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
38 self.strand = strand
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
39 self.chrSize = chrSize # chromosome sizes
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
40 self.seq = seq # aligned sequences, including gaps
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
41 self.prob = prob # probabilities (may be empty)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
42
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
43 def __nonzero__(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
44 return len(self.seq) > 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
45
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
46 def __cmp__(self, other):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
47 return cmp(self.chr[:1] + self.beg[:1], other.chr[:1] + other.beg[:1])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
48
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
49 def before(self, other):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
50 return (self.chr[0], self.end[0]) <= (other.chr[0], other.beg[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
51
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
52 def after(self, other):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
53 return (self.chr[0], self.beg[0]) >= (other.chr[0], other.end[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
54
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
55 def addLine(self, line):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
56 words = line.split()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
57 if line.startswith('s'):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
58 self.chr.append(words[1])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
59 self.beg.append(int(words[2]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
60 self.end.append(int(words[2]) + int(words[3]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
61 self.strand.append(words[4])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
62 self.chrSize.append(words[5])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
63 self.seq.append(list(words[6]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
64 elif line.startswith('p'):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
65 self.prob.append(words[1])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
66
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
67 def write(self):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
68 beg = map(str, self.beg)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
69 size = [str(e-b) for b, e in zip(self.beg, self.end)]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
70 seq = [''.join(i) for i in self.seq]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
71 columns = self.chr, beg, size, self.strand, self.chrSize, seq
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
72 widths = map(maxLen, columns)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
73 print 'a'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
74 for row in zip(*columns):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
75 widthsAndFields = zip(widths, row)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
76 field0 = "%-*s" % widthsAndFields[0] # left-justify
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
77 fields = ["%*s" % i for i in widthsAndFields[1:]] # right-justify
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
78 print 's', field0, ' '.join(fields)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
79 pad = ' '.join(' ' * i for i in widths[:-1])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
80 for i in self.prob:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
81 print 'p', pad, i
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
82 print # blank line afterwards
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
83
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
84 def topSeqBeg(maf): return maf.beg[0]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
85 def topSeqEnd(maf): return maf.end[0]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
86 def emptyMaf(): return MafBlock([], [], [], [], [], [], [])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
87
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
88 def joinOnFirstItem(x, y):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
89 if x[0] != y[0]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
90 raise ValueError('join fields not equal:\n'+str(x[0])+'\n'+str(y[0]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
91 return x + y[1:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
92
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
93 def mafEasyJoin(x, y):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
94 '''Join two MAF blocks on the top sequence.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
95 xJoin = zip(x.chr, x.beg, x.end, x.strand, x.chrSize, x.seq)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
96 yJoin = zip(y.chr, y.beg, y.end, y.strand, y.chrSize, y.seq)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
97 joined = joinOnFirstItem(xJoin, yJoin)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
98 chr, beg, end, strand, chrSize, seq = zip(*joined)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
99 prob = x.prob + y.prob
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
100 return MafBlock(chr, beg, end, strand, chrSize, seq, prob)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
101
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
102 def countNonGaps(s): return len(s) - s.count('-')
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
103
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
104 def nthNonGap(s, n):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
105 '''Get the start position of the n-th non-gap.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
106 for i, x in enumerate(s):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
107 if x != '-':
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
108 if n == 0: return i
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
109 n -= 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
110 raise ValueError('non-gap not found')
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
111
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
112 def nthLastNonGap(s, n):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
113 '''Get the end position of the n-th last non-gap.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
114 return len(s) - nthNonGap(s[::-1], n)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
115
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
116 def mafSlice(maf, alnBeg, alnEnd):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
117 '''Return a slice of a MAF block, using coordinates in the alignment.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
118 beg = [b + countNonGaps(s[:alnBeg]) for b, s in zip(maf.beg, maf.seq)]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
119 end = [e - countNonGaps(s[alnEnd:]) for e, s in zip(maf.end, maf.seq)]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
120 seq = [i[alnBeg:alnEnd] for i in maf.seq]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
121 prob = [i[alnBeg:alnEnd] for i in maf.prob]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
122 return MafBlock(maf.chr, beg, end, maf.strand, maf.chrSize, seq, prob)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
123
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
124 def mafSliceTopSeq(maf, newTopSeqBeg, newTopSeqEnd):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
125 '''Return a slice of a MAF block, using coordinates in the top sequence.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
126 lettersFromBeg = newTopSeqBeg - topSeqBeg(maf)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
127 lettersFromEnd = topSeqEnd(maf) - newTopSeqEnd
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
128 alnBeg = nthNonGap(maf.seq[0], lettersFromBeg)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
129 alnEnd = nthLastNonGap(maf.seq[0], lettersFromEnd)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
130 return mafSlice(maf, alnBeg, alnEnd)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
131
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
132 def jumpGaps(sequence, index):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
133 '''Return the next index of the sequence where there is a non-gap.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
134 nextIndex = index
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
135 while sequence[nextIndex] == '-': nextIndex += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
136 return nextIndex
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
137
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
138 def gapsToAdd(sequences):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
139 '''Return new gaps and their positions, needed to align the non-gaps.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
140 gapInfo = [[] for i in sequences]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
141 gapBeg = [0 for i in sequences]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
142 try:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
143 while True:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
144 gapEnd = [jumpGaps(s, p) for s, p in zip(sequences, gapBeg)]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
145 gapSize = [e-b for b, e in zip(gapBeg, gapEnd)]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
146 maxGapSize = max(gapSize)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
147 for s, e, i in zip(gapSize, gapEnd, gapInfo):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
148 if s < maxGapSize:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
149 newGap = maxGapSize - s
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
150 i.append((newGap, e))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
151 gapBeg = [e+1 for e in gapEnd]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
152 except IndexError: return gapInfo
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
153
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
154 def chunksAndGaps(s, gapsAndPositions, oneGap):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
155 '''Yield chunks of "s" interspersed with gaps at given positions.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
156 oldPosition = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
157 for gapLen, position in gapsAndPositions:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
158 yield s[oldPosition:position]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
159 yield oneGap * gapLen
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
160 oldPosition = position
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
161 yield s[oldPosition:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
162
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
163 def mafAddGaps(maf, gapsAndPositions):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
164 '''Add the given gaps at the given positions to a MAF block.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
165 maf.seq = [sum(chunksAndGaps(i, gapsAndPositions, ['-']), [])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
166 for i in maf.seq]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
167 maf.prob = [''.join(chunksAndGaps(i, gapsAndPositions, '~'))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
168 for i in maf.prob]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
169
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
170 def mafJoin(mafs):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
171 '''Intersect and join overlapping MAF blocks.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
172 newTopSeqBeg = max(map(topSeqBeg, mafs))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
173 newTopSeqEnd = min(map(topSeqEnd, mafs))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
174 mafs = [mafSliceTopSeq(i, newTopSeqBeg, newTopSeqEnd) for i in mafs]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
175 topSeqs = [i.seq[0] for i in mafs]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
176 gapInfo = gapsToAdd(topSeqs)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
177 for maf, gapsAndPositions in zip(mafs, gapInfo):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
178 mafAddGaps(maf, gapsAndPositions)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
179 return reduce(mafEasyJoin, mafs)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
180
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
181 def mafInput(lines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
182 '''Read lines and yield MAF blocks.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
183 maf = emptyMaf()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
184 for line in lines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
185 if line.isspace():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
186 if maf: yield maf
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
187 maf = emptyMaf()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
188 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
189 maf.addLine(line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
190 if maf: yield maf
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
191
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
192 def sortedMafInput(lines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
193 '''Read lines and yield MAF blocks, checking that they are in order.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
194 old = emptyMaf()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
195 for maf in mafInput(lines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
196 if maf < old: sys.exit(progName + ": MAF blocks not sorted properly")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
197 yield maf
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
198 old = maf
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
199
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
200 def allOverlaps(sequences, beg, end):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
201 '''Yield all combinations of MAF blocks that overlap in the top genome.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
202 assert beg < end
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
203 if not sequences:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
204 yield ()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
205 return
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
206 for i in sequences[0]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
207 if topSeqEnd(i) <= beg: continue
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
208 if topSeqBeg(i) >= end: break # assumes they're sorted by position
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
209 newBeg = max(beg, topSeqBeg(i))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
210 newEnd = min(end, topSeqEnd(i))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
211 for j in allOverlaps(sequences[1:], newBeg, newEnd):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
212 yield (i,) + j
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
213
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
214 def nextWindow(window, input, referenceMaf):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
215 '''Yield "relevant" MAF blocks, based on overlap with referenceMaf.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
216 for maf in window:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
217 if not maf.before(referenceMaf): yield maf
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
218 try:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
219 while True:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
220 maf = input.peek()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
221 if maf.after(referenceMaf): break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
222 maf = input.next()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
223 if not maf.before(referenceMaf): yield maf
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
224 except StopIteration: pass
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
225
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
226 def overlappingMafs(sortedMafInputs):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
227 '''Yield all combinations of MAF blocks that overlap in the top genome.'''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
228 if not sortedMafInputs: return
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
229 head, tail = sortedMafInputs[0], sortedMafInputs[1:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
230 windows = [[] for t in tail]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
231 for h in head: # iterate over MAF blocks in the first input
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
232 windows = [list(nextWindow(w, t, h)) for w, t in zip(windows, tail)]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
233 for i in allOverlaps(windows, topSeqBeg(h), topSeqEnd(h)):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
234 yield (h,) + i
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
235
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
236 op = optparse.OptionParser(usage="%prog sorted-file1.maf sorted-file2.maf ...")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
237 (opts, args) = op.parse_args()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
238
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
239 progName = os.path.basename(sys.argv[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
240
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
241 inputs = [peekable(sortedMafInput(fileinput.input(i))) for i in args]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
242
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
243 for mafs in overlappingMafs(inputs):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
244 mafJoin(mafs).write()