annotate RaGOO/ragoo_utilities/break_chimera.py @ 13:b9a3aeb162ab draft default tip

Uploaded
author dereeper
date Mon, 26 Jul 2021 18:22:37 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
1 from collections import defaultdict
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
2 import copy
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
3
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
4 from intervaltree import IntervalTree
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
5
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
6 from ragoo_utilities.ContigAlignment import UniqueContigAlignment
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
7
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
8
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
9 def get_ref_parts(alns, l, p, r):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
10 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
11
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
12 :param alns: A ContigAlignment object for the contig in question
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
13 :param l: Minimum alignment length to consider
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
14 :param p: Minimum percentage of a reference chromosome that must be covered to be considered significant
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
15 :param r: Minimum number of reference chromosome nucleotides needed to be covered to be considered significant
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
16 :return: A list of reference chromosomes to which the input contig significantly aligns to
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
17 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
18 all_intervals = defaultdict(list)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
19
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
20 # Iterate over all alignments
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
21 for i in range(len(alns.ref_headers)):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
22 if alns.aln_lens[i] < l:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
23 continue
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
24
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
25 all_intervals[alns.ref_headers[i]].append((alns.ref_starts[i], alns.ref_ends[i]))
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
26
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
27 ranges = dict()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
28 for i in all_intervals.keys():
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
29 ranges[i] = 0
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
30
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
31 # For each chromosome, sort the range and get the union interval length.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
32 # Algorithm and pseudocode given by Michael Kirsche
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
33 for i in all_intervals.keys():
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
34 sorted_intervals = sorted(all_intervals[i], key=lambda tup: tup[0])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
35 max_end = -1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
36 for j in sorted_intervals:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
37 start_new_terr = max(j[0], max_end)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
38 ranges[i] += max(0, j[1] - start_new_terr)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
39 max_end = max(max_end, j[1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
40
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
41 total_sum = sum(ranges.values())
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
42 chimeric_refs = []
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
43 for i in ranges.keys():
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
44 if (ranges[i]/total_sum)*100 > float(p) and ranges[i] > r:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
45 chimeric_refs.append(i)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
46
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
47 return chimeric_refs
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
48
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
49
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
50 def cluster_contig_alns(contig, alns, chroms, l):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
51 ctg_alns = alns[contig]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
52
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
53 # Filter contig alignments so as to only include alignments to desired chromosomes
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
54 ctg_alns.filter_ref_chroms(chroms)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
55
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
56 # Intialize the list of start and end positions w.r.t the query
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
57 query_pos = []
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
58
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
59 for i in range(len(ctg_alns.ref_headers)):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
60 query_pos.append((ctg_alns.query_starts[i], ctg_alns.query_ends[i], i))
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
61
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
62 final_order = [i[2] for i in sorted(query_pos)]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
63 final_refs = []
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
64 for i in final_order:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
65 final_refs.append(ctg_alns.ref_headers[i])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
66
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
67 return get_borders(final_refs, ctg_alns, final_order)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
68
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
69
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
70 def get_borders(ordered_refs, alns, order):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
71 borders = [0]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
72 current_ref = ordered_refs[0]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
73 for i in range(1, len(ordered_refs)-1):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
74 if ordered_refs[i] != current_ref and ordered_refs[i+1] != current_ref:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
75 current_ref = ordered_refs[i]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
76 borders.append(alns.query_ends[order[i-1]])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
77
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
78 borders.append(alns.query_lens[0])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
79 intervals = [(borders[i], borders[i+1]) for i in range(len(borders)-1)]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
80 return intervals
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
81
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
82
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
83 def avoid_gff_intervals(borders, gff_ins):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
84 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
85 :param borders:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
86 :param gff_ins: all features
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
87 :return:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
88 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
89
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
90 # Make an interval tree from the intervals of features associated with this contig
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
91 t = IntervalTree()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
92 for line in gff_ins:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
93 # If the interval is one bp long, skip
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
94 if line.start == line.end:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
95 continue
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
96 t[line.start:line.end] = (line.start, line.end)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
97
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
98 new_borders = []
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
99 for i in borders:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
100 # Returns an empty set if a border does not fall in any interval.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
101 # Otherwise, returns a list of Interval objects
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
102 interval_query = t[i[0]]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
103 if interval_query:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
104 while interval_query:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
105 # Make new border larger than the max end position of all intervals
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
106 i = (i[0] + 1, i[1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
107 interval_query = t[i[0]]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
108
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
109 interval_query = t[i[1]]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
110 if interval_query:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
111 while interval_query:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
112 # Make new border larger than the max end position of all intervals
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
113 i = (i[0], i[1] + 1)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
114 interval_query = t[i[1]]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
115
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
116 new_borders.append(i)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
117
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
118 new_borders = new_borders[:-1] + [(new_borders[-1][0], borders[-1][1])]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
119 return new_borders
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
120
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
121
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
122 def update_gff(features, borders, contig):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
123 # Pop the features to be updated
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
124 contig_feats = features.pop(contig)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
125
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
126 # Initialize lists for new broken contig headers
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
127 for i in range(len(borders)):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
128 features[contig + '_chimera_broken:' + str(borders[i][0]) + '-' + str(borders[i][1])] = []
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
129
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
130 # Could just use binary search instead of this tree since intervals are non-overlapping.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
131 # but input so small both are trivial
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
132 t = IntervalTree()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
133 for i in borders:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
134 t[i[0]:i[1]] = i
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
135
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
136 for i in contig_feats:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
137 query = t[i.start]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
138 assert len(query) == 1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
139 break_start = list(query)[0].begin
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
140 break_end = list(query)[0].end
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
141 query_border = (break_start, break_end)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
142 break_number = borders.index(query_border)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
143 i.seqname = contig + '_chimera_broken:' + str(borders[break_number][0]) + '-' + str(borders[break_number][1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
144 i.start = i.start - break_start
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
145 i.end = i.end - break_start
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
146 features[contig + '_chimera_broken:' + str(borders[break_number][0]) + '-' + str(borders[break_number][1])].append(i)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
147
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
148 return features
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
149
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
150
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
151 def break_contig(contigs_dict, header, break_points):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
152 seq = contigs_dict.pop(header)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
153 test_seq = ''
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
154 for i in range(len(break_points)):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
155 contigs_dict[header + '_chimera_broken:' + str(break_points[i][0]) + '-' + str(break_points[i][1])] = seq[break_points[i][0]: break_points[i][1]]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
156 test_seq += seq[break_points[i][0]: break_points[i][1]]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
157
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
158 assert test_seq == seq
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
159 return contigs_dict
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
160
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
161
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
162 def get_intra_contigs(alns, l, d, c):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
163 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
164 Flag contigs as being intrachromosomal chimeras
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
165 :param alns:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
166 :param l: Minimum alignment length to consider
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
167 :param d: Distance between consecutive adjacent alignments with respect to the reference. If larger than this, flag
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
168 :param c: Distance between consecutive adjacent alignments with respect to the query. If larger than this, flag
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
169 :return: dict of contigs and break points.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
170 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
171
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
172 # Get only the header to which this contig mostly aligns to and filter out smaller alignments.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
173 uniq_aln = UniqueContigAlignment(alns)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
174 best_header = uniq_aln.ref_chrom
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
175 ctg_alns = copy.deepcopy(alns)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
176 ctg_alns.filter_ref_chroms([best_header])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
177 ctg_alns.filter_lengths(l)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
178
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
179 # If there are no longer any alignments after length filtering, give up
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
180 if not len(ctg_alns.ref_headers):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
181 return
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
182
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
183 # Sort the alignments with respect to the reference start and end positions.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
184 ctg_alns.sort_by_ref()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
185
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
186 # Make a list of distance between alignments
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
187 # first with respect to (wrt) the reference.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
188 distances_wrt_ref = []
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
189 for i in range(len(ctg_alns.ref_headers)-1):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
190 distances_wrt_ref.append(ctg_alns.ref_starts[i+1] - ctg_alns.ref_starts[i])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
191
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
192 # next, with respect to (wrt) the contig.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
193 distances_wrt_ctg = []
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
194 for i in range(len(ctg_alns.ref_headers) - 1):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
195 distances_wrt_ctg.append(abs(ctg_alns.query_starts[i + 1] - ctg_alns.query_starts[i]))
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
196
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
197 # Next, assign the following two identities.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
198 # 1. When ordered by the reference, the alignments start at the beginning or the end of the query
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
199 # 2. For the alignment which will be broken on, is it on the forward or reverse strand.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
200
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
201 is_query_start = True
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
202
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
203 if ctg_alns.query_starts[0] >= ctg_alns.query_lens[0]/5:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
204 is_query_start = False
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
205
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
206 # This conditional essentially checks if there are any break points for this contig.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
207 # Returns None otherwise (no return statement)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
208 if distances_wrt_ref:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
209 if max(distances_wrt_ref) > d:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
210 gap_index = distances_wrt_ref.index(max(distances_wrt_ref))
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
211 a_alns_strands = ctg_alns.strands[:gap_index]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
212 if is_query_start:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
213 if a_alns_strands.count('-') > a_alns_strands.count('+'):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
214 # The first subcontig is on the reverse strand
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
215 return (ctg_alns.contig, [(0, ctg_alns.query_ends[0]), (ctg_alns.query_ends[0], ctg_alns.query_lens[0])])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
216 else:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
217 # The first subcontig is on the forward strand.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
218 return (ctg_alns.contig, [(0, ctg_alns.query_ends[gap_index]), (ctg_alns.query_ends[gap_index], ctg_alns.query_lens[0])])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
219 else:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
220 # The first subcontig starts at the end of the contig
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
221 if a_alns_strands.count('-') > a_alns_strands.count('+'):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
222 # The first subcontig is on the reverse strand
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
223 return (ctg_alns.contig, [(0, ctg_alns.query_starts[gap_index]), (ctg_alns.query_starts[gap_index], ctg_alns.query_lens[0])])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
224 else:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
225 # The first subcontig is on the forward strand.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
226 return (ctg_alns.contig, [(0, ctg_alns.query_starts[0]), (ctg_alns.query_starts[0], ctg_alns.query_lens[0])])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
227
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
228 if max(distances_wrt_ctg) > c:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
229 gap_index = distances_wrt_ctg.index(max(distances_wrt_ctg)) + 1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
230 a_alns_strands = ctg_alns.strands[:gap_index]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
231 if is_query_start:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
232 if a_alns_strands.count('-') > a_alns_strands.count('+'):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
233 # The first subcontig is on the reverse strand
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
234 return (ctg_alns.contig, [(0, ctg_alns.query_ends[0]), (ctg_alns.query_ends[0], ctg_alns.query_lens[0])])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
235 else:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
236 # The first subcontig is on the forward strand.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
237 return (ctg_alns.contig, [(0, ctg_alns.query_ends[gap_index]), (ctg_alns.query_ends[gap_index], ctg_alns.query_lens[0])])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
238 else:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
239 # The first subcontig starts at the end of the contig
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
240 if a_alns_strands.count('-') > a_alns_strands.count('+'):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
241 # The first subcontig is on the reverse strand
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
242 return (ctg_alns.contig, [(0, ctg_alns.query_starts[gap_index-1]), (ctg_alns.query_starts[gap_index-1], ctg_alns.query_lens[0])])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
243 else:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
244 # The first subcontig is on the forward strand.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
245 return (ctg_alns.contig, [(0, ctg_alns.query_starts[0]), (ctg_alns.query_starts[0], ctg_alns.query_lens[0])])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
246