comparison read2mut.py @ 78:fdfe9a919ff7 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8-dirty
author mheinzl
date Fri, 22 Jul 2022 09:19:44 +0000
parents 1797e461d674
children d7aea14291e8
comparison
equal deleted inserted replaced
77:1797e461d674 78:fdfe9a919ff7
85 sscs_json = args.sscsJson 85 sscs_json = args.sscsJson
86 outfile = args.outputFile 86 outfile = args.outputFile
87 outfile2 = args.outputFile2 87 outfile2 = args.outputFile2
88 outfile3 = args.outputFile3 88 outfile3 = args.outputFile3
89 outputFile_csv = args.outputFile_csv 89 outputFile_csv = args.outputFile_csv
90
90 thresh = args.thresh 91 thresh = args.thresh
91 phred_score = args.phred 92 phred_score = args.phred
92 trim = args.trim 93 trim = args.trim
93 chimera_correction = args.chimera_correction 94 chimera_correction = args.chimera_correction
94 thr = args.softclipping_dist 95 thr = args.softclipping_dist
109 if thr <= 0: 110 if thr <= 0:
110 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thr)) 111 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thr))
111 112
112 # load dicts 113 # load dicts
113 with open(json_file, "r") as f: 114 with open(json_file, "r") as f:
114 (tag_dict, cvrg_dict) = json.load(f) 115 (tag_dict, cvrg_dict, tag_dict_ref) = json.load(f)
115 116
116 with open(sscs_json, "r") as f: 117 with open(sscs_json, "r") as f:
117 (mut_pos_dict, ref_pos_dict) = json.load(f) 118 (mut_pos_dict, ref_pos_dict) = json.load(f)
118 119
119 # read bam file 120 # read bam file
136 ref = variant.REF 137 ref = variant.REF
137 if len(variant.ALT) == 0: 138 if len(variant.ALT) == 0:
138 continue 139 continue
139 else: 140 else:
140 alt = variant.ALT[0] 141 alt = variant.ALT[0]
142
143 alt = alt.upper()
144 ref = ref.upper()
145 if "N" in alt: # skip indels with N in alt allele --> it is not an indel but just a mismatch at the position where the N is (checked this in IGV)
146 continue
147
141 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt 148 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt
142 149 mut_array.append([chrom, stop_pos, ref, alt])
143 if len(ref) == len(alt): 150 i += 1
144 mut_array.append([chrom, stop_pos, ref, alt]) 151 mut_dict[chrom_stop_pos] = {}
145 i += 1 152 mut_read_pos_dict[chrom_stop_pos] = {}
146 mut_dict[chrom_stop_pos] = {} 153 reads_dict[chrom_stop_pos] = {}
147 mut_read_pos_dict[chrom_stop_pos] = {} 154 mut_read_cigar_dict[chrom_stop_pos] = {}
148 reads_dict[chrom_stop_pos] = {} 155 real_start_end[chrom_stop_pos] = {}
149 mut_read_cigar_dict[chrom_stop_pos] = {} 156 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=1000000000):
150 real_start_end[chrom_stop_pos] = {} 157 if pileupcolumn.reference_pos == stop_pos:
151 158 count_alt = 0
152 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000): 159 count_ref = 0
153 if pileupcolumn.reference_pos == stop_pos: 160 count_indel = 0
154 count_alt = 0 161 count_n = 0
155 count_ref = 0 162 count_other = 0
156 count_indel = 0 163 count_lowq = 0
157 count_n = 0 164 n = 0
158 count_other = 0 165 for pileupread in pileupcolumn.pileups:
159 count_lowq = 0 166 n += 1
160 n = 0 167 if not pileupread.is_refskip:
161 for pileupread in pileupcolumn.pileups: 168 if pileupread.is_del:
162 n += 1 169 p = pileupread.query_position_or_next
163 if not pileupread.is_del and not pileupread.is_refskip: 170 e = p + len(alt) - 1
164 tag = pileupread.alignment.query_name 171 else:
165 nuc = pileupread.alignment.query_sequence[pileupread.query_position] 172 p = pileupread.query_position
166 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 173 e = p + len(alt)
167 # if read is softclipped, store real position in reference 174 s = p
168 if "S" in pileupread.alignment.cigarstring: 175 tag = pileupread.alignment.query_name
169 # spftclipped at start 176 split_cigar = re.split('(\d+)', pileupread.alignment.cigarstring)
170 if re.search(r"^[0-9]+S", pileupread.alignment.cigarstring): 177
171 start = pileupread.alignment.reference_start - int(pileupread.alignment.cigarstring.split("S")[0]) 178 if len(ref) < len(alt):
172 end = pileupread.alignment.reference_end 179 if "I" in split_cigar:
173 # softclipped at end 180 all_insertions = [inser_i for inser_i, ins in enumerate(split_cigar) if ins == "I"]
174 elif re.search(r"S$", pileupread.alignment.cigarstring): 181 for ai in all_insertions: # if multiple insertions in DCS
175 end = pileupread.alignment.reference_end + int(re.split("[A-Z]", str(pileupread.alignment.cigarstring))[-2]) 182 ins_index = [int(ci) for ci in split_cigar[:ai - 1] if ci.isdigit()]
176 start = pileupread.alignment.reference_start 183 ins_count = split_cigar[ai - 1] # nr of insertions should match with alt allele
184 if "I" in split_cigar and sum(ins_index) == p + 1 and int(ins_count) >= len(alt) - 1: # if pe read matches exatcly to insertion
185 nuc = pileupread.alignment.query_sequence[s:e]
186 phred = ord(pileupread.alignment.qual[s]) - 33
187 break
188 elif "I" in split_cigar and sum(ins_index) == p + 1 and int(ins_count) < len(alt) - 1: # if pe read has shorter insertion -- not alt
189 nuc = pileupread.alignment.query_sequence[s:s+int(ins_count)+1]
190 phred = ord(pileupread.alignment.qual[s]) - 33
191 break
192 else: # insertion in pe reads but not at the desired position
193 nuc = pileupread.alignment.query_sequence[s]
194 phred = ord(pileupread.alignment.qual[s]) - 33
195 elif "D" in split_cigar: # if deletion in pe read, don't count
196 all_deletions = [del_i for del_i, dele in enumerate(split_cigar) if dele == "D"]
197 for di, ai in enumerate(all_deletions): # if multiple insertions in DCS
198 if di > 0: # more than 1 deletion, don't count previous deletion to position
199 all_deletions_mod = split_cigar[:ai - 1]
200 prev_del_idx = [all_deletions_mod.index("D") - 1, all_deletions_mod.index("D")]
201 split_cigar_no_prev = [ad for i, ad in enumerate(all_deletions_mod) if i not in prev_del_idx]
202 del_index = [int(ci) for ci in split_cigar_no_prev[:ai - 1] if ci.isdigit()]
203 else: # first deletion in read, sum all previous (mis)matches and insertions to position
204 del_index = [int(ci) for ci in split_cigar[:ai - 1] if ci.isdigit()]
205 if "D" in split_cigar and sum(del_index) == p + 1: # if deletion at that position
206 nuc = "D"
207 phred = ord(pileupread.alignment.qual[s]) - 33
208 break
209 else:
210 nuc = pileupread.alignment.query_sequence[s]
211 phred = ord(pileupread.alignment.qual[s]) - 33
212 else: # insertion in pe reads but not at the desired position
213 nuc = pileupread.alignment.query_sequence[s]
214 phred = ord(pileupread.alignment.qual[s]) - 33
215
216 elif len(ref) > len(alt):
217 ref_positions = pileupread.alignment.get_reference_positions(full_length=True)[s:p + len(ref)]
218 if "D" in split_cigar:
219 all_deletions = [del_i for del_i, dele in enumerate(split_cigar) if dele == "D"]
220 for di, ai in enumerate(all_deletions): # if multiple insertions in DCS
221 if di > 0: # more than 1 deletion, don't count previous deletion to position
222 all_deletions_mod = split_cigar[:ai - 1]
223 prev_del_idx = [all_deletions_mod.index("D") - 1, all_deletions_mod.index("D")]
224 split_cigar_no_prev = [ad for i, ad in enumerate(all_deletions_mod) if i not in prev_del_idx]
225 del_index = [int(ci) for ci in split_cigar_no_prev[:ai - 1] if ci.isdigit()]
226 else: # first deletion in read, sum all previous (mis)matches and insertions to position
227 del_index = [int(ci) for ci in split_cigar[:ai - 1] if ci.isdigit()]
228 del_count = split_cigar[ai - 1] # deletion on that position but does not necesserily match in the length
229 if "D" in split_cigar and sum(del_index) == p + 1 and int(del_count) >= len(ref) - 1: # if pe read matches exatcly to deletion
230 nuc = pileupread.alignment.query_sequence[s:e]
231 phred = ord(pileupread.alignment.qual[s]) - 33
232 if nuc == "":
233 nuc = str(alt)
234 break
235 elif "D" in split_cigar and sum(del_index) == p + 1 and int(del_count) < len(ref) - 1: # if pe read has shorter deletion --> not alt
236 nuc = str(ref)[:int(del_count)+1]
237 phred = ord(pileupread.alignment.qual[s]) - 33
238 break
239 else: # deletion in pe reads but not at the desired position
240 nuc = pileupread.alignment.query_sequence[s:s + len(ref)]
241 phred = ord(pileupread.alignment.qual[s]) - 33
242 elif "I" in split_cigar: # if pe read has insertion --> not count
243 all_insertions = [inser_i for inser_i, ins in enumerate(split_cigar) if ins == "I"]
244 for ai in all_insertions: # if multiple insertions in DCS
245 ins_index = [int(ci) for ci in split_cigar[:ai - 1] if ci.isdigit()]
246 ins_count = split_cigar[ai - 1] # nr of insertions should match with alt allele
247 if "I" in split_cigar and sum(ins_index) == p + 1:
248 nuc = "I"
249 phred = ord(pileupread.alignment.qual[s]) - 33
250 break
251 else:
252 nuc = pileupread.alignment.query_sequence[s]
253 phred = ord(pileupread.alignment.qual[s]) - 33
254 elif len(ref_positions) < len(ref): # DCS has reference but the position is at the very end of the DCS and therefore not the full reference positions are there
255 nuc = pileupread.alignment.get_reference_sequence()[s:s + len(ref)]
256 phred = ord(pileupread.alignment.qual[s]) - 33
257 if nuc.upper() == ref[:len(nuc)]:
258 nuc = str(ref)
259 else: # deletion in pe reads but not at the desired position
260 nuc = pileupread.alignment.query_sequence[s:s + len(ref)]
261 phred = ord(pileupread.alignment.qual[s]) - 33
262 else: # SNV: query position is None if is_del or is_refskip is set.
263 nuc = pileupread.alignment.query_sequence[s]
264 phred = ord(pileupread.alignment.qual[s]) - 33
265
266 nuc = nuc.upper()
267
268 # if read is softclipped, store real position in reference
269 if "S" in pileupread.alignment.cigarstring:
270 # spftclipped at start
271 if re.search(r"^[0-9]+S", pileupread.alignment.cigarstring):
272 start = pileupread.alignment.reference_start - int(pileupread.alignment.cigarstring.split("S")[0])
273 end = pileupread.alignment.reference_end
274 # softclipped at end
275 elif re.search(r"S$", pileupread.alignment.cigarstring):
276 end = pileupread.alignment.reference_end + int(re.split("[A-Z]", str(pileupread.alignment.cigarstring))[-2])
277 start = pileupread.alignment.reference_start
278 else:
279 end = pileupread.alignment.reference_end
280 start = pileupread.alignment.reference_start
281 if phred < phred_score:
282 nuc = "lowQ"
283 if tag not in mut_dict[chrom_stop_pos]:
284 mut_dict[chrom_stop_pos][tag] = {}
285 if nuc in mut_dict[chrom_stop_pos][tag]:
286 mut_dict[chrom_stop_pos][tag][nuc] += 1
287 else:
288 mut_dict[chrom_stop_pos][tag][nuc] = 1
289 if tag not in mut_read_pos_dict[chrom_stop_pos]:
290 mut_read_pos_dict[chrom_stop_pos][tag] = [s + 1]
291 reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)]
292 mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring]
293 real_start_end[chrom_stop_pos][tag] = [(start, end)]
294 else:
295 mut_read_pos_dict[chrom_stop_pos][tag].append(s + 1)
296 reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence))
297 mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring)
298 real_start_end[chrom_stop_pos][tag].append((start, end))
299 if nuc == alt:
300 count_alt += 1
301 if tag not in mut_read_dict:
302 mut_read_dict[tag] = {}
303 mut_read_dict[tag][chrom_stop_pos] = (alt, ref)
177 else: 304 else:
178 end = pileupread.alignment.reference_end 305 mut_read_dict[tag][chrom_stop_pos] = (alt, ref)
179 start = pileupread.alignment.reference_start 306 elif nuc == ref:
180 307 count_ref += 1
181 if phred < phred_score: 308 elif nuc == "N":
182 nuc = "lowQ" 309 count_n += 1
183 if tag not in mut_dict[chrom_stop_pos]: 310 elif nuc == "lowQ":
184 mut_dict[chrom_stop_pos][tag] = {} 311 count_lowq += 1
185 if nuc in mut_dict[chrom_stop_pos][tag]: 312 else:
186 mut_dict[chrom_stop_pos][tag][nuc] += 1 313 count_other += 1
187 else: 314 else:
188 mut_dict[chrom_stop_pos][tag][nuc] = 1 315 count_indel += 1
189 if tag not in mut_read_pos_dict[chrom_stop_pos]:
190 mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1]
191 reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)]
192 mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring]
193 real_start_end[chrom_stop_pos][tag] = [(start, end)]
194 else:
195 mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1)
196 reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence))
197 mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring)
198 real_start_end[chrom_stop_pos][tag].append((start, end))
199 if nuc == alt:
200 count_alt += 1
201 if tag not in mut_read_dict:
202 mut_read_dict[tag] = {}
203 mut_read_dict[tag][chrom_stop_pos] = (alt, ref)
204 else:
205 mut_read_dict[tag][chrom_stop_pos] = (alt, ref)
206 elif nuc == ref:
207 count_ref += 1
208 elif nuc == "N":
209 count_n += 1
210 elif nuc == "lowQ":
211 count_lowq += 1
212 else:
213 count_other += 1
214 else:
215 count_indel += 1
216 316
217 mut_array = np.array(mut_array) 317 mut_array = np.array(mut_array)
218 for read in bam.fetch(until_eof=True): 318 for read in bam.fetch(until_eof=True):
219 if read.is_unmapped: 319 if read.is_unmapped:
220 pure_tag = read.query_name[:-5] 320 pure_tag = read.query_name[:-5]
221 nuc = "na" 321 nuc = "na"
222 for key in tag_dict[pure_tag].keys(): 322 if pure_tag in tag_dict.keys(): # stored all ref and alt reads --> get only alt reads
223 if key not in mut_dict: 323 for key in tag_dict[pure_tag].keys():
224 mut_dict[key] = {} 324 if key not in mut_dict:
225 if read.query_name not in mut_dict[key]: 325 mut_dict[key] = {}
226 mut_dict[key][read.query_name] = {} 326 if read.query_name not in mut_dict[key]:
227 if nuc in mut_dict[key][read.query_name]: 327 mut_dict[key][read.query_name] = {}
228 mut_dict[key][read.query_name][nuc] += 1 328 if nuc in mut_dict[key][read.query_name]:
229 else: 329 mut_dict[key][read.query_name][nuc] += 1
230 mut_dict[key][read.query_name][nuc] = 1 330 else:
331 mut_dict[key][read.query_name][nuc] = 1
231 bam.close() 332 bam.close()
232
233 # create pure_tags_dict 333 # create pure_tags_dict
234 pure_tags_dict = {} 334 pure_tags_dict = {}
335 pure_tags_dict_ref = {}
235 for key1, value1 in sorted(mut_dict.items()): 336 for key1, value1 in sorted(mut_dict.items()):
236 i = np.where(np.array(['#'.join(str(i) for i in z) 337 i = np.where(np.array(['#'.join(str(i) for i in z)
237 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] 338 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0]
238 ref = mut_array[i, 2] 339 ref = mut_array[i, 2]
239 alt = mut_array[i, 3] 340 alt = mut_array[i, 3]
240 pure_tags_dict[key1] = {} 341 pure_tags_dict[key1] = {}
342 pure_tags_dict_ref[key1] = {}
241 for key2, value2 in sorted(value1.items()): 343 for key2, value2 in sorted(value1.items()):
242 for key3, value3 in value2.items(): 344 for key3, value3 in value2.items():
243 pure_tag = key2[:-5] 345 pure_tag = key2[:-5]
244 if key3 == alt: 346 if key3 == alt:
245 if pure_tag in pure_tags_dict[key1]: 347 if pure_tag in pure_tags_dict[key1]:
246 pure_tags_dict[key1][pure_tag] += 1 348 pure_tags_dict[key1][pure_tag] += 1
247 else: 349 else:
248 pure_tags_dict[key1][pure_tag] = 1 350 pure_tags_dict[key1][pure_tag] = 1
249 351
352 if key3 == ref:
353 if pure_tag in pure_tags_dict_ref[key1]:
354 pure_tags_dict_ref[key1][pure_tag] += 1
355 else:
356 pure_tags_dict_ref[key1][pure_tag] = 1
357
250 # create pure_tags_dict_short with thresh 358 # create pure_tags_dict_short with thresh
251 if thresh > 0: 359 if thresh > 0:
252 pure_tags_dict_short = {} 360 pure_tags_dict_short = {}
253 for key, value in sorted(pure_tags_dict.items()): 361 for key, value in sorted(pure_tags_dict.items()):
254 if len(value) < thresh: 362 if len(value) < thresh:
277 385
278 format13 = workbook3.add_format({'bg_color': '#BCF5A9'}) # green 386 format13 = workbook3.add_format({'bg_color': '#BCF5A9'}) # green
279 format23 = workbook3.add_format({'bg_color': '#FFC7CE'}) # red 387 format23 = workbook3.add_format({'bg_color': '#FFC7CE'}) # red
280 format33 = workbook3.add_format({'bg_color': '#FACC2E'}) # yellow 388 format33 = workbook3.add_format({'bg_color': '#FACC2E'}) # yellow
281 389
282 header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab', 390 header_line = ('variant ID', 'tier', 'allele', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab',
283 'read median length.ba', 'DCS median length', 391 'read median length.ba', 'DCS median length',
284 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', 392 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba',
285 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', 393 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba',
286 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', 394 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba',
287 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', 395 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba',
288 'in phase', 'chimeric tag') 396 'in phase', 'chimeric tag')
289 ws1.write_row(0, 0, header_line) 397 ws1.write_row(0, 0, header_line)
290 csv_writer.writerow(header_line) 398 csv_writer.writerow(header_line)
399
291 counter_tier11 = 0 400 counter_tier11 = 0
292 counter_tier12 = 0 401 counter_tier12 = 0
293 counter_tier21 = 0 402 counter_tier21 = 0
294 counter_tier22 = 0 403 counter_tier22 = 0
295 counter_tier23 = 0 404 counter_tier23 = 0
306 counter_tier6 = 0 415 counter_tier6 = 0
307 counter_tier7 = 0 416 counter_tier7 = 0
308 417
309 row = 1 418 row = 1
310 tier_dict = {} 419 tier_dict = {}
420 tier_dict_ref = {}
311 chimera_dict = {} 421 chimera_dict = {}
312 for key1, value1 in sorted(mut_dict.items()): 422 for key1, value1 in sorted(mut_dict.items()):
313 counts_mut = 0 423 counts_mut = 0
314 chimeric_tag_list = [] 424 chimeric_tag_list = []
315 chimeric_tag = {} 425 chimeric_tag = {}
316 if key1 in pure_tags_dict_short.keys(): 426 if (key1 in pure_tags_dict_short.keys()) or (key1 in pure_tags_dict_ref.keys()): # ref or alt
427
317 change_tier_after_print = [] 428 change_tier_after_print = []
318 i = np.where(np.array(['#'.join(str(i) for i in z) 429 i = np.where(np.array(['#'.join(str(i) for i in z)
319 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] 430 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0]
320 ref = mut_array[i, 2] 431 ref = mut_array[i, 2]
321 alt = mut_array[i, 3] 432 alt = mut_array[i, 3]
322 dcs_median = cvrg_dict[key1][2] 433 dcs_median = cvrg_dict[key1][2]
323 whole_array = list(pure_tags_dict_short[key1].keys()) 434 whole_array = list(pure_tags_dict_short[key1].keys())
324 435
325 tier_dict[key1] = {} 436 tier_dict[key1] = {}
437 tier_dict_ref[key1] = {}
326 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 2.5", 0), 438 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 2.5", 0),
327 ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0), 439 ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0),
328 ("tier 6", 0), ("tier 7", 0)] 440 ("tier 6", 0), ("tier 7", 0)]
329 for k, v in values_tier_dict: 441 for k, v in values_tier_dict:
330 tier_dict[key1][k] = v 442 tier_dict[key1][k] = v
443 tier_dict_ref[key1][k] = v
331 444
332 used_keys = [] 445 used_keys = []
333 if 'ab' in mut_pos_dict[key1].keys(): 446 if 'ab' in mut_pos_dict[key1].keys():
334 sscs_mut_ab = mut_pos_dict[key1]['ab'] 447 sscs_mut_ab = mut_pos_dict[key1]['ab']
335 else: 448 else:
347 else: 460 else:
348 sscs_ref_ba = 0 461 sscs_ref_ba = 0
349 for key2, value2 in sorted(value1.items()): 462 for key2, value2 in sorted(value1.items()):
350 add_mut14 = "" 463 add_mut14 = ""
351 add_mut23 = "" 464 add_mut23 = ""
352 if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()): 465 if key2[:-5] not in tag_dict.keys() and key2[:-5] not in tag_dict_ref.keys(): # skip reads that have not alt or ref
466 continue
467
468 if ((key2[:-5] in tag_dict.keys() and key2[:-5] in pure_tags_dict_short[key1].keys() and key1 in tag_dict[key2[:-5]].keys()) or (key2[:-5] in tag_dict_ref.keys() and key2[:-5] in pure_tags_dict_ref[key1].keys() and key1 in tag_dict_ref[key2[:-5]].keys())) and (key2[:-5] not in used_keys):
469
470 if key2[:-5] in pure_tags_dict_short[key1].keys():
471 variant_type = "alt"
472 elif key2[:-5] in pure_tags_dict_ref[key1].keys():
473 variant_type = "ref"
474
353 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): 475 if key2[:-5] + '.ab.1' in mut_dict[key1].keys():
354 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) 476 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values())
355 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): 477 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys():
356 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na'] 478 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na']
357 # na1f = na1/total1 479 # na1f = na1/total1
548 end_read4 = reads_dict[key1][key2[:-5] + '.ba.2'] 670 end_read4 = reads_dict[key1][key2[:-5] + '.ba.2']
549 ref_positions4 = real_start_end[key1][key2[:-5] + '.ba.2'] 671 ref_positions4 = real_start_end[key1][key2[:-5] + '.ba.2']
550 672
551 used_keys.append(key2[:-5]) 673 used_keys.append(key2[:-5])
552 counts_mut += 1 674 counts_mut += 1
553 if (alt1f + alt2f + alt3f + alt4f) > 0.5: 675 if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)):
676 if variant_type == "alt":
677 tier1ff, tier2ff, tier3ff, tier4ff = alt1f, alt2f, alt3f, alt4f
678 tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim = alt1f, alt2f, alt3f, alt4f
679 elif variant_type == "ref":
680 tier1ff, tier2ff, tier3ff, tier4ff = ref1f, ref2f, ref3f, ref4f
681 tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim = ref1f, ref2f, ref3f, ref4f
682
554 total1new_trim, total2new_trim, total3new_trim, total4new_trim = total1new, total2new, total3new, total4new 683 total1new_trim, total2new_trim, total3new_trim, total4new_trim = total1new, total2new, total3new, total4new
555 if total1new == 0: 684 if total1new == 0:
556 ref1f = alt1f = None 685 ref1f = alt1f = None
557 alt1ff = -1 686 alt1ff = -1
558 alt1ff_trim = -1 687 alt1ff_trim = -1
688 tier1ff = -1
689 tier1ff_trim = -1
559 else: 690 else:
560 alt1ff = alt1f 691 alt1ff = alt1f
561 alt1ff_trim = alt1f 692 alt1ff_trim = alt1f
693 tier1ff = tier1ff
694 tier1ff_trim = tier1ff_trim
562 if total2new == 0: 695 if total2new == 0:
563 ref2f = alt2f = None 696 ref2f = alt2f = None
564 alt2ff = -1 697 alt2ff = -1
565 alt2ff_trim = -1 698 alt2ff_trim = -1
699 tier2ff = -1
700 tier2ff_trim = -1
566 else: 701 else:
567 alt2ff = alt2f 702 alt2ff = alt2f
568 alt2ff_trim = alt2f 703 alt2ff_trim = alt2f
704 tier2ff = tier2ff
705 tier2ff_trim = tier2ff_trim
569 if total3new == 0: 706 if total3new == 0:
570 ref3f = alt3f = None 707 ref3f = alt3f = None
571 alt3ff = -1 708 alt3ff = -1
572 alt3ff_trim = -1 709 alt3ff_trim = -1
710 tier3ff = -1
711 tier3ff_trim = -1
573 else: 712 else:
574 alt3ff = alt3f 713 alt3ff = alt3f
575 alt3ff_trim = alt3f 714 alt3ff_trim = alt3f
715 tier3ff = tier3ff
716 tier3ff_trim = tier3ff_trim
576 if total4new == 0: 717 if total4new == 0:
577 ref4f = alt4f = None 718 ref4f = alt4f = None
578 alt4ff = -1 719 alt4ff = -1
579 alt4ff_trim = -1 720 alt4ff_trim = -1
721 tier4ff = -1
722 tier4ff_trim = -1
580 else: 723 else:
581 alt4ff = alt4f 724 alt4ff = alt4f
582 alt4ff_trim = alt4f 725 alt4ff_trim = alt4f
726 tier4ff = tier4ff
727 tier4ff_trim = tier4ff_trim
583 728
584 beg1 = beg4 = beg2 = beg3 = 0 729 beg1 = beg4 = beg2 = beg3 = 0
585 730
586 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) 731 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4)
587 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) 732 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
603 ratio_dist_end1 = ratio_dist_end2 = ratio_dist_end3 = ratio_dist_end4 = False 748 ratio_dist_end1 = ratio_dist_end2 = ratio_dist_end3 = ratio_dist_end4 = False
604 749
605 # mate 1 - SSCS ab 750 # mate 1 - SSCS ab
606 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1] 751 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1]
607 safe_div_result = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) 752 safe_div_result = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1)))
608 if (safe_div_result == None): 753 if (safe_div_result is None):
609 ratio1 = False 754 ratio1 = False
610 else: 755 else:
611 ratio1 = safe_div_result >= threshold_reads 756 ratio1 = safe_div_result >= threshold_reads
612 if any(ij is True for ij in softclipped_idx1): 757 if any(ij is True for ij in softclipped_idx1):
613 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1] 758 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1]
627 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads 772 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads
628 773
629 # mate 1 - SSCS ba 774 # mate 1 - SSCS ba
630 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4] 775 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4]
631 safe_div_result = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) 776 safe_div_result = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4)))
632 if (safe_div_result == None): 777 if (safe_div_result is None):
633 ratio4 = False 778 ratio4 = False
634 else: 779 else:
635 ratio4 = safe_div_result >= threshold_reads 780 ratio4 = safe_div_result >= threshold_reads
636 if any(ij is True for ij in softclipped_idx4): 781 if any(ij is True for ij in softclipped_idx4):
637 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4] 782 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4]
651 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads 796 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads
652 797
653 # mate 2 - SSCS ab 798 # mate 2 - SSCS ab
654 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2] 799 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2]
655 safe_div_result = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) 800 safe_div_result = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2)))
656 if (safe_div_result == None): 801 if (safe_div_result is None):
657 ratio2 = False 802 ratio2 = False
658 else: 803 else:
659 ratio2 = safe_div_result >= threshold_reads 804 ratio2 = safe_div_result >= threshold_reads
660 if any(ij is True for ij in softclipped_idx2): 805 if any(ij is True for ij in softclipped_idx2):
661 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2] 806 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2]
675 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads 820 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads
676 821
677 # mate 2 - SSCS ba 822 # mate 2 - SSCS ba
678 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3] 823 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3]
679 safe_div_result = safe_div(sum(softclipped_idx3), float(len(softclipped_idx3))) 824 safe_div_result = safe_div(sum(softclipped_idx3), float(len(softclipped_idx3)))
680 if (safe_div_result == None): 825 if (safe_div_result is None):
681 ratio3 = False 826 ratio3 = False
682 else: 827 else:
683 ratio3 = safe_div_result >= threshold_reads 828 ratio3 = safe_div_result >= threshold_reads
684 if any(ij is True for ij in softclipped_idx3): 829 if any(ij is True for ij in softclipped_idx3):
685 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3] 830 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3]
696 else: 841 else:
697 dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number 842 dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number
698 ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads 843 ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads
699 ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads 844 ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads
700 845
701 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant 846 if ((all(float(ij) >= 0.5 for ij in [tier1ff, tier4ff]) & # contradictory variant
702 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | 847 all(float(ij) == 0. for ij in [tier2ff, tier3ff])) |
703 (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & 848 (all(float(ij) >= 0.5 for ij in [tier2ff, tier3ff]) &
704 all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): 849 all(float(ij) == 0. for ij in [tier1ff, tier4ff]))):
705 alt1ff = 0 850 tier1ff = 0
706 alt4ff = 0 851 tier4ff = 0
707 alt2ff = 0 852 tier2ff = 0
708 alt3ff = 0 853 tier3ff = 0
709 trimmed = False 854 trimmed = False
710 contradictory = True 855 contradictory = True
711 # softclipping tiers 856 # softclipping tiers
712 # information of both mates available --> all reads for both mates and SSCS are softclipped 857 # information of both mates available --> all reads for both mates and SSCS are softclipped
713 elif (ratio1 & ratio4 & ratio2 & ratio3 & 858 elif (ratio1 & ratio4 & ratio2 & ratio3 &
714 (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & 859 (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) &
715 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available 860 all(float(ij) > 0. for ij in [tier1ff, tier2ff, tier3ff, tier4ff])): # all mates available
716 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 861 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
717 softclipped_mutation_allMates = True 862 softclipped_mutation_allMates = True
718 softclipped_mutation_oneOfTwoMates = False 863 softclipped_mutation_oneOfTwoMates = False
719 softclipped_mutation_oneOfTwoSSCS = False 864 softclipped_mutation_oneOfTwoSSCS = False
720 softclipped_mutation_oneOfTwoSSCS_diffMates = False 865 softclipped_mutation_oneOfTwoSSCS_diffMates = False
721 softclipped_mutation_oneMate = False 866 softclipped_mutation_oneMate = False
722 softclipped_mutation_oneMateOneSSCS = False 867 softclipped_mutation_oneMateOneSSCS = False
723 alt1ff = 0 868 tier1ff = 0
724 alt4ff = 0 869 tier4ff = 0
725 alt2ff = 0 870 tier2ff = 0
726 alt3ff = 0 871 tier3ff = 0
727 trimmed = False 872 trimmed = False
728 contradictory = False 873 contradictory = False
729 # information of both mates available --> only one mate softclipped 874 # information of both mates available --> only one mate softclipped
730 elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) | 875 elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) |
731 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) & 876 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) &
732 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available 877 all(float(ij) > 0. for ij in [tier1ff, tier2ff, tier3ff, tier4ff])): # all mates available
733 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 878 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
734 min_start1 = min(min([ij[0] for ij in ref_positions1]), min([ij[0] for ij in ref_positions4])) # red 879 min_start1 = min(min([ij[0] for ij in ref_positions1]), min([ij[0] for ij in ref_positions4])) # red
735 min_start2 = min(min([ij[0] for ij in ref_positions2]), min([ij[0] for ij in ref_positions3])) # blue 880 min_start2 = min(min([ij[0] for ij in ref_positions2]), min([ij[0] for ij in ref_positions3])) # blue
736 max_end1 = max(max([ij[1] for ij in ref_positions1]), max([ij[1] for ij in ref_positions4])) # red 881 max_end1 = max(max([ij[1] for ij in ref_positions1]), max([ij[1] for ij in ref_positions4])) # red
737 max_end2 = max(max([ij[1] for ij in ref_positions2]), max([ij[1] for ij in ref_positions3])) # blue 882 max_end2 = max(max([ij[1] for ij in ref_positions2]), max([ij[1] for ij in ref_positions3])) # blue
761 n_spacer_barcode = max_end2 - max_end1 906 n_spacer_barcode = max_end2 - max_end1
762 read_len_median2 = read_len_median2 - n_spacer_barcode 907 read_len_median2 = read_len_median2 - n_spacer_barcode
763 read_len_median3 = read_len_median3 - n_spacer_barcode 908 read_len_median3 = read_len_median3 - n_spacer_barcode
764 else: 909 else:
765 softclipped_mutation_oneOfTwoMates = True 910 softclipped_mutation_oneOfTwoMates = True
766 alt1ff = 0 911 tier1ff = 0
767 alt4ff = 0 912 tier4ff = 0
768 alt2ff = 0 913 tier2ff = 0
769 alt3ff = 0 914 tier3ff = 0
770 trimmed = False 915 trimmed = False
771 contradictory = False 916 contradictory = False
772 softclipped_mutation_allMates = False 917 softclipped_mutation_allMates = False
773 softclipped_mutation_oneOfTwoSSCS = False 918 softclipped_mutation_oneOfTwoSSCS = False
774 softclipped_mutation_oneMate = False 919 softclipped_mutation_oneMate = False
778 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): 923 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))):
779 beg1 = total1new 924 beg1 = total1new
780 total1new = 0 925 total1new = 0
781 alt1ff = 0 926 alt1ff = 0
782 alt1f = 0 927 alt1f = 0
928 tier1ff = 0
783 trimmed = True 929 trimmed = True
784 930
785 if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))): 931 if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))):
786 beg4 = total4new 932 beg4 = total4new
787 total4new = 0 933 total4new = 0
788 alt4ff = 0 934 alt4ff = 0
789 alt4f = 0 935 alt4f = 0
936 tier4ff = 0
790 trimmed = True 937 trimmed = True
791 938
792 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): 939 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))):
793 beg2 = total2new 940 beg2 = total2new
794 total2new = 0 941 total2new = 0
795 alt2ff = 0 942 alt2ff = 0
796 alt2f = 0 943 alt2f = 0
944 tier2ff = 0
797 trimmed = True 945 trimmed = True
798 946
799 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): 947 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))):
800 beg3 = total3new 948 beg3 = total3new
801 total3new = 0 949 total3new = 0
802 alt3ff = 0 950 alt3ff = 0
803 alt3f = 0 951 alt3f = 0
952 tier3ff = 0
804 trimmed = True 953 trimmed = True
805 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) 954 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4)
806 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) 955 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
807 956
808 # information of both mates available --> only one mate softclipped 957 # information of both mates available --> only one mate softclipped
809 elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & 958 elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) &
810 ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & 959 ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) &
811 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available 960 all(float(ij) > 0. for ij in [tier1ff, tier2ff, tier3ff, tier4ff])): # all mates available
812 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 961 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
813 softclipped_mutation_allMates = False 962 softclipped_mutation_allMates = False
814 softclipped_mutation_oneOfTwoMates = False 963 softclipped_mutation_oneOfTwoMates = False
815 softclipped_mutation_oneOfTwoSSCS = True 964 softclipped_mutation_oneOfTwoSSCS = True
816 softclipped_mutation_oneOfTwoSSCS_diffMates = False 965 softclipped_mutation_oneOfTwoSSCS_diffMates = False
817 softclipped_mutation_oneMate = False 966 softclipped_mutation_oneMate = False
818 softclipped_mutation_oneMateOneSSCS = False 967 softclipped_mutation_oneMateOneSSCS = False
819 alt1ff = 0 968 tier1ff = 0
820 alt4ff = 0 969 tier4ff = 0
821 alt2ff = 0 970 tier2ff = 0
822 alt3ff = 0 971 tier3ff = 0
823 trimmed = False 972 trimmed = False
824 contradictory = False 973 contradictory = False
825 974
826 # information of one mate available --> all reads of one mate are softclipped 975 # information of one mate available --> all reads of one mate are softclipped
827 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & 976 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) &
828 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) | 977 all(float(ij) < 0. for ij in [tier2ff, tier3ff]) & all(float(ij) > 0. for ij in [tier1ff, tier4ff])) |
829 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & 978 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) &
830 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available 979 all(float(ij) < 0. for ij in [tier1ff, tier4ff]) & all(float(ij) > 0. for ij in [tier2ff, tier3ff]))): # all mates available
831 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 980 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
832 softclipped_mutation_allMates = False 981 softclipped_mutation_allMates = False
833 softclipped_mutation_oneOfTwoMates = False 982 softclipped_mutation_oneOfTwoMates = False
834 softclipped_mutation_oneOfTwoSSCS = False 983 softclipped_mutation_oneOfTwoSSCS = False
835 softclipped_mutation_oneOfTwoSSCS_diffMates = False 984 softclipped_mutation_oneOfTwoSSCS_diffMates = False
836 softclipped_mutation_oneMate = True 985 softclipped_mutation_oneMate = True
837 softclipped_mutation_oneMateOneSSCS = False 986 softclipped_mutation_oneMateOneSSCS = False
838 alt1ff = 0 987 tier1ff = 0
839 alt4ff = 0 988 tier4ff = 0
840 alt2ff = 0 989 tier2ff = 0
841 alt3ff = 0 990 tier3ff = 0
842 trimmed = False 991 trimmed = False
843 contradictory = False 992 contradictory = False
844 # information of one mate available --> only one SSCS is softclipped 993 # information of one mate available --> only one SSCS is softclipped
845 elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & 994 elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) &
846 (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) | 995 (all(float(ij) < 0. for ij in [tier2ff, tier3ff]) & all(float(ij) > 0. for ij in [tier1ff, tier4ff]))) |
847 (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & 996 (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) &
848 (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available 997 (all(float(ij) < 0. for ij in [tier1ff, tier4ff]) & all(float(ij) < 0. for ij in [tier2ff, tier3ff])))): # all mates available
849 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 998 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
850 softclipped_mutation_allMates = False 999 softclipped_mutation_allMates = False
851 softclipped_mutation_oneOfTwoMates = False 1000 softclipped_mutation_oneOfTwoMates = False
852 softclipped_mutation_oneOfTwoSSCS = False 1001 softclipped_mutation_oneOfTwoSSCS = False
853 softclipped_mutation_oneOfTwoSSCS_diffMates = False 1002 softclipped_mutation_oneOfTwoSSCS_diffMates = False
854 softclipped_mutation_oneMate = False 1003 softclipped_mutation_oneMate = False
855 softclipped_mutation_oneMateOneSSCS = True 1004 softclipped_mutation_oneMateOneSSCS = True
856 alt1ff = 0 1005 tier1ff = 0
857 alt4ff = 0 1006 tier4ff = 0
858 alt2ff = 0 1007 tier2ff = 0
859 alt3ff = 0 1008 tier3ff = 0
860 trimmed = False 1009 trimmed = False
861 contradictory = False 1010 contradictory = False
862 1011
863 else: 1012 else:
864 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): 1013 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))):
865 beg1 = total1new 1014 beg1 = total1new
866 total1new = 0 1015 total1new = 0
867 alt1ff = 0 1016 alt1ff = 0
868 alt1f = 0 1017 alt1f = 0
1018 tier1ff = 0
869 trimmed = True 1019 trimmed = True
870 1020
871 if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))): 1021 if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))):
872 beg4 = total4new 1022 beg4 = total4new
873 total4new = 0 1023 total4new = 0
874 alt4ff = 0 1024 alt4ff = 0
875 alt4f = 0 1025 alt4f = 0
1026 tier4ff = 0
876 trimmed = True 1027 trimmed = True
877 1028
878 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): 1029 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))):
879 beg2 = total2new 1030 beg2 = total2new
880 total2new = 0 1031 total2new = 0
881 alt2ff = 0 1032 alt2ff = 0
882 alt2f = 0 1033 alt2f = 0
1034 tier2ff = 0
883 trimmed = True 1035 trimmed = True
884 1036
885 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): 1037 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))):
886 beg3 = total3new 1038 beg3 = total3new
887 total3new = 0 1039 total3new = 0
888 alt3ff = 0 1040 alt3ff = 0
889 alt3f = 0 1041 alt3f = 0
1042 tier3ff = 0
890 trimmed = True 1043 trimmed = True
891 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) 1044 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4)
892 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) 1045 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
893 1046
894 # assign tiers 1047 # assign tiers
895 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & 1048 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) &
896 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | 1049 all(float(ij) >= 0.75 for ij in [tier1ff, tier4ff])) |
897 (all(int(ij) >= 3 for ij in [total2new, total3new]) & 1050 (all(int(ij) >= 3 for ij in [total2new, total3new]) &
898 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): 1051 all(float(ij) >= 0.75 for ij in [tier2ff, tier3ff]))):
899 tier = "1.1" 1052 tier = "1.1"
900 counter_tier11 += 1 1053 counter_tier11 += 1
901 tier_dict[key1]["tier 1.1"] += 1 1054 if variant_type == "alt":
1055 tier_dict[key1]["tier 1.1"] += 1
1056 elif variant_type == "ref":
1057 tier_dict_ref[key1]["tier 1.1"] += 1
902 1058
903 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & 1059 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) &
904 any(int(ij) >= 3 for ij in [total1new, total4new]) & 1060 any(int(ij) >= 3 for ij in [total1new, total4new]) &
905 any(int(ij) >= 3 for ij in [total2new, total3new]) & 1061 any(int(ij) >= 3 for ij in [total2new, total3new]) &
906 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): 1062 all(float(ij) >= 0.75 for ij in [tier1ff, tier2ff, tier3ff, tier4ff])):
907 tier = "1.2" 1063 tier = "1.2"
908 counter_tier12 += 1 1064 counter_tier12 += 1
909 tier_dict[key1]["tier 1.2"] += 1 1065 if variant_type == "alt":
1066 tier_dict[key1]["tier 1.2"] += 1
1067 elif variant_type == "ref":
1068 tier_dict_ref[key1]["tier 1.2"] += 1
910 1069
911 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & 1070 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) &
912 any(int(ij) >= 3 for ij in [total1new, total4new]) & 1071 any(int(ij) >= 3 for ij in [total1new, total4new]) &
913 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | 1072 all(float(ij) >= 0.75 for ij in [tier1ff, tier4ff])) |
914 (all(int(ij) >= 1 for ij in [total2new, total3new]) & 1073 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
915 any(int(ij) >= 3 for ij in [total2new, total3new]) & 1074 any(int(ij) >= 3 for ij in [total2new, total3new]) &
916 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): 1075 all(float(ij) >= 0.75 for ij in [tier2ff, tier3ff]))):
917 tier = "2.1" 1076 tier = "2.1"
918 counter_tier21 += 1 1077 counter_tier21 += 1
919 tier_dict[key1]["tier 2.1"] += 1 1078 if variant_type == "alt":
1079 tier_dict[key1]["tier 2.1"] += 1
1080 elif variant_type == "ref":
1081 tier_dict_ref[key1]["tier 2.1"] += 1
920 1082
921 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & 1083 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) &
922 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): 1084 all(float(ij) >= 0.75 for ij in [tier1ff, tier2ff, tier3ff, tier4ff])):
923 tier = "2.2" 1085 tier = "2.2"
924 counter_tier22 += 1 1086 counter_tier22 += 1
925 tier_dict[key1]["tier 2.2"] += 1 1087 if variant_type == "alt":
1088 tier_dict[key1]["tier 2.2"] += 1
1089 elif variant_type == "ref":
1090 tier_dict_ref[key1]["tier 2.2"] += 1
926 1091
927 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & 1092 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) &
928 any(int(ij) >= 3 for ij in [total2new, total3new]) & 1093 any(int(ij) >= 3 for ij in [total2new, total3new]) &
929 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) & 1094 all(float(ij) >= 0.75 for ij in [tier1ff, tier4ff]) &
930 any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff])) | 1095 any(float(ij) >= 0.75 for ij in [tier2ff, tier3ff])) |
931 (all(int(ij) >= 1 for ij in [total2new, total3new]) & 1096 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
932 any(int(ij) >= 3 for ij in [total1new, total4new]) & 1097 any(int(ij) >= 3 for ij in [total1new, total4new]) &
933 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) & 1098 all(float(ij) >= 0.75 for ij in [tier2ff, tier3ff]) &
934 any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))): 1099 any(float(ij) >= 0.75 for ij in [tier1ff, tier4ff]))):
935 tier = "2.3" 1100 tier = "2.3"
936 counter_tier23 += 1 1101 counter_tier23 += 1
937 tier_dict[key1]["tier 2.3"] += 1 1102 if variant_type == "alt":
1103 tier_dict[key1]["tier 2.3"] += 1
1104 elif variant_type == "ref":
1105 tier_dict_ref[key1]["tier 2.3"] += 1
938 1106
939 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & 1107 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) &
940 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | 1108 all(float(ij) >= 0.75 for ij in [tier1ff, tier4ff])) |
941 (all(int(ij) >= 1 for ij in [total2new, total3new]) & 1109 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
942 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): 1110 all(float(ij) >= 0.75 for ij in [tier2ff, tier3ff]))):
943 tier = "2.4" 1111 tier = "2.4"
944 counter_tier24 += 1 1112 counter_tier24 += 1
945 tier_dict[key1]["tier 2.4"] += 1 1113 if variant_type == "alt":
1114 tier_dict[key1]["tier 2.4"] += 1
1115 elif variant_type == "ref":
1116 tier_dict_ref[key1]["tier 2.4"] += 1
946 1117
947 elif ((len(pure_tags_dict_short[key1]) > 1) & 1118 elif ((len(pure_tags_dict_short[key1]) > 1) &
948 (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) | 1119 (all(float(ij) >= 0.5 for ij in [tier1ff, tier4ff]) |
949 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): 1120 all(float(ij) >= 0.5 for ij in [tier2ff, tier3ff]))):
950 tier = "3.1" 1121 tier = "3.1"
951 counter_tier31 += 1 1122 counter_tier31 += 1
952 tier_dict[key1]["tier 3.1"] += 1 1123 if variant_type == "alt":
1124 tier_dict[key1]["tier 3.1"] += 1
1125 elif variant_type == "ref":
1126 tier_dict_ref[key1]["tier 3.1"] += 1
953 1127
954 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & 1128 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) &
955 all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff])) | 1129 all(float(ij) >= 0.5 for ij in [tier1ff, tier4ff])) |
956 (all(int(ij) >= 1 for ij in [total2new, total3new]) & 1130 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
957 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): 1131 all(float(ij) >= 0.5 for ij in [tier2ff, tier3ff]))):
958 tier = "3.2" 1132 tier = "3.2"
959 counter_tier32 += 1 1133 counter_tier32 += 1
960 tier_dict[key1]["tier 3.2"] += 1 1134 if variant_type == "alt":
1135 tier_dict[key1]["tier 3.2"] += 1
1136 elif variant_type == "ref":
1137 tier_dict_ref[key1]["tier 3.2"] += 1
961 1138
962 elif (trimmed): 1139 elif (trimmed):
963 tier = "4" 1140 tier = "4"
964 counter_tier4 += 1 1141 counter_tier4 += 1
965 tier_dict[key1]["tier 4"] += 1 1142 if variant_type == "alt":
1143 tier_dict[key1]["tier 4"] += 1
1144 elif variant_type == "ref":
1145 tier_dict_ref[key1]["tier 4"] += 1
966 1146
967 # assign tiers 1147 # assign tiers
968 if ((all(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) & 1148 if ((all(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) &
969 all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) | 1149 all(float(ij) >= 0.75 for ij in [tier1ff_trim, tier4ff_trim])) |
970 (all(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) & 1150 (all(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) &
971 all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))): 1151 all(float(ij) >= 0.75 for ij in [tier2ff_trim, tier3ff_trim]))):
972 trimmed_actual_high_tier = True 1152 trimmed_actual_high_tier = True
973 elif (all(int(ij) >= 1 for ij in [total1new_trim, total2new_trim, total3new_trim, total4new_trim]) & 1153 elif (all(int(ij) >= 1 for ij in [total1new_trim, total2new_trim, total3new_trim, total4new_trim]) &
974 any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) & 1154 any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) &
975 any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) & 1155 any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) &
976 all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt2ff_trim, alt3ff_trim, alt4ff_trim])): 1156 all(float(ij) >= 0.75 for ij in [tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim])):
977 trimmed_actual_high_tier = True 1157 trimmed_actual_high_tier = True
978 elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) & 1158 elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) &
979 any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) & 1159 any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) &
980 all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) | 1160 all(float(ij) >= 0.75 for ij in [tier1ff_trim, tier4ff_trim])) |
981 (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) & 1161 (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) &
982 any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) & 1162 any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) &
983 all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))): 1163 all(float(ij) >= 0.75 for ij in [tier2ff_trim, tier3ff_trim]))):
984 trimmed_actual_high_tier = True 1164 trimmed_actual_high_tier = True
985 elif (all(int(ij) >= 1 for ij in [total1new_trim, total2new_trim, total3new_trim, total4new_trim]) & 1165 elif (all(int(ij) >= 1 for ij in [total1new_trim, total2new_trim, total3new_trim, total4new_trim]) &
986 all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt2ff_trim, alt3ff_trim, alt4ff_trim])): 1166 all(float(ij) >= 0.75 for ij in [tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim])):
987 trimmed_actual_high_tier = True 1167 trimmed_actual_high_tier = True
988 elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) & 1168 elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) &
989 any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) & 1169 any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) &
990 all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim]) & 1170 all(float(ij) >= 0.75 for ij in [tier1ff_trim, tier4ff_trim]) &
991 any(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim])) | 1171 any(float(ij) >= 0.75 for ij in [tier2ff_trim, tier3ff_trim])) |
992 (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) & 1172 (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) &
993 any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) & 1173 any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) &
994 all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]) & 1174 all(float(ij) >= 0.75 for ij in [tier2ff_trim, tier3ff_trim]) &
995 any(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim]))): 1175 any(float(ij) >= 0.75 for ij in [tier1ff_trim, tier4ff_trim]))):
996 trimmed_actual_high_tier = True 1176 trimmed_actual_high_tier = True
997 elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) & 1177 elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) &
998 all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) | 1178 all(float(ij) >= 0.75 for ij in [tier1ff_trim, tier4ff_trim])) |
999 (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) & 1179 (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) &
1000 all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))): 1180 all(float(ij) >= 0.75 for ij in [tier2ff_trim, tier3ff_trim]))):
1001 trimmed_actual_high_tier = True 1181 trimmed_actual_high_tier = True
1002 else: 1182 else:
1003 trimmed_actual_high_tier = False 1183 trimmed_actual_high_tier = False
1004 1184
1005 elif softclipped_mutation_allMates: 1185 elif softclipped_mutation_allMates:
1006 tier = "5.1" 1186 tier = "5.1"
1007 counter_tier51 += 1 1187 counter_tier51 += 1
1008 tier_dict[key1]["tier 5.1"] += 1 1188 if variant_type == "alt":
1189 tier_dict[key1]["tier 5.1"] += 1
1190 elif variant_type == "ref":
1191 tier_dict_ref[key1]["tier 5.1"] += 1
1009 1192
1010 elif softclipped_mutation_oneOfTwoMates: 1193 elif softclipped_mutation_oneOfTwoMates:
1011 tier = "5.2" 1194 tier = "5.2"
1012 counter_tier52 += 1 1195 counter_tier52 += 1
1013 tier_dict[key1]["tier 5.2"] += 1 1196 if variant_type == "alt":
1197 tier_dict[key1]["tier 5.2"] += 1
1198 elif variant_type == "ref":
1199 tier_dict_ref[key1]["tier 5.2"] += 1
1014 1200
1015 elif softclipped_mutation_oneOfTwoSSCS: 1201 elif softclipped_mutation_oneOfTwoSSCS:
1016 tier = "5.3" 1202 tier = "5.3"
1017 counter_tier53 += 1 1203 counter_tier53 += 1
1018 tier_dict[key1]["tier 5.3"] += 1 1204 if variant_type == "alt":
1205 tier_dict[key1]["tier 5.3"] += 1
1206 elif variant_type == "ref":
1207 tier_dict_ref[key1]["tier 5.3"] += 1
1019 1208
1020 elif softclipped_mutation_oneMate: 1209 elif softclipped_mutation_oneMate:
1021 tier = "5.4" 1210 tier = "5.4"
1022 counter_tier54 += 1 1211 counter_tier54 += 1
1023 tier_dict[key1]["tier 5.4"] += 1 1212 if variant_type == "alt":
1213 tier_dict[key1]["tier 5.4"] += 1
1214 elif variant_type == "ref":
1215 tier_dict_ref[key1]["tier 5.4"] += 1
1024 1216
1025 elif softclipped_mutation_oneMateOneSSCS: 1217 elif softclipped_mutation_oneMateOneSSCS:
1026 tier = "5.5" 1218 tier = "5.5"
1027 counter_tier55 += 1 1219 counter_tier55 += 1
1028 tier_dict[key1]["tier 5.5"] += 1 1220 if variant_type == "alt":
1221 tier_dict[key1]["tier 5.5"] += 1
1222 elif variant_type == "ref":
1223 tier_dict_ref[key1]["tier 5.5"] += 1
1029 1224
1030 elif (contradictory): 1225 elif (contradictory):
1031 tier = "6" 1226 tier = "6"
1032 counter_tier6 += 1 1227 counter_tier6 += 1
1033 tier_dict[key1]["tier 6"] += 1 1228 if variant_type == "alt":
1229 tier_dict[key1]["tier 6"] += 1
1230 elif variant_type == "ref":
1231 tier_dict_ref[key1]["tier 6"] += 1
1034 1232
1035 else: 1233 else:
1036 tier = "7" 1234 tier = "7"
1037 counter_tier7 += 1 1235 counter_tier7 += 1
1038 tier_dict[key1]["tier 7"] += 1 1236 if variant_type == "alt":
1237 tier_dict[key1]["tier 7"] += 1
1238 elif variant_type == "ref":
1239 tier_dict_ref[key1]["tier 7"] += 1
1039 1240
1040 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) 1241 chrom, pos, ref_a, alt_a = re.split(r'\#', key1)
1041 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) 1242 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt])
1042 sample_tag = key2[:-5] 1243 sample_tag = key2[:-5]
1043 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time 1244 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time
1112 read_pos4 = read_len_median4 = None 1313 read_pos4 = read_len_median4 = None
1113 if (read_pos2 == -1): 1314 if (read_pos2 == -1):
1114 read_pos2 = read_len_median2 = None 1315 read_pos2 = read_len_median2 = None
1115 if (read_pos3 == -1): 1316 if (read_pos3 == -1):
1116 read_pos3 = read_len_median3 = None 1317 read_pos3 = read_len_median3 = None
1117 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) 1318 line = (var_id, tier, variant_type, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera)
1118 # ws1.write_row(row, 0, line) 1319 # ws1.write_row(row, 0, line)
1119 # csv_writer.writerow(line) 1320 # csv_writer.writerow(line)
1120 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) 1321 line2 = ("", "", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
1121 # ws1.write_row(row + 1, 0, line2) 1322 # ws1.write_row(row + 1, 0, line2)
1122 # csv_writer.writerow(line2) 1323 # csv_writer.writerow(line2)
1123
1124 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1324 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1125 # {'type': 'formula', 1325 # {'type': 'formula',
1126 # 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 1326 # 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
1127 # 'format': format1, 1327 # 'format': format1,
1128 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1328 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1153 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) 1353 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers)
1154 1354
1155 # write to file 1355 # write to file
1156 # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4 1356 # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4
1157 sum_highTiers = sum([tier_dict[key1][ij] for ij in list(sorted(tier_dict[key1].keys()))[:6]]) 1357 sum_highTiers = sum([tier_dict[key1][ij] for ij in list(sorted(tier_dict[key1].keys()))[:6]])
1358 sum_highTiers_ref = sum([tier_dict_ref[key1][ij] for ij in list(sorted(tier_dict_ref[key1].keys()))[:6]])
1158 correct_tier = False 1359 correct_tier = False
1360 correct_tier_ref = False
1159 if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0: 1361 if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0:
1160 tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"] 1362 tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"]
1161 tier_dict[key1]["tier 4"] = 0 1363 tier_dict[key1]["tier 4"] = 0
1162 correct_tier = True 1364 correct_tier = True
1365
1366 if tier_dict_ref[key1]["tier 4"] > 0 and sum_highTiers_ref > 0:
1367 tier_dict_ref[key1]["tier 2.5"] = tier_dict_ref[key1]["tier 4"]
1368 tier_dict_ref[key1]["tier 4"] = 0
1369 correct_tier_ref = True
1163 1370
1164 for sample in change_tier_after_print: 1371 for sample in change_tier_after_print:
1165 row_number = sample[0] 1372 row_number = sample[0]
1166 line1 = sample[1] 1373 line1 = sample[1]
1167 line2 = sample[2] 1374 line2 = sample[2]
1168 actual_high_tier = sample[3] 1375 actual_high_tier = sample[3]
1169 current_tier = list(line1)[1] 1376 current_tier = list(line1)[1]
1170 1377
1171 if correct_tier and (current_tier == "4") and actual_high_tier: 1378 if line1[2] == "alt" and correct_tier and (current_tier == "4") and actual_high_tier:
1172 line1 = list(line1) 1379 line1 = list(line1)
1173 line1[1] = "2.5" 1380 line1[1] = "2.5"
1174 line1 = tuple(line1) 1381 line1 = tuple(line1)
1175 counter_tier25 += 1 1382 counter_tier25 += 1
1176 counter_tier4 -= 1 1383 counter_tier4 -= 1
1384 if line1[2] == "ref" and correct_tier_ref and (current_tier == "4") and actual_high_tier:
1385 line1 = list(line1)
1386 line1[1] = "2.5"
1387 line1 = tuple(line1)
1388 counter_tier25 += 1
1389 counter_tier4 -= 1
1390
1177 ws1.write_row(row_number, 0, line1) 1391 ws1.write_row(row_number, 0, line1)
1178 csv_writer.writerow(line1) 1392 csv_writer.writerow(line1)
1179 ws1.write_row(row_number + 1, 0, line2) 1393 ws1.write_row(row_number + 1, 0, line2)
1180 csv_writer.writerow(line2) 1394 csv_writer.writerow(line2)
1181 1395 if line1[2] == "alt":
1182 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2), 1396 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
1183 {'type': 'formula', 1397 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
1184 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), 1398 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row_number + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
1185 'format': format1, 1399 elif line1[2] == "ref":
1186 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) 1400 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
1187 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2), 1401 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
1188 {'type': 'formula', 1402 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row_number + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
1189 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), 1403
1190 'format': format3,
1191 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
1192 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2),
1193 {'type': 'formula',
1194 'criteria': '=$B${}>="3"'.format(row_number + 1),
1195 'format': format2,
1196 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
1197 # sheet 2 1404 # sheet 2
1198 if chimera_correction: 1405 if chimera_correction:
1199 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', 1406 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC ref (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)',
1200 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', 1407 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)',
1201 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', 1408 'tier 3.1 (alt)', 'tier 3.2 (alt)', 'tier 4 (alt)', 'tier 5.1 (alt)', 'tier 5.2 (alt)', 'tier 5.3 (alt)', 'tier 5.4 (alt)', 'tier 5.5 (alt)', 'tier 6 (alt)', 'tier 7 (alt)',
1202 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') 1409 'tier 1.1 (ref)', 'tier 1.2 (ref)', 'tier 2.1 (ref)', 'tier 2.2 (ref)', 'tier 2.3 (ref)', 'tier 2.4 (ref)', 'tier 2.5 (ref)',
1410 'tier 3.1 (ref)', 'tier 3.2 (ref)', 'tier 4 (ref)', 'tier 5.1 (ref)', 'tier 5.2 (ref)', 'tier 5.3 (ref)', 'tier 5.4 (ref)', 'tier 5.5 (ref)', 'tier 6 (ref)', 'tier 7 (ref)'
1411 )
1203 else: 1412 else:
1204 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', 1413 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC ref (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)',
1205 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', 1414 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)',
1206 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', 1415 'tier 3.1 (alt)', 'tier 3.2 (alt)', 'tier 4 (alt)', 'tier 5.1 (alt)', 'tier 5.2 (alt)', 'tier 5.3 (alt)', 'tier 5.4 (alt)', 'tier 5.5 (alt)', 'tier 6 (alt)', 'tier 7 (alt)',
1207 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') 1416 'tier 1.1 (ref)', 'tier 1.2 (ref)', 'tier 2.1 (ref)', 'tier 2.2 (ref)', 'tier 2.3 (ref)', 'tier 2.4 (ref)', 'tier 2.5 (ref)',
1208 1417 'tier 3.1 (ref)', 'tier 3.2 (ref)', 'tier 4 (ref)', 'tier 5.1 (ref)', 'tier 5.2 (ref)', 'tier 5.3 (ref)', 'tier 5.4 (ref)', 'tier 5.5 (ref)', 'tier 6 (ref)', 'tier 7 (ref)'
1418 )
1209 ws2.write_row(0, 0, header_line2) 1419 ws2.write_row(0, 0, header_line2)
1210 row = 0 1420 row = 0
1211 1421
1212 for key1, value1 in sorted(tier_dict.items()): 1422 for key1, value1 in sorted(tier_dict.items()):
1213 if key1 in pure_tags_dict_short.keys(): 1423 if key1 in pure_tags_dict_short.keys():
1218 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) 1428 chrom, pos, ref_a, alt_a = re.split(r'\#', key1)
1219 ref_count = cvrg_dict[key1][0] 1429 ref_count = cvrg_dict[key1][0]
1220 alt_count = cvrg_dict[key1][1] 1430 alt_count = cvrg_dict[key1][1]
1221 cvrg = ref_count + alt_count 1431 cvrg = ref_count + alt_count
1222 1432
1433 ref_tiers = tier_dict_ref[key1]
1434
1223 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) 1435 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt])
1224 lst = [var_id, cvrg] 1436 lst = [var_id, cvrg]
1225 used_tiers = [] 1437 used_tiers = []
1438 used_tiers_ref = [t for k, t in sorted(ref_tiers.items())]
1226 cum_af = [] 1439 cum_af = []
1227 for key2, value2 in sorted(value1.items()): 1440 for key2, value2 in sorted(value1.items()):
1228 # calculate cummulative AF 1441 # calculate cummulative AF
1229 used_tiers.append(value2) 1442 used_tiers.append(value2)
1230 if len(used_tiers) > 1: 1443 if len(used_tiers) > 1:
1231 cum = safe_div(sum(used_tiers), cvrg) 1444 cum = safe_div(sum(used_tiers), cvrg)
1232 cum_af.append(cum) 1445 cum_af.append(cum)
1233 if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place 1446 if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place
1234 continue 1447 continue
1235 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) 1448 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)])
1236 lst.extend([(cvrg - sum(used_tiers[-10:])), sum(used_tiers[0:7]), safe_div(sum(used_tiers[0:7]), (cvrg - sum(used_tiers[-10:])))]) 1449 lst.extend([(sum(used_tiers_ref[0:7]) + sum(used_tiers[0:7])), sum(used_tiers_ref[0:7]), sum(used_tiers[0:7]), safe_div(sum(used_tiers[0:7]), (sum(used_tiers_ref[0:7]) + sum(used_tiers[0:7])))])
1237 if chimera_correction: 1450 if chimera_correction:
1238 chimeras_all = chimera_dict[key1][1] 1451 chimeras_all = chimera_dict[key1][1]
1239 new_alt = sum(used_tiers[0:7]) - chimeras_all 1452 new_alt = sum(used_tiers[0:7]) - chimeras_all
1240 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:7]))) 1453 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:7])))
1241 if fraction_chimeras is None: 1454 if fraction_chimeras is None:
1242 fraction_chimeras = 0. 1455 fraction_chimeras = 0.
1243 new_cvrg = (cvrg - sum(used_tiers[-10:])) * (1. - fraction_chimeras) 1456 new_cvrg = (sum(used_tiers_ref[0:7]) + sum(used_tiers[0:7])) * (1. - fraction_chimeras)
1244 lst.extend([new_cvrg, chimeras_all, safe_div(new_alt, new_cvrg)]) 1457 lst.extend([new_cvrg, chimeras_all, safe_div(new_alt, new_cvrg)])
1245 lst.extend([alt_count, safe_div(alt_count, cvrg)]) 1458 lst.extend([alt_count, safe_div(alt_count, cvrg)])
1246 lst.extend(used_tiers) 1459 lst.extend(used_tiers)
1247 lst.extend(cum_af) 1460 lst.extend(used_tiers_ref)
1461 # lst.extend(cum_af)
1248 lst = tuple(lst) 1462 lst = tuple(lst)
1249 ws2.write_row(row + 1, 0, lst) 1463 ws2.write_row(row + 1, 0, lst)
1250 if chimera_correction: 1464 if chimera_correction:
1251 ws2.conditional_format('M{}:N{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$M$1="tier 1.1"', 'format': format12, 'multi_range': 'M{}:N{} M1:N1'.format(row + 2, row + 2)}) 1465 ws2.conditional_format('N{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$N$1="tier 1.1 (alt)"', 'format': format12, 'multi_range': 'N{}:O{} N1:O1 AE{}:AF{} AE1:AF1'.format(row + 2, row + 2, row + 2, row + 2)})
1252 ws2.conditional_format('O{}:S{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$O$1="tier 2.1"', 'format': format32, 'multi_range': 'O{}:S{} O1:S1'.format(row + 2, row + 2)}) 1466 ws2.conditional_format('P{}:T{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 2.1 (alt)"', 'format': format32, 'multi_range': 'P{}:T{} P1:T1 AG{}:AK{} AG1:AK1'.format(row + 2, row + 2, row + 2, row + 2)})
1253 ws2.conditional_format('T{}:AC{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$T$1="tier 3.1"', 'format': format22, 'multi_range': 'T{}:AC{} T1:AC1'.format(row + 2, row + 2)}) 1467 ws2.conditional_format('U{}:AD{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$U$1="tier 3.1 (alt)"', 'format': format22, 'multi_range': 'U{}:AD{} U1:AD1 AL{}:AU{} AL1:AU1'.format(row + 2, row + 2, row + 2, row + 2)})
1254 else: 1468 else:
1255 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format12, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) 1469 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format12, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)})
1256 ws2.conditional_format('L{}:P{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format32, 'multi_range': 'L{}:P{} L1:P1'.format(row + 2, row + 2)}) 1470 ws2.conditional_format('L{}:P{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format32, 'multi_range': 'L{}:P{} L1:P1'.format(row + 2, row + 2)})
1257 ws2.conditional_format('Q{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$Q$1="tier 3.1"', 'format': format22, 'multi_range': 'Q{}:Z{} Q1:Z1'.format(row + 2, row + 2)}) 1471 ws2.conditional_format('Q{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$Q$1="tier 3.1"', 'format': format22, 'multi_range': 'Q{}:Z{} Q1:Z1'.format(row + 2, row + 2)})
1258 row += 1 1472 row += 1