comparison read2mut.py @ 0:e5953c54cfb5 draft

planemo upload for repository https://github.com/gpovysil/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Sun, 04 Oct 2020 17:19:39 +0000
parents
children 9d74f30275c6
comparison
equal deleted inserted replaced
-1:000000000000 0:e5953c54cfb5
1 #!/usr/bin/env python
2
3 """read2mut.py
4
5 Author -- Gundula Povysil
6 Contact -- povysil@bioinf.jku.at
7
8 Looks for reads with mutation at known
9 positions and calculates frequencies and stats.
10
11 ======= ========== ================= ================================
12 Version Date Author Description
13 0.2.1 2019-10-27 Gundula Povysil -
14 ======= ========== ================= ================================
15
16
17 USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam
18 --inputJson tag_count_dict.json --sscsJson SSCS_counts.json
19 --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim 10 --chimera_correction
20
21 """
22
23 from __future__ import division
24
25 import argparse
26 import json
27 import operator
28 import os
29 import re
30 import sys
31
32 import numpy as np
33 import pysam
34 import xlsxwriter
35
36
37 def make_argparser():
38 parser = argparse.ArgumentParser(description='Takes a tabular file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.')
39 parser.add_argument('--mutFile',
40 help='TABULAR file with DCS mutations.')
41 parser.add_argument('--bamFile',
42 help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).')
43 parser.add_argument('--inputJson',
44 help='JSON file with data collected by mut2read.py.')
45 parser.add_argument('--sscsJson',
46 help='JSON file with SSCS counts collected by mut2sscs.py.')
47 parser.add_argument('--outputFile',
48 help='Output xlsx file of mutation details.')
49 parser.add_argument('--thresh', type=int, default=0,
50 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.')
51 parser.add_argument('--phred', type=int, default=20,
52 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.')
53 parser.add_argument('--trim', type=int, default=10,
54 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.')
55 parser.add_argument('--chimera_correction', action="store_true",
56 help='Add additional tier for chimeric variants and correct the variant frequencies')
57
58 return parser
59
60
61 def safe_div(x, y):
62 if y == 0:
63 return None
64 return x / y
65
66
67 def read2mut(argv):
68 parser = make_argparser()
69 args = parser.parse_args(argv[1:])
70 file1 = args.mutFile
71 file2 = args.bamFile
72 json_file = args.inputJson
73 sscs_json = args.sscsJson
74 outfile = args.outputFile
75 thresh = args.thresh
76 phred_score = args.phred
77 trim = args.trim
78 chimera_correction = args.chimera_correction
79
80 if os.path.isfile(file1) is False:
81 sys.exit("Error: Could not find '{}'".format(file1))
82 if os.path.isfile(file2) is False:
83 sys.exit("Error: Could not find '{}'".format(file2))
84 if os.path.isfile(json_file) is False:
85 sys.exit("Error: Could not find '{}'".format(json_file))
86 if thresh < 0:
87 sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh))
88 if phred_score < 0:
89 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score))
90 if trim < 0:
91 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh))
92
93 # 1. read mut file
94 with open(file1, 'r') as mut:
95 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)
96
97 # 2. load dicts
98 with open(json_file, "r") as f:
99 (tag_dict, cvrg_dict) = json.load(f)
100
101 with open(sscs_json, "r") as f:
102 (mut_pos_dict, ref_pos_dict) = json.load(f)
103
104 # 3. read bam file
105 # pysam.index(file2)
106 bam = pysam.AlignmentFile(file2, "rb")
107
108 # 4. create mut_dict
109 mut_dict = {}
110 mut_read_pos_dict = {}
111 mut_read_dict = {}
112 reads_dict = {}
113 if mut_array.shape == (1, 13):
114 mut_array = mut_array.reshape((1, len(mut_array)))
115
116 for m in range(0, len(mut_array[:, 0])):
117 print(str(m + 1) + " of " + str(len(mut_array[:, 0])))
118 # for m in range(0, 5):
119 chrom = mut_array[m, 1]
120 stop_pos = mut_array[m, 2].astype(int)
121 chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
122 ref = mut_array[m, 9]
123 alt = mut_array[m, 10]
124 mut_dict[chrom_stop_pos] = {}
125 mut_read_pos_dict[chrom_stop_pos] = {}
126 reads_dict[chrom_stop_pos] = {}
127
128 for pileupcolumn in bam.pileup(chrom.tostring(), stop_pos - 2, stop_pos, max_depth=1000000000):
129 if pileupcolumn.reference_pos == stop_pos - 1:
130 count_alt = 0
131 count_ref = 0
132 count_indel = 0
133 count_n = 0
134 count_other = 0
135 count_lowq = 0
136 n = 0
137 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
138 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
139 for pileupread in pileupcolumn.pileups:
140 n += 1
141 if not pileupread.is_del and not pileupread.is_refskip:
142 tag = pileupread.alignment.query_name
143 nuc = pileupread.alignment.query_sequence[pileupread.query_position]
144 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33
145 if phred < phred_score:
146 nuc = "lowQ"
147 if tag not in mut_dict[chrom_stop_pos]:
148 mut_dict[chrom_stop_pos][tag] = {}
149 if nuc in mut_dict[chrom_stop_pos][tag]:
150 mut_dict[chrom_stop_pos][tag][nuc] += 1
151 else:
152 mut_dict[chrom_stop_pos][tag][nuc] = 1
153 if tag not in mut_read_pos_dict[chrom_stop_pos]:
154 mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1
155 reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence)
156 else:
157 mut_read_pos_dict[chrom_stop_pos][tag] = np.append(
158 mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1)
159 reads_dict[chrom_stop_pos][tag] = np.append(
160 reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence))
161
162 if nuc == alt:
163 count_alt += 1
164 if tag not in mut_read_dict:
165 mut_read_dict[tag] = {}
166 mut_read_dict[tag][chrom_stop_pos] = alt
167 else:
168 mut_read_dict[tag][chrom_stop_pos] = alt
169 elif nuc == ref:
170 count_ref += 1
171 elif nuc == "N":
172 count_n += 1
173 elif nuc == "lowQ":
174 count_lowq += 1
175 else:
176 count_other += 1
177 else:
178 count_indel += 1
179
180 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq))
181
182 for read in bam.fetch(until_eof=True):
183 if read.is_unmapped:
184 pure_tag = read.query_name[:-5]
185 nuc = "na"
186 for key in tag_dict[pure_tag].keys():
187 if key not in mut_dict:
188 mut_dict[key] = {}
189 if read.query_name not in mut_dict[key]:
190 mut_dict[key][read.query_name] = {}
191 if nuc in mut_dict[key][read.query_name]:
192 mut_dict[key][read.query_name][nuc] += 1
193 else:
194 mut_dict[key][read.query_name][nuc] = 1
195 bam.close()
196
197 # 5. create pure_tags_dict
198 pure_tags_dict = {}
199 for key1, value1 in sorted(mut_dict.items()):
200 i = np.where(np.array(['#'.join(str(i) for i in z)
201 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0]
202 ref = mut_array[i, 9]
203 alt = mut_array[i, 10]
204 pure_tags_dict[key1] = {}
205 for key2, value2 in sorted(value1.items()):
206 for key3, value3 in value2.items():
207 pure_tag = key2[:-5]
208 if key3 == alt:
209 if pure_tag in pure_tags_dict[key1]:
210 pure_tags_dict[key1][pure_tag] += 1
211 else:
212 pure_tags_dict[key1][pure_tag] = 1
213
214 # 6. create pure_tags_dict_short with thresh
215 if thresh > 0:
216 pure_tags_dict_short = {}
217 for key, value in sorted(pure_tags_dict.items()):
218 if len(value) < thresh:
219 pure_tags_dict_short[key] = value
220 else:
221 pure_tags_dict_short = pure_tags_dict
222
223 #whole_array = []
224 #for k in pure_tags_dict.values():
225 # whole_array.extend(k.keys())
226
227 # 7. output summary with threshold
228 workbook = xlsxwriter.Workbook(outfile)
229 ws1 = workbook.add_worksheet("Results")
230 ws2 = workbook.add_worksheet("Allele frequencies")
231 ws3 = workbook.add_worksheet("Tiers")
232
233 format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green
234 format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red
235 format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow
236
237 header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab',
238 'read median length.ba', 'DCS median length',
239 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba',
240 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba',
241 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba',
242 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba',
243 'other mut', 'chimeric tag')
244 ws1.write_row(0, 0, header_line)
245
246 counter_tier11 = 0
247 counter_tier12 = 0
248 counter_tier21 = 0
249 counter_tier22 = 0
250 counter_tier23 = 0
251 counter_tier24 = 0
252 counter_tier31 = 0
253 counter_tier32 = 0
254 counter_tier41 = 0
255 counter_tier42 = 0
256
257 if chimera_correction:
258 counter_tier43 = 0
259
260 counter_tier5 = 0
261
262 row = 1
263 tier_dict = {}
264 for key1, value1 in sorted(mut_dict.items()):
265 counts_mut = 0
266 if key1 in pure_tags_dict_short.keys():
267 i = np.where(np.array(['#'.join(str(i) for i in z)
268 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0]
269 ref = mut_array[i, 9]
270 alt = mut_array[i, 10]
271 dcs_median = cvrg_dict[key1][2]
272 whole_array = pure_tags_dict_short[key1].keys()
273
274 tier_dict[key1] = {}
275 if chimera_correction:
276 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 4.3", 0), ("tier 5", 0)]
277 else:
278 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5", 0)]
279
280 for k, v in values_tier_dict:
281 tier_dict[key1][k] = v
282
283 used_keys = []
284 if 'ab' in mut_pos_dict[key1].keys():
285 sscs_mut_ab = mut_pos_dict[key1]['ab']
286 else:
287 sscs_mut_ab = 0
288 if 'ba' in mut_pos_dict[key1].keys():
289 sscs_mut_ba = mut_pos_dict[key1]['ba']
290 else:
291 sscs_mut_ba = 0
292 if 'ab' in ref_pos_dict[key1].keys():
293 sscs_ref_ab = ref_pos_dict[key1]['ab']
294 else:
295 sscs_ref_ab = 0
296 if 'ba' in ref_pos_dict[key1].keys():
297 sscs_ref_ba = ref_pos_dict[key1]['ba']
298 else:
299 sscs_ref_ba = 0
300 for key2, value2 in sorted(value1.items()):
301 add_mut14 = ""
302 add_mut23 = ""
303 if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()):
304 if key2[:-5] + '.ab.1' in mut_dict[key1].keys():
305 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values())
306 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys():
307 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na']
308 # na1f = na1/total1
309 else:
310 # na1 = na1f = 0
311 na1 = 0
312 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.1'].keys():
313 lowq1 = mut_dict[key1][key2[:-5] + '.ab.1']['lowQ']
314 # lowq1f = lowq1 / total1
315 else:
316 # lowq1 = lowq1f = 0
317 lowq1 = 0
318 if ref in mut_dict[key1][key2[:-5] + '.ab.1'].keys():
319 ref1 = mut_dict[key1][key2[:-5] + '.ab.1'][ref]
320 ref1f = ref1 / (total1 - na1 - lowq1)
321 else:
322 ref1 = ref1f = 0
323 if alt in mut_dict[key1][key2[:-5] + '.ab.1'].keys():
324 alt1 = mut_dict[key1][key2[:-5] + '.ab.1'][alt]
325 alt1f = alt1 / (total1 - na1 - lowq1)
326 else:
327 alt1 = alt1f = 0
328 total1new = total1 - na1 - lowq1
329 if (key2[:-5] + '.ab.1') in mut_read_dict.keys():
330 k1 = mut_read_dict[(key2[:-5] + '.ab.1')].keys()
331 add_mut1 = len(k1)
332 if add_mut1 > 1:
333 for k, v in mut_read_dict[(key2[:-5] + '.ab.1')].items():
334 if k != key1:
335 if len(add_mut14) == 0:
336 add_mut14 = str(k) + "_" + v
337 else:
338 add_mut14 = add_mut14 + ", " + str(k) + "_" + v
339 else:
340 k1 = []
341 else:
342 total1 = total1new = na1 = lowq1 = 0
343 ref1 = alt1 = ref1f = alt1f = 0
344 k1 = []
345
346 if key2[:-5] + '.ab.2' in mut_dict[key1].keys():
347 total2 = sum(mut_dict[key1][key2[:-5] + '.ab.2'].values())
348 if 'na' in mut_dict[key1][key2[:-5] + '.ab.2'].keys():
349 na2 = mut_dict[key1][key2[:-5] + '.ab.2']['na']
350 # na2f = na2 / total2
351 else:
352 # na2 = na2f = 0
353 na2 = 0
354 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.2'].keys():
355 lowq2 = mut_dict[key1][key2[:-5] + '.ab.2']['lowQ']
356 # lowq2f = lowq2 / total2
357 else:
358 # lowq2 = lowq2f = 0
359 lowq2 = 0
360 if ref in mut_dict[key1][key2[:-5] + '.ab.2'].keys():
361 ref2 = mut_dict[key1][key2[:-5] + '.ab.2'][ref]
362 ref2f = ref2 / (total2 - na2 - lowq2)
363 else:
364 ref2 = ref2f = 0
365 if alt in mut_dict[key1][key2[:-5] + '.ab.2'].keys():
366 alt2 = mut_dict[key1][key2[:-5] + '.ab.2'][alt]
367 alt2f = alt2 / (total2 - na2 - lowq2)
368 else:
369 alt2 = alt2f = 0
370 total2new = total2 - na2 - lowq2
371 if (key2[:-5] + '.ab.2') in mut_read_dict.keys():
372 k2 = mut_read_dict[(key2[:-5] + '.ab.2')].keys()
373 add_mut2 = len(k2)
374 if add_mut2 > 1:
375 for k, v in mut_read_dict[(key2[:-5] + '.ab.2')].items():
376 if k != key1:
377 if len(add_mut23) == 0:
378 add_mut23 = str(k) + "_" + v
379 else:
380 add_mut23 = add_mut23 + ", " + str(k) + "_" + v
381 else:
382 k2 = []
383 else:
384 total2 = total2new = na2 = lowq2 = 0
385 ref2 = alt2 = ref2f = alt2f = 0
386 k2 = []
387
388 if key2[:-5] + '.ba.1' in mut_dict[key1].keys():
389 total3 = sum(mut_dict[key1][key2[:-5] + '.ba.1'].values())
390 if 'na' in mut_dict[key1][key2[:-5] + '.ba.1'].keys():
391 na3 = mut_dict[key1][key2[:-5] + '.ba.1']['na']
392 # na3f = na3 / total3
393 else:
394 # na3 = na3f = 0
395 na3 = 0
396 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.1'].keys():
397 lowq3 = mut_dict[key1][key2[:-5] + '.ba.1']['lowQ']
398 # lowq3f = lowq3 / total3
399 else:
400 # lowq3 = lowq3f = 0
401 lowq3 = 0
402 if ref in mut_dict[key1][key2[:-5] + '.ba.1'].keys():
403 ref3 = mut_dict[key1][key2[:-5] + '.ba.1'][ref]
404 ref3f = ref3 / (total3 - na3 - lowq3)
405 else:
406 ref3 = ref3f = 0
407 if alt in mut_dict[key1][key2[:-5] + '.ba.1'].keys():
408 alt3 = mut_dict[key1][key2[:-5] + '.ba.1'][alt]
409 alt3f = alt3 / (total3 - na3 - lowq3)
410 else:
411 alt3 = alt3f = 0
412 total3new = total3 - na3 - lowq3
413 if (key2[:-5] + '.ba.1') in mut_read_dict.keys():
414 add_mut3 = len(mut_read_dict[(key2[:-5] + '.ba.1')].keys())
415 if add_mut3 > 1:
416 for k, v in mut_read_dict[(key2[:-5] + '.ba.1')].items():
417 if k != key1 and k not in k2:
418 if len(add_mut23) == 0:
419 add_mut23 = str(k) + "_" + v
420 else:
421 add_mut23 = add_mut23 + ", " + str(k) + "_" + v
422 else:
423 total3 = total3new = na3 = lowq3 = 0
424 ref3 = alt3 = ref3f = alt3f = 0
425
426 if key2[:-5] + '.ba.2' in mut_dict[key1].keys():
427 total4 = sum(mut_dict[key1][key2[:-5] + '.ba.2'].values())
428 if 'na' in mut_dict[key1][key2[:-5] + '.ba.2'].keys():
429 na4 = mut_dict[key1][key2[:-5] + '.ba.2']['na']
430 # na4f = na4 / total4
431 else:
432 # na4 = na4f = 0
433 na4 = 0
434 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.2'].keys():
435 lowq4 = mut_dict[key1][key2[:-5] + '.ba.2']['lowQ']
436 # lowq4f = lowq4 / total4
437 else:
438 # lowq4 = lowq4f = 0
439 lowq4 = 0
440 if ref in mut_dict[key1][key2[:-5] + '.ba.2'].keys():
441 ref4 = mut_dict[key1][key2[:-5] + '.ba.2'][ref]
442 ref4f = ref4 / (total4 - na4 - lowq4)
443 else:
444 ref4 = ref4f = 0
445 if alt in mut_dict[key1][key2[:-5] + '.ba.2'].keys():
446 alt4 = mut_dict[key1][key2[:-5] + '.ba.2'][alt]
447 alt4f = alt4 / (total4 - na4 - lowq4)
448 else:
449 alt4 = alt4f = 0
450 total4new = total4 - na4 - lowq4
451 if (key2[:-5] + '.ba.2') in mut_read_dict.keys():
452 add_mut4 = len(mut_read_dict[(key2[:-5] + '.ba.2')].keys())
453 if add_mut4 > 1:
454 for k, v in mut_read_dict[(key2[:-5] + '.ba.2')].items():
455 if k != key1 and k not in k1:
456 if len(add_mut14) == 0:
457 add_mut14 = str(k) + "_" + v
458 else:
459 add_mut14 = add_mut14 + ", " + str(k) + "_" + v
460 else:
461 total4 = total4new = na4 = lowq4 = 0
462 ref4 = alt4 = ref4f = alt4f = 0
463
464 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1
465 read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0
466
467 if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys():
468 read_pos1 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.1'])
469 read_len_median1 = np.median(reads_dict[key1][key2[:-5] + '.ab.1'])
470 if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys():
471 read_pos2 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.2'])
472 read_len_median2 = np.median(reads_dict[key1][key2[:-5] + '.ab.2'])
473 if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys():
474 read_pos3 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.1'])
475 read_len_median3 = np.median(reads_dict[key1][key2[:-5] + '.ba.1'])
476 if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys():
477 read_pos4 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.2'])
478 read_len_median4 = np.median(reads_dict[key1][key2[:-5] + '.ba.2'])
479
480 used_keys.append(key2[:-5])
481 counts_mut += 1
482 if (alt1f + alt2f + alt3f + alt4f) > 0.5:
483 if total1new == 0:
484 ref1f = alt1f = None
485 alt1ff = -1
486 else:
487 alt1ff = alt1f
488 if total2new == 0:
489 ref2f = alt2f = None
490 alt2ff = -1
491 else:
492 alt2ff = alt2f
493 if total3new == 0:
494 ref3f = alt3f = None
495 alt3ff = -1
496 else:
497 alt3ff = alt3f
498 if total4new == 0:
499 ref4f = alt4f = None
500 alt4ff = -1
501 else:
502 alt4ff = alt4f
503
504 beg1 = beg4 = beg2 = beg3 = 0
505
506 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4)
507 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
508
509 chrom, pos = re.split(r'\#', key1)
510 var_id = '-'.join([chrom, pos, ref, alt])
511 sample_tag = key2[:-5]
512 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time
513 # exclude identical tag from array2, to prevent comparison to itself
514 same_tag = np.where(array2 == sample_tag)
515 index_array2 = np.arange(0, len(array2), 1)
516 index_withoutSame = np.delete(index_array2, same_tag) # delete identical tag from the data
517 array2 = array2[index_withoutSame]
518 if len(array2) != 0: # only perform chimera analysis if there is more than 1 variant
519 array1_half = sample_tag[0:int(len(sample_tag) / 2)] # mate1 part1
520 array1_half2 = sample_tag[int(len(sample_tag) / 2):int(len(sample_tag))] # mate1 part 2
521 array2_half = np.array([ii[0:int(len(ii) / 2)] for ii in array2]) # mate2 part1
522 array2_half2 = np.array([ii[int(len(ii) / 2):int(len(ii))] for ii in array2]) # mate2 part2
523
524 min_tags_list_zeros = []
525 chimera_tags = []
526 for mate_b in [False, True]:
527 i = 0 # counter, only used to see how many HDs of tags were already calculated
528 if mate_b is False: # HD calculation for all a's
529 half1_mate1 = array1_half
530 half2_mate1 = array1_half2
531 half1_mate2 = array2_half
532 half2_mate2 = array2_half2
533 elif mate_b is True: # HD calculation for all b's
534 half1_mate1 = array1_half2
535 half2_mate1 = array1_half
536 half1_mate2 = array2_half2
537 half2_mate2 = array2_half
538 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's"
539 dist = np.array([sum(map(operator.ne, half1_mate1, c)) for c in half1_mate2])
540 min_index = np.where(dist == dist.min()) # get index of min HD
541 # get all "b's" of the tag or all "a's" of the tag with minimum HD
542 min_tag_half2 = half2_mate2[min_index]
543 min_tag_array2 = array2[min_index] # get whole tag with min HD
544 min_value = dist.min()
545 # calculate HD of "b" to all "b's" or "a" to all "a's"
546 dist_second_half = np.array([sum(map(operator.ne, half2_mate1, e))
547 for e in min_tag_half2])
548
549 dist2 = dist_second_half.max()
550 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD
551 max_tag = min_tag_array2[max_index]
552
553 # tags which have identical parts:
554 if min_value == 0 or dist2 == 0:
555 min_tags_list_zeros.append(tag)
556 chimera_tags.append(max_tag)
557 # chimeric = True
558 # else:
559 # chimeric = False
560
561 # if mate_b is False:
562 # text = "pos {}: sample tag: {}; HD a = {}; HD b' = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, min_value, dist2, list(max_tag), chimeric)
563 # else:
564 # text = "pos {}: sample tag: {}; HD a' = {}; HD b = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, dist2, min_value, list(max_tag), chimeric)
565 i += 1
566 chimera_tags = [x for x in chimera_tags if x != []]
567 chimera_tags_new = []
568 for i in chimera_tags:
569 if len(i) > 1:
570 for t in i:
571 chimera_tags_new.append(t)
572 else:
573 chimera_tags_new.extend(i)
574 chimera_tags_new = np.asarray(chimera_tags_new)
575 chimera = ", ".join(chimera_tags_new)
576 else:
577 chimera_tags_new = []
578 chimera = ""
579
580 trimmed = False
581 contradictory = False
582 chimeric_dcs = False
583
584 if chimera_correction and len(chimera_tags_new) > 0: # chimeras
585 alt1ff = 0
586 alt4ff = 0
587 alt2ff = 0
588 alt3ff = 0
589 chimeric_dcs = True
590 trimmed = False
591 contradictory = False
592 elif ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant
593 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) |
594 (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) &
595 all(float(ij) == 0. for ij in [alt1ff, alt4ff]))):
596 alt1ff = 0
597 alt4ff = 0
598 alt2ff = 0
599 alt3ff = 0
600 trimmed = False
601 contradictory = True
602 chimeric_dcs = False
603 else:
604 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))):
605 beg1 = total1new
606 total1new = 0
607 alt1ff = 0
608 alt1f = 0
609 trimmed = True
610
611 if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))):
612 beg4 = total4new
613 total4new = 0
614 alt4ff = 0
615 alt4f = 0
616 trimmed = True
617
618 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))):
619 beg2 = total2new
620 total2new = 0
621 alt2ff = 0
622 alt2f = 0
623 trimmed = True
624
625 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))):
626 beg3 = total3new
627 total3new = 0
628 alt3ff = 0
629 alt3f = 0
630 trimmed = True
631 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4)
632 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
633
634 # assign tiers
635 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) &
636 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) |
637 (all(int(ij) >= 3 for ij in [total2new, total3new]) &
638 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))):
639 tier = "1.1"
640 counter_tier11 += 1
641 tier_dict[key1]["tier 1.1"] += 1
642
643 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) &
644 any(int(ij) >= 3 for ij in [total1new, total4new]) &
645 any(int(ij) >= 3 for ij in [total2new, total3new]) &
646 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])):
647 tier = "1.2"
648 counter_tier12 += 1
649 tier_dict[key1]["tier 1.2"] += 1
650
651 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) &
652 any(int(ij) >= 3 for ij in [total1new, total4new]) &
653 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) |
654 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
655 any(int(ij) >= 3 for ij in [total2new, total3new]) &
656 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))):
657 tier = "2.1"
658 counter_tier21 += 1
659 tier_dict[key1]["tier 2.1"] += 1
660
661 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) &
662 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])):
663 tier = "2.2"
664 counter_tier22 += 1
665 tier_dict[key1]["tier 2.2"] += 1
666
667 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) &
668 any(int(ij) >= 3 for ij in [total2new, total3new]) &
669 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) &
670 any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff])) |
671 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
672 any(int(ij) >= 3 for ij in [total1new, total4new]) &
673 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) &
674 any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))):
675 tier = "2.3"
676 counter_tier23 += 1
677 tier_dict[key1]["tier 2.3"] += 1
678
679 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) &
680 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) |
681 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
682 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))):
683 tier = "2.4"
684 counter_tier24 += 1
685 tier_dict[key1]["tier 2.4"] += 1
686
687 elif ((len(pure_tags_dict_short[key1]) > 1) &
688 (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) |
689 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))):
690 tier = "3.1"
691 counter_tier31 += 1
692 tier_dict[key1]["tier 3.1"] += 1
693
694 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) &
695 all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff])) |
696 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
697 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))):
698 tier = "3.2"
699 counter_tier32 += 1
700 tier_dict[key1]["tier 3.2"] += 1
701
702 elif (trimmed):
703 tier = "4.1"
704 counter_tier41 += 1
705 tier_dict[key1]["tier 4.1"] += 1
706
707 elif (contradictory):
708 tier = "4.2"
709 counter_tier42 += 1
710 tier_dict[key1]["tier 4.2"] += 1
711
712 elif chimera_correction and chimeric_dcs:
713 tier = "4.3"
714 counter_tier43 += 1
715 tier_dict[key1]["tier 4.3"] += 1
716
717 else:
718 tier = "5"
719 counter_tier5 += 1
720 tier_dict[key1]["tier 5"] += 1
721
722 if (read_pos1 == -1):
723 read_pos1 = read_len_median1 = None
724 if (read_pos4 == -1):
725 read_pos4 = read_len_median4 = None
726 if (read_pos2 == -1):
727 read_pos2 = read_len_median2 = None
728 if (read_pos3 == -1):
729 read_pos3 = read_len_median3 = None
730 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera)
731 ws1.write_row(row, 0, line)
732 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
733 ws1.write_row(row + 1, 0, line)
734
735 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
736 {'type': 'formula',
737 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
738 'format': format1,
739 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
740 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
741 {'type': 'formula',
742 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(row + 1, row + 1, row + 1, row + 1),
743 'format': format3,
744 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
745 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
746 {'type': 'formula',
747 'criteria': '=$B${}>="3"'.format(row + 1),
748 'format': format2,
749 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
750
751 row += 3
752
753 if chimera_correction:
754 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)',
755 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4',
756 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 4.3', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
757 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-4.3', 'AF 1.1-5')
758 else:
759 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)',
760 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4',
761 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
762 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5')
763
764 ws2.write_row(0, 0, header_line2)
765 row = 0
766
767 for key1, value1 in sorted(tier_dict.items()):
768 if key1 in pure_tags_dict_short.keys():
769 i = np.where(np.array(['#'.join(str(i) for i in z)
770 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0]
771 ref = mut_array[i, 9]
772 alt = mut_array[i, 10]
773 chrom, pos = re.split(r'\#', key1)
774 ref_count = cvrg_dict[key1][0]
775 alt_count = cvrg_dict[key1][1]
776 cvrg = ref_count + alt_count
777
778 var_id = '-'.join([chrom, pos, ref, alt])
779 lst = [var_id, cvrg]
780 used_tiers = []
781 cum_af = []
782 for key2, value2 in sorted(value1.items()):
783 # calculate cummulative AF
784 used_tiers.append(value2)
785 if len(used_tiers) > 1:
786 cum = safe_div(sum(used_tiers), cvrg)
787 cum_af.append(cum)
788 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)])
789 if chimera_correction:
790 lst.extend([(cvrg - sum(used_tiers[-6:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-6:]))), alt_count, safe_div(alt_count, cvrg)])
791 else:
792 lst.extend([(cvrg - sum(used_tiers[-5:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-5:]))), alt_count, safe_div(alt_count, cvrg)])
793 lst.extend(used_tiers)
794 lst.extend(cum_af)
795 lst = tuple(lst)
796 ws2.write_row(row + 1, 0, lst)
797 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)})
798 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)})
799 if chimera_correction:
800 ws2.conditional_format('P{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:U{} P1:U1'.format(row + 2, row + 2)})
801 else:
802 ws2.conditional_format('P{}:T{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:T{} P1:T1'.format(row + 2, row + 2)})
803
804 row += 1
805
806 # sheet 3
807 if chimera_correction:
808 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21),
809 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24),
810 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41),
811 ("tier 4.2", counter_tier42), ("tier 4.3", counter_tier43), ("tier 5", counter_tier5)]
812 else:
813 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21),
814 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24),
815 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41),
816 ("tier 4.2", counter_tier42), ("tier 5", counter_tier5)]
817
818
819 header = ("tier", "count")
820 ws3.write_row(0, 0, header)
821
822 for i in range(len(sheet3)):
823 ws3.write_row(i + 1, 0, sheet3[i])
824 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2),
825 {'type': 'formula',
826 'criteria': '=OR($A${}="tier 1.1", $A${}="tier 1.2")'.format(i + 2, i + 2),
827 'format': format1})
828 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2),
829 {'type': 'formula',
830 'criteria': '=OR($A${}="tier 2.1", $A${}="tier 2.2", $A${}="tier 2.3", $A${}="tier 2.4")'.format(i + 2, i + 2, i + 2, i + 2),
831 'format': format3})
832 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2),
833 {'type': 'formula',
834 'criteria': '=$A${}>="3"'.format(i + 2),
835 'format': format2})
836 if chimera_correction:
837 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), ("Tier 4.3", "variants that are chimeric"), ("Tier 5", "remaining variants")]
838 else:
839 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), ("Tier 5", "remaining variants")]
840
841 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289",
842 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
843 "4081", "4098", "5", "10", "", ""),
844 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None,
845 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None,
846 "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")],
847 [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289",
848 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0",
849 "0", "4081", "4098", "5", "10", "", ""),
850 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289",
851 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1",
852 "7", "0", "0", "4081", "4098", "5", "10", "", "")],
853 [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290",
854 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0",
855 "0", "0", "1", "6", "47170", "41149", "", ""),
856 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290",
857 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0",
858 "0", "0", "1", "6", "47170", "41149", "", "")],
859 [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289",
860 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
861 "4081", "4098", "5", "10", "", ""),
862 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None,
863 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0",
864 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")],
865 [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289",
866 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
867 "4081", "4098", "5", "10", "", ""),
868 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289",
869 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
870 "4081", "4098", "5", "10", "", "")],
871 [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None,
872 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0",
873 "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""),
874 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289",
875 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0",
876 "0", "0", "4081", "4098", "5", "10", "", "")],
877 [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289",
878 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081",
879 "4098", "5", "10", "", ""),
880 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289",
881 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081",
882 "4098", "5", "10", "", "")],
883 [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290",
884 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1",
885 "0", "0", "3", "3", "47170", "41149", "", ""),
886 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None,
887 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5",
888 "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")],
889 [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271",
890 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1",
891 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""),
892 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271",
893 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1",
894 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")],
895 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269",
896 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""),
897 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None,
898 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
899 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")],
900 [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263",
901 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0",
902 "0", "0", "0", "1", "1", "5348", "5350", "", ""),
903 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263",
904 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0",
905 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]]
906
907 if chimera_correction:
908 examples_tiers.extend([[("Chr5:5-20000-13963-T-C", "4.3", "TTTTTAAGAAGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289",
909 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081",
910 "4098", "5", "10", "", "TTTTTAAGAATAACCCACAC"),
911 ("", "", "TTTTTAAGAAGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289",
912 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081",
913 "4098", "5", "10", "", "TTTTTAAGAATAACCCACAC")],
914 [("Chr5:5-20000-13983-G-C", "5", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269",
915 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0",
916 "0", "1", "1", "5348", "5350", "", ""),
917 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None,
918 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
919 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]])
920 else:
921 examples_tiers.extend([
922 [("Chr5:5-20000-13983-G-C", "5", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269",
923 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0",
924 "0", "1", "1", "5348", "5350", "", ""),
925 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None,
926 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
927 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]])
928
929 start_row = 15
930 ws3.write(start_row, 0, "Description of tiers with examples")
931 ws3.write_row(start_row + 1, 0, header_line)
932 row = 0
933 for i in range(len(description_tiers)):
934 ws3.write_row(start_row + 2 + row + i + 1, 0, description_tiers[i])
935 ex = examples_tiers[i]
936 for k in range(len(ex)):
937 ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k])
938 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format1, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)})
939 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3),
940 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2),
941 'format': format3,
942 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)})
943 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3),
944 {'type': 'formula',
945 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2),
946 'format': format2,
947 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)})
948 row += 3
949 workbook.close()
950
951
952 if __name__ == '__main__':
953 sys.exit(read2mut(sys.argv))