comparison read2mut.py @ 2:3f1dbd2c59bf draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
author iuc
date Tue, 10 Nov 2020 12:55:29 +0000
parents 3556001ff2db
children
comparison
equal deleted inserted replaced
1:3556001ff2db 2:3f1dbd2c59bf
8 Looks for reads with mutation at known 8 Looks for reads with mutation at known
9 positions and calculates frequencies and stats. 9 positions and calculates frequencies and stats.
10 10
11 ======= ========== ================= ================================ 11 ======= ========== ================= ================================
12 Version Date Author Description 12 Version Date Author Description
13 0.2.1 2019-10-27 Gundula Povysil - 13 2.0.0 2020-10-30 Gundula Povysil -
14 ======= ========== ================= ================================ 14 ======= ========== ================= ================================
15 15
16 16
17 USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam 17 USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam
18 --inputJson tag_count_dict.json --sscsJson SSCS_counts.json 18 --inputJson tag_count_dict.json --sscsJson SSCS_counts.json
19 --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim 10 19 --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim 10 --chimera_correction
20 20
21 """ 21 """
22 22
23 from __future__ import division 23 from __future__ import division
24 24
30 import sys 30 import sys
31 31
32 import numpy as np 32 import numpy as np
33 import pysam 33 import pysam
34 import xlsxwriter 34 import xlsxwriter
35 from cyvcf2 import VCF
35 36
36 37
37 def make_argparser(): 38 def make_argparser():
38 parser = argparse.ArgumentParser(description='Takes a tabular file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.') 39 parser = argparse.ArgumentParser(description='Takes a VCF file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.')
39 parser.add_argument('--mutFile', 40 parser.add_argument('--mutFile',
40 help='TABULAR file with DCS mutations.') 41 help='VCF file with DCS mutations.')
41 parser.add_argument('--bamFile', 42 parser.add_argument('--bamFile',
42 help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).') 43 help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).')
43 parser.add_argument('--inputJson', 44 parser.add_argument('--inputJson',
44 help='JSON file with data collected by mut2read.py.') 45 help='JSON file with data collected by mut2read.py.')
45 parser.add_argument('--sscsJson', 46 parser.add_argument('--sscsJson',
50 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') 51 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.')
51 parser.add_argument('--phred', type=int, default=20, 52 parser.add_argument('--phred', type=int, default=20,
52 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') 53 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.')
53 parser.add_argument('--trim', type=int, default=10, 54 parser.add_argument('--trim', type=int, default=10,
54 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') 55 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.')
56 parser.add_argument('--chimera_correction', action="store_true",
57 help='Count chimeric variants and correct the variant frequencies')
55 return parser 58 return parser
56 59
57 60
58 def safe_div(x, y): 61 def safe_div(x, y):
59 if y == 0: 62 if y == 0:
70 sscs_json = args.sscsJson 73 sscs_json = args.sscsJson
71 outfile = args.outputFile 74 outfile = args.outputFile
72 thresh = args.thresh 75 thresh = args.thresh
73 phred_score = args.phred 76 phred_score = args.phred
74 trim = args.trim 77 trim = args.trim
78 chimera_correction = args.chimera_correction
75 79
76 if os.path.isfile(file1) is False: 80 if os.path.isfile(file1) is False:
77 sys.exit("Error: Could not find '{}'".format(file1)) 81 sys.exit("Error: Could not find '{}'".format(file1))
78 if os.path.isfile(file2) is False: 82 if os.path.isfile(file2) is False:
79 sys.exit("Error: Could not find '{}'".format(file2)) 83 sys.exit("Error: Could not find '{}'".format(file2))
84 if phred_score < 0: 88 if phred_score < 0:
85 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) 89 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score))
86 if trim < 0: 90 if trim < 0:
87 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) 91 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh))
88 92
89 # 1. read mut file 93 # load dicts
90 with open(file1, 'r') as mut:
91 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)
92
93 # 2. load dicts
94 with open(json_file, "r") as f: 94 with open(json_file, "r") as f:
95 (tag_dict, cvrg_dict) = json.load(f) 95 (tag_dict, cvrg_dict) = json.load(f)
96 96
97 with open(sscs_json, "r") as f: 97 with open(sscs_json, "r") as f:
98 (mut_pos_dict, ref_pos_dict) = json.load(f) 98 (mut_pos_dict, ref_pos_dict) = json.load(f)
99 99
100 # 3. read bam file 100 # read bam file
101 # pysam.index(file2)
102 bam = pysam.AlignmentFile(file2, "rb") 101 bam = pysam.AlignmentFile(file2, "rb")
103 102
104 # 4. create mut_dict 103 # create mut_dict
105 mut_dict = {} 104 mut_dict = {}
106 mut_read_pos_dict = {} 105 mut_read_pos_dict = {}
107 mut_read_dict = {} 106 mut_read_dict = {}
108 reads_dict = {} 107 reads_dict = {}
109 if mut_array.shape == (13, ): 108 i = 0
110 mut_array = mut_array.reshape((1, len(mut_array))) 109 mut_array = []
111 110
112 for m in range(0, len(mut_array[:, 0])): 111 for variant in VCF(file1):
113 print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) 112 chrom = variant.CHROM
114 # for m in range(0, 5): 113 stop_pos = variant.start
115 chrom = mut_array[m, 1]
116 stop_pos = mut_array[m, 2].astype(int)
117 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) 114 chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
118 ref = mut_array[m, 9] 115 ref = variant.REF
119 alt = mut_array[m, 10] 116 alt = variant.ALT[0]
120 mut_dict[chrom_stop_pos] = {} 117
121 mut_read_pos_dict[chrom_stop_pos] = {} 118 if len(ref) == len(alt):
122 reads_dict[chrom_stop_pos] = {} 119 mut_array.append([chrom, stop_pos, ref, alt])
123 120 i += 1
124 for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000): 121 mut_dict[chrom_stop_pos] = {}
125 if pileupcolumn.reference_pos == stop_pos - 1: 122 mut_read_pos_dict[chrom_stop_pos] = {}
126 count_alt = 0 123 reads_dict[chrom_stop_pos] = {}
127 count_ref = 0 124
128 count_indel = 0 125 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000):
129 count_n = 0 126 if pileupcolumn.reference_pos == stop_pos:
130 count_other = 0 127 count_alt = 0
131 count_lowq = 0 128 count_ref = 0
132 n = 0 129 count_indel = 0
133 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), 130 count_n = 0
134 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) 131 count_other = 0
135 for pileupread in pileupcolumn.pileups: 132 count_lowq = 0
136 n += 1 133 n = 0
137 if not pileupread.is_del and not pileupread.is_refskip: 134 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
138 tag = pileupread.alignment.query_name 135 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
139 nuc = pileupread.alignment.query_sequence[pileupread.query_position] 136 for pileupread in pileupcolumn.pileups:
140 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 137 n += 1
141 if phred < phred_score: 138 if not pileupread.is_del and not pileupread.is_refskip:
142 nuc = "lowQ" 139 tag = pileupread.alignment.query_name
143 if tag not in mut_dict[chrom_stop_pos]: 140 nuc = pileupread.alignment.query_sequence[pileupread.query_position]
144 mut_dict[chrom_stop_pos][tag] = {} 141 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33
145 if nuc in mut_dict[chrom_stop_pos][tag]: 142 if phred < phred_score:
146 mut_dict[chrom_stop_pos][tag][nuc] += 1 143 nuc = "lowQ"
147 else: 144 if tag not in mut_dict[chrom_stop_pos]:
148 mut_dict[chrom_stop_pos][tag][nuc] = 1 145 mut_dict[chrom_stop_pos][tag] = {}
149 if tag not in mut_read_pos_dict[chrom_stop_pos]: 146 if nuc in mut_dict[chrom_stop_pos][tag]:
150 mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1 147 mut_dict[chrom_stop_pos][tag][nuc] += 1
151 reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence)
152 else:
153 mut_read_pos_dict[chrom_stop_pos][tag] = np.append(
154 mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1)
155 reads_dict[chrom_stop_pos][tag] = np.append(
156 reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence))
157
158 if nuc == alt:
159 count_alt += 1
160 if tag not in mut_read_dict:
161 mut_read_dict[tag] = {}
162 mut_read_dict[tag][chrom_stop_pos] = alt
163 else: 148 else:
164 mut_read_dict[tag][chrom_stop_pos] = alt 149 mut_dict[chrom_stop_pos][tag][nuc] = 1
165 elif nuc == ref: 150 if tag not in mut_read_pos_dict[chrom_stop_pos]:
166 count_ref += 1 151 mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1
167 elif nuc == "N": 152 reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence)
168 count_n += 1 153 else:
169 elif nuc == "lowQ": 154 mut_read_pos_dict[chrom_stop_pos][tag] = np.append(
170 count_lowq += 1 155 mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1)
171 else: 156 reads_dict[chrom_stop_pos][tag] = np.append(
172 count_other += 1 157 reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence))
173 else: 158
174 count_indel += 1 159 if nuc == alt:
175 160 count_alt += 1
176 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) 161 if tag not in mut_read_dict:
177 162 mut_read_dict[tag] = {}
163 mut_read_dict[tag][chrom_stop_pos] = (alt, ref)
164 else:
165 mut_read_dict[tag][chrom_stop_pos] = (alt, ref)
166 elif nuc == ref:
167 count_ref += 1
168 elif nuc == "N":
169 count_n += 1
170 elif nuc == "lowQ":
171 count_lowq += 1
172 else:
173 count_other += 1
174 else:
175 count_indel += 1
176
177 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq))
178 else:
179 print("indels are currently not evaluated")
180
181 mut_array = np.array(mut_array)
178 for read in bam.fetch(until_eof=True): 182 for read in bam.fetch(until_eof=True):
179 if read.is_unmapped: 183 if read.is_unmapped:
180 pure_tag = read.query_name[:-5] 184 pure_tag = read.query_name[:-5]
181 nuc = "na" 185 nuc = "na"
182 for key in tag_dict[pure_tag].keys(): 186 for key in tag_dict[pure_tag].keys():
188 mut_dict[key][read.query_name][nuc] += 1 192 mut_dict[key][read.query_name][nuc] += 1
189 else: 193 else:
190 mut_dict[key][read.query_name][nuc] = 1 194 mut_dict[key][read.query_name][nuc] = 1
191 bam.close() 195 bam.close()
192 196
193 # 5. create pure_tags_dict 197 # create pure_tags_dict
194 pure_tags_dict = {} 198 pure_tags_dict = {}
195 for key1, value1 in sorted(mut_dict.items()): 199 for key1, value1 in sorted(mut_dict.items()):
196 i = np.where(np.array(['#'.join(str(i) for i in z) 200 i = np.where(np.array(['#'.join(str(i) for i in z)
197 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] 201 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0]
198 ref = mut_array[i, 9] 202 ref = mut_array[i, 2]
199 alt = mut_array[i, 10] 203 alt = mut_array[i, 3]
200 pure_tags_dict[key1] = {} 204 pure_tags_dict[key1] = {}
201 for key2, value2 in sorted(value1.items()): 205 for key2, value2 in sorted(value1.items()):
202 for key3, value3 in value2.items(): 206 for key3, value3 in value2.items():
203 pure_tag = key2[:-5] 207 pure_tag = key2[:-5]
204 if key3 == alt: 208 if key3 == alt:
205 if pure_tag in pure_tags_dict[key1]: 209 if pure_tag in pure_tags_dict[key1]:
206 pure_tags_dict[key1][pure_tag] += 1 210 pure_tags_dict[key1][pure_tag] += 1
207 else: 211 else:
208 pure_tags_dict[key1][pure_tag] = 1 212 pure_tags_dict[key1][pure_tag] = 1
209 213
210 # 6. create pure_tags_dict_short with thresh 214 # create pure_tags_dict_short with thresh
211 if thresh > 0: 215 if thresh > 0:
212 pure_tags_dict_short = {} 216 pure_tags_dict_short = {}
213 for key, value in sorted(pure_tags_dict.items()): 217 for key, value in sorted(pure_tags_dict.items()):
214 if len(value) < thresh: 218 if len(value) < thresh:
215 pure_tags_dict_short[key] = value 219 pure_tags_dict_short[key] = value
216 else: 220 else:
217 pure_tags_dict_short = pure_tags_dict 221 pure_tags_dict_short = pure_tags_dict
218 222
219 whole_array = [] 223 # output summary with threshold
220 for k in pure_tags_dict.values():
221 whole_array.extend(k.keys())
222
223 # 7. output summary with threshold
224 workbook = xlsxwriter.Workbook(outfile) 224 workbook = xlsxwriter.Workbook(outfile)
225 ws1 = workbook.add_worksheet("Results") 225 ws1 = workbook.add_worksheet("Results")
226 ws2 = workbook.add_worksheet("Allele frequencies") 226 ws2 = workbook.add_worksheet("Allele frequencies")
227 ws3 = workbook.add_worksheet("Tiers") 227 ws3 = workbook.add_worksheet("Tiers")
228 228
232 232
233 header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab', 233 header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab',
234 'read median length.ba', 'DCS median length', 234 'read median length.ba', 'DCS median length',
235 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', 235 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba',
236 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', 236 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba',
237 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 237 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba',
238 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', 238 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba',
239 'other mut', 'chimeric tag') 239 'in phase', 'chimeric tag')
240 ws1.write_row(0, 0, header_line) 240 ws1.write_row(0, 0, header_line)
241
242 counter_tier11 = 0 241 counter_tier11 = 0
243 counter_tier12 = 0 242 counter_tier12 = 0
244 counter_tier21 = 0 243 counter_tier21 = 0
245 counter_tier22 = 0 244 counter_tier22 = 0
246 counter_tier23 = 0 245 counter_tier23 = 0
247 counter_tier24 = 0 246 counter_tier24 = 0
248 counter_tier31 = 0 247 counter_tier31 = 0
249 counter_tier32 = 0 248 counter_tier32 = 0
250 counter_tier41 = 0 249 counter_tier41 = 0
251 counter_tier42 = 0 250 counter_tier42 = 0
252 251 counter_tier5 = 0
253 row = 1 252 row = 1
254 tier_dict = {} 253 tier_dict = {}
254 chimera_dict = {}
255 for key1, value1 in sorted(mut_dict.items()): 255 for key1, value1 in sorted(mut_dict.items()):
256 counts_mut = 0 256 counts_mut = 0
257 chimeric_tag = {}
257 if key1 in pure_tags_dict_short.keys(): 258 if key1 in pure_tags_dict_short.keys():
258 i = np.where(np.array(['#'.join(str(i) for i in z) 259 i = np.where(np.array(['#'.join(str(i) for i in z)
259 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] 260 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0]
260 ref = mut_array[i, 9] 261 ref = mut_array[i, 2]
261 alt = mut_array[i, 10] 262 alt = mut_array[i, 3]
262 dcs_median = cvrg_dict[key1][2] 263 dcs_median = cvrg_dict[key1][2]
264 whole_array = list(pure_tags_dict_short[key1].keys())
263 265
264 tier_dict[key1] = {} 266 tier_dict[key1] = {}
265 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0)] 267 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5", 0)]
266 for k, v in values_tier_dict: 268 for k, v in values_tier_dict:
267 tier_dict[key1][k] = v 269 tier_dict[key1][k] = v
268 270
269 used_keys = [] 271 used_keys = []
270 if 'ab' in mut_pos_dict[key1].keys(): 272 if 'ab' in mut_pos_dict[key1].keys():
289 if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()): 291 if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()):
290 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): 292 if key2[:-5] + '.ab.1' in mut_dict[key1].keys():
291 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) 293 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values())
292 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): 294 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys():
293 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na'] 295 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na']
294 # na1f = na1/total1 296 else:
295 else:
296 # na1 = na1f = 0
297 na1 = 0 297 na1 = 0
298 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): 298 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.1'].keys():
299 lowq1 = mut_dict[key1][key2[:-5] + '.ab.1']['lowQ'] 299 lowq1 = mut_dict[key1][key2[:-5] + '.ab.1']['lowQ']
300 # lowq1f = lowq1 / total1 300 else:
301 else:
302 # lowq1 = lowq1f = 0
303 lowq1 = 0 301 lowq1 = 0
304 if ref in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): 302 if ref in mut_dict[key1][key2[:-5] + '.ab.1'].keys():
305 ref1 = mut_dict[key1][key2[:-5] + '.ab.1'][ref] 303 ref1 = mut_dict[key1][key2[:-5] + '.ab.1'][ref]
306 ref1f = ref1 / (total1 - na1 - lowq1) 304 ref1f = ref1 / (total1 - na1 - lowq1)
307 else: 305 else:
316 k1 = mut_read_dict[(key2[:-5] + '.ab.1')].keys() 314 k1 = mut_read_dict[(key2[:-5] + '.ab.1')].keys()
317 add_mut1 = len(k1) 315 add_mut1 = len(k1)
318 if add_mut1 > 1: 316 if add_mut1 > 1:
319 for k, v in mut_read_dict[(key2[:-5] + '.ab.1')].items(): 317 for k, v in mut_read_dict[(key2[:-5] + '.ab.1')].items():
320 if k != key1: 318 if k != key1:
319 new_mut = str(k).split("#")[0] + "-" + str(int(str(k).split("#")[1]) + 1) + "-" + v[1] + "-" + v[0]
321 if len(add_mut14) == 0: 320 if len(add_mut14) == 0:
322 add_mut14 = str(k) + "_" + v 321 add_mut14 = new_mut
323 else: 322 else:
324 add_mut14 = add_mut14 + ", " + str(k) + "_" + v 323 add_mut14 = add_mut14 + ", " + new_mut
325 else: 324 else:
326 k1 = [] 325 k1 = []
327 else: 326 else:
328 total1 = total1new = na1 = lowq1 = 0 327 total1 = total1new = na1 = lowq1 = 0
329 ref1 = alt1 = ref1f = alt1f = 0 328 ref1 = alt1 = ref1f = alt1f = 0
329 k1 = []
330 330
331 if key2[:-5] + '.ab.2' in mut_dict[key1].keys(): 331 if key2[:-5] + '.ab.2' in mut_dict[key1].keys():
332 total2 = sum(mut_dict[key1][key2[:-5] + '.ab.2'].values()) 332 total2 = sum(mut_dict[key1][key2[:-5] + '.ab.2'].values())
333 if 'na' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): 333 if 'na' in mut_dict[key1][key2[:-5] + '.ab.2'].keys():
334 na2 = mut_dict[key1][key2[:-5] + '.ab.2']['na'] 334 na2 = mut_dict[key1][key2[:-5] + '.ab.2']['na']
335 # na2f = na2 / total2 335 else:
336 else:
337 # na2 = na2f = 0
338 na2 = 0 336 na2 = 0
339 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): 337 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.2'].keys():
340 lowq2 = mut_dict[key1][key2[:-5] + '.ab.2']['lowQ'] 338 lowq2 = mut_dict[key1][key2[:-5] + '.ab.2']['lowQ']
341 # lowq2f = lowq2 / total2 339 else:
342 else:
343 # lowq2 = lowq2f = 0
344 lowq2 = 0 340 lowq2 = 0
345 if ref in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): 341 if ref in mut_dict[key1][key2[:-5] + '.ab.2'].keys():
346 ref2 = mut_dict[key1][key2[:-5] + '.ab.2'][ref] 342 ref2 = mut_dict[key1][key2[:-5] + '.ab.2'][ref]
347 ref2f = ref2 / (total2 - na2 - lowq2) 343 ref2f = ref2 / (total2 - na2 - lowq2)
348 else: 344 else:
357 k2 = mut_read_dict[(key2[:-5] + '.ab.2')].keys() 353 k2 = mut_read_dict[(key2[:-5] + '.ab.2')].keys()
358 add_mut2 = len(k2) 354 add_mut2 = len(k2)
359 if add_mut2 > 1: 355 if add_mut2 > 1:
360 for k, v in mut_read_dict[(key2[:-5] + '.ab.2')].items(): 356 for k, v in mut_read_dict[(key2[:-5] + '.ab.2')].items():
361 if k != key1: 357 if k != key1:
358 new_mut = str(k).split("#")[0] + "-" + str(int(str(k).split("#")[1]) + 1) + "-" + v[1] + "-" + v[0]
362 if len(add_mut23) == 0: 359 if len(add_mut23) == 0:
363 add_mut23 = str(k) + "_" + v 360 add_mut23 = new_mut
364 else: 361 else:
365 add_mut23 = add_mut23 + ", " + str(k) + "_" + v 362 add_mut23 = add_mut23 + ", " + new_mut
366 else: 363 else:
367 k2 = [] 364 k2 = []
368 else: 365 else:
369 total2 = total2new = na2 = lowq2 = 0 366 total2 = total2new = na2 = lowq2 = 0
370 ref2 = alt2 = ref2f = alt2f = 0 367 ref2 = alt2 = ref2f = alt2f = 0
368 k2 = []
371 369
372 if key2[:-5] + '.ba.1' in mut_dict[key1].keys(): 370 if key2[:-5] + '.ba.1' in mut_dict[key1].keys():
373 total3 = sum(mut_dict[key1][key2[:-5] + '.ba.1'].values()) 371 total3 = sum(mut_dict[key1][key2[:-5] + '.ba.1'].values())
374 if 'na' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): 372 if 'na' in mut_dict[key1][key2[:-5] + '.ba.1'].keys():
375 na3 = mut_dict[key1][key2[:-5] + '.ba.1']['na'] 373 na3 = mut_dict[key1][key2[:-5] + '.ba.1']['na']
376 # na3f = na3 / total3 374 else:
377 else:
378 # na3 = na3f = 0
379 na3 = 0 375 na3 = 0
380 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): 376 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.1'].keys():
381 lowq3 = mut_dict[key1][key2[:-5] + '.ba.1']['lowQ'] 377 lowq3 = mut_dict[key1][key2[:-5] + '.ba.1']['lowQ']
382 # lowq3f = lowq3 / total3 378 else:
383 else:
384 # lowq3 = lowq3f = 0
385 lowq3 = 0 379 lowq3 = 0
386 if ref in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): 380 if ref in mut_dict[key1][key2[:-5] + '.ba.1'].keys():
387 ref3 = mut_dict[key1][key2[:-5] + '.ba.1'][ref] 381 ref3 = mut_dict[key1][key2[:-5] + '.ba.1'][ref]
388 ref3f = ref3 / (total3 - na3 - lowq3) 382 ref3f = ref3 / (total3 - na3 - lowq3)
389 else: 383 else:
397 if (key2[:-5] + '.ba.1') in mut_read_dict.keys(): 391 if (key2[:-5] + '.ba.1') in mut_read_dict.keys():
398 add_mut3 = len(mut_read_dict[(key2[:-5] + '.ba.1')].keys()) 392 add_mut3 = len(mut_read_dict[(key2[:-5] + '.ba.1')].keys())
399 if add_mut3 > 1: 393 if add_mut3 > 1:
400 for k, v in mut_read_dict[(key2[:-5] + '.ba.1')].items(): 394 for k, v in mut_read_dict[(key2[:-5] + '.ba.1')].items():
401 if k != key1 and k not in k2: 395 if k != key1 and k not in k2:
396 new_mut = str(k).split("#")[0] + "-" + str(int(str(k).split("#")[1]) + 1) + "-" + v[1] + "-" + v[0]
402 if len(add_mut23) == 0: 397 if len(add_mut23) == 0:
403 add_mut23 = str(k) + "_" + v 398 add_mut23 = new_mut
404 else: 399 else:
405 add_mut23 = add_mut23 + ", " + str(k) + "_" + v 400 add_mut23 = add_mut23 + ", " + new_mut
406 else: 401 else:
407 total3 = total3new = na3 = lowq3 = 0 402 total3 = total3new = na3 = lowq3 = 0
408 ref3 = alt3 = ref3f = alt3f = 0 403 ref3 = alt3 = ref3f = alt3f = 0
409 404
410 if key2[:-5] + '.ba.2' in mut_dict[key1].keys(): 405 if key2[:-5] + '.ba.2' in mut_dict[key1].keys():
411 total4 = sum(mut_dict[key1][key2[:-5] + '.ba.2'].values()) 406 total4 = sum(mut_dict[key1][key2[:-5] + '.ba.2'].values())
412 if 'na' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): 407 if 'na' in mut_dict[key1][key2[:-5] + '.ba.2'].keys():
413 na4 = mut_dict[key1][key2[:-5] + '.ba.2']['na'] 408 na4 = mut_dict[key1][key2[:-5] + '.ba.2']['na']
414 # na4f = na4 / total4 409 else:
415 else:
416 # na4 = na4f = 0
417 na4 = 0 410 na4 = 0
418 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): 411 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.2'].keys():
419 lowq4 = mut_dict[key1][key2[:-5] + '.ba.2']['lowQ'] 412 lowq4 = mut_dict[key1][key2[:-5] + '.ba.2']['lowQ']
420 # lowq4f = lowq4 / total4 413 else:
421 else:
422 # lowq4 = lowq4f = 0
423 lowq4 = 0 414 lowq4 = 0
424 if ref in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): 415 if ref in mut_dict[key1][key2[:-5] + '.ba.2'].keys():
425 ref4 = mut_dict[key1][key2[:-5] + '.ba.2'][ref] 416 ref4 = mut_dict[key1][key2[:-5] + '.ba.2'][ref]
426 ref4f = ref4 / (total4 - na4 - lowq4) 417 ref4f = ref4 / (total4 - na4 - lowq4)
427 else: 418 else:
435 if (key2[:-5] + '.ba.2') in mut_read_dict.keys(): 426 if (key2[:-5] + '.ba.2') in mut_read_dict.keys():
436 add_mut4 = len(mut_read_dict[(key2[:-5] + '.ba.2')].keys()) 427 add_mut4 = len(mut_read_dict[(key2[:-5] + '.ba.2')].keys())
437 if add_mut4 > 1: 428 if add_mut4 > 1:
438 for k, v in mut_read_dict[(key2[:-5] + '.ba.2')].items(): 429 for k, v in mut_read_dict[(key2[:-5] + '.ba.2')].items():
439 if k != key1 and k not in k1: 430 if k != key1 and k not in k1:
431 new_mut = str(k).split("#")[0] + "-" + str(int(str(k).split("#")[1]) + 1) + "-" + v[1] + "-" + v[0]
440 if len(add_mut14) == 0: 432 if len(add_mut14) == 0:
441 add_mut14 = str(k) + "_" + v 433 add_mut14 = new_mut
442 else: 434 else:
443 add_mut14 = add_mut14 + ", " + str(k) + "_" + v 435 add_mut14 = add_mut14 + ", " + new_mut
444 else: 436 else:
445 total4 = total4new = na4 = lowq4 = 0 437 total4 = total4new = na4 = lowq4 = 0
446 ref4 = alt4 = ref4f = alt4f = 0 438 ref4 = alt4 = ref4f = alt4f = 0
447 439
448 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 440 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1
483 ref4f = alt4f = None 475 ref4f = alt4f = None
484 alt4ff = -1 476 alt4ff = -1
485 else: 477 else:
486 alt4ff = alt4f 478 alt4ff = alt4f
487 479
488 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4) 480 beg1 = beg4 = beg2 = beg3 = 0
489 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3) 481
482 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4)
483 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
484
490 trimmed = False 485 trimmed = False
491 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): 486 contradictory = False
492 total1new = 0 487
488 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & all(float(ij) == 0. for ij in [alt1ff, alt4ff]))):
493 alt1ff = 0 489 alt1ff = 0
494 trimmed = True
495
496 if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))):
497 total4new = 0
498 alt4ff = 0 490 alt4ff = 0
499 trimmed = True
500
501 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))):
502 total2new = 0
503 alt2ff = 0 491 alt2ff = 0
504 trimmed = True
505
506 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))):
507 total3new = 0
508 alt3ff = 0 492 alt3ff = 0
509 trimmed = True 493 trimmed = False
510 494 contradictory = True
511 chrom, pos = re.split(r'\#', key1) 495 else:
496 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))):
497 beg1 = total1new
498 total1new = 0
499 alt1ff = 0
500 alt1f = 0
501 trimmed = True
502
503 if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))):
504 beg4 = total4new
505 total4new = 0
506 alt4ff = 0
507 alt4f = 0
508 trimmed = True
509
510 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))):
511 beg2 = total2new
512 total2new = 0
513 alt2ff = 0
514 alt2f = 0
515 trimmed = True
516
517 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))):
518 beg3 = total3new
519 total3new = 0
520 alt3ff = 0
521 alt3f = 0
522 trimmed = True
523 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4)
524 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
525
512 # assign tiers 526 # assign tiers
513 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & 527 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | (all(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))):
514 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) |
515 (all(int(ij) >= 3 for ij in [total2new, total3new]) &
516 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))):
517 tier = "1.1" 528 tier = "1.1"
518 counter_tier11 += 1 529 counter_tier11 += 1
519 tier_dict[key1]["tier 1.1"] += 1 530 tier_dict[key1]["tier 1.1"] += 1
520 531
521 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & 532 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & any(int(ij) >= 3 for ij in [total1new, total4new])
522 any(int(ij) >= 3 for ij in [total1new, total4new]) & 533 & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])):
523 any(int(ij) >= 3 for ij in [total2new, total3new]) &
524 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])):
525 tier = "1.2" 534 tier = "1.2"
526 counter_tier12 += 1 535 counter_tier12 += 1
527 tier_dict[key1]["tier 1.2"] += 1 536 tier_dict[key1]["tier 1.2"] += 1
528 537
529 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & 538 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & any(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))
530 any(int(ij) >= 3 for ij in [total1new, total4new]) & 539 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))):
531 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) |
532 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
533 any(int(ij) >= 3 for ij in [total2new, total3new]) &
534 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))):
535 tier = "2.1" 540 tier = "2.1"
536 counter_tier21 += 1 541 counter_tier21 += 1
537 tier_dict[key1]["tier 2.1"] += 1 542 tier_dict[key1]["tier 2.1"] += 1
538 543
539 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & 544 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])):
540 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])):
541 tier = "2.2" 545 tier = "2.2"
542 counter_tier22 += 1 546 counter_tier22 += 1
543 tier_dict[key1]["tier 2.2"] += 1 547 tier_dict[key1]["tier 2.2"] += 1
544 548
545 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & 549 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) & any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))
546 any(int(ij) >= 3 for ij in [total2new, total3new]) & 550 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & any(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) & any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))):
547 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) &
548 any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff])) |
549 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
550 any(int(ij) >= 3 for ij in [total1new, total4new]) &
551 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) &
552 any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))):
553 tier = "2.3" 551 tier = "2.3"
554 counter_tier23 += 1 552 counter_tier23 += 1
555 tier_dict[key1]["tier 2.3"] += 1 553 tier_dict[key1]["tier 2.3"] += 1
556 554
557 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & 555 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))
558 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | 556 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))):
559 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
560 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))):
561 tier = "2.4" 557 tier = "2.4"
562 counter_tier24 += 1 558 counter_tier24 += 1
563 tier_dict[key1]["tier 2.4"] += 1 559 tier_dict[key1]["tier 2.4"] += 1
564 560
565 elif ((len(pure_tags_dict_short[key1]) > 1) & 561 elif ((len(pure_tags_dict_short[key1]) > 1) & (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) | all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))):
566 (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) |
567 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))):
568 tier = "3.1" 562 tier = "3.1"
569 counter_tier31 += 1 563 counter_tier31 += 1
570 tier_dict[key1]["tier 3.1"] += 1 564 tier_dict[key1]["tier 3.1"] += 1
571 565
572 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & 566 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]))
573 all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff])) | 567 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))):
574 (all(int(ij) >= 1 for ij in [total2new, total3new]) &
575 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))):
576 tier = "3.2" 568 tier = "3.2"
577 counter_tier32 += 1 569 counter_tier32 += 1
578 tier_dict[key1]["tier 3.2"] += 1 570 tier_dict[key1]["tier 3.2"] += 1
579 571
580 elif (trimmed): 572 elif (trimmed):
581 tier = "4.1" 573 tier = "4.1"
582 counter_tier41 += 1 574 counter_tier41 += 1
583 tier_dict[key1]["tier 4.1"] += 1 575 tier_dict[key1]["tier 4.1"] += 1
584 576
585 else: 577 elif (contradictory):
586 tier = "4.2" 578 tier = "4.2"
587 counter_tier42 += 1 579 counter_tier42 += 1
588 tier_dict[key1]["tier 4.2"] += 1 580 tier_dict[key1]["tier 4.2"] += 1
589 581
590 var_id = '-'.join([chrom, pos, ref, alt]) 582 else:
583 tier = "5"
584 counter_tier5 += 1
585 tier_dict[key1]["tier 5"] += 1
586
587 chrom, pos = re.split(r'\#', key1)
588 var_id = '-'.join([chrom, str(int(pos) + 1), ref, alt])
591 sample_tag = key2[:-5] 589 sample_tag = key2[:-5]
592 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time 590 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time
593 # exclude identical tag from array2, to prevent comparison to itself 591 # exclude identical tag from array2, to prevent comparison to itself
594 same_tag = np.where(array2 == sample_tag) 592 same_tag = np.where(array2 == sample_tag)
595 index_array2 = np.arange(0, len(array2), 1) 593 index_array2 = np.arange(0, len(array2), 1)
632 630
633 # tags which have identical parts: 631 # tags which have identical parts:
634 if min_value == 0 or dist2 == 0: 632 if min_value == 0 or dist2 == 0:
635 min_tags_list_zeros.append(tag) 633 min_tags_list_zeros.append(tag)
636 chimera_tags.append(max_tag) 634 chimera_tags.append(max_tag)
637 # chimeric = True 635
638 # else:
639 # chimeric = False
640
641 # if mate_b is False:
642 # text = "pos {}: sample tag: {}; HD a = {}; HD b' = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, min_value, dist2, list(max_tag), chimeric)
643 # else:
644 # text = "pos {}: sample tag: {}; HD a' = {}; HD b = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, dist2, min_value, list(max_tag), chimeric)
645 i += 1 636 i += 1
646 chimera_tags = [x for x in chimera_tags if x != []] 637 chimera_tags = [x for x in chimera_tags if x != []]
647 chimera_tags_new = [] 638 chimera_tags_new = []
648 for i in chimera_tags: 639 for i in chimera_tags:
649 if len(i) > 1: 640 if len(i) > 1:
650 for t in i: 641 for t in i:
651 chimera_tags_new.append(t) 642 chimera_tags_new.append(t)
652 else: 643 else:
653 chimera_tags_new.extend(i) 644 chimera_tags_new.extend(i)
654 chimera_tags_new = np.asarray(chimera_tags_new)
655 chimera = ", ".join(chimera_tags_new) 645 chimera = ", ".join(chimera_tags_new)
656 else: 646 else:
647 chimera_tags_new = []
657 chimera = "" 648 chimera = ""
649
650 if len(chimera_tags_new) > 0:
651 chimera_tags_new.append(sample_tag)
652 key_chimera = ",".join(sorted(chimera_tags_new))
653 if key_chimera in chimeric_tag.keys():
654 chimeric_tag[key_chimera].append(float(tier))
655 else:
656 chimeric_tag[key_chimera] = [float(tier)]
658 657
659 if (read_pos1 == -1): 658 if (read_pos1 == -1):
660 read_pos1 = read_len_median1 = None 659 read_pos1 = read_len_median1 = None
661 if (read_pos4 == -1): 660 if (read_pos4 == -1):
662 read_pos4 = read_len_median4 = None 661 read_pos4 = read_len_median4 = None
671 670
672 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 671 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
673 {'type': 'formula', 672 {'type': 'formula',
674 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 673 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
675 'format': format1, 674 'format': format1,
676 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 675 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)})
677 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 676 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
678 {'type': 'formula', 677 {'type': 'formula',
679 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(row + 1, row + 1, row + 1, row + 1), 678 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(row + 1, row + 1, row + 1, row + 1),
680 'format': format3, 679 'format': format3,
681 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 680 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)})
682 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 681 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
683 {'type': 'formula', 682 {'type': 'formula',
684 'criteria': '=$B${}>="3"'.format(row + 1), 683 'criteria': '=$B${}>="3"'.format(row + 1),
685 'format': format2, 684 'format': format2,
686 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 685 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)})
687
688 row += 3 686 row += 3
689 687
688 if chimera_correction:
689 chimeric_dcs_high_tiers = 0
690 chimeric_dcs = 0
691 for keys_chimera in chimeric_tag.keys():
692 tiers = chimeric_tag[keys_chimera]
693 chimeric_dcs += len(tiers) - 1
694 high_tiers = sum(1 for t in tiers if t < 3.)
695 if high_tiers == len(tiers):
696 chimeric_dcs_high_tiers += high_tiers - 1
697 else:
698 chimeric_dcs_high_tiers += high_tiers
699 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers)
700
690 # sheet 2 701 # sheet 2
691 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (Du Novo)', 'AF (Du Novo)', 702 if chimera_correction:
692 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 703 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)',
693 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', 704 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4',
694 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2') 705 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
706 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5')
707 else:
708 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)',
709 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4',
710 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
711 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5')
695 712
696 ws2.write_row(0, 0, header_line2) 713 ws2.write_row(0, 0, header_line2)
697 row = 0 714 row = 0
698 715
699 for key1, value1 in sorted(tier_dict.items()): 716 for key1, value1 in sorted(tier_dict.items()):
700 if key1 in pure_tags_dict_short.keys(): 717 if key1 in pure_tags_dict_short.keys():
701 i = np.where(np.array(['#'.join(str(i) for i in z) 718 i = np.where(np.array(['#'.join(str(i) for i in z)
702 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] 719 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0]
703 ref = mut_array[i, 9] 720 ref = mut_array[i, 2]
704 alt = mut_array[i, 10] 721 alt = mut_array[i, 3]
705 chrom, pos = re.split(r'\#', key1) 722 chrom, pos = re.split(r'\#', key1)
706 ref_count = cvrg_dict[key1][0] 723 ref_count = cvrg_dict[key1][0]
707 alt_count = cvrg_dict[key1][1] 724 alt_count = cvrg_dict[key1][1]
708 cvrg = ref_count + alt_count 725 cvrg = ref_count + alt_count
709 726
710 var_id = '-'.join([chrom, pos, ref, alt]) 727 var_id = '-'.join([chrom, str(int(pos) + 1), ref, alt])
711 lst = [var_id, cvrg] 728 lst = [var_id, cvrg]
712 used_tiers = [] 729 used_tiers = []
713 cum_af = [] 730 cum_af = []
714 for key2, value2 in sorted(value1.items()): 731 for key2, value2 in sorted(value1.items()):
715 # calculate cummulative AF 732 # calculate cummulative AF
716 used_tiers.append(value2) 733 used_tiers.append(value2)
717 if len(used_tiers) > 1: 734 if len(used_tiers) > 1:
718 cum = safe_div(sum(used_tiers), cvrg) 735 cum = safe_div(sum(used_tiers), cvrg)
719 cum_af.append(cum) 736 cum_af.append(cum)
720 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg), (cvrg - sum(used_tiers[-4:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-4:]))), alt_count, safe_div(alt_count, cvrg)]) 737 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)])
738 if chimera_correction:
739 chimeras_all = chimera_dict[key1][0]
740 new_alt = sum(used_tiers) - chimeras_all
741 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers)))
742 if fraction_chimeras is None:
743 fraction_chimeras = 0.
744 new_cvrg = cvrg * (1. - fraction_chimeras)
745 lst.extend([chimeras_all, new_cvrg, safe_div(new_alt, new_cvrg)])
746 lst.extend([(cvrg - sum(used_tiers[-5:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-5:])))])
747 if chimera_correction:
748 chimeras_all = chimera_dict[key1][1]
749 new_alt = sum(used_tiers[0:6]) - chimeras_all
750 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:6])))
751 if fraction_chimeras is None:
752 fraction_chimeras = 0.
753 new_cvrg = (cvrg - sum(used_tiers[-5:])) * (1. - fraction_chimeras)
754 lst.extend([chimeras_all, new_cvrg, safe_div(new_alt, new_cvrg)])
755 lst.extend([alt_count, safe_div(alt_count, cvrg)])
721 lst.extend(used_tiers) 756 lst.extend(used_tiers)
722 lst.extend(cum_af) 757 lst.extend(cum_af)
723 lst = tuple(lst) 758 lst = tuple(lst)
724 ws2.write_row(row + 1, 0, lst) 759 ws2.write_row(row + 1, 0, lst)
725 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) 760 if chimera_correction:
726 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) 761 ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format1, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)})
727 ws2.conditional_format('P{}:S{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:S{} P1:S1'.format(row + 2, row + 2)}) 762 ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format3, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)})
763 ws2.conditional_format('V{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format2, 'multi_range': 'V{}:Z{} V1:Z1'.format(row + 2, row + 2)})
764 else:
765 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)})
766 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)})
767 ws2.conditional_format('P{}:T{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:T{} P1:T1'.format(row + 2, row + 2)})
728 row += 1 768 row += 1
729 769
730 # sheet 3 770 # sheet 3
731 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), 771 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21),
732 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), 772 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24),
733 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), 773 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41),
734 ("tier 4.1", counter_tier41), ("tier 4.2", counter_tier42)] 774 ("tier 4.2", counter_tier42), ("tier 5", counter_tier5)]
735 775
736 header = ("tier", "count") 776 header = ("tier", "count")
737 ws3.write_row(0, 0, header) 777 ws3.write_row(0, 0, header)
738 778
739 for i in range(len(sheet3)): 779 for i in range(len(sheet3)):
746 {'type': 'formula', 786 {'type': 'formula',
747 'criteria': '=OR($A${}="tier 2.1", $A${}="tier 2.2", $A${}="tier 2.3", $A${}="tier 2.4")'.format(i + 2, i + 2, i + 2, i + 2), 787 'criteria': '=OR($A${}="tier 2.1", $A${}="tier 2.2", $A${}="tier 2.3", $A${}="tier 2.4")'.format(i + 2, i + 2, i + 2, i + 2),
748 'format': format3}) 788 'format': format3})
749 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), 789 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2),
750 {'type': 'formula', 790 {'type': 'formula',
751 'criteria': '=OR($A${}="tier 3.1", $A${}="tier 3.2", $A${}="tier 4.1", $A${}="tier 4.2")'.format(i + 2, i + 2, i + 2, i + 2), 791 'criteria': '=$A${}>="3"'.format(i + 2),
752 'format': format2}) 792 'format': format2})
753 793
754 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "remaining variants")] 794 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), ("Tier 5", "remaining variants")]
755 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", 795 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289",
756 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", 796 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
757 "4081", "4098", "5", "10", "", ""), 797 "4081", "4098", "5", "10", "", ""),
758 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, 798 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None,
759 "289", "0", "0", "0", "0", "0", "0", "3", "6", None, None, None, None, 799 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None,
760 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], 800 "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")],
761 [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", 801 [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289",
762 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", 802 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0",
763 "0", "4081", "4098", "5", "10", "", ""), 803 "0", "4081", "4098", "5", "10", "", ""),
764 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "11068", "268", "268", "270", "288", "289", 804 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289",
765 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", 805 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1",
766 "7", "4081", "4098", "5", "10", "", "")], 806 "7", "0", "0", "4081", "4098", "5", "10", "", "")],
767 [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", 807 [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290",
768 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", "1", 808 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0",
769 "6", "47170", "41149", "", ""), 809 "0", "0", "1", "6", "47170", "41149", "", ""),
770 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", 810 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290",
771 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", "1", 811 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0",
772 "6", "47170", "41149", "", "")], 812 "0", "0", "1", "6", "47170", "41149", "", "")],
773 [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", 813 [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289",
774 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", 814 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
775 "4081", "4098", "5", "10", "", ""), 815 "4081", "4098", "5", "10", "", ""),
776 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, 816 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None,
777 "289", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", None, None, "0", "0", 817 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0",
778 "0", "0", "4081", "4098", "5", "10", "", "")], 818 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")],
779 [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", 819 [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289",
780 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", 820 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
781 "4081", "4098", "5", "10", "", ""), 821 "4081", "4098", "5", "10", "", ""),
782 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", 822 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289",
783 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", 823 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
784 "4081", "4098", "5", "10", "", "")], 824 "4081", "4098", "5", "10", "", "")],
785 [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, 825 [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None,
786 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", 826 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0",
787 "0", "0", "0", "4081", "4098", "5", "10", "", ""), 827 "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""),
788 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", 828 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289",
789 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "1", "7", 829 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0",
790 "4081", "4098", "5", "10", "", "")], 830 "0", "0", "4081", "4098", "5", "10", "", "")],
791 [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", 831 [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289",
792 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "4081", 832 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081",
793 "4098", "5", "10", "", ""), 833 "4098", "5", "10", "", ""),
794 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", 834 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289",
795 "1", "1", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "4081", 835 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081",
796 "4098", "5", "10", "", "")], 836 "4098", "5", "10", "", "")],
797 [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", 837 [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290",
798 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", 838 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1",
799 "3", "3", "47170", "41149", "", ""), 839 "0", "0", "3", "3", "47170", "41149", "", ""),
800 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, 840 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None,
801 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", 841 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5",
802 "0", "0", "0", "1", "3", "3", "47170", "41149", "", "")], 842 "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")],
803 [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", 843 [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271",
804 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", 844 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1",
805 "0.666666666666667", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), 845 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""),
806 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", 846 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271",
807 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", 847 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1",
808 "0.666666666666667", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], 848 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")],
809 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "0", "255", "276", "269", 849 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269",
810 "5", "6", "5", "6", "0", "0", "5", "6", "0", "0", "1", "1", "0", "0", "0", "0", "1", 850 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""),
811 "1", "5348", "5350", "", ""),
812 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, 851 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None,
813 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", 852 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
814 "0", "0", "0", "1", "1", "5348", "5350", "", "")], 853 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")],
815 [("Chr5:5-20000-13983-G-C", "4.2", "ATGTTGTGAATAACCCACAC", "ab1.ba2", "209", "186", "255", "276", "269", 854 [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263",
816 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "1", 855 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0",
817 "1", "5348", "5350", "", ""), 856 "0", "0", "0", "1", "1", "5348", "5350", "", ""),
857 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263",
858 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0",
859 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")],
860 [("Chr5:5-20000-13983-G-C", "5", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269",
861 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0",
862 "0", "1", "1", "5348", "5350", "", ""),
818 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, 863 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None,
819 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", 864 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
820 "0", "0", "0", "1", "1", "5348", "5350", "", "")]] 865 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]]
821 866
822 ws3.write(11, 0, "Description of tiers with examples") 867 start_row = 15
823 ws3.write_row(12, 0, header_line) 868 ws3.write(start_row, 0, "Description of tiers with examples")
869 ws3.write_row(start_row + 1, 0, header_line)
824 row = 0 870 row = 0
825 for i in range(len(description_tiers)): 871 for i in range(len(description_tiers)):
826 ws3.write_row(13 + row + i + 1, 0, description_tiers[i]) 872 ws3.write_row(start_row + 2 + row + i + 1, 0, description_tiers[i])
827 ex = examples_tiers[i] 873 ex = examples_tiers[i]
828 for k in range(len(ex)): 874 for k in range(len(ex)):
829 ws3.write_row(13 + row + i + k + 2, 0, ex[k]) 875 ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k])
830 ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(13 + row + i + k + 2, 13 + row + i + k + 2), 'format': format1, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) 876 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format1, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)})
831 ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), 877 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3),
832 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(13 + row + i + k + 2, 13 + row + i + k + 2, 13 + row + i + k + 2, 13 + row + i + k + 2), 878 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2),
833 'format': format3, 879 'format': format3,
834 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) 880 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)})
835 ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), 881 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3),
836 {'type': 'formula', 882 {'type': 'formula',
837 'criteria': '=$B${}>="3"'.format(13 + row + i + k + 2), 883 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2),
838 'format': format2, 884 'format': format2,
839 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) 885 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)})
840 row += 3 886 row += 3
841 workbook.close() 887 workbook.close()
842 888
843 889
844 if __name__ == '__main__': 890 if __name__ == '__main__':