Mercurial > repos > iuc > variant_analyzer
comparison read2mut.py @ 2:3f1dbd2c59bf draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
author | iuc |
---|---|
date | Tue, 10 Nov 2020 12:55:29 +0000 |
parents | 3556001ff2db |
children |
comparison
equal
deleted
inserted
replaced
1:3556001ff2db | 2:3f1dbd2c59bf |
---|---|
8 Looks for reads with mutation at known | 8 Looks for reads with mutation at known |
9 positions and calculates frequencies and stats. | 9 positions and calculates frequencies and stats. |
10 | 10 |
11 ======= ========== ================= ================================ | 11 ======= ========== ================= ================================ |
12 Version Date Author Description | 12 Version Date Author Description |
13 0.2.1 2019-10-27 Gundula Povysil - | 13 2.0.0 2020-10-30 Gundula Povysil - |
14 ======= ========== ================= ================================ | 14 ======= ========== ================= ================================ |
15 | 15 |
16 | 16 |
17 USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam | 17 USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam |
18 --inputJson tag_count_dict.json --sscsJson SSCS_counts.json | 18 --inputJson tag_count_dict.json --sscsJson SSCS_counts.json |
19 --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim 10 | 19 --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim 10 --chimera_correction |
20 | 20 |
21 """ | 21 """ |
22 | 22 |
23 from __future__ import division | 23 from __future__ import division |
24 | 24 |
30 import sys | 30 import sys |
31 | 31 |
32 import numpy as np | 32 import numpy as np |
33 import pysam | 33 import pysam |
34 import xlsxwriter | 34 import xlsxwriter |
35 from cyvcf2 import VCF | |
35 | 36 |
36 | 37 |
37 def make_argparser(): | 38 def make_argparser(): |
38 parser = argparse.ArgumentParser(description='Takes a tabular file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.') | 39 parser = argparse.ArgumentParser(description='Takes a VCF file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.') |
39 parser.add_argument('--mutFile', | 40 parser.add_argument('--mutFile', |
40 help='TABULAR file with DCS mutations.') | 41 help='VCF file with DCS mutations.') |
41 parser.add_argument('--bamFile', | 42 parser.add_argument('--bamFile', |
42 help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).') | 43 help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).') |
43 parser.add_argument('--inputJson', | 44 parser.add_argument('--inputJson', |
44 help='JSON file with data collected by mut2read.py.') | 45 help='JSON file with data collected by mut2read.py.') |
45 parser.add_argument('--sscsJson', | 46 parser.add_argument('--sscsJson', |
50 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') | 51 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') |
51 parser.add_argument('--phred', type=int, default=20, | 52 parser.add_argument('--phred', type=int, default=20, |
52 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') | 53 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') |
53 parser.add_argument('--trim', type=int, default=10, | 54 parser.add_argument('--trim', type=int, default=10, |
54 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') | 55 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') |
56 parser.add_argument('--chimera_correction', action="store_true", | |
57 help='Count chimeric variants and correct the variant frequencies') | |
55 return parser | 58 return parser |
56 | 59 |
57 | 60 |
58 def safe_div(x, y): | 61 def safe_div(x, y): |
59 if y == 0: | 62 if y == 0: |
70 sscs_json = args.sscsJson | 73 sscs_json = args.sscsJson |
71 outfile = args.outputFile | 74 outfile = args.outputFile |
72 thresh = args.thresh | 75 thresh = args.thresh |
73 phred_score = args.phred | 76 phred_score = args.phred |
74 trim = args.trim | 77 trim = args.trim |
78 chimera_correction = args.chimera_correction | |
75 | 79 |
76 if os.path.isfile(file1) is False: | 80 if os.path.isfile(file1) is False: |
77 sys.exit("Error: Could not find '{}'".format(file1)) | 81 sys.exit("Error: Could not find '{}'".format(file1)) |
78 if os.path.isfile(file2) is False: | 82 if os.path.isfile(file2) is False: |
79 sys.exit("Error: Could not find '{}'".format(file2)) | 83 sys.exit("Error: Could not find '{}'".format(file2)) |
84 if phred_score < 0: | 88 if phred_score < 0: |
85 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) | 89 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) |
86 if trim < 0: | 90 if trim < 0: |
87 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) | 91 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) |
88 | 92 |
89 # 1. read mut file | 93 # load dicts |
90 with open(file1, 'r') as mut: | |
91 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) | |
92 | |
93 # 2. load dicts | |
94 with open(json_file, "r") as f: | 94 with open(json_file, "r") as f: |
95 (tag_dict, cvrg_dict) = json.load(f) | 95 (tag_dict, cvrg_dict) = json.load(f) |
96 | 96 |
97 with open(sscs_json, "r") as f: | 97 with open(sscs_json, "r") as f: |
98 (mut_pos_dict, ref_pos_dict) = json.load(f) | 98 (mut_pos_dict, ref_pos_dict) = json.load(f) |
99 | 99 |
100 # 3. read bam file | 100 # read bam file |
101 # pysam.index(file2) | |
102 bam = pysam.AlignmentFile(file2, "rb") | 101 bam = pysam.AlignmentFile(file2, "rb") |
103 | 102 |
104 # 4. create mut_dict | 103 # create mut_dict |
105 mut_dict = {} | 104 mut_dict = {} |
106 mut_read_pos_dict = {} | 105 mut_read_pos_dict = {} |
107 mut_read_dict = {} | 106 mut_read_dict = {} |
108 reads_dict = {} | 107 reads_dict = {} |
109 if mut_array.shape == (13, ): | 108 i = 0 |
110 mut_array = mut_array.reshape((1, len(mut_array))) | 109 mut_array = [] |
111 | 110 |
112 for m in range(0, len(mut_array[:, 0])): | 111 for variant in VCF(file1): |
113 print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) | 112 chrom = variant.CHROM |
114 # for m in range(0, 5): | 113 stop_pos = variant.start |
115 chrom = mut_array[m, 1] | |
116 stop_pos = mut_array[m, 2].astype(int) | |
117 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) | 114 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) |
118 ref = mut_array[m, 9] | 115 ref = variant.REF |
119 alt = mut_array[m, 10] | 116 alt = variant.ALT[0] |
120 mut_dict[chrom_stop_pos] = {} | 117 |
121 mut_read_pos_dict[chrom_stop_pos] = {} | 118 if len(ref) == len(alt): |
122 reads_dict[chrom_stop_pos] = {} | 119 mut_array.append([chrom, stop_pos, ref, alt]) |
123 | 120 i += 1 |
124 for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000): | 121 mut_dict[chrom_stop_pos] = {} |
125 if pileupcolumn.reference_pos == stop_pos - 1: | 122 mut_read_pos_dict[chrom_stop_pos] = {} |
126 count_alt = 0 | 123 reads_dict[chrom_stop_pos] = {} |
127 count_ref = 0 | 124 |
128 count_indel = 0 | 125 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000): |
129 count_n = 0 | 126 if pileupcolumn.reference_pos == stop_pos: |
130 count_other = 0 | 127 count_alt = 0 |
131 count_lowq = 0 | 128 count_ref = 0 |
132 n = 0 | 129 count_indel = 0 |
133 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), | 130 count_n = 0 |
134 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) | 131 count_other = 0 |
135 for pileupread in pileupcolumn.pileups: | 132 count_lowq = 0 |
136 n += 1 | 133 n = 0 |
137 if not pileupread.is_del and not pileupread.is_refskip: | 134 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), |
138 tag = pileupread.alignment.query_name | 135 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) |
139 nuc = pileupread.alignment.query_sequence[pileupread.query_position] | 136 for pileupread in pileupcolumn.pileups: |
140 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 | 137 n += 1 |
141 if phred < phred_score: | 138 if not pileupread.is_del and not pileupread.is_refskip: |
142 nuc = "lowQ" | 139 tag = pileupread.alignment.query_name |
143 if tag not in mut_dict[chrom_stop_pos]: | 140 nuc = pileupread.alignment.query_sequence[pileupread.query_position] |
144 mut_dict[chrom_stop_pos][tag] = {} | 141 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 |
145 if nuc in mut_dict[chrom_stop_pos][tag]: | 142 if phred < phred_score: |
146 mut_dict[chrom_stop_pos][tag][nuc] += 1 | 143 nuc = "lowQ" |
147 else: | 144 if tag not in mut_dict[chrom_stop_pos]: |
148 mut_dict[chrom_stop_pos][tag][nuc] = 1 | 145 mut_dict[chrom_stop_pos][tag] = {} |
149 if tag not in mut_read_pos_dict[chrom_stop_pos]: | 146 if nuc in mut_dict[chrom_stop_pos][tag]: |
150 mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1 | 147 mut_dict[chrom_stop_pos][tag][nuc] += 1 |
151 reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence) | |
152 else: | |
153 mut_read_pos_dict[chrom_stop_pos][tag] = np.append( | |
154 mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1) | |
155 reads_dict[chrom_stop_pos][tag] = np.append( | |
156 reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence)) | |
157 | |
158 if nuc == alt: | |
159 count_alt += 1 | |
160 if tag not in mut_read_dict: | |
161 mut_read_dict[tag] = {} | |
162 mut_read_dict[tag][chrom_stop_pos] = alt | |
163 else: | 148 else: |
164 mut_read_dict[tag][chrom_stop_pos] = alt | 149 mut_dict[chrom_stop_pos][tag][nuc] = 1 |
165 elif nuc == ref: | 150 if tag not in mut_read_pos_dict[chrom_stop_pos]: |
166 count_ref += 1 | 151 mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1 |
167 elif nuc == "N": | 152 reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence) |
168 count_n += 1 | 153 else: |
169 elif nuc == "lowQ": | 154 mut_read_pos_dict[chrom_stop_pos][tag] = np.append( |
170 count_lowq += 1 | 155 mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1) |
171 else: | 156 reads_dict[chrom_stop_pos][tag] = np.append( |
172 count_other += 1 | 157 reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence)) |
173 else: | 158 |
174 count_indel += 1 | 159 if nuc == alt: |
175 | 160 count_alt += 1 |
176 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) | 161 if tag not in mut_read_dict: |
177 | 162 mut_read_dict[tag] = {} |
163 mut_read_dict[tag][chrom_stop_pos] = (alt, ref) | |
164 else: | |
165 mut_read_dict[tag][chrom_stop_pos] = (alt, ref) | |
166 elif nuc == ref: | |
167 count_ref += 1 | |
168 elif nuc == "N": | |
169 count_n += 1 | |
170 elif nuc == "lowQ": | |
171 count_lowq += 1 | |
172 else: | |
173 count_other += 1 | |
174 else: | |
175 count_indel += 1 | |
176 | |
177 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) | |
178 else: | |
179 print("indels are currently not evaluated") | |
180 | |
181 mut_array = np.array(mut_array) | |
178 for read in bam.fetch(until_eof=True): | 182 for read in bam.fetch(until_eof=True): |
179 if read.is_unmapped: | 183 if read.is_unmapped: |
180 pure_tag = read.query_name[:-5] | 184 pure_tag = read.query_name[:-5] |
181 nuc = "na" | 185 nuc = "na" |
182 for key in tag_dict[pure_tag].keys(): | 186 for key in tag_dict[pure_tag].keys(): |
188 mut_dict[key][read.query_name][nuc] += 1 | 192 mut_dict[key][read.query_name][nuc] += 1 |
189 else: | 193 else: |
190 mut_dict[key][read.query_name][nuc] = 1 | 194 mut_dict[key][read.query_name][nuc] = 1 |
191 bam.close() | 195 bam.close() |
192 | 196 |
193 # 5. create pure_tags_dict | 197 # create pure_tags_dict |
194 pure_tags_dict = {} | 198 pure_tags_dict = {} |
195 for key1, value1 in sorted(mut_dict.items()): | 199 for key1, value1 in sorted(mut_dict.items()): |
196 i = np.where(np.array(['#'.join(str(i) for i in z) | 200 i = np.where(np.array(['#'.join(str(i) for i in z) |
197 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] | 201 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] |
198 ref = mut_array[i, 9] | 202 ref = mut_array[i, 2] |
199 alt = mut_array[i, 10] | 203 alt = mut_array[i, 3] |
200 pure_tags_dict[key1] = {} | 204 pure_tags_dict[key1] = {} |
201 for key2, value2 in sorted(value1.items()): | 205 for key2, value2 in sorted(value1.items()): |
202 for key3, value3 in value2.items(): | 206 for key3, value3 in value2.items(): |
203 pure_tag = key2[:-5] | 207 pure_tag = key2[:-5] |
204 if key3 == alt: | 208 if key3 == alt: |
205 if pure_tag in pure_tags_dict[key1]: | 209 if pure_tag in pure_tags_dict[key1]: |
206 pure_tags_dict[key1][pure_tag] += 1 | 210 pure_tags_dict[key1][pure_tag] += 1 |
207 else: | 211 else: |
208 pure_tags_dict[key1][pure_tag] = 1 | 212 pure_tags_dict[key1][pure_tag] = 1 |
209 | 213 |
210 # 6. create pure_tags_dict_short with thresh | 214 # create pure_tags_dict_short with thresh |
211 if thresh > 0: | 215 if thresh > 0: |
212 pure_tags_dict_short = {} | 216 pure_tags_dict_short = {} |
213 for key, value in sorted(pure_tags_dict.items()): | 217 for key, value in sorted(pure_tags_dict.items()): |
214 if len(value) < thresh: | 218 if len(value) < thresh: |
215 pure_tags_dict_short[key] = value | 219 pure_tags_dict_short[key] = value |
216 else: | 220 else: |
217 pure_tags_dict_short = pure_tags_dict | 221 pure_tags_dict_short = pure_tags_dict |
218 | 222 |
219 whole_array = [] | 223 # output summary with threshold |
220 for k in pure_tags_dict.values(): | |
221 whole_array.extend(k.keys()) | |
222 | |
223 # 7. output summary with threshold | |
224 workbook = xlsxwriter.Workbook(outfile) | 224 workbook = xlsxwriter.Workbook(outfile) |
225 ws1 = workbook.add_worksheet("Results") | 225 ws1 = workbook.add_worksheet("Results") |
226 ws2 = workbook.add_worksheet("Allele frequencies") | 226 ws2 = workbook.add_worksheet("Allele frequencies") |
227 ws3 = workbook.add_worksheet("Tiers") | 227 ws3 = workbook.add_worksheet("Tiers") |
228 | 228 |
232 | 232 |
233 header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab', | 233 header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab', |
234 'read median length.ba', 'DCS median length', | 234 'read median length.ba', 'DCS median length', |
235 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', | 235 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', |
236 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', | 236 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', |
237 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', | 237 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', |
238 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', | 238 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', |
239 'other mut', 'chimeric tag') | 239 'in phase', 'chimeric tag') |
240 ws1.write_row(0, 0, header_line) | 240 ws1.write_row(0, 0, header_line) |
241 | |
242 counter_tier11 = 0 | 241 counter_tier11 = 0 |
243 counter_tier12 = 0 | 242 counter_tier12 = 0 |
244 counter_tier21 = 0 | 243 counter_tier21 = 0 |
245 counter_tier22 = 0 | 244 counter_tier22 = 0 |
246 counter_tier23 = 0 | 245 counter_tier23 = 0 |
247 counter_tier24 = 0 | 246 counter_tier24 = 0 |
248 counter_tier31 = 0 | 247 counter_tier31 = 0 |
249 counter_tier32 = 0 | 248 counter_tier32 = 0 |
250 counter_tier41 = 0 | 249 counter_tier41 = 0 |
251 counter_tier42 = 0 | 250 counter_tier42 = 0 |
252 | 251 counter_tier5 = 0 |
253 row = 1 | 252 row = 1 |
254 tier_dict = {} | 253 tier_dict = {} |
254 chimera_dict = {} | |
255 for key1, value1 in sorted(mut_dict.items()): | 255 for key1, value1 in sorted(mut_dict.items()): |
256 counts_mut = 0 | 256 counts_mut = 0 |
257 chimeric_tag = {} | |
257 if key1 in pure_tags_dict_short.keys(): | 258 if key1 in pure_tags_dict_short.keys(): |
258 i = np.where(np.array(['#'.join(str(i) for i in z) | 259 i = np.where(np.array(['#'.join(str(i) for i in z) |
259 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] | 260 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] |
260 ref = mut_array[i, 9] | 261 ref = mut_array[i, 2] |
261 alt = mut_array[i, 10] | 262 alt = mut_array[i, 3] |
262 dcs_median = cvrg_dict[key1][2] | 263 dcs_median = cvrg_dict[key1][2] |
264 whole_array = list(pure_tags_dict_short[key1].keys()) | |
263 | 265 |
264 tier_dict[key1] = {} | 266 tier_dict[key1] = {} |
265 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0)] | 267 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5", 0)] |
266 for k, v in values_tier_dict: | 268 for k, v in values_tier_dict: |
267 tier_dict[key1][k] = v | 269 tier_dict[key1][k] = v |
268 | 270 |
269 used_keys = [] | 271 used_keys = [] |
270 if 'ab' in mut_pos_dict[key1].keys(): | 272 if 'ab' in mut_pos_dict[key1].keys(): |
289 if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()): | 291 if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()): |
290 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): | 292 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): |
291 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) | 293 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) |
292 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | 294 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): |
293 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na'] | 295 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na'] |
294 # na1f = na1/total1 | 296 else: |
295 else: | |
296 # na1 = na1f = 0 | |
297 na1 = 0 | 297 na1 = 0 |
298 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | 298 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): |
299 lowq1 = mut_dict[key1][key2[:-5] + '.ab.1']['lowQ'] | 299 lowq1 = mut_dict[key1][key2[:-5] + '.ab.1']['lowQ'] |
300 # lowq1f = lowq1 / total1 | 300 else: |
301 else: | |
302 # lowq1 = lowq1f = 0 | |
303 lowq1 = 0 | 301 lowq1 = 0 |
304 if ref in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | 302 if ref in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): |
305 ref1 = mut_dict[key1][key2[:-5] + '.ab.1'][ref] | 303 ref1 = mut_dict[key1][key2[:-5] + '.ab.1'][ref] |
306 ref1f = ref1 / (total1 - na1 - lowq1) | 304 ref1f = ref1 / (total1 - na1 - lowq1) |
307 else: | 305 else: |
316 k1 = mut_read_dict[(key2[:-5] + '.ab.1')].keys() | 314 k1 = mut_read_dict[(key2[:-5] + '.ab.1')].keys() |
317 add_mut1 = len(k1) | 315 add_mut1 = len(k1) |
318 if add_mut1 > 1: | 316 if add_mut1 > 1: |
319 for k, v in mut_read_dict[(key2[:-5] + '.ab.1')].items(): | 317 for k, v in mut_read_dict[(key2[:-5] + '.ab.1')].items(): |
320 if k != key1: | 318 if k != key1: |
319 new_mut = str(k).split("#")[0] + "-" + str(int(str(k).split("#")[1]) + 1) + "-" + v[1] + "-" + v[0] | |
321 if len(add_mut14) == 0: | 320 if len(add_mut14) == 0: |
322 add_mut14 = str(k) + "_" + v | 321 add_mut14 = new_mut |
323 else: | 322 else: |
324 add_mut14 = add_mut14 + ", " + str(k) + "_" + v | 323 add_mut14 = add_mut14 + ", " + new_mut |
325 else: | 324 else: |
326 k1 = [] | 325 k1 = [] |
327 else: | 326 else: |
328 total1 = total1new = na1 = lowq1 = 0 | 327 total1 = total1new = na1 = lowq1 = 0 |
329 ref1 = alt1 = ref1f = alt1f = 0 | 328 ref1 = alt1 = ref1f = alt1f = 0 |
329 k1 = [] | |
330 | 330 |
331 if key2[:-5] + '.ab.2' in mut_dict[key1].keys(): | 331 if key2[:-5] + '.ab.2' in mut_dict[key1].keys(): |
332 total2 = sum(mut_dict[key1][key2[:-5] + '.ab.2'].values()) | 332 total2 = sum(mut_dict[key1][key2[:-5] + '.ab.2'].values()) |
333 if 'na' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | 333 if 'na' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): |
334 na2 = mut_dict[key1][key2[:-5] + '.ab.2']['na'] | 334 na2 = mut_dict[key1][key2[:-5] + '.ab.2']['na'] |
335 # na2f = na2 / total2 | 335 else: |
336 else: | |
337 # na2 = na2f = 0 | |
338 na2 = 0 | 336 na2 = 0 |
339 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | 337 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): |
340 lowq2 = mut_dict[key1][key2[:-5] + '.ab.2']['lowQ'] | 338 lowq2 = mut_dict[key1][key2[:-5] + '.ab.2']['lowQ'] |
341 # lowq2f = lowq2 / total2 | 339 else: |
342 else: | |
343 # lowq2 = lowq2f = 0 | |
344 lowq2 = 0 | 340 lowq2 = 0 |
345 if ref in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | 341 if ref in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): |
346 ref2 = mut_dict[key1][key2[:-5] + '.ab.2'][ref] | 342 ref2 = mut_dict[key1][key2[:-5] + '.ab.2'][ref] |
347 ref2f = ref2 / (total2 - na2 - lowq2) | 343 ref2f = ref2 / (total2 - na2 - lowq2) |
348 else: | 344 else: |
357 k2 = mut_read_dict[(key2[:-5] + '.ab.2')].keys() | 353 k2 = mut_read_dict[(key2[:-5] + '.ab.2')].keys() |
358 add_mut2 = len(k2) | 354 add_mut2 = len(k2) |
359 if add_mut2 > 1: | 355 if add_mut2 > 1: |
360 for k, v in mut_read_dict[(key2[:-5] + '.ab.2')].items(): | 356 for k, v in mut_read_dict[(key2[:-5] + '.ab.2')].items(): |
361 if k != key1: | 357 if k != key1: |
358 new_mut = str(k).split("#")[0] + "-" + str(int(str(k).split("#")[1]) + 1) + "-" + v[1] + "-" + v[0] | |
362 if len(add_mut23) == 0: | 359 if len(add_mut23) == 0: |
363 add_mut23 = str(k) + "_" + v | 360 add_mut23 = new_mut |
364 else: | 361 else: |
365 add_mut23 = add_mut23 + ", " + str(k) + "_" + v | 362 add_mut23 = add_mut23 + ", " + new_mut |
366 else: | 363 else: |
367 k2 = [] | 364 k2 = [] |
368 else: | 365 else: |
369 total2 = total2new = na2 = lowq2 = 0 | 366 total2 = total2new = na2 = lowq2 = 0 |
370 ref2 = alt2 = ref2f = alt2f = 0 | 367 ref2 = alt2 = ref2f = alt2f = 0 |
368 k2 = [] | |
371 | 369 |
372 if key2[:-5] + '.ba.1' in mut_dict[key1].keys(): | 370 if key2[:-5] + '.ba.1' in mut_dict[key1].keys(): |
373 total3 = sum(mut_dict[key1][key2[:-5] + '.ba.1'].values()) | 371 total3 = sum(mut_dict[key1][key2[:-5] + '.ba.1'].values()) |
374 if 'na' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | 372 if 'na' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): |
375 na3 = mut_dict[key1][key2[:-5] + '.ba.1']['na'] | 373 na3 = mut_dict[key1][key2[:-5] + '.ba.1']['na'] |
376 # na3f = na3 / total3 | 374 else: |
377 else: | |
378 # na3 = na3f = 0 | |
379 na3 = 0 | 375 na3 = 0 |
380 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | 376 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): |
381 lowq3 = mut_dict[key1][key2[:-5] + '.ba.1']['lowQ'] | 377 lowq3 = mut_dict[key1][key2[:-5] + '.ba.1']['lowQ'] |
382 # lowq3f = lowq3 / total3 | 378 else: |
383 else: | |
384 # lowq3 = lowq3f = 0 | |
385 lowq3 = 0 | 379 lowq3 = 0 |
386 if ref in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | 380 if ref in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): |
387 ref3 = mut_dict[key1][key2[:-5] + '.ba.1'][ref] | 381 ref3 = mut_dict[key1][key2[:-5] + '.ba.1'][ref] |
388 ref3f = ref3 / (total3 - na3 - lowq3) | 382 ref3f = ref3 / (total3 - na3 - lowq3) |
389 else: | 383 else: |
397 if (key2[:-5] + '.ba.1') in mut_read_dict.keys(): | 391 if (key2[:-5] + '.ba.1') in mut_read_dict.keys(): |
398 add_mut3 = len(mut_read_dict[(key2[:-5] + '.ba.1')].keys()) | 392 add_mut3 = len(mut_read_dict[(key2[:-5] + '.ba.1')].keys()) |
399 if add_mut3 > 1: | 393 if add_mut3 > 1: |
400 for k, v in mut_read_dict[(key2[:-5] + '.ba.1')].items(): | 394 for k, v in mut_read_dict[(key2[:-5] + '.ba.1')].items(): |
401 if k != key1 and k not in k2: | 395 if k != key1 and k not in k2: |
396 new_mut = str(k).split("#")[0] + "-" + str(int(str(k).split("#")[1]) + 1) + "-" + v[1] + "-" + v[0] | |
402 if len(add_mut23) == 0: | 397 if len(add_mut23) == 0: |
403 add_mut23 = str(k) + "_" + v | 398 add_mut23 = new_mut |
404 else: | 399 else: |
405 add_mut23 = add_mut23 + ", " + str(k) + "_" + v | 400 add_mut23 = add_mut23 + ", " + new_mut |
406 else: | 401 else: |
407 total3 = total3new = na3 = lowq3 = 0 | 402 total3 = total3new = na3 = lowq3 = 0 |
408 ref3 = alt3 = ref3f = alt3f = 0 | 403 ref3 = alt3 = ref3f = alt3f = 0 |
409 | 404 |
410 if key2[:-5] + '.ba.2' in mut_dict[key1].keys(): | 405 if key2[:-5] + '.ba.2' in mut_dict[key1].keys(): |
411 total4 = sum(mut_dict[key1][key2[:-5] + '.ba.2'].values()) | 406 total4 = sum(mut_dict[key1][key2[:-5] + '.ba.2'].values()) |
412 if 'na' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | 407 if 'na' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): |
413 na4 = mut_dict[key1][key2[:-5] + '.ba.2']['na'] | 408 na4 = mut_dict[key1][key2[:-5] + '.ba.2']['na'] |
414 # na4f = na4 / total4 | 409 else: |
415 else: | |
416 # na4 = na4f = 0 | |
417 na4 = 0 | 410 na4 = 0 |
418 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | 411 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): |
419 lowq4 = mut_dict[key1][key2[:-5] + '.ba.2']['lowQ'] | 412 lowq4 = mut_dict[key1][key2[:-5] + '.ba.2']['lowQ'] |
420 # lowq4f = lowq4 / total4 | 413 else: |
421 else: | |
422 # lowq4 = lowq4f = 0 | |
423 lowq4 = 0 | 414 lowq4 = 0 |
424 if ref in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | 415 if ref in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): |
425 ref4 = mut_dict[key1][key2[:-5] + '.ba.2'][ref] | 416 ref4 = mut_dict[key1][key2[:-5] + '.ba.2'][ref] |
426 ref4f = ref4 / (total4 - na4 - lowq4) | 417 ref4f = ref4 / (total4 - na4 - lowq4) |
427 else: | 418 else: |
435 if (key2[:-5] + '.ba.2') in mut_read_dict.keys(): | 426 if (key2[:-5] + '.ba.2') in mut_read_dict.keys(): |
436 add_mut4 = len(mut_read_dict[(key2[:-5] + '.ba.2')].keys()) | 427 add_mut4 = len(mut_read_dict[(key2[:-5] + '.ba.2')].keys()) |
437 if add_mut4 > 1: | 428 if add_mut4 > 1: |
438 for k, v in mut_read_dict[(key2[:-5] + '.ba.2')].items(): | 429 for k, v in mut_read_dict[(key2[:-5] + '.ba.2')].items(): |
439 if k != key1 and k not in k1: | 430 if k != key1 and k not in k1: |
431 new_mut = str(k).split("#")[0] + "-" + str(int(str(k).split("#")[1]) + 1) + "-" + v[1] + "-" + v[0] | |
440 if len(add_mut14) == 0: | 432 if len(add_mut14) == 0: |
441 add_mut14 = str(k) + "_" + v | 433 add_mut14 = new_mut |
442 else: | 434 else: |
443 add_mut14 = add_mut14 + ", " + str(k) + "_" + v | 435 add_mut14 = add_mut14 + ", " + new_mut |
444 else: | 436 else: |
445 total4 = total4new = na4 = lowq4 = 0 | 437 total4 = total4new = na4 = lowq4 = 0 |
446 ref4 = alt4 = ref4f = alt4f = 0 | 438 ref4 = alt4 = ref4f = alt4f = 0 |
447 | 439 |
448 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 | 440 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 |
483 ref4f = alt4f = None | 475 ref4f = alt4f = None |
484 alt4ff = -1 | 476 alt4ff = -1 |
485 else: | 477 else: |
486 alt4ff = alt4f | 478 alt4ff = alt4f |
487 | 479 |
488 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4) | 480 beg1 = beg4 = beg2 = beg3 = 0 |
489 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3) | 481 |
482 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) | |
483 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) | |
484 | |
490 trimmed = False | 485 trimmed = False |
491 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): | 486 contradictory = False |
492 total1new = 0 | 487 |
488 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): | |
493 alt1ff = 0 | 489 alt1ff = 0 |
494 trimmed = True | |
495 | |
496 if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))): | |
497 total4new = 0 | |
498 alt4ff = 0 | 490 alt4ff = 0 |
499 trimmed = True | |
500 | |
501 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): | |
502 total2new = 0 | |
503 alt2ff = 0 | 491 alt2ff = 0 |
504 trimmed = True | |
505 | |
506 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): | |
507 total3new = 0 | |
508 alt3ff = 0 | 492 alt3ff = 0 |
509 trimmed = True | 493 trimmed = False |
510 | 494 contradictory = True |
511 chrom, pos = re.split(r'\#', key1) | 495 else: |
496 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): | |
497 beg1 = total1new | |
498 total1new = 0 | |
499 alt1ff = 0 | |
500 alt1f = 0 | |
501 trimmed = True | |
502 | |
503 if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))): | |
504 beg4 = total4new | |
505 total4new = 0 | |
506 alt4ff = 0 | |
507 alt4f = 0 | |
508 trimmed = True | |
509 | |
510 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): | |
511 beg2 = total2new | |
512 total2new = 0 | |
513 alt2ff = 0 | |
514 alt2f = 0 | |
515 trimmed = True | |
516 | |
517 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): | |
518 beg3 = total3new | |
519 total3new = 0 | |
520 alt3ff = 0 | |
521 alt3f = 0 | |
522 trimmed = True | |
523 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) | |
524 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) | |
525 | |
512 # assign tiers | 526 # assign tiers |
513 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & | 527 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | (all(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): |
514 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | |
515 (all(int(ij) >= 3 for ij in [total2new, total3new]) & | |
516 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
517 tier = "1.1" | 528 tier = "1.1" |
518 counter_tier11 += 1 | 529 counter_tier11 += 1 |
519 tier_dict[key1]["tier 1.1"] += 1 | 530 tier_dict[key1]["tier 1.1"] += 1 |
520 | 531 |
521 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & | 532 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & any(int(ij) >= 3 for ij in [total1new, total4new]) |
522 any(int(ij) >= 3 for ij in [total1new, total4new]) & | 533 & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): |
523 any(int(ij) >= 3 for ij in [total2new, total3new]) & | |
524 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): | |
525 tier = "1.2" | 534 tier = "1.2" |
526 counter_tier12 += 1 | 535 counter_tier12 += 1 |
527 tier_dict[key1]["tier 1.2"] += 1 | 536 tier_dict[key1]["tier 1.2"] += 1 |
528 | 537 |
529 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | 538 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & any(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) |
530 any(int(ij) >= 3 for ij in [total1new, total4new]) & | 539 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): |
531 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | |
532 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
533 any(int(ij) >= 3 for ij in [total2new, total3new]) & | |
534 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
535 tier = "2.1" | 540 tier = "2.1" |
536 counter_tier21 += 1 | 541 counter_tier21 += 1 |
537 tier_dict[key1]["tier 2.1"] += 1 | 542 tier_dict[key1]["tier 2.1"] += 1 |
538 | 543 |
539 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & | 544 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): |
540 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): | |
541 tier = "2.2" | 545 tier = "2.2" |
542 counter_tier22 += 1 | 546 counter_tier22 += 1 |
543 tier_dict[key1]["tier 2.2"] += 1 | 547 tier_dict[key1]["tier 2.2"] += 1 |
544 | 548 |
545 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | 549 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) & any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff])) |
546 any(int(ij) >= 3 for ij in [total2new, total3new]) & | 550 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & any(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) & any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))): |
547 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) & | |
548 any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff])) | | |
549 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
550 any(int(ij) >= 3 for ij in [total1new, total4new]) & | |
551 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) & | |
552 any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))): | |
553 tier = "2.3" | 551 tier = "2.3" |
554 counter_tier23 += 1 | 552 counter_tier23 += 1 |
555 tier_dict[key1]["tier 2.3"] += 1 | 553 tier_dict[key1]["tier 2.3"] += 1 |
556 | 554 |
557 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | 555 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) |
558 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | 556 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): |
559 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
560 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
561 tier = "2.4" | 557 tier = "2.4" |
562 counter_tier24 += 1 | 558 counter_tier24 += 1 |
563 tier_dict[key1]["tier 2.4"] += 1 | 559 tier_dict[key1]["tier 2.4"] += 1 |
564 | 560 |
565 elif ((len(pure_tags_dict_short[key1]) > 1) & | 561 elif ((len(pure_tags_dict_short[key1]) > 1) & (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) | all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): |
566 (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) | | |
567 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): | |
568 tier = "3.1" | 562 tier = "3.1" |
569 counter_tier31 += 1 | 563 counter_tier31 += 1 |
570 tier_dict[key1]["tier 3.1"] += 1 | 564 tier_dict[key1]["tier 3.1"] += 1 |
571 | 565 |
572 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | 566 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff])) |
573 all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff])) | | 567 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): |
574 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
575 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): | |
576 tier = "3.2" | 568 tier = "3.2" |
577 counter_tier32 += 1 | 569 counter_tier32 += 1 |
578 tier_dict[key1]["tier 3.2"] += 1 | 570 tier_dict[key1]["tier 3.2"] += 1 |
579 | 571 |
580 elif (trimmed): | 572 elif (trimmed): |
581 tier = "4.1" | 573 tier = "4.1" |
582 counter_tier41 += 1 | 574 counter_tier41 += 1 |
583 tier_dict[key1]["tier 4.1"] += 1 | 575 tier_dict[key1]["tier 4.1"] += 1 |
584 | 576 |
585 else: | 577 elif (contradictory): |
586 tier = "4.2" | 578 tier = "4.2" |
587 counter_tier42 += 1 | 579 counter_tier42 += 1 |
588 tier_dict[key1]["tier 4.2"] += 1 | 580 tier_dict[key1]["tier 4.2"] += 1 |
589 | 581 |
590 var_id = '-'.join([chrom, pos, ref, alt]) | 582 else: |
583 tier = "5" | |
584 counter_tier5 += 1 | |
585 tier_dict[key1]["tier 5"] += 1 | |
586 | |
587 chrom, pos = re.split(r'\#', key1) | |
588 var_id = '-'.join([chrom, str(int(pos) + 1), ref, alt]) | |
591 sample_tag = key2[:-5] | 589 sample_tag = key2[:-5] |
592 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time | 590 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time |
593 # exclude identical tag from array2, to prevent comparison to itself | 591 # exclude identical tag from array2, to prevent comparison to itself |
594 same_tag = np.where(array2 == sample_tag) | 592 same_tag = np.where(array2 == sample_tag) |
595 index_array2 = np.arange(0, len(array2), 1) | 593 index_array2 = np.arange(0, len(array2), 1) |
632 | 630 |
633 # tags which have identical parts: | 631 # tags which have identical parts: |
634 if min_value == 0 or dist2 == 0: | 632 if min_value == 0 or dist2 == 0: |
635 min_tags_list_zeros.append(tag) | 633 min_tags_list_zeros.append(tag) |
636 chimera_tags.append(max_tag) | 634 chimera_tags.append(max_tag) |
637 # chimeric = True | 635 |
638 # else: | |
639 # chimeric = False | |
640 | |
641 # if mate_b is False: | |
642 # text = "pos {}: sample tag: {}; HD a = {}; HD b' = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, min_value, dist2, list(max_tag), chimeric) | |
643 # else: | |
644 # text = "pos {}: sample tag: {}; HD a' = {}; HD b = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, dist2, min_value, list(max_tag), chimeric) | |
645 i += 1 | 636 i += 1 |
646 chimera_tags = [x for x in chimera_tags if x != []] | 637 chimera_tags = [x for x in chimera_tags if x != []] |
647 chimera_tags_new = [] | 638 chimera_tags_new = [] |
648 for i in chimera_tags: | 639 for i in chimera_tags: |
649 if len(i) > 1: | 640 if len(i) > 1: |
650 for t in i: | 641 for t in i: |
651 chimera_tags_new.append(t) | 642 chimera_tags_new.append(t) |
652 else: | 643 else: |
653 chimera_tags_new.extend(i) | 644 chimera_tags_new.extend(i) |
654 chimera_tags_new = np.asarray(chimera_tags_new) | |
655 chimera = ", ".join(chimera_tags_new) | 645 chimera = ", ".join(chimera_tags_new) |
656 else: | 646 else: |
647 chimera_tags_new = [] | |
657 chimera = "" | 648 chimera = "" |
649 | |
650 if len(chimera_tags_new) > 0: | |
651 chimera_tags_new.append(sample_tag) | |
652 key_chimera = ",".join(sorted(chimera_tags_new)) | |
653 if key_chimera in chimeric_tag.keys(): | |
654 chimeric_tag[key_chimera].append(float(tier)) | |
655 else: | |
656 chimeric_tag[key_chimera] = [float(tier)] | |
658 | 657 |
659 if (read_pos1 == -1): | 658 if (read_pos1 == -1): |
660 read_pos1 = read_len_median1 = None | 659 read_pos1 = read_len_median1 = None |
661 if (read_pos4 == -1): | 660 if (read_pos4 == -1): |
662 read_pos4 = read_len_median4 = None | 661 read_pos4 = read_len_median4 = None |
671 | 670 |
672 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 671 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
673 {'type': 'formula', | 672 {'type': 'formula', |
674 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 673 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), |
675 'format': format1, | 674 'format': format1, |
676 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 675 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)}) |
677 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 676 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
678 {'type': 'formula', | 677 {'type': 'formula', |
679 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(row + 1, row + 1, row + 1, row + 1), | 678 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(row + 1, row + 1, row + 1, row + 1), |
680 'format': format3, | 679 'format': format3, |
681 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 680 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)}) |
682 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 681 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
683 {'type': 'formula', | 682 {'type': 'formula', |
684 'criteria': '=$B${}>="3"'.format(row + 1), | 683 'criteria': '=$B${}>="3"'.format(row + 1), |
685 'format': format2, | 684 'format': format2, |
686 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 685 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)}) |
687 | |
688 row += 3 | 686 row += 3 |
689 | 687 |
688 if chimera_correction: | |
689 chimeric_dcs_high_tiers = 0 | |
690 chimeric_dcs = 0 | |
691 for keys_chimera in chimeric_tag.keys(): | |
692 tiers = chimeric_tag[keys_chimera] | |
693 chimeric_dcs += len(tiers) - 1 | |
694 high_tiers = sum(1 for t in tiers if t < 3.) | |
695 if high_tiers == len(tiers): | |
696 chimeric_dcs_high_tiers += high_tiers - 1 | |
697 else: | |
698 chimeric_dcs_high_tiers += high_tiers | |
699 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) | |
700 | |
690 # sheet 2 | 701 # sheet 2 |
691 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (Du Novo)', 'AF (Du Novo)', | 702 if chimera_correction: |
692 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', | 703 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
693 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 704 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', |
694 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2') | 705 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
706 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') | |
707 else: | |
708 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | |
709 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', | |
710 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | |
711 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') | |
695 | 712 |
696 ws2.write_row(0, 0, header_line2) | 713 ws2.write_row(0, 0, header_line2) |
697 row = 0 | 714 row = 0 |
698 | 715 |
699 for key1, value1 in sorted(tier_dict.items()): | 716 for key1, value1 in sorted(tier_dict.items()): |
700 if key1 in pure_tags_dict_short.keys(): | 717 if key1 in pure_tags_dict_short.keys(): |
701 i = np.where(np.array(['#'.join(str(i) for i in z) | 718 i = np.where(np.array(['#'.join(str(i) for i in z) |
702 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] | 719 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] |
703 ref = mut_array[i, 9] | 720 ref = mut_array[i, 2] |
704 alt = mut_array[i, 10] | 721 alt = mut_array[i, 3] |
705 chrom, pos = re.split(r'\#', key1) | 722 chrom, pos = re.split(r'\#', key1) |
706 ref_count = cvrg_dict[key1][0] | 723 ref_count = cvrg_dict[key1][0] |
707 alt_count = cvrg_dict[key1][1] | 724 alt_count = cvrg_dict[key1][1] |
708 cvrg = ref_count + alt_count | 725 cvrg = ref_count + alt_count |
709 | 726 |
710 var_id = '-'.join([chrom, pos, ref, alt]) | 727 var_id = '-'.join([chrom, str(int(pos) + 1), ref, alt]) |
711 lst = [var_id, cvrg] | 728 lst = [var_id, cvrg] |
712 used_tiers = [] | 729 used_tiers = [] |
713 cum_af = [] | 730 cum_af = [] |
714 for key2, value2 in sorted(value1.items()): | 731 for key2, value2 in sorted(value1.items()): |
715 # calculate cummulative AF | 732 # calculate cummulative AF |
716 used_tiers.append(value2) | 733 used_tiers.append(value2) |
717 if len(used_tiers) > 1: | 734 if len(used_tiers) > 1: |
718 cum = safe_div(sum(used_tiers), cvrg) | 735 cum = safe_div(sum(used_tiers), cvrg) |
719 cum_af.append(cum) | 736 cum_af.append(cum) |
720 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg), (cvrg - sum(used_tiers[-4:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-4:]))), alt_count, safe_div(alt_count, cvrg)]) | 737 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) |
738 if chimera_correction: | |
739 chimeras_all = chimera_dict[key1][0] | |
740 new_alt = sum(used_tiers) - chimeras_all | |
741 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers))) | |
742 if fraction_chimeras is None: | |
743 fraction_chimeras = 0. | |
744 new_cvrg = cvrg * (1. - fraction_chimeras) | |
745 lst.extend([chimeras_all, new_cvrg, safe_div(new_alt, new_cvrg)]) | |
746 lst.extend([(cvrg - sum(used_tiers[-5:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-5:])))]) | |
747 if chimera_correction: | |
748 chimeras_all = chimera_dict[key1][1] | |
749 new_alt = sum(used_tiers[0:6]) - chimeras_all | |
750 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:6]))) | |
751 if fraction_chimeras is None: | |
752 fraction_chimeras = 0. | |
753 new_cvrg = (cvrg - sum(used_tiers[-5:])) * (1. - fraction_chimeras) | |
754 lst.extend([chimeras_all, new_cvrg, safe_div(new_alt, new_cvrg)]) | |
755 lst.extend([alt_count, safe_div(alt_count, cvrg)]) | |
721 lst.extend(used_tiers) | 756 lst.extend(used_tiers) |
722 lst.extend(cum_af) | 757 lst.extend(cum_af) |
723 lst = tuple(lst) | 758 lst = tuple(lst) |
724 ws2.write_row(row + 1, 0, lst) | 759 ws2.write_row(row + 1, 0, lst) |
725 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) | 760 if chimera_correction: |
726 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) | 761 ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format1, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)}) |
727 ws2.conditional_format('P{}:S{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:S{} P1:S1'.format(row + 2, row + 2)}) | 762 ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format3, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)}) |
763 ws2.conditional_format('V{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format2, 'multi_range': 'V{}:Z{} V1:Z1'.format(row + 2, row + 2)}) | |
764 else: | |
765 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) | |
766 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) | |
767 ws2.conditional_format('P{}:T{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:T{} P1:T1'.format(row + 2, row + 2)}) | |
728 row += 1 | 768 row += 1 |
729 | 769 |
730 # sheet 3 | 770 # sheet 3 |
731 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), | 771 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), |
732 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), | 772 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), |
733 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), | 773 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41), |
734 ("tier 4.1", counter_tier41), ("tier 4.2", counter_tier42)] | 774 ("tier 4.2", counter_tier42), ("tier 5", counter_tier5)] |
735 | 775 |
736 header = ("tier", "count") | 776 header = ("tier", "count") |
737 ws3.write_row(0, 0, header) | 777 ws3.write_row(0, 0, header) |
738 | 778 |
739 for i in range(len(sheet3)): | 779 for i in range(len(sheet3)): |
746 {'type': 'formula', | 786 {'type': 'formula', |
747 'criteria': '=OR($A${}="tier 2.1", $A${}="tier 2.2", $A${}="tier 2.3", $A${}="tier 2.4")'.format(i + 2, i + 2, i + 2, i + 2), | 787 'criteria': '=OR($A${}="tier 2.1", $A${}="tier 2.2", $A${}="tier 2.3", $A${}="tier 2.4")'.format(i + 2, i + 2, i + 2, i + 2), |
748 'format': format3}) | 788 'format': format3}) |
749 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), | 789 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), |
750 {'type': 'formula', | 790 {'type': 'formula', |
751 'criteria': '=OR($A${}="tier 3.1", $A${}="tier 3.2", $A${}="tier 4.1", $A${}="tier 4.2")'.format(i + 2, i + 2, i + 2, i + 2), | 791 'criteria': '=$A${}>="3"'.format(i + 2), |
752 'format': format2}) | 792 'format': format2}) |
753 | 793 |
754 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "remaining variants")] | 794 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), ("Tier 5", "remaining variants")] |
755 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", | 795 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", |
756 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", | 796 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
757 "4081", "4098", "5", "10", "", ""), | 797 "4081", "4098", "5", "10", "", ""), |
758 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, | 798 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, |
759 "289", "0", "0", "0", "0", "0", "0", "3", "6", None, None, None, None, | 799 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, |
760 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], | 800 "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], |
761 [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", | 801 [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", |
762 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", | 802 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0", |
763 "0", "4081", "4098", "5", "10", "", ""), | 803 "0", "4081", "4098", "5", "10", "", ""), |
764 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "11068", "268", "268", "270", "288", "289", | 804 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289", |
765 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", | 805 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", |
766 "7", "4081", "4098", "5", "10", "", "")], | 806 "7", "0", "0", "4081", "4098", "5", "10", "", "")], |
767 [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", | 807 [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", |
768 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", "1", | 808 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", |
769 "6", "47170", "41149", "", ""), | 809 "0", "0", "1", "6", "47170", "41149", "", ""), |
770 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", | 810 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", |
771 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", "1", | 811 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", |
772 "6", "47170", "41149", "", "")], | 812 "0", "0", "1", "6", "47170", "41149", "", "")], |
773 [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", | 813 [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", |
774 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", | 814 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
775 "4081", "4098", "5", "10", "", ""), | 815 "4081", "4098", "5", "10", "", ""), |
776 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, | 816 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, |
777 "289", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", None, None, "0", "0", | 817 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", |
778 "0", "0", "4081", "4098", "5", "10", "", "")], | 818 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], |
779 [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", | 819 [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", |
780 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", | 820 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
781 "4081", "4098", "5", "10", "", ""), | 821 "4081", "4098", "5", "10", "", ""), |
782 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", | 822 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", |
783 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", | 823 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
784 "4081", "4098", "5", "10", "", "")], | 824 "4081", "4098", "5", "10", "", "")], |
785 [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, | 825 [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, |
786 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", | 826 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", |
787 "0", "0", "0", "4081", "4098", "5", "10", "", ""), | 827 "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), |
788 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", | 828 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", |
789 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "1", "7", | 829 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0", |
790 "4081", "4098", "5", "10", "", "")], | 830 "0", "0", "4081", "4098", "5", "10", "", "")], |
791 [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", | 831 [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", |
792 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "4081", | 832 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", |
793 "4098", "5", "10", "", ""), | 833 "4098", "5", "10", "", ""), |
794 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", | 834 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", |
795 "1", "1", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "4081", | 835 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081", |
796 "4098", "5", "10", "", "")], | 836 "4098", "5", "10", "", "")], |
797 [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", | 837 [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", |
798 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", | 838 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", |
799 "3", "3", "47170", "41149", "", ""), | 839 "0", "0", "3", "3", "47170", "41149", "", ""), |
800 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, | 840 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, |
801 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", | 841 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", |
802 "0", "0", "0", "1", "3", "3", "47170", "41149", "", "")], | 842 "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")], |
803 [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", | 843 [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", |
804 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", | 844 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", |
805 "0.666666666666667", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), | 845 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), |
806 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", | 846 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", |
807 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", | 847 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", |
808 "0.666666666666667", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], | 848 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], |
809 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "0", "255", "276", "269", | 849 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269", |
810 "5", "6", "5", "6", "0", "0", "5", "6", "0", "0", "1", "1", "0", "0", "0", "0", "1", | 850 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), |
811 "1", "5348", "5350", "", ""), | |
812 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, | 851 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, |
813 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | 852 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", |
814 "0", "0", "0", "1", "1", "5348", "5350", "", "")], | 853 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], |
815 [("Chr5:5-20000-13983-G-C", "4.2", "ATGTTGTGAATAACCCACAC", "ab1.ba2", "209", "186", "255", "276", "269", | 854 [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", |
816 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "1", | 855 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", |
817 "1", "5348", "5350", "", ""), | 856 "0", "0", "0", "1", "1", "5348", "5350", "", ""), |
857 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", | |
858 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", | |
859 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | |
860 [("Chr5:5-20000-13983-G-C", "5", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", | |
861 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", | |
862 "0", "1", "1", "5348", "5350", "", ""), | |
818 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, | 863 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, |
819 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | 864 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", |
820 "0", "0", "0", "1", "1", "5348", "5350", "", "")]] | 865 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] |
821 | 866 |
822 ws3.write(11, 0, "Description of tiers with examples") | 867 start_row = 15 |
823 ws3.write_row(12, 0, header_line) | 868 ws3.write(start_row, 0, "Description of tiers with examples") |
869 ws3.write_row(start_row + 1, 0, header_line) | |
824 row = 0 | 870 row = 0 |
825 for i in range(len(description_tiers)): | 871 for i in range(len(description_tiers)): |
826 ws3.write_row(13 + row + i + 1, 0, description_tiers[i]) | 872 ws3.write_row(start_row + 2 + row + i + 1, 0, description_tiers[i]) |
827 ex = examples_tiers[i] | 873 ex = examples_tiers[i] |
828 for k in range(len(ex)): | 874 for k in range(len(ex)): |
829 ws3.write_row(13 + row + i + k + 2, 0, ex[k]) | 875 ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k]) |
830 ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(13 + row + i + k + 2, 13 + row + i + k + 2), 'format': format1, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) | 876 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format1, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) |
831 ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), | 877 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), |
832 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(13 + row + i + k + 2, 13 + row + i + k + 2, 13 + row + i + k + 2, 13 + row + i + k + 2), | 878 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), |
833 'format': format3, | 879 'format': format3, |
834 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) | 880 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) |
835 ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), | 881 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), |
836 {'type': 'formula', | 882 {'type': 'formula', |
837 'criteria': '=$B${}>="3"'.format(13 + row + i + k + 2), | 883 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2), |
838 'format': format2, | 884 'format': format2, |
839 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) | 885 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) |
840 row += 3 | 886 row += 3 |
841 workbook.close() | 887 workbook.close() |
842 | 888 |
843 | 889 |
844 if __name__ == '__main__': | 890 if __name__ == '__main__': |