comparison read2mut.py @ 6:11a2a34f8a2b draft

planemo upload for repository https://github.com/gpovysil/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Mon, 18 Jan 2021 09:49:15 +0000
parents d9cbf833624e
children ded0dc6a20d3
comparison
equal deleted inserted replaced
5:d9cbf833624e 6:11a2a34f8a2b
31 import sys 31 import sys
32 32
33 import numpy as np 33 import numpy as np
34 import pysam 34 import pysam
35 import xlsxwriter 35 import xlsxwriter
36 from cyvcf2 import VCF
37
36 38
37 def make_argparser(): 39 def make_argparser():
38 parser = argparse.ArgumentParser(description='Takes a tabular file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.') 40 parser = argparse.ArgumentParser(description='Takes a VCF file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.')
39 parser.add_argument('--mutFile', 41 parser.add_argument('--mutFile',
40 help='TABULAR file with DCS mutations.') 42 help='VCF file with DCS mutations.')
41 parser.add_argument('--bamFile', 43 parser.add_argument('--bamFile',
42 help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).') 44 help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).')
43 parser.add_argument('--inputJson', 45 parser.add_argument('--inputJson',
44 help='JSON file with data collected by mut2read.py.') 46 help='JSON file with data collected by mut2read.py.')
45 parser.add_argument('--sscsJson', 47 parser.add_argument('--sscsJson',
46 help='JSON file with SSCS counts collected by mut2sscs.py.') 48 help='JSON file with SSCS counts collected by mut2sscs.py.')
47 parser.add_argument('--outputFile', 49 parser.add_argument('--outputFile',
48 help='Output xlsx file of mutation details.') 50 help='Output xlsx file with summary of mutations.')
51 parser.add_argument('--outputFile2',
52 help='Output xlsx file with allele frequencies of mutations.')
53 parser.add_argument('--outputFile3',
54 help='Output xlsx file with examples of the tier classification.')
49 parser.add_argument('--thresh', type=int, default=0, 55 parser.add_argument('--thresh', type=int, default=0,
50 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') 56 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.')
51 parser.add_argument('--phred', type=int, default=20, 57 parser.add_argument('--phred', type=int, default=20,
52 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') 58 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.')
53 parser.add_argument('--trim', type=int, default=10, 59 parser.add_argument('--trim', type=int, default=10,
54 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') 60 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.')
55 parser.add_argument('--chimera_correction', action="store_true", 61 parser.add_argument('--chimera_correction', action="store_true",
56 help='Count chimeric variants and correct the variant frequencies.') 62 help='Count chimeric variants and correct the variant frequencies')
63 parser.add_argument('--softclipping_dist', type=int, default=15,
64 help='Count mutation as an artifact if mutation lies within this parameter away from the softclipping part of the read.')
65 parser.add_argument('--reads_threshold', type=float, default=1.0,
66 help='Float number which specifies the minimum percentage of softclipped reads in a family to be considered in the softclipping tiers. Default: 1.0, means all reads of a family have to be softclipped.')
57 return parser 67 return parser
58 68
59 69
60 def safe_div(x, y): 70 def safe_div(x, y):
61 if y == 0: 71 if y == 0:
69 file1 = args.mutFile 79 file1 = args.mutFile
70 file2 = args.bamFile 80 file2 = args.bamFile
71 json_file = args.inputJson 81 json_file = args.inputJson
72 sscs_json = args.sscsJson 82 sscs_json = args.sscsJson
73 outfile = args.outputFile 83 outfile = args.outputFile
84 outfile2 = args.outputFile2
85 outfile3 = args.outputFile3
74 thresh = args.thresh 86 thresh = args.thresh
75 phred_score = args.phred 87 phred_score = args.phred
76 trim = args.trim 88 trim = args.trim
77 chimera_correction = args.chimera_correction 89 chimera_correction = args.chimera_correction
90 thr = args.softclipping_dist
91 threshold_reads = args.reads_threshold
78 92
79 if os.path.isfile(file1) is False: 93 if os.path.isfile(file1) is False:
80 sys.exit("Error: Could not find '{}'".format(file1)) 94 sys.exit("Error: Could not find '{}'".format(file1))
81 if os.path.isfile(file2) is False: 95 if os.path.isfile(file2) is False:
82 sys.exit("Error: Could not find '{}'".format(file2)) 96 sys.exit("Error: Could not find '{}'".format(file2))
86 sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh)) 100 sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh))
87 if phred_score < 0: 101 if phred_score < 0:
88 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) 102 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score))
89 if trim < 0: 103 if trim < 0:
90 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) 104 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh))
91 105 if thr <= 0:
92 with open(file1, 'r') as mut: 106 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thr))
93 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) 107
94
95 # load dicts 108 # load dicts
96 with open(json_file, "r") as f: 109 with open(json_file, "r") as f:
97 (tag_dict, cvrg_dict) = json.load(f) 110 (tag_dict, cvrg_dict) = json.load(f)
98 111
99 with open(sscs_json, "r") as f: 112 with open(sscs_json, "r") as f:
101 114
102 # read bam file 115 # read bam file
103 # pysam.index(file2) 116 # pysam.index(file2)
104 bam = pysam.AlignmentFile(file2, "rb") 117 bam = pysam.AlignmentFile(file2, "rb")
105 118
106 # 4. create mut_dict 119 # create mut_dict
107 mut_dict = {} 120 mut_dict = {}
108 mut_read_pos_dict = {} 121 mut_read_pos_dict = {}
109 mut_read_dict = {} 122 mut_read_dict = {}
110 reads_dict = {} 123 reads_dict = {}
111 if mut_array.shape == (1, 13): 124 mut_read_cigar_dict = {}
112 mut_array = mut_array.reshape((1, len(mut_array))) 125 i = 0
113 126 mut_array = []
114 for m in range(0, len(mut_array[:, 0])): 127
115 print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) 128 for count, variant in enumerate(VCF(file1)):
116 chrom = mut_array[m, 1] 129 #if count == 2000:
117 stop_pos = mut_array[m, 2].astype(int) 130 # break
131 chrom = variant.CHROM
132 stop_pos = variant.start
118 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) 133 chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
119 ref = mut_array[m, 9] 134 ref = variant.REF
120 alt = mut_array[m, 10] 135 alt = variant.ALT[0]
121 mut_dict[chrom_stop_pos] = {} 136 # nc = variant.format('NC')
122 mut_read_pos_dict[chrom_stop_pos] = {} 137 ad = variant.format('AD')
123 reads_dict[chrom_stop_pos] = {} 138 if len(ref) == len(alt):
124 139 mut_array.append([chrom, stop_pos, ref, alt])
125 for pileupcolumn in bam.pileup(chrom.tostring(), stop_pos - 2, stop_pos, max_depth=100000000): 140 i += 1
126 if pileupcolumn.reference_pos == stop_pos - 1: 141 mut_dict[chrom_stop_pos] = {}
127 count_alt = 0 142 mut_read_pos_dict[chrom_stop_pos] = {}
128 count_ref = 0 143 reads_dict[chrom_stop_pos] = {}
129 count_indel = 0 144 mut_read_cigar_dict[chrom_stop_pos] = {}
130 count_n = 0 145
131 count_other = 0 146 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000):
132 count_lowq = 0 147 if pileupcolumn.reference_pos == stop_pos:
133 n = 0 148 count_alt = 0
134 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), 149 count_ref = 0
135 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) 150 count_indel = 0
136 for pileupread in pileupcolumn.pileups: 151 count_n = 0
137 n += 1 152 count_other = 0
138 if not pileupread.is_del and not pileupread.is_refskip: 153 count_lowq = 0
139 tag = pileupread.alignment.query_name 154 n = 0
140 nuc = pileupread.alignment.query_sequence[pileupread.query_position] 155 #print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
141 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 156 # "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
142 if phred < phred_score: 157 for pileupread in pileupcolumn.pileups:
143 nuc = "lowQ" 158 n += 1
144 if tag not in mut_dict[chrom_stop_pos]: 159 if not pileupread.is_del and not pileupread.is_refskip:
145 mut_dict[chrom_stop_pos][tag] = {} 160 tag = pileupread.alignment.query_name
146 if nuc in mut_dict[chrom_stop_pos][tag]: 161 nuc = pileupread.alignment.query_sequence[pileupread.query_position]
147 mut_dict[chrom_stop_pos][tag][nuc] += 1 162 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33
148 else: 163 if phred < phred_score:
149 mut_dict[chrom_stop_pos][tag][nuc] = 1 164 nuc = "lowQ"
150 if tag not in mut_read_pos_dict[chrom_stop_pos]: 165 if tag not in mut_dict[chrom_stop_pos]:
151 mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1 166 mut_dict[chrom_stop_pos][tag] = {}
152 reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence) 167 if nuc in mut_dict[chrom_stop_pos][tag]:
153 else: 168 mut_dict[chrom_stop_pos][tag][nuc] += 1
154 mut_read_pos_dict[chrom_stop_pos][tag] = np.append(
155 mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1)
156 reads_dict[chrom_stop_pos][tag] = np.append(
157 reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence))
158 if nuc == alt:
159 count_alt += 1
160 if tag not in mut_read_dict:
161 mut_read_dict[tag] = {}
162 mut_read_dict[tag][chrom_stop_pos] = (alt, ref)
163 else: 169 else:
164 mut_read_dict[tag][chrom_stop_pos] = (alt, ref) 170 mut_dict[chrom_stop_pos][tag][nuc] = 1
165 elif nuc == ref: 171 if tag not in mut_read_pos_dict[chrom_stop_pos]:
166 count_ref += 1 172 mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1]
167 elif nuc == "N": 173 reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)]
168 count_n += 1 174 mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring]
169 elif nuc == "lowQ": 175 else:
170 count_lowq += 1 176 mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1)
171 else: 177 reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence))
172 count_other += 1 178 mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring)
173 else: 179 if nuc == alt:
174 count_indel += 1 180 count_alt += 1
175 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) 181 if tag not in mut_read_dict:
176 182 mut_read_dict[tag] = {}
183 mut_read_dict[tag][chrom_stop_pos] = (alt, ref)
184 else:
185 mut_read_dict[tag][chrom_stop_pos] = (alt, ref)
186 elif nuc == ref:
187 count_ref += 1
188 elif nuc == "N":
189 count_n += 1
190 elif nuc == "lowQ":
191 count_lowq += 1
192 else:
193 count_other += 1
194 else:
195 count_indel += 1
196
197 #print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq))
198 #else:
199 # print("indels are currently not evaluated")
200 mut_array = np.array(mut_array)
177 for read in bam.fetch(until_eof=True): 201 for read in bam.fetch(until_eof=True):
178 if read.is_unmapped: 202 if read.is_unmapped:
179 pure_tag = read.query_name[:-5] 203 pure_tag = read.query_name[:-5]
180 nuc = "na" 204 nuc = "na"
181 for key in tag_dict[pure_tag].keys(): 205 for key in tag_dict[pure_tag].keys():
187 mut_dict[key][read.query_name][nuc] += 1 211 mut_dict[key][read.query_name][nuc] += 1
188 else: 212 else:
189 mut_dict[key][read.query_name][nuc] = 1 213 mut_dict[key][read.query_name][nuc] = 1
190 bam.close() 214 bam.close()
191 215
192 # 5. create pure_tags_dict 216 # create pure_tags_dict
193 pure_tags_dict = {} 217 pure_tags_dict = {}
194 for key1, value1 in sorted(mut_dict.items()): 218 for key1, value1 in sorted(mut_dict.items()):
219 if len(np.where(np.array(['#'.join(str(i) for i in z)
220 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0:
221 continue
222
195 i = np.where(np.array(['#'.join(str(i) for i in z) 223 i = np.where(np.array(['#'.join(str(i) for i in z)
196 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] 224 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0]
197 ref = mut_array[i, 9] 225 ref = mut_array[i, 2]
198 alt = mut_array[i, 10] 226 alt = mut_array[i, 3]
199 pure_tags_dict[key1] = {} 227 pure_tags_dict[key1] = {}
200 for key2, value2 in sorted(value1.items()): 228 for key2, value2 in sorted(value1.items()):
201 for key3, value3 in value2.items(): 229 for key3, value3 in value2.items():
202 pure_tag = key2[:-5] 230 pure_tag = key2[:-5]
203 if key3 == alt: 231 if key3 == alt:
204 if pure_tag in pure_tags_dict[key1]: 232 if pure_tag in pure_tags_dict[key1]:
205 pure_tags_dict[key1][pure_tag] += 1 233 pure_tags_dict[key1][pure_tag] += 1
206 else: 234 else:
207 pure_tags_dict[key1][pure_tag] = 1 235 pure_tags_dict[key1][pure_tag] = 1
208 236
209 # 6. create pure_tags_dict_short with thresh 237 # create pure_tags_dict_short with thresh
210 if thresh > 0: 238 if thresh > 0:
211 pure_tags_dict_short = {} 239 pure_tags_dict_short = {}
212 for key, value in sorted(pure_tags_dict.items()): 240 for key, value in sorted(pure_tags_dict.items()):
213 if len(value) < thresh: 241 if len(value) < thresh:
214 pure_tags_dict_short[key] = value 242 pure_tags_dict_short[key] = value
215 else: 243 else:
216 pure_tags_dict_short = pure_tags_dict 244 pure_tags_dict_short = pure_tags_dict
217 245
218 # 7. output summary with threshold 246 # whole_array = []
247 # for k in pure_tags_dict.values():
248 # if len(k) != 0:
249 # keys = k.keys()
250 # if len(keys) > 1:
251 # for k1 in keys:
252 # whole_array.append(k1)
253 # else:
254 # whole_array.append(keys[0])
255
256 # output summary with threshold
219 workbook = xlsxwriter.Workbook(outfile) 257 workbook = xlsxwriter.Workbook(outfile)
258 workbook2 = xlsxwriter.Workbook(outfile2)
259 workbook3 = xlsxwriter.Workbook(outfile3)
220 ws1 = workbook.add_worksheet("Results") 260 ws1 = workbook.add_worksheet("Results")
221 ws2 = workbook.add_worksheet("Allele frequencies") 261 ws2 = workbook2.add_worksheet("Allele frequencies")
222 ws3 = workbook.add_worksheet("Tiers") 262 ws3 = workbook3.add_worksheet("Tiers")
223 263
224 format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green 264 format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green
225 format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red 265 format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red
226 format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow 266 format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow
267
268 format12 = workbook2.add_format({'bg_color': '#BCF5A9'}) # green
269 format22 = workbook2.add_format({'bg_color': '#FFC7CE'}) # red
270 format32 = workbook2.add_format({'bg_color': '#FACC2E'}) # yellow
271
272 format13 = workbook3.add_format({'bg_color': '#BCF5A9'}) # green
273 format23 = workbook3.add_format({'bg_color': '#FFC7CE'}) # red
274 format33 = workbook3.add_format({'bg_color': '#FACC2E'}) # yellow
227 275
228 header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab', 276 header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab',
229 'read median length.ba', 'DCS median length', 277 'read median length.ba', 'DCS median length',
230 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', 278 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba',
231 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', 279 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba',
242 counter_tier24 = 0 290 counter_tier24 = 0
243 counter_tier31 = 0 291 counter_tier31 = 0
244 counter_tier32 = 0 292 counter_tier32 = 0
245 counter_tier41 = 0 293 counter_tier41 = 0
246 counter_tier42 = 0 294 counter_tier42 = 0
247 #if chimera_correction: 295 # if chimera_correction:
248 # counter_tier43 = 0 296 # counter_tier43 = 0
249 counter_tier5 = 0 297 counter_tier51 = 0
298 counter_tier52 = 0
299 counter_tier53 = 0
300 counter_tier54 = 0
301 counter_tier55 = 0
302 counter_tier6 = 0
250 303
251 row = 1 304 row = 1
252 tier_dict = {} 305 tier_dict = {}
253 chimera_dict = {} 306 chimera_dict = {}
254 for key1, value1 in sorted(mut_dict.items()): 307 for key1, value1 in sorted(mut_dict.items()):
255 counts_mut = 0 308 counts_mut = 0
256 chimeric_tag_list = [] 309 chimeric_tag_list = []
257 chimeric_tag = {} 310 chimeric_tag = {}
258 if key1 in pure_tags_dict_short.keys(): 311 if key1 in pure_tags_dict_short.keys():
259 i = np.where(np.array(['#'.join(str(i) for i in z) 312 i = np.where(np.array(['#'.join(str(i) for i in z)
260 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] 313 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0]
261 ref = mut_array[i, 9] 314 ref = mut_array[i, 2]
262 alt = mut_array[i, 10] 315 alt = mut_array[i, 3]
263 dcs_median = cvrg_dict[key1][2] 316 dcs_median = cvrg_dict[key1][2]
264 whole_array = pure_tags_dict_short[key1].keys() 317 whole_array = pure_tags_dict_short[key1].keys()
265 318
266 tier_dict[key1] = {} 319 tier_dict[key1] = {}
267 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5", 0)] 320 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0),
321 ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0),
322 ("tier 6", 0)]
268 for k, v in values_tier_dict: 323 for k, v in values_tier_dict:
269 tier_dict[key1][k] = v 324 tier_dict[key1][k] = v
270 325
271 used_keys = [] 326 used_keys = []
272 if 'ab' in mut_pos_dict[key1].keys(): 327 if 'ab' in mut_pos_dict[key1].keys():
453 total4 = total4new = na4 = lowq4 = 0 508 total4 = total4new = na4 = lowq4 = 0
454 ref4 = alt4 = ref4f = alt4f = 0 509 ref4 = alt4 = ref4f = alt4f = 0
455 510
456 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 511 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1
457 read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0 512 read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0
458 513 cigars_dcs1 = cigars_dcs2 = cigars_dcs3 = cigars_dcs4 = []
514 pos_read1 = pos_read2 = pos_read3 = pos_read4 = []
515 end_read1 = end_read2 = end_read3 = end_read4 = []
459 if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys(): 516 if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys():
460 read_pos1 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.1']) 517 read_pos1 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.1']))
461 read_len_median1 = np.median(reads_dict[key1][key2[:-5] + '.ab.1']) 518 read_len_median1 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.1']))
519 cigars_dcs1 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.1']
520 #print(mut_read_cigar_dict[key1][key2[:-5] + '.ab.1'])
521 pos_read1 = mut_read_pos_dict[key1][key2[:-5] + '.ab.1']
522 #print(cigars_dcs1)
523 end_read1 = reads_dict[key1][key2[:-5] + '.ab.1']
462 if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys(): 524 if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys():
463 read_pos2 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.2']) 525 read_pos2 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.2']))
464 read_len_median2 = np.median(reads_dict[key1][key2[:-5] + '.ab.2']) 526 read_len_median2 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.2']))
527 cigars_dcs2 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.2']
528 pos_read2 = mut_read_pos_dict[key1][key2[:-5] + '.ab.2']
529 end_read2 = reads_dict[key1][key2[:-5] + '.ab.2']
465 if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys(): 530 if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys():
466 read_pos3 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.1']) 531 read_pos3 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.1']))
467 read_len_median3 = np.median(reads_dict[key1][key2[:-5] + '.ba.1']) 532 read_len_median3 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.1']))
533 cigars_dcs3 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.1']
534 pos_read3 = mut_read_pos_dict[key1][key2[:-5] + '.ba.1']
535 end_read3 = reads_dict[key1][key2[:-5] + '.ba.1']
468 if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys(): 536 if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys():
469 read_pos4 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.2']) 537 read_pos4 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.2']))
470 read_len_median4 = np.median(reads_dict[key1][key2[:-5] + '.ba.2']) 538 read_len_median4 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.2']))
539 #print(mut_read_cigar_dict[key1][key2[:-5] + '.ba.2'])
540 cigars_dcs4 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.2']
541
542 pos_read4 = mut_read_pos_dict[key1][key2[:-5] + '.ba.2']
543 #print(cigars_dcs4)
544 end_read4 = reads_dict[key1][key2[:-5] + '.ba.2']
471 545
472 used_keys.append(key2[:-5]) 546 used_keys.append(key2[:-5])
473 counts_mut += 1 547 counts_mut += 1
474 if (alt1f + alt2f + alt3f + alt4f) > 0.5: 548 if (alt1f + alt2f + alt3f + alt4f) > 0.5:
475 if total1new == 0: 549 if total1new == 0:
495 569
496 beg1 = beg4 = beg2 = beg3 = 0 570 beg1 = beg4 = beg2 = beg3 = 0
497 571
498 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) 572 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4)
499 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) 573 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
500 574
501
502 trimmed = False 575 trimmed = False
503 contradictory = False 576 contradictory = False
504 577 softclipped_mutation_allMates = False
505 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant 578 softclipped_mutation_oneOfTwoMates = False
579 softclipped_mutation_oneOfTwoSSCS = False
580 softclipped_mutation_oneMate = False
581 softclipped_mutation_oneMateOneSSCS = False
582 print()
583 print(key1, cigars_dcs1, cigars_dcs4, cigars_dcs2, cigars_dcs3)
584 dist_start_read1 = dist_start_read2 = dist_start_read3 = dist_start_read4 = []
585 dist_end_read1 = dist_end_read2 = dist_end_read3 = dist_end_read4 = []
586 ratio_dist_start1 = ratio_dist_start2 = ratio_dist_start3 = ratio_dist_start4 = False
587 ratio_dist_end1 = ratio_dist_end2 = ratio_dist_end3 = ratio_dist_end4 = False
588
589 # mate 1 - SSCS ab
590 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1]
591 ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads
592
593 if any(ij is True for ij in softclipped_idx1):
594 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1]
595 softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1]
596 softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1]
597 dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)]
598 dist_end_read1 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end1, pos_read1, end_read1)]
599
600 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
601 if any(ij is True for ij in softclipped_both_ends_idx1):
602 print(softclipped_both_ends_idx1)
603 for nr, indx in enumerate(softclipped_both_ends_idx1):
604 if indx:
605 if dist_start_read1[nr] <= dist_end_read1[nr]:
606 dist_end_read1[nr] = thr + 1000 # use dist of start and set start to very large number
607 else:
608 dist_start_read1[nr] = thr + 1000 # use dist of end and set start to very large number
609 ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads
610 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads
611 print(key1, "mate1 ab", dist_start_read1, dist_end_read1, cigars_dcs1, ratio1, ratio_dist_start1, ratio_dist_end1)
612
613 # mate 1 - SSCS ba
614 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4]
615 ratio4 = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) >= threshold_reads
616 if any(ij is True for ij in softclipped_idx4):
617 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4]
618 softclipped_start4 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs4]
619 softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4]
620 dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)]
621 dist_end_read4 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end4, pos_read4, end_read4)]
622
623 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
624 if any(ij is True for ij in softclipped_both_ends_idx4):
625 print(softclipped_both_ends_idx4)
626 for nr, indx in enumerate(softclipped_both_ends_idx4):
627 if indx:
628 if dist_start_read4[nr] <= dist_end_read4[nr]:
629 dist_end_read4[nr] = thr + 1000 # use dist of start and set start to very large number
630 else:
631 dist_start_read4[nr] = thr + 1000 # use dist of end and set start to very large number
632 ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads
633 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads
634 print(key1, "mate1 ba", dist_start_read4, dist_end_read4,cigars_dcs4, ratio4, ratio_dist_start4, ratio_dist_end4)
635
636 # mate 2 - SSCS ab
637 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2]
638 #print(sum(softclipped_idx2))
639 ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads
640 if any(ij is True for ij in softclipped_idx2):
641 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2]
642 softclipped_start2 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs2]
643 softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2]
644 dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)]
645 dist_end_read2 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end2, pos_read2, end_read2)]
646
647 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
648 if any(ij is True for ij in softclipped_both_ends_idx2):
649 print(softclipped_both_ends_idx2)
650 for nr, indx in enumerate(softclipped_both_ends_idx2):
651 if indx:
652 if dist_start_read2[nr] <= dist_end_read2[nr]:
653 dist_end_read2[nr] = thr + 1000 # use dist of start and set start to very large number
654 else:
655 dist_start_read2[nr] = thr + 1000 # use dist of end and set start to very large number
656 ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads
657 #print(ratio_dist_end2)
658 #print([True if x <= thr else False for x in ratio_dist_end2])
659 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads
660 print(key1, "mate2 ab", dist_start_read2, dist_end_read2,cigars_dcs2, ratio2, ratio_dist_start2, ratio_dist_end2)
661
662 # mate 2 - SSCS ba
663 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3]
664 ratio3 = safe_div(sum(softclipped_idx3), float(len(softclipped_idx3))) >= threshold_reads
665 if any(ij is True for ij in softclipped_idx3):
666 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3]
667 softclipped_start3 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs3]
668 softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3]
669 dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)]
670 dist_end_read3 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end3, pos_read3, end_read3)]
671
672 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
673 if any(ij is True for ij in softclipped_both_ends_idx3):
674 print(softclipped_both_ends_idx3)
675 for nr, indx in enumerate(softclipped_both_ends_idx3):
676 if indx:
677 if dist_start_read3[nr] <= dist_end_read3[nr]:
678 dist_end_read3[nr] = thr + 1000 # use dist of start and set start to a larger number than thresh
679 else:
680 dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number
681 #print([True if x <= thr else False for x in dist_start_read3])
682 ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads
683 ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads
684 print(key1, "mate2 ba", dist_start_read3, dist_end_read3,cigars_dcs3, ratio3, ratio_dist_start3, ratio_dist_end3)
685
686 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant
506 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | 687 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) |
507 (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & 688 (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) &
508 all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): 689 all(float(ij) == 0. for ij in [alt1ff, alt4ff]))):
509 alt1ff = 0 690 alt1ff = 0
510 alt4ff = 0 691 alt4ff = 0
511 alt2ff = 0 692 alt2ff = 0
512 alt3ff = 0 693 alt3ff = 0
513 trimmed = False 694 trimmed = False
514 contradictory = True 695 contradictory = True
696 # softclipping tiers
697 # information of both mates available --> all reads for both mates and SSCS are softclipped
698 elif (ratio1 & ratio4 & ratio2 & ratio3 &
699 (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) &
700 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available
701 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
702 softclipped_mutation_allMates = True
703 softclipped_mutation_oneOfTwoMates = False
704 softclipped_mutation_oneOfTwoSSCS = False
705 softclipped_mutation_oneMate = False
706 softclipped_mutation_oneMateOneSSCS = False
707 alt1ff = 0
708 alt4ff = 0
709 alt2ff = 0
710 alt3ff = 0
711 trimmed = False
712 contradictory = False
713 print(key1, "softclipped_mutation_allMates", softclipped_mutation_allMates)
714 # information of both mates available --> only one mate softclipped
715 elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) |
716 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) &
717 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available
718 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
719 softclipped_mutation_allMates = False
720 softclipped_mutation_oneOfTwoMates = True
721 softclipped_mutation_oneOfTwoSSCS = False
722 softclipped_mutation_oneMate = False
723 softclipped_mutation_oneMateOneSSCS = False
724 alt1ff = 0
725 alt4ff = 0
726 alt2ff = 0
727 alt3ff = 0
728 trimmed = False
729 contradictory = False
730 print(key1, "softclipped_mutation_oneOfTwoMates", softclipped_mutation_oneOfTwoMates)
731 # information of both mates available --> only one mate softclipped
732 elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) &
733 ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) &
734 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available
735 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
736 softclipped_mutation_allMates = False
737 softclipped_mutation_oneOfTwoMates = False
738 softclipped_mutation_oneOfTwoSSCS = True
739 softclipped_mutation_oneMate = False
740 softclipped_mutation_oneMateOneSSCS = False
741 alt1ff = 0
742 alt4ff = 0
743 alt2ff = 0
744 alt3ff = 0
745 trimmed = False
746 contradictory = False
747 print(key1, "softclipped_mutation_oneOfTwoSSCS", softclipped_mutation_oneOfTwoSSCS, [alt1ff, alt2ff, alt3ff, alt4ff])
748 # information of one mate available --> all reads of one mate are softclipped
749 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) &
750 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) |
751 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) &
752 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available
753 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
754 #if ((((len(dist_start_read1) > 0 | len(dist_end_read1) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1))) &
755 # ((len(dist_start_read4) > 0 | len(dist_end_read4) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4)))) |
756 # (((len(dist_start_read2) > 0 | len(dist_end_read2) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2))) &
757 # ((len(dist_start_read3) > 0 | len(dist_end_read3) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3))))):
758 softclipped_mutation_allMates = False
759 softclipped_mutation_oneOfTwoMates = False
760 softclipped_mutation_oneOfTwoSSCS = False
761 softclipped_mutation_oneMate = True
762 softclipped_mutation_oneMateOneSSCS = False
763 alt1ff = 0
764 alt4ff = 0
765 alt2ff = 0
766 alt3ff = 0
767 trimmed = False
768 contradictory = False
769 print(key1, "softclipped_mutation_oneMate", softclipped_mutation_oneMate)
770 # information of one mate available --> only one SSCS is softclipped
771 elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) &
772 (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) |
773 (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) &
774 (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available
775 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
776 #if ((all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1)) |
777 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4))) |
778 # (all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2)) |
779 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3)))):
780 softclipped_mutation_allMates = False
781 softclipped_mutation_oneOfTwoMates = False
782 softclipped_mutation_oneOfTwoSSCS = False
783 softclipped_mutation_oneMate = False
784 softclipped_mutation_oneMateOneSSCS = True
785 alt1ff = 0
786 alt4ff = 0
787 alt2ff = 0
788 alt3ff = 0
789 trimmed = False
790 contradictory = False
791 print(key1, "softclipped_mutation_oneMateOneSSCS", softclipped_mutation_oneMateOneSSCS)
792
515 else: 793 else:
516 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): 794 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))):
517 beg1 = total1new 795 beg1 = total1new
518 total1new = 0 796 total1new = 0
519 alt1ff = 0 797 alt1ff = 0
524 beg4 = total4new 802 beg4 = total4new
525 total4new = 0 803 total4new = 0
526 alt4ff = 0 804 alt4ff = 0
527 alt4f = 0 805 alt4f = 0
528 trimmed = True 806 trimmed = True
529 807
530 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): 808 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))):
531 beg2 = total2new 809 beg2 = total2new
532 total2new = 0 810 total2new = 0
533 alt2ff = 0 811 alt2ff = 0
534 alt2f = 0 812 alt2f = 0
535 trimmed = True 813 trimmed = True
536 814
537 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): 815 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))):
538 beg3 = total3new 816 beg3 = total3new
539 total3new = 0 817 total3new = 0
540 alt3ff = 0 818 alt3ff = 0
541 alt3f = 0 819 alt3f = 0
619 elif (contradictory): 897 elif (contradictory):
620 tier = "4.2" 898 tier = "4.2"
621 counter_tier42 += 1 899 counter_tier42 += 1
622 tier_dict[key1]["tier 4.2"] += 1 900 tier_dict[key1]["tier 4.2"] += 1
623 901
624 else: 902 elif softclipped_mutation_allMates:
625 tier = "5" 903 tier = "5.1"
626 counter_tier5 += 1 904 counter_tier51 += 1
627 tier_dict[key1]["tier 5"] += 1 905 tier_dict[key1]["tier 5.1"] += 1
906
907 elif softclipped_mutation_oneOfTwoMates:
908 tier = "5.2"
909 counter_tier52 += 1
910 tier_dict[key1]["tier 5.2"] += 1
911
912 elif softclipped_mutation_oneOfTwoSSCS:
913 tier = "5.3"
914 counter_tier53 += 1
915 tier_dict[key1]["tier 5.3"] += 1
916
917 elif softclipped_mutation_oneMate:
918 tier = "5.4"
919 counter_tier54 += 1
920 tier_dict[key1]["tier 5.4"] += 1
921
922 elif softclipped_mutation_oneMateOneSSCS:
923 tier = "5.5"
924 counter_tier55 += 1
925 tier_dict[key1]["tier 5.5"] += 1
926
927 else:
928 tier = "6"
929 counter_tier6 += 1
930 tier_dict[key1]["tier 6"] += 1
628 931
629 chrom, pos = re.split(r'\#', key1) 932 chrom, pos = re.split(r'\#', key1)
630 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) 933 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt])
631 sample_tag = key2[:-5] 934 sample_tag = key2[:-5]
632 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time 935 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time
700 chimera_tags_new.append(sample_tag) 1003 chimera_tags_new.append(sample_tag)
701 key_chimera = ",".join(sorted(chimera_tags_new)) 1004 key_chimera = ",".join(sorted(chimera_tags_new))
702 if key_chimera in chimeric_tag.keys(): 1005 if key_chimera in chimeric_tag.keys():
703 chimeric_tag[key_chimera].append(float(tier)) 1006 chimeric_tag[key_chimera].append(float(tier))
704 else: 1007 else:
705 chimeric_tag[key_chimera] = [float(tier)] 1008 chimeric_tag[key_chimera] = [float(tier)]
706 1009
707 if (read_pos1 == -1): 1010 if (read_pos1 == -1):
708 read_pos1 = read_len_median1 = None 1011 read_pos1 = read_len_median1 = None
709 if (read_pos4 == -1): 1012 if (read_pos4 == -1):
710 read_pos4 = read_len_median4 = None 1013 read_pos4 = read_len_median4 = None
748 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) 1051 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers)
749 # sheet 2 1052 # sheet 2
750 if chimera_correction: 1053 if chimera_correction:
751 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', 1054 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)',
752 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 1055 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4',
753 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', 1056 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
754 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') 1057 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6')
755 else: 1058 else:
756 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', 1059 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)',
757 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 1060 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4',
758 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', 1061 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
759 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') 1062 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6')
760 1063
761 ws2.write_row(0, 0, header_line2) 1064 ws2.write_row(0, 0, header_line2)
762 row = 0 1065 row = 0
763 1066
764 for key1, value1 in sorted(tier_dict.items()): 1067 for key1, value1 in sorted(tier_dict.items()):
765 if key1 in pure_tags_dict_short.keys(): 1068 if key1 in pure_tags_dict_short.keys():
766 i = np.where(np.array(['#'.join(str(i) for i in z) 1069 i = np.where(np.array(['#'.join(str(i) for i in z)
767 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] 1070 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0]
768 ref = mut_array[i, 9] 1071 ref = mut_array[i, 2]
769 alt = mut_array[i, 10] 1072 alt = mut_array[i, 3]
770 chrom, pos = re.split(r'\#', key1) 1073 chrom, pos = re.split(r'\#', key1)
771 ref_count = cvrg_dict[key1][0] 1074 ref_count = cvrg_dict[key1][0]
772 alt_count = cvrg_dict[key1][1] 1075 alt_count = cvrg_dict[key1][1]
773 cvrg = ref_count + alt_count 1076 cvrg = ref_count + alt_count
774 1077
780 # calculate cummulative AF 1083 # calculate cummulative AF
781 used_tiers.append(value2) 1084 used_tiers.append(value2)
782 if len(used_tiers) > 1: 1085 if len(used_tiers) > 1:
783 cum = safe_div(sum(used_tiers), cvrg) 1086 cum = safe_div(sum(used_tiers), cvrg)
784 cum_af.append(cum) 1087 cum_af.append(cum)
1088 if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place
1089 continue
785 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) 1090 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)])
786 if chimera_correction: 1091 if chimera_correction:
787 chimeras_all = chimera_dict[key1][0] 1092 chimeras_all = chimera_dict[key1][0]
788 new_alt = sum(used_tiers) - chimeras_all 1093 new_alt = sum(used_tiers) - chimeras_all
789 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers))) 1094 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers)))
804 lst.extend(used_tiers) 1109 lst.extend(used_tiers)
805 lst.extend(cum_af) 1110 lst.extend(cum_af)
806 lst = tuple(lst) 1111 lst = tuple(lst)
807 ws2.write_row(row + 1, 0, lst) 1112 ws2.write_row(row + 1, 0, lst)
808 if chimera_correction: 1113 if chimera_correction:
809 ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format1, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)}) 1114 ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format12, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)})
810 ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format3, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)}) 1115 ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format32, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)})
811 ws2.conditional_format('V{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format2, 'multi_range': 'V{}:Z{} V1:Z1'.format(row + 2, row + 2)}) 1116 ws2.conditional_format('V{}:AE{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format22, 'multi_range': 'V{}:AE{} V1:AE1'.format(row + 2, row + 2)})
812 else: 1117 else:
813 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) 1118 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format12, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)})
814 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) 1119 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format32, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)})
815 ws2.conditional_format('P{}:T{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:T{} P1:T1'.format(row + 2, row + 2)}) 1120 ws2.conditional_format('P{}:Y{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format22, 'multi_range': 'P{}:Y{} P1:Y1'.format(row + 2, row + 2)})
816 row += 1 1121 row += 1
817 1122
818 # sheet 3 1123 # sheet 3
819 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), 1124 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21),
820 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), 1125 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24),
821 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41), 1126 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41),
822 ("tier 4.2", counter_tier42), ("tier 5", counter_tier5)] 1127 ("tier 4.2", counter_tier42), ("tier 5.1", counter_tier51), ("tier 5.2", counter_tier52),
1128 ("tier 5.3", counter_tier53), ("tier 5.4", counter_tier54), ("tier 5.5", counter_tier55), ("tier 6", counter_tier6)]
823 1129
824 header = ("tier", "count") 1130 header = ("tier", "count")
825 ws3.write_row(0, 0, header) 1131 ws3.write_row(0, 0, header)
826 1132
827 for i in range(len(sheet3)): 1133 for i in range(len(sheet3)):
837 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), 1143 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2),
838 {'type': 'formula', 1144 {'type': 'formula',
839 'criteria': '=$A${}>="3"'.format(i + 2), 1145 'criteria': '=$A${}>="3"'.format(i + 2),
840 'format': format2}) 1146 'format': format2})
841 1147
842 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), ("Tier 5", "remaining variants")] 1148 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""),
1149 ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"),
1150 ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"),
1151 ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"),
1152 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"),
1153 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"),
1154 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"),
1155 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"),
1156 ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"),
1157 ("Tier 5.1", "variants is close to softclipping in both mates"),
1158 ("Tier 5.2", "variants is close to softclipping in one of the mates"),
1159 ("Tier 5.3", "variants is close to softclipping in one of the SSCS of both mates"),
1160 ("Tier 5.4", "variants is close to softclipping in one mate (no information of second mate"),
1161 ("Tier 5.5", "variants is close to softclipping in one of the SSCS (no information of the second mate"),
1162 ("Tier 6", "remaining variants")]
843 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", 1163 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289",
844 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", 1164 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
845 "4081", "4098", "5", "10", "", ""), 1165 "4081", "4098", "5", "10", "", ""),
846 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, 1166 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None,
847 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, 1167 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None,
897 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269", 1217 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269",
898 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), 1218 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""),
899 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, 1219 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None,
900 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", 1220 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
901 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], 1221 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")],
902 [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", 1222 [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263",
903 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", 1223 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0",
904 "0", "0", "0", "1", "1", "5348", "5350", "", ""), 1224 "0", "0", "0", "1", "1", "5348", "5350", "", ""),
905 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", 1225 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263",
906 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", 1226 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0",
907 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], 1227 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")],
908 [("Chr5:5-20000-13983-G-C", "5", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", 1228 [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)],
1229 [("Chr5:5-20000-13983-G-C", "6", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269",
909 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", 1230 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0",
910 "0", "1", "1", "5348", "5350", "", ""), 1231 "0", "1", "1", "5348", "5350", "", ""),
911 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, 1232 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None,
912 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", 1233 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
913 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] 1234 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]]
914 1235
915 start_row = 15 1236 start_row = 20
916 ws3.write(start_row, 0, "Description of tiers with examples") 1237 ws3.write(start_row, 0, "Description of tiers with examples")
917 ws3.write_row(start_row + 1, 0, header_line) 1238 ws3.write_row(start_row + 1, 0, header_line)
918 row = 0 1239 row = 0
919 for i in range(len(description_tiers)): 1240 for i in range(len(description_tiers)):
920 ws3.write_row(start_row + 2 + row + i + 1, 0, description_tiers[i]) 1241 ws3.write_row(start_row + 2 + row + i + 1, 0, description_tiers[i])
921 ex = examples_tiers[i] 1242 ex = examples_tiers[i]
922 for k in range(len(ex)): 1243 for k in range(len(ex)):
923 ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k]) 1244 ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k])
924 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format1, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) 1245 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format13, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)})
925 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), 1246 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3),
926 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 1247 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2),
927 'format': format3, 1248 'format': format33,
928 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) 1249 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)})
929 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), 1250 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3),
930 {'type': 'formula', 1251 {'type': 'formula',
931 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2), 1252 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2),
932 'format': format2, 1253 'format': format23,
933 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) 1254 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)})
934 row += 3 1255 row += 3
935 workbook.close() 1256 workbook.close()
1257 workbook2.close()
1258 workbook3.close()
936 1259
937 1260
938 if __name__ == '__main__': 1261 if __name__ == '__main__':
939 sys.exit(read2mut(sys.argv)) 1262 sys.exit(read2mut(sys.argv))
1263