# HG changeset patch # User mheinzl # Date 1660908217 0 # Node ID e46d5e377760c700ce23d6a4757ae67ec4f2a441 # Parent 8cec772c0bf1ad3aeb787d066c481107240e692c planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8-dirty diff -r 8cec772c0bf1 -r e46d5e377760 mut2read.py --- a/mut2read.py Fri Aug 05 08:23:34 2022 +0000 +++ b/mut2read.py Fri Aug 19 11:23:37 2022 +0000 @@ -40,6 +40,8 @@ help='Output FASTQ file of reads with mutations.') parser.add_argument('--outputJson', help='Output JSON file to store collected data.') + parser.add_argument('--refalttiers', action="store_true", + help='Store also information about the reference allele.') return parser @@ -52,6 +54,7 @@ file3 = args.familiesFile outfile = args.outputFastq json_file = args.outputJson + refalttiers = args.refalttiers if os.path.isfile(file1) is False: sys.exit("Error: Could not find '{}'".format(file1)) @@ -189,16 +192,26 @@ line = line.rstrip('\n') splits = line.split('\t') tag = splits[0] - - if tag in tag_dict or tag in tag_dict_ref: - str1 = splits[4] - curr_seq = str1.replace("-", "") - str2 = splits[5] - curr_qual = str2.replace(" ", "") - out.write("@" + splits[0] + "." + splits[1] + "." + splits[2] + "\n") - out.write(curr_seq + "\n") - out.write("+" + "\n") - out.write(curr_qual + "\n") + if refalttiers is True: + if tag in tag_dict or tag in tag_dict_ref: + str1 = splits[4] + curr_seq = str1.replace("-", "") + str2 = splits[5] + curr_qual = str2.replace(" ", "") + out.write("@" + splits[0] + "." + splits[1] + "." + splits[2] + "\n") + out.write(curr_seq + "\n") + out.write("+" + "\n") + out.write(curr_qual + "\n") + else: + if tag in tag_dict: + str1 = splits[4] + curr_seq = str1.replace("-", "") + str2 = splits[5] + curr_qual = str2.replace(" ", "") + out.write("@" + splits[0] + "." + splits[1] + "." + splits[2] + "\n") + out.write(curr_seq + "\n") + out.write("+" + "\n") + out.write(curr_qual + "\n") if __name__ == '__main__': diff -r 8cec772c0bf1 -r e46d5e377760 mut2read.xml --- a/mut2read.xml Fri Aug 05 08:23:34 2022 +0000 +++ b/mut2read.xml Fri Aug 19 11:23:37 2022 +0000 @@ -1,5 +1,5 @@ - + Extracts all tags that carry a mutation in the duplex consensus sequence (DCS) va_macros.xml @@ -12,6 +12,7 @@ --mutFile '$file1' --bamFile bam_input.bam --familiesFile '$file3' + $refalttiers --outputFastq '$output_fastq' --outputJson '$output_json' ]]> @@ -20,6 +21,7 @@ + @@ -40,7 +42,7 @@ Takes a VCF file with mutations, a BAM file of aligned DCS reads, and a tabular file with aligned families as input and prints all tags of reads that carry a mutation or have the reference allele to a user-specified output file and creates a fastq file of -reads of tags with a mutation and the reference allele. +reads of tags with a mutation (and optional the reference allele). **Input** diff -r 8cec772c0bf1 -r e46d5e377760 mut2sscs.xml --- a/mut2sscs.xml Fri Aug 05 08:23:34 2022 +0000 +++ b/mut2sscs.xml Fri Aug 19 11:23:37 2022 +0000 @@ -1,5 +1,5 @@ - + Extracts all tags from the single-stranded consensus sequence (SSCS) bam file that carries a mutation at the same position a mutation is called in the duplex consensus sequence (DCS) and calculates their frequencies va_macros.xml diff -r 8cec772c0bf1 -r e46d5e377760 read2mut.py --- a/read2mut.py Fri Aug 05 08:23:34 2022 +0000 +++ b/read2mut.py Fri Aug 19 11:23:37 2022 +0000 @@ -67,6 +67,9 @@ help='Count mutation as an artifact if mutation lies within this parameter away from the softclipping part of the read.') parser.add_argument('--reads_threshold', type=float, default=1.0, help='Float number which specifies the minimum percentage of softclipped reads in a family to be considered in the softclipping tiers. Default: 1.0, means all reads of a family have to be softclipped.') + parser.add_argument('--refalttiers', action="store_true", + help='Store also information about the reference allele.') + return parser @@ -87,6 +90,7 @@ outfile2 = args.outputFile2 outfile3 = args.outputFile3 outputFile_csv = args.outputFile_csv + refalttiers = args.refalttiers thresh = args.thresh phred_score = args.phred @@ -425,8 +429,8 @@ counts_mut = 0 chimeric_tag_list = [] chimeric_tag = {} + if (key1 in pure_tags_dict_short.keys()) or (key1 in pure_tags_dict_ref.keys()): # ref or alt - # if key1 not in np.array(['#'.join(str(i) for i in z) # for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]): # continue @@ -478,6 +482,9 @@ elif key2[:-5] in tag_dict_ref.keys(): variant_type = "ref" + if refalttiers is False and variant_type == "ref": # if we only want information about alt tiers, skip all refs + continue + if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): @@ -682,8 +689,6 @@ # used_keys_ref.append(key2[:-5]) counts_mut += 1 - if key2[:-5] == "CTTGAACACGTCAGCATGCATGGAGA": - print(key1, variant_type, (alt1f + alt2f + alt3f + alt4f) > 0.5, (ref1f + ref2f + ref3f + ref4f) > 0.5) if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)): if variant_type == "alt": tier1ff, tier2ff, tier3ff, tier4ff = alt1f, alt2f, alt3f, alt4f @@ -1345,10 +1350,9 @@ ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) + row += 3 else: - change_tier_after_print.append((line, line2, trimmed_actual_high_tier)) - - row += 3 + change_tier_after_print.append((line, line2, trimmed_actual_high_tier)) if chimera_correction: chimeric_dcs_high_tiers = 0 @@ -1416,20 +1420,32 @@ row += 3 # sheet 2 - if chimera_correction: - header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC ref (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', - 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)', - 'tier 3.1 (alt)', 'tier 3.2 (alt)', 'tier 4 (alt)', 'tier 5.1 (alt)', 'tier 5.2 (alt)', 'tier 5.3 (alt)', 'tier 5.4 (alt)', 'tier 5.5 (alt)', 'tier 6 (alt)', 'tier 7 (alt)', - 'tier 1.1 (ref)', 'tier 1.2 (ref)', 'tier 2.1 (ref)', 'tier 2.2 (ref)', 'tier 2.3 (ref)', 'tier 2.4 (ref)', 'tier 2.5 (ref)', - 'tier 3.1 (ref)', 'tier 3.2 (ref)', 'tier 4 (ref)', 'tier 5.1 (ref)', 'tier 5.2 (ref)', 'tier 5.3 (ref)', 'tier 5.4 (ref)', 'tier 5.5 (ref)', 'tier 6 (ref)', 'tier 7 (ref)' - ) + if refalttiers: + if chimera_correction: + header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC ref (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', + 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)', + 'tier 3.1 (alt)', 'tier 3.2 (alt)', 'tier 4 (alt)', 'tier 5.1 (alt)', 'tier 5.2 (alt)', 'tier 5.3 (alt)', 'tier 5.4 (alt)', 'tier 5.5 (alt)', 'tier 6 (alt)', 'tier 7 (alt)', + 'tier 1.1 (ref)', 'tier 1.2 (ref)', 'tier 2.1 (ref)', 'tier 2.2 (ref)', 'tier 2.3 (ref)', 'tier 2.4 (ref)', 'tier 2.5 (ref)', + 'tier 3.1 (ref)', 'tier 3.2 (ref)', 'tier 4 (ref)', 'tier 5.1 (ref)', 'tier 5.2 (ref)', 'tier 5.3 (ref)', 'tier 5.4 (ref)', 'tier 5.5 (ref)', 'tier 6 (ref)', 'tier 7 (ref)' + ) + else: + header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC ref (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', + 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)', + 'tier 3.1 (alt)', 'tier 3.2 (alt)', 'tier 4 (alt)', 'tier 5.1 (alt)', 'tier 5.2 (alt)', 'tier 5.3 (alt)', 'tier 5.4 (alt)', 'tier 5.5 (alt)', 'tier 6 (alt)', 'tier 7 (alt)', + 'tier 1.1 (ref)', 'tier 1.2 (ref)', 'tier 2.1 (ref)', 'tier 2.2 (ref)', 'tier 2.3 (ref)', 'tier 2.4 (ref)', 'tier 2.5 (ref)', + 'tier 3.1 (ref)', 'tier 3.2 (ref)', 'tier 4 (ref)', 'tier 5.1 (ref)', 'tier 5.2 (ref)', 'tier 5.3 (ref)', 'tier 5.4 (ref)', 'tier 5.5 (ref)', 'tier 6 (ref)', 'tier 7 (ref)' + ) else: - header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC ref (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', - 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)', - 'tier 3.1 (alt)', 'tier 3.2 (alt)', 'tier 4 (alt)', 'tier 5.1 (alt)', 'tier 5.2 (alt)', 'tier 5.3 (alt)', 'tier 5.4 (alt)', 'tier 5.5 (alt)', 'tier 6 (alt)', 'tier 7 (alt)', - 'tier 1.1 (ref)', 'tier 1.2 (ref)', 'tier 2.1 (ref)', 'tier 2.2 (ref)', 'tier 2.3 (ref)', 'tier 2.4 (ref)', 'tier 2.5 (ref)', - 'tier 3.1 (ref)', 'tier 3.2 (ref)', 'tier 4 (ref)', 'tier 5.1 (ref)', 'tier 5.2 (ref)', 'tier 5.3 (ref)', 'tier 5.4 (ref)', 'tier 5.5 (ref)', 'tier 6 (ref)', 'tier 7 (ref)' - ) + if chimera_correction: + header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', + 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)', + 'tier 3.1 (alt)', 'tier 3.2 (alt)', 'tier 4 (alt)', 'tier 5.1 (alt)', 'tier 5.2 (alt)', 'tier 5.3 (alt)', 'tier 5.4 (alt)', 'tier 5.5 (alt)', 'tier 6 (alt)', 'tier 7 (alt)' + ) + else: + header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', + 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)', + 'tier 3.1 (alt)', 'tier 3.2 (alt)', 'tier 4 (alt)', 'tier 5.1 (alt)', 'tier 5.2 (alt)', 'tier 5.3 (alt)', 'tier 5.4 (alt)', 'tier 5.5 (alt)', 'tier 6 (alt)', 'tier 7 (alt)' + ) ws2.write_row(0, 0, header_line2) row = 0 @@ -1458,7 +1474,10 @@ if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place continue lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) - lst.extend([(sum(used_tiers_ref[0:7]) + sum(used_tiers[0:7])), sum(used_tiers_ref[0:7]), sum(used_tiers[0:7]), safe_div(sum(used_tiers[0:7]), (sum(used_tiers_ref[0:7]) + sum(used_tiers[0:7])))]) + if refalttiers: + lst.extend([(sum(used_tiers_ref[0:7]) + sum(used_tiers[0:7])), sum(used_tiers_ref[0:7]), sum(used_tiers[0:7]), safe_div(sum(used_tiers[0:7]), (sum(used_tiers_ref[0:7]) + sum(used_tiers[0:7])))]) + else: + lst.extend([(cvrg - sum(used_tiers[-10:])), sum(used_tiers[0:7]), safe_div(sum(used_tiers[0:7]), (cvrg - sum(used_tiers[-10:])))]) if chimera_correction: chimeras_all = chimera_dict[key1][1] new_alt = sum(used_tiers[0:7]) - chimeras_all @@ -1469,18 +1488,29 @@ lst.extend([new_cvrg, chimeras_all, safe_div(new_alt, new_cvrg)]) lst.extend([alt_count, safe_div(alt_count, cvrg)]) lst.extend(used_tiers) - lst.extend(used_tiers_ref) + if refalttiers: + lst.extend(used_tiers_ref) # lst.extend(cum_af) lst = tuple(lst) ws2.write_row(row + 1, 0, lst) - if chimera_correction: - ws2.conditional_format('N{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$N$1="tier 1.1 (alt)"', 'format': format12, 'multi_range': 'N{}:O{} N1:O1 AE{}:AF{} AE1:AF1'.format(row + 2, row + 2, row + 2, row + 2)}) - ws2.conditional_format('P{}:T{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 2.1 (alt)"', 'format': format32, 'multi_range': 'P{}:T{} P1:T1 AG{}:AK{} AG1:AK1'.format(row + 2, row + 2, row + 2, row + 2)}) - ws2.conditional_format('U{}:AD{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$U$1="tier 3.1 (alt)"', 'format': format22, 'multi_range': 'U{}:AD{} U1:AD1 AL{}:AU{} AL1:AU1'.format(row + 2, row + 2, row + 2, row + 2)}) + if refalttiers: + if chimera_correction: + ws2.conditional_format('N{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$N$1="tier 1.1 (alt)"', 'format': format12, 'multi_range': 'N{}:O{} N1:O1 AE{}:AF{} AE1:AF1'.format(row + 2, row + 2, row + 2, row + 2)}) + ws2.conditional_format('P{}:T{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 2.1 (alt)"', 'format': format32, 'multi_range': 'P{}:T{} P1:T1 AG{}:AK{} AG1:AK1'.format(row + 2, row + 2, row + 2, row + 2)}) + ws2.conditional_format('U{}:AD{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$U$1="tier 3.1 (alt)"', 'format': format22, 'multi_range': 'U{}:AD{} U1:AD1 AL{}:AU{} AL1:AU1'.format(row + 2, row + 2, row + 2, row + 2)}) + else: + ws2.conditional_format('K{}:L{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$K$1="tier 1.1 (alt)"', 'format': format12, 'multi_range': 'K{}:L{} K1:L1 AB{}:AC{} AB1:AC1'.format(row + 2, row + 2, row + 2, row + 2)}) + ws2.conditional_format('M{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$M$1="tier 2.1 (alt)"', 'format': format32, 'multi_range': 'M{}:Q{} M1:Q1 AD{}:AH{} AD1:AH1'.format(row + 2, row + 2, row + 2, row + 2)}) + ws2.conditional_format('R{}:AA{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 3.1 (alt)"', 'format': format22, 'multi_range': 'R{}:AA{} R1:AA1 AI{}:AR{} AI1:AR1'.format(row + 2, row + 2, row + 2, row + 2)}) else: - ws2.conditional_format('K{}:L{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$K$1="tier 1.1 (alt)"', 'format': format12, 'multi_range': 'K{}:L{} K1:L1 AB{}:AC{} AB1:AC1'.format(row + 2, row + 2, row + 2, row + 2)}) - ws2.conditional_format('M{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$M$1="tier 2.1 (alt)"', 'format': format32, 'multi_range': 'M{}:Q{} M1:Q1 AD{}:AH{} AD1:AH1'.format(row + 2, row + 2, row + 2, row + 2)}) - ws2.conditional_format('R{}:AA{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 3.1 (alt)"', 'format': format22, 'multi_range': 'R{}:AA{} R1:AA1 AI{}:AR{} AI1:AR1'.format(row + 2, row + 2, row + 2, row + 2)}) + if chimera_correction: + ws2.conditional_format('M{}:N{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$M$1="tier 1.1 (alt)"', 'format': format12, 'multi_range': 'M{}:N{} M1:N1'.format(row + 2, row + 2)}) + ws2.conditional_format('O{}:S{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$O$1="tier 2.1 (alt)"', 'format': format32, 'multi_range': 'O{}:S{} O1:S1'.format(row + 2, row + 2)}) + ws2.conditional_format('T{}:AC{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$T$1="tier 3.1 (alt)"', 'format': format22, 'multi_range': 'T{}:AC{} T1:AC1'.format(row + 2, row + 2)}) + else: + ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1 (alt)"', 'format': format12, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) + ws2.conditional_format('L{}:P{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1 (alt)"', 'format': format32, 'multi_range': 'L{}:P{} L1:P1'.format(row + 2, row + 2)}) + ws2.conditional_format('Q{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$Q$1="tier 3.1 (alt)"', 'format': format22, 'multi_range': 'Q{}:Z{} Q1:Z1'.format(row + 2, row + 2)}) row += 1 # sheet 3 @@ -1612,9 +1642,9 @@ ex = examples_tiers[i] for k in range(len(ex)): ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k]) - ws3.conditional_format('M{}:N{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), - {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), - 'format': format13, + ws3.conditional_format('M{}:N{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), + {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), + 'format': format13, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) ws3.conditional_format('M{}:N{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), diff -r 8cec772c0bf1 -r e46d5e377760 read2mut.xml --- a/read2mut.xml Fri Aug 05 08:23:34 2022 +0000 +++ b/read2mut.xml Fri Aug 19 11:23:37 2022 +0000 @@ -1,5 +1,5 @@ - + Looks for reads with a mutation at known positions and calculates frequencies and stats. va_macros.xml @@ -19,6 +19,7 @@ --phred '$phred' --trim '$trim' $chimera_correction + $refalttiers --softclipping_dist '$softclipping_dist' --reads_threshold '$reads_threshold' --outputFile '$output_xlsx' @@ -37,7 +38,8 @@ - + + @@ -57,6 +59,7 @@ + @@ -84,7 +87,7 @@ in the DCS. **Dataset 4:** JSON file generated by the **DCS mutations to SSCS stats** tool -stats of tags that carry a mutation and the reference allele in the SSCS at the same position a mutation +stats of tags that carry a mutation (and optional the reference allele) in the SSCS at the same position a mutation is called in the DCS. **Output**