# HG changeset patch # User iuc # Date 1574290055 18000 # Node ID 8d29173d49a9ccf6be420da2b6cb069062709b70 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8" diff -r 000000000000 -r 8d29173d49a9 mut2read.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mut2read.py Wed Nov 20 17:47:35 2019 -0500 @@ -0,0 +1,155 @@ +#!/usr/bin/env python + +"""mut2read.py + +Author -- Gundula Povysil +Contact -- povysil@bioinf.jku.at + +Takes a tabular file with mutations and a BAM file as input and prints +all tags of reads that carry the mutation to a user specified output file. +Creates fastq file of reads of tags with mutation. + +======= ========== ================= ================================ +Version Date Author Description +0.2.1 2019-10-27 Gundula Povysil - +======= ========== ================= ================================ + +USAGE: python mut2read.py DCS_Mutations.tabular DCS.bam Aligned_Families.tabular Interesting_Reads.fastq + tag_count_dict.json +""" + +import argparse +import json +import os +import sys + +import numpy as np +import pysam + + +def make_argparser(): + parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file and creates a fastq file of reads of tags with mutation.') + parser.add_argument('--mutFile', + help='TABULAR file with DCS mutations.') + parser.add_argument('--bamFile', + help='BAM file with aligned DCS reads.') + parser.add_argument('--familiesFile', + help='TABULAR file with aligned families.') + parser.add_argument('--outputFastq', + help='Output FASTQ file of reads with mutations.') + parser.add_argument('--outputJson', + help='Output JSON file to store collected data.') + return parser + + +def mut2read(argv): + parser = make_argparser() + args = parser.parse_args(argv[1:]) + + file1 = args.mutFile + file2 = args.bamFile + file3 = args.familiesFile + outfile = args.outputFastq + json_file = args.outputJson + + if os.path.isfile(file1) is False: + sys.exit("Error: Could not find '{}'".format(file1)) + + if os.path.isfile(file2) is False: + sys.exit("Error: Could not find '{}'".format(file2)) + + if os.path.isfile(file3) is False: + sys.exit("Error: Could not find '{}'".format(file3)) + + # read mut file + with open(file1, 'r') as mut: + mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') + + # read dcs bam file + # pysam.index(file2) + bam = pysam.AlignmentFile(file2, "rb") + + # get tags + tag_dict = {} + cvrg_dict = {} + + if len(mut_array) == 13: + mut_array = mut_array.reshape((1, len(mut_array))) + + for m in range(len(mut_array[:, 0])): + print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) + chrom = mut_array[m, 1] + stop_pos = mut_array[m, 2].astype(int) + chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + ref = mut_array[m, 9] + alt = mut_array[m, 10] + + dcs_len = [] + + for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=100000000): + + if pileupcolumn.reference_pos == stop_pos - 1: + count_alt = 0 + count_ref = 0 + count_indel = 0 + count_n = 0 + count_other = 0 + count_lowq = 0 + print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), + "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) + for pileupread in pileupcolumn.pileups: + if not pileupread.is_del and not pileupread.is_refskip: + # query position is None if is_del or is_refskip is set. + nuc = pileupread.alignment.query_sequence[pileupread.query_position] + dcs_len.append(len(pileupread.alignment.query_sequence)) + if nuc == alt: + count_alt += 1 + tag = pileupread.alignment.query_name + if tag in tag_dict: + tag_dict[tag][chrom_stop_pos] = alt + else: + tag_dict[tag] = {} + tag_dict[tag][chrom_stop_pos] = alt + elif nuc == ref: + count_ref += 1 + elif nuc == "N": + count_n += 1 + elif nuc == "lowQ": + count_lowq += 1 + else: + count_other += 1 + else: + count_indel += 1 + dcs_median = np.median(np.array(dcs_len)) + cvrg_dict[chrom_stop_pos] = (count_ref, count_alt, dcs_median) + + print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s, median length of DCS = %s\n" % + (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, + count_indel, count_lowq, dcs_median)) + bam.close() + + with open(json_file, "w") as f: + json.dump((tag_dict, cvrg_dict), f) + + # create fastq from aligned reads + with open(outfile, 'w') as out: + with open(file3, 'r') as families: + for line in families: + line = line.rstrip('\n') + splits = line.split('\t') + tag = splits[0] + + if tag in tag_dict: + str1 = splits[4] + curr_seq = str1.replace("-", "") + str2 = splits[5] + curr_qual = str2.replace(" ", "") + + out.write("@" + splits[0] + "." + splits[1] + "." + splits[2] + "\n") + out.write(curr_seq + "\n") + out.write("+" + "\n") + out.write(curr_qual + "\n") + + +if __name__ == '__main__': + sys.exit(mut2read(sys.argv)) diff -r 000000000000 -r 8d29173d49a9 mut2read.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mut2read.xml Wed Nov 20 17:47:35 2019 -0500 @@ -0,0 +1,69 @@ + + + Extracts all tags that carry a mutation in the duplex consensus sequence (DCS) + + va_macros.xml + + + python + matplotlib + pysam + + + + + + + + + + + + + + + + + + + + + + `_. + +**Dataset 3:** Tabular file with reads as produced by the +**Du Novo: Align families** tool of the `Du Novo Analysis Pipeline +`_ + +**Output** + +The output is a json file containing dictonaries of the tags of reads containing mutations +in the DCS and a fastq file of all reads of these tags. + + ]]> + + + diff -r 000000000000 -r 8d29173d49a9 mut2sscs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mut2sscs.py Wed Nov 20 17:47:35 2019 -0500 @@ -0,0 +1,133 @@ +#!/usr/bin/env python + +"""mut2sscs.py + +Author -- Gundula Povysil +Contact -- povysil@bioinf.jku.at + +Takes a tabular file with mutations from DCS and a BAM file of SSCS as input +and extracts all tags of reads that carry the mutation. +Calculates statistics about number of ab/ba/duplex per mutation. + +======= ========== ================= ================================ +Version Date Author Description +0.2.1 2019-10-27 Gundula Povysil - +======= ========== ================= ================================ + +USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json + +""" + +from __future__ import division + +import argparse +import json +import os +import sys + +import numpy as np +import pysam + + +def make_argparser(): + parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.') + parser.add_argument('--mutFile', + help='TABULAR file with DCS mutations.') + parser.add_argument('--bamFile', + help='BAM file with aligned SSCS reads.') + parser.add_argument('--outputJson', + help='Output JSON file to store SSCS counts.') + return parser + + +def mut2sscs(argv): + parser = make_argparser() + args = parser.parse_args(argv[1:]) + + file1 = args.mutFile + file2 = args.bamFile + sscs_counts_json = args.outputJson + + if os.path.isfile(file1) is False: + sys.exit("Error: Could not find '{}'".format(file1)) + + if os.path.isfile(file2) is False: + sys.exit("Error: Could not find '{}'".format(file2)) + + # 1. read mut file + with open(file1, 'r') as mut: + mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') + + # 2 read SSCS bam file + # pysam.index(file2) + bam = pysam.AlignmentFile(file2, "rb") + + # get tags + mut_pos_dict = {} + ref_pos_dict = {} + if len(mut_array) == 13: + mut_array = mut_array.reshape((1, len(mut_array))) + + for m in range(0, len(mut_array[:, 0])): + print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) + chrom = mut_array[m, 1] + stop_pos = mut_array[m, 2].astype(int) + chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + ref = mut_array[m, 9] + alt = mut_array[m, 10] + + for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000): + if pileupcolumn.reference_pos == stop_pos - 1: + count_alt = 0 + count_ref = 0 + count_indel = 0 + print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), + "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) + for pileupread in pileupcolumn.pileups: + if not pileupread.is_del and not pileupread.is_refskip: + tag = pileupread.alignment.query_name + abba = tag[-2:] + # query position is None if is_del or is_refskip is set. + if pileupread.alignment.query_sequence[pileupread.query_position] == alt: + count_alt += 1 + if chrom_stop_pos in mut_pos_dict: + if abba in mut_pos_dict[chrom_stop_pos]: + mut_pos_dict[chrom_stop_pos][abba] += 1 + else: + mut_pos_dict[chrom_stop_pos][abba] = 1 + else: + mut_pos_dict[chrom_stop_pos] = {} + mut_pos_dict[chrom_stop_pos][abba] = 1 + elif pileupread.alignment.query_sequence[pileupread.query_position] == ref: + count_ref += 1 + if chrom_stop_pos in ref_pos_dict: + if abba in ref_pos_dict[chrom_stop_pos]: + ref_pos_dict[chrom_stop_pos][abba] += 1 + else: + ref_pos_dict[chrom_stop_pos][abba] = 1 + else: + ref_pos_dict[chrom_stop_pos] = {} + ref_pos_dict[chrom_stop_pos][abba] = 1 + else: + count_indel += 1 + + print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" % + (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel)) + + # if mutation is in DCS file but not in SSCS, then set counts to NA + if chrom_stop_pos not in mut_pos_dict.keys(): + mut_pos_dict[chrom_stop_pos] = {} + mut_pos_dict[chrom_stop_pos]["ab"] = 0 + mut_pos_dict[chrom_stop_pos]["ba"] = 0 + ref_pos_dict[chrom_stop_pos] = {} + ref_pos_dict[chrom_stop_pos]["ab"] = 0 + ref_pos_dict[chrom_stop_pos]["ba"] = 0 + bam.close() + + # save counts + with open(sscs_counts_json, "w") as f: + json.dump((mut_pos_dict, ref_pos_dict), f) + + +if __name__ == '__main__': + sys.exit(mut2sscs(sys.argv)) diff -r 000000000000 -r 8d29173d49a9 mut2sscs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mut2sscs.xml Wed Nov 20 17:47:35 2019 -0500 @@ -0,0 +1,63 @@ + + + Extracts all tags from the single stranded consensus sequence (SSCS) bam file that carry a mutation at the same position a mutation is called in the duplex consensus sequence (DCS) and calculates their frequencies + + va_macros.xml + + + python + matplotlib + pysam + + + + + + + + + + + + + + + + + + `_. + +**Dataset 3:** Tabular file with reads as produced by the +**Du Novo: Align families** tool of the `Du Novo Analysis Pipeline +`_ + +**Output** + +The output is a json file containing dictonaries with stats of tags that carry a mutation in the SSCS +at the same position a mutation is called in the DCS. + + ]]> + + + diff -r 000000000000 -r 8d29173d49a9 read2mut.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read2mut.py Wed Nov 20 17:47:35 2019 -0500 @@ -0,0 +1,852 @@ +#!/usr/bin/env python + +"""read2mut.py + +Author -- Gundula Povysil +Contact -- povysil@bioinf.jku.at + +Looks for reads with mutation at known +positions and calculates frequencies and stats. + +======= ========== ================= ================================ +Version Date Author Description +0.2.1 2019-10-27 Gundula Povysil - +======= ========== ================= ================================ + + +USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam + --inputJson tag_count_dict.json --sscsJson SSCS_counts.json + --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim 10 + +""" + +from __future__ import division + +import argparse +import itertools +import json +import operator +import os +import re +import sys + +import numpy as np +import pysam +import xlsxwriter + + +def make_argparser(): + parser = argparse.ArgumentParser(description='Takes a tabular file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.') + parser.add_argument('--mutFile', + help='TABULAR file with DCS mutations.') + parser.add_argument('--bamFile', + help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).') + parser.add_argument('--inputJson', + help='JSON file with data collected by mut2read.py.') + parser.add_argument('--sscsJson', + help='JSON file with SSCS counts collected by mut2sscs.py.') + parser.add_argument('--outputFile', + help='Output xlsx file of mutation details.') + parser.add_argument('--thresh', type=int, default=0, + help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') + parser.add_argument('--phred', type=int, default=20, + help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') + parser.add_argument('--trim', type=int, default=10, + help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') + return parser + + +def safe_div(x, y): + if y == 0: + return None + return x / y + + +def read2mut(argv): + parser = make_argparser() + args = parser.parse_args(argv[1:]) + file1 = args.mutFile + file2 = args.bamFile + json_file = args.inputJson + sscs_json = args.sscsJson + outfile = args.outputFile + thresh = args.thresh + phred_score = args.phred + trim = args.trim + + if os.path.isfile(file1) is False: + sys.exit("Error: Could not find '{}'".format(file1)) + if os.path.isfile(file2) is False: + sys.exit("Error: Could not find '{}'".format(file2)) + if os.path.isfile(json_file) is False: + sys.exit("Error: Could not find '{}'".format(json_file)) + if thresh < 0: + sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh)) + if phred_score < 0: + sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) + if trim < 0: + sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) + + # 1. read mut file + with open(file1, 'r') as mut: + mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') + + # 2. load dicts + with open(json_file, "r") as f: + (tag_dict, cvrg_dict) = json.load(f) + + with open(sscs_json, "r") as f: + (mut_pos_dict, ref_pos_dict) = json.load(f) + + # 3. read bam file + # pysam.index(file2) + bam = pysam.AlignmentFile(file2, "rb") + + # 4. create mut_dict + mut_dict = {} + mut_read_pos_dict = {} + mut_read_dict = {} + reads_dict = {} + if mut_array.shape == (13, ): + mut_array = mut_array.reshape((1, len(mut_array))) + + for m in range(0, len(mut_array[:, 0])): + print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) + # for m in range(0, 5): + chrom = mut_array[m, 1] + stop_pos = mut_array[m, 2].astype(int) + chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + ref = mut_array[m, 9] + alt = mut_array[m, 10] + mut_dict[chrom_stop_pos] = {} + mut_read_pos_dict[chrom_stop_pos] = {} + reads_dict[chrom_stop_pos] = {} + + for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000): + if pileupcolumn.reference_pos == stop_pos - 1: + count_alt = 0 + count_ref = 0 + count_indel = 0 + count_n = 0 + count_other = 0 + count_lowq = 0 + n = 0 + print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), + "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) + for pileupread in pileupcolumn.pileups: + n += 1 + if not pileupread.is_del and not pileupread.is_refskip: + tag = pileupread.alignment.query_name + nuc = pileupread.alignment.query_sequence[pileupread.query_position] + phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 + if phred < phred_score: + nuc = "lowQ" + if tag not in mut_dict[chrom_stop_pos]: + mut_dict[chrom_stop_pos][tag] = {} + if nuc in mut_dict[chrom_stop_pos][tag]: + mut_dict[chrom_stop_pos][tag][nuc] += 1 + else: + mut_dict[chrom_stop_pos][tag][nuc] = 1 + if tag not in mut_read_pos_dict[chrom_stop_pos]: + mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1 + reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence) + else: + mut_read_pos_dict[chrom_stop_pos][tag] = np.append( + mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1) + reads_dict[chrom_stop_pos][tag] = np.append( + reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence)) + + if nuc == alt: + count_alt += 1 + if tag not in mut_read_dict: + mut_read_dict[tag] = {} + mut_read_dict[tag][chrom_stop_pos] = alt + else: + mut_read_dict[tag][chrom_stop_pos] = alt + elif nuc == ref: + count_ref += 1 + elif nuc == "N": + count_n += 1 + elif nuc == "lowQ": + count_lowq += 1 + else: + count_other += 1 + else: + count_indel += 1 + + print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) + + for read in bam.fetch(until_eof=True): + if read.is_unmapped: + pure_tag = read.query_name[:-5] + nuc = "na" + for key in tag_dict[pure_tag].keys(): + if key not in mut_dict: + mut_dict[key] = {} + if read.query_name not in mut_dict[key]: + mut_dict[key][read.query_name] = {} + if nuc in mut_dict[key][read.query_name]: + mut_dict[key][read.query_name][nuc] += 1 + else: + mut_dict[key][read.query_name][nuc] = 1 + bam.close() + + # 5. create pure_tags_dict + pure_tags_dict = {} + for key1, value1 in sorted(mut_dict.items()): + i = np.where(np.array(['#'.join(str(i) for i in z) + for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] + ref = mut_array[i, 9] + alt = mut_array[i, 10] + pure_tags_dict[key1] = {} + for key2, value2 in sorted(value1.items()): + for key3, value3 in value2.items(): + pure_tag = key2[:-5] + if key3 == alt: + if pure_tag in pure_tags_dict[key1]: + pure_tags_dict[key1][pure_tag] += 1 + else: + pure_tags_dict[key1][pure_tag] = 1 + + # 6. create pure_tags_dict_short with thresh + if thresh > 0: + pure_tags_dict_short = {} + for key, value in sorted(pure_tags_dict.items()): + if len(value) < thresh: + pure_tags_dict_short[key] = value + else: + pure_tags_dict_short = pure_tags_dict + + whole_array = [] + for k in pure_tags_dict.values(): + if len(k) != 0: + keys = k.keys() + if len(keys) > 1: + for k1 in keys: + whole_array.append(k1) + else: + whole_array.append(keys[0]) + + # 7. output summary with threshold + workbook = xlsxwriter.Workbook(outfile) + ws1 = workbook.add_worksheet("Results") + ws2 = workbook.add_worksheet("Allele frequencies") + ws3 = workbook.add_worksheet("Tiers") + + format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green + format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red + format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow + + header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab', + 'read median length.ba', 'DCS median length', + 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', + 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', + 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', + 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', + 'other mut', 'chimeric tag') + ws1.write_row(0, 0, header_line) + + counter_tier11 = 0 + counter_tier12 = 0 + counter_tier21 = 0 + counter_tier22 = 0 + counter_tier23 = 0 + counter_tier24 = 0 + counter_tier31 = 0 + counter_tier32 = 0 + counter_tier41 = 0 + counter_tier42 = 0 + + row = 1 + tier_dict = {} + for key1, value1 in sorted(mut_dict.items()): + counts_mut = 0 + if key1 in pure_tags_dict_short.keys(): + i = np.where(np.array(['#'.join(str(i) for i in z) + for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] + ref = mut_array[i, 9] + alt = mut_array[i, 10] + dcs_median = cvrg_dict[key1][2] + + tier_dict[key1] = {} + values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0)] + for k, v in values_tier_dict: + tier_dict[key1][k] = v + + used_keys = [] + if 'ab' in mut_pos_dict[key1].keys(): + sscs_mut_ab = mut_pos_dict[key1]['ab'] + else: + sscs_mut_ab = 0 + if 'ba' in mut_pos_dict[key1].keys(): + sscs_mut_ba = mut_pos_dict[key1]['ba'] + else: + sscs_mut_ba = 0 + if 'ab' in ref_pos_dict[key1].keys(): + sscs_ref_ab = ref_pos_dict[key1]['ab'] + else: + sscs_ref_ab = 0 + if 'ba' in ref_pos_dict[key1].keys(): + sscs_ref_ba = ref_pos_dict[key1]['ba'] + else: + sscs_ref_ba = 0 + for key2, value2 in sorted(value1.items()): + add_mut14 = "" + add_mut23 = "" + if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()): + if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): + total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) + if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): + na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na'] + # na1f = na1/total1 + else: + # na1 = na1f = 0 + na1 = 0 + if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): + lowq1 = mut_dict[key1][key2[:-5] + '.ab.1']['lowQ'] + # lowq1f = lowq1 / total1 + else: + # lowq1 = lowq1f = 0 + lowq1 = 0 + if ref in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): + ref1 = mut_dict[key1][key2[:-5] + '.ab.1'][ref] + ref1f = ref1 / (total1 - na1 - lowq1) + else: + ref1 = ref1f = 0 + if alt in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): + alt1 = mut_dict[key1][key2[:-5] + '.ab.1'][alt] + alt1f = alt1 / (total1 - na1 - lowq1) + else: + alt1 = alt1f = 0 + total1new = total1 - na1 - lowq1 + if (key2[:-5] + '.ab.1') in mut_read_dict.keys(): + k1 = mut_read_dict[(key2[:-5] + '.ab.1')].keys() + add_mut1 = len(k1) + if add_mut1 > 1: + for k, v in mut_read_dict[(key2[:-5] + '.ab.1')].items(): + if k != key1: + if len(add_mut14) == 0: + add_mut14 = str(k) + "_" + v + else: + add_mut14 = add_mut14 + ", " + str(k) + "_" + v + else: + k1 = [] + else: + total1 = total1new = na1 = lowq1 = 0 + ref1 = alt1 = ref1f = alt1f = 0 + + if key2[:-5] + '.ab.2' in mut_dict[key1].keys(): + total2 = sum(mut_dict[key1][key2[:-5] + '.ab.2'].values()) + if 'na' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): + na2 = mut_dict[key1][key2[:-5] + '.ab.2']['na'] + # na2f = na2 / total2 + else: + # na2 = na2f = 0 + na2 = 0 + if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): + lowq2 = mut_dict[key1][key2[:-5] + '.ab.2']['lowQ'] + # lowq2f = lowq2 / total2 + else: + # lowq2 = lowq2f = 0 + lowq2 = 0 + if ref in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): + ref2 = mut_dict[key1][key2[:-5] + '.ab.2'][ref] + ref2f = ref2 / (total2 - na2 - lowq2) + else: + ref2 = ref2f = 0 + if alt in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): + alt2 = mut_dict[key1][key2[:-5] + '.ab.2'][alt] + alt2f = alt2 / (total2 - na2 - lowq2) + else: + alt2 = alt2f = 0 + total2new = total2 - na2 - lowq2 + if (key2[:-5] + '.ab.2') in mut_read_dict.keys(): + k2 = mut_read_dict[(key2[:-5] + '.ab.2')].keys() + add_mut2 = len(k2) + if add_mut2 > 1: + for k, v in mut_read_dict[(key2[:-5] + '.ab.2')].items(): + if k != key1: + if len(add_mut23) == 0: + add_mut23 = str(k) + "_" + v + else: + add_mut23 = add_mut23 + ", " + str(k) + "_" + v + else: + k2 = [] + else: + total2 = total2new = na2 = lowq2 = 0 + ref2 = alt2 = ref2f = alt2f = 0 + + if key2[:-5] + '.ba.1' in mut_dict[key1].keys(): + total3 = sum(mut_dict[key1][key2[:-5] + '.ba.1'].values()) + if 'na' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): + na3 = mut_dict[key1][key2[:-5] + '.ba.1']['na'] + # na3f = na3 / total3 + else: + # na3 = na3f = 0 + na3 = 0 + if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): + lowq3 = mut_dict[key1][key2[:-5] + '.ba.1']['lowQ'] + # lowq3f = lowq3 / total3 + else: + # lowq3 = lowq3f = 0 + lowq3 = 0 + if ref in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): + ref3 = mut_dict[key1][key2[:-5] + '.ba.1'][ref] + ref3f = ref3 / (total3 - na3 - lowq3) + else: + ref3 = ref3f = 0 + if alt in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): + alt3 = mut_dict[key1][key2[:-5] + '.ba.1'][alt] + alt3f = alt3 / (total3 - na3 - lowq3) + else: + alt3 = alt3f = 0 + total3new = total3 - na3 - lowq3 + if (key2[:-5] + '.ba.1') in mut_read_dict.keys(): + add_mut3 = len(mut_read_dict[(key2[:-5] + '.ba.1')].keys()) + if add_mut3 > 1: + for k, v in mut_read_dict[(key2[:-5] + '.ba.1')].items(): + if k != key1 and k not in k2: + if len(add_mut23) == 0: + add_mut23 = str(k) + "_" + v + else: + add_mut23 = add_mut23 + ", " + str(k) + "_" + v + else: + total3 = total3new = na3 = lowq3 = 0 + ref3 = alt3 = ref3f = alt3f = 0 + + if key2[:-5] + '.ba.2' in mut_dict[key1].keys(): + total4 = sum(mut_dict[key1][key2[:-5] + '.ba.2'].values()) + if 'na' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): + na4 = mut_dict[key1][key2[:-5] + '.ba.2']['na'] + # na4f = na4 / total4 + else: + # na4 = na4f = 0 + na4 = 0 + if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): + lowq4 = mut_dict[key1][key2[:-5] + '.ba.2']['lowQ'] + # lowq4f = lowq4 / total4 + else: + # lowq4 = lowq4f = 0 + lowq4 = 0 + if ref in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): + ref4 = mut_dict[key1][key2[:-5] + '.ba.2'][ref] + ref4f = ref4 / (total4 - na4 - lowq4) + else: + ref4 = ref4f = 0 + if alt in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): + alt4 = mut_dict[key1][key2[:-5] + '.ba.2'][alt] + alt4f = alt4 / (total4 - na4 - lowq4) + else: + alt4 = alt4f = 0 + total4new = total4 - na4 - lowq4 + if (key2[:-5] + '.ba.2') in mut_read_dict.keys(): + add_mut4 = len(mut_read_dict[(key2[:-5] + '.ba.2')].keys()) + if add_mut4 > 1: + for k, v in mut_read_dict[(key2[:-5] + '.ba.2')].items(): + if k != key1 and k not in k1: + if len(add_mut14) == 0: + add_mut14 = str(k) + "_" + v + else: + add_mut14 = add_mut14 + ", " + str(k) + "_" + v + else: + total4 = total4new = na4 = lowq4 = 0 + ref4 = alt4 = ref4f = alt4f = 0 + + read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 + read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0 + + if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys(): + read_pos1 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.1']) + read_len_median1 = np.median(reads_dict[key1][key2[:-5] + '.ab.1']) + if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys(): + read_pos2 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.2']) + read_len_median2 = np.median(reads_dict[key1][key2[:-5] + '.ab.2']) + if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys(): + read_pos3 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.1']) + read_len_median3 = np.median(reads_dict[key1][key2[:-5] + '.ba.1']) + if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys(): + read_pos4 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.2']) + read_len_median4 = np.median(reads_dict[key1][key2[:-5] + '.ba.2']) + + used_keys.append(key2[:-5]) + counts_mut += 1 + if (alt1f + alt2f + alt3f + alt4f) > 0.5: + if total1new == 0: + ref1f = alt1f = None + alt1ff = -1 + else: + alt1ff = alt1f + if total2new == 0: + ref2f = alt2f = None + alt2ff = -1 + else: + alt2ff = alt2f + if total3new == 0: + ref3f = alt3f = None + alt3ff = -1 + else: + alt3ff = alt3f + if total4new == 0: + ref4f = alt4f = None + alt4ff = -1 + else: + alt4ff = alt4f + + details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4) + details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3) + trimmed = False + if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): + total1new = 0 + alt1ff = 0 + trimmed = True + + if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))): + total4new = 0 + alt4ff = 0 + trimmed = True + + if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): + total2new = 0 + alt2ff = 0 + trimmed = True + + if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): + total3new = 0 + alt3ff = 0 + trimmed = True + + chrom, pos = re.split(r'\#', key1) + # assign tiers + if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & + all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | + (all(int(ij) >= 3 for ij in [total2new, total3new]) & + all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): + tier = "1.1" + counter_tier11 += 1 + tier_dict[key1]["tier 1.1"] += 1 + + elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & + any(int(ij) >= 3 for ij in [total1new, total4new]) & + any(int(ij) >= 3 for ij in [total2new, total3new]) & + all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): + tier = "1.2" + counter_tier12 += 1 + tier_dict[key1]["tier 1.2"] += 1 + + elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & + any(int(ij) >= 3 for ij in [total1new, total4new]) & + all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | + (all(int(ij) >= 1 for ij in [total2new, total3new]) & + any(int(ij) >= 3 for ij in [total2new, total3new]) & + all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): + tier = "2.1" + counter_tier21 += 1 + tier_dict[key1]["tier 2.1"] += 1 + + elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & + all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): + tier = "2.2" + counter_tier22 += 1 + tier_dict[key1]["tier 2.2"] += 1 + + elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & + any(int(ij) >= 3 for ij in [total2new, total3new]) & + all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) & + any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff])) | + (all(int(ij) >= 1 for ij in [total2new, total3new]) & + any(int(ij) >= 3 for ij in [total1new, total4new]) & + all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) & + any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))): + tier = "2.3" + counter_tier23 += 1 + tier_dict[key1]["tier 2.3"] += 1 + + elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & + all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | + (all(int(ij) >= 1 for ij in [total2new, total3new]) & + all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): + tier = "2.4" + counter_tier24 += 1 + tier_dict[key1]["tier 2.4"] += 1 + + elif ((len(pure_tags_dict_short[key1]) > 1) & + (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) | + all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): + tier = "3.1" + counter_tier31 += 1 + tier_dict[key1]["tier 3.1"] += 1 + + elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & + all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff])) | + (all(int(ij) >= 1 for ij in [total2new, total3new]) & + all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): + tier = "3.2" + counter_tier32 += 1 + tier_dict[key1]["tier 3.2"] += 1 + + elif (trimmed): + tier = "4.1" + counter_tier41 += 1 + tier_dict[key1]["tier 4.1"] += 1 + + else: + tier = "4.2" + counter_tier42 += 1 + tier_dict[key1]["tier 4.2"] += 1 + + var_id = '-'.join([chrom, pos, ref, alt]) + sample_tag = key2[:-5] + array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time + # exclude identical tag from array2, to prevent comparison to itself + same_tag = np.where(array2 == sample_tag) + index_array2 = np.arange(0, len(array2), 1) + index_withoutSame = np.delete(index_array2, same_tag) # delete identical tag from the data + array2 = array2[index_withoutSame] + if len(array2) != 0: # only perform chimera analysis if there is more than 1 variant + array1_half = sample_tag[0:int(len(sample_tag) / 2)] # mate1 part1 + array1_half2 = sample_tag[int(len(sample_tag) / 2):int(len(sample_tag))] # mate1 part 2 + array2_half = np.array([ii[0:int(len(ii) / 2)] for ii in array2]) # mate2 part1 + array2_half2 = np.array([ii[int(len(ii) / 2):int(len(ii))] for ii in array2]) # mate2 part2 + + min_tags_list_zeros = [] + chimera_tags = [] + for mate_b in [False, True]: + i = 0 # counter, only used to see how many HDs of tags were already calculated + if mate_b is False: # HD calculation for all a's + half1_mate1 = array1_half + half2_mate1 = array1_half2 + half1_mate2 = array2_half + half2_mate2 = array2_half2 + elif mate_b is True: # HD calculation for all b's + half1_mate1 = array1_half2 + half2_mate1 = array1_half + half1_mate2 = array2_half2 + half2_mate2 = array2_half + # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" + dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) + min_index = np.where(dist == dist.min()) # get index of min HD + # get all "b's" of the tag or all "a's" of the tag with minimum HD + min_tag_half2 = half2_mate2[min_index] + min_tag_array2 = array2[min_index] # get whole tag with min HD + min_value = dist.min() + # calculate HD of "b" to all "b's" or "a" to all "a's" + dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) + for e in min_tag_half2]) + + dist2 = dist_second_half.max() + max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD + max_tag = min_tag_array2[max_index] + + # tags which have identical parts: + if min_value == 0 or dist2 == 0: + min_tags_list_zeros.append(tag) + chimera_tags.append(max_tag) + # chimeric = True + # else: + # chimeric = False + + # if mate_b is False: + # text = "pos {}: sample tag: {}; HD a = {}; HD b' = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, min_value, dist2, list(max_tag), chimeric) + # else: + # text = "pos {}: sample tag: {}; HD a' = {}; HD b = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, dist2, min_value, list(max_tag), chimeric) + i += 1 + chimera_tags = [x for x in chimera_tags if x != []] + chimera_tags_new = [] + for i in chimera_tags: + if len(i) > 1: + for t in i: + chimera_tags_new.append(t) + else: + chimera_tags_new.extend(i) + chimera_tags_new = np.asarray(chimera_tags_new) + chimera = ", ".join(chimera_tags_new) + else: + chimera = "" + + if (read_pos1 == -1): + read_pos1 = read_len_median1 = None + if (read_pos4 == -1): + read_pos4 = read_len_median4 = None + if (read_pos2 == -1): + read_pos2 = read_len_median2 = None + if (read_pos3 == -1): + read_pos3 = read_len_median3 = None + line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) + ws1.write_row(row, 0, line) + line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) + ws1.write_row(row + 1, 0, line) + + ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), + {'type': 'formula', + 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), + 'format': format1, + 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) + ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), + {'type': 'formula', + 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(row + 1, row + 1, row + 1, row + 1), + 'format': format3, + 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) + ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), + {'type': 'formula', + 'criteria': '=$B${}>="3"'.format(row + 1), + 'format': format2, + 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) + + row += 3 + + # sheet 2 + header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (Du Novo)', 'AF (Du Novo)', + 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', + 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', + 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2') + + ws2.write_row(0, 0, header_line2) + row = 0 + + for key1, value1 in sorted(tier_dict.items()): + if key1 in pure_tags_dict_short.keys(): + i = np.where(np.array(['#'.join(str(i) for i in z) + for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] + ref = mut_array[i, 9] + alt = mut_array[i, 10] + chrom, pos = re.split(r'\#', key1) + ref_count = cvrg_dict[key1][0] + alt_count = cvrg_dict[key1][1] + cvrg = ref_count + alt_count + + var_id = '-'.join([chrom, pos, ref, alt]) + lst = [var_id, cvrg] + used_tiers = [] + cum_af = [] + for key2, value2 in sorted(value1.items()): + # calculate cummulative AF + used_tiers.append(value2) + if len(used_tiers) > 1: + cum = safe_div(sum(used_tiers), cvrg) + cum_af.append(cum) + lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg), (cvrg - sum(used_tiers[-4:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-4:]))), alt_count, safe_div(alt_count, cvrg)]) + lst.extend(used_tiers) + lst.extend(cum_af) + lst = tuple(lst) + ws2.write_row(row + 1, 0, lst) + ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) + ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) + ws2.conditional_format('P{}:S{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:S{} P1:S1'.format(row + 2, row + 2)}) + row += 1 + + # sheet 3 + sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), + ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), + ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), + ("tier 4.1", counter_tier41), ("tier 4.2", counter_tier42)] + + header = ("tier", "count") + ws3.write_row(0, 0, header) + + for i in range(len(sheet3)): + ws3.write_row(i + 1, 0, sheet3[i]) + ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), + {'type': 'formula', + 'criteria': '=OR($A${}="tier 1.1", $A${}="tier 1.2")'.format(i + 2, i + 2), + 'format': format1}) + ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), + {'type': 'formula', + 'criteria': '=OR($A${}="tier 2.1", $A${}="tier 2.2", $A${}="tier 2.3", $A${}="tier 2.4")'.format(i + 2, i + 2, i + 2, i + 2), + 'format': format3}) + ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), + {'type': 'formula', + 'criteria': '=OR($A${}="tier 3.1", $A${}="tier 3.2", $A${}="tier 4.1", $A${}="tier 4.2")'.format(i + 2, i + 2, i + 2, i + 2), + 'format': format2}) + + description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "remaining variants")] + examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", + "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", + "4081", "4098", "5", "10", "", ""), + ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, + "289", "0", "0", "0", "0", "0", "0", "3", "6", None, None, None, None, + "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], + [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", + "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", + "0", "4081", "4098", "5", "10", "", ""), + ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "11068", "268", "268", "270", "288", "289", + "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", + "7", "4081", "4098", "5", "10", "", "")], + [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", + "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", "1", + "6", "47170", "41149", "", ""), + ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", + "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", "1", + "6", "47170", "41149", "", "")], + [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", + "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", + "4081", "4098", "5", "10", "", ""), + ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, + "289", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", None, None, "0", "0", + "0", "0", "4081", "4098", "5", "10", "", "")], + [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", + "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", + "4081", "4098", "5", "10", "", ""), + ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", + "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", + "4081", "4098", "5", "10", "", "")], + [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, + "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", + "0", "0", "0", "4081", "4098", "5", "10", "", ""), + ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", + "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "1", "7", + "4081", "4098", "5", "10", "", "")], + [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", + "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "4081", + "4098", "5", "10", "", ""), + ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", + "1", "1", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "4081", + "4098", "5", "10", "", "")], + [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", + "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", + "3", "3", "47170", "41149", "", ""), + ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, + "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", + "0", "0", "0", "1", "3", "3", "47170", "41149", "", "")], + [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", + "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", + "0.666666666666667", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), + ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", + "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", + "0.666666666666667", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], + [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "0", "255", "276", "269", + "5", "6", "5", "6", "0", "0", "5", "6", "0", "0", "1", "1", "0", "0", "0", "0", "1", + "1", "5348", "5350", "", ""), + ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, + "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", + "0", "0", "0", "1", "1", "5348", "5350", "", "")], + [("Chr5:5-20000-13983-G-C", "4.2", "ATGTTGTGAATAACCCACAC", "ab1.ba2", "209", "186", "255", "276", "269", + "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "1", + "1", "5348", "5350", "", ""), + ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, + "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", + "0", "0", "0", "1", "1", "5348", "5350", "", "")]] + + ws3.write(11, 0, "Description of tiers with examples") + ws3.write_row(12, 0, header_line) + row = 0 + for i in range(len(description_tiers)): + ws3.write_row(13 + row + i + 1, 0, description_tiers[i]) + ex = examples_tiers[i] + for k in range(len(ex)): + ws3.write_row(13 + row + i + k + 2, 0, ex[k]) + ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(13 + row + i + k + 2, 13 + row + i + k + 2), 'format': format1, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) + ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), + {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(13 + row + i + k + 2, 13 + row + i + k + 2, 13 + row + i + k + 2, 13 + row + i + k + 2), + 'format': format3, + 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) + ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), + {'type': 'formula', + 'criteria': '=$B${}>="3"'.format(13 + row + i + k + 2), + 'format': format2, + 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) + row += 3 + workbook.close() + + +if __name__ == '__main__': + sys.exit(read2mut(sys.argv)) diff -r 000000000000 -r 8d29173d49a9 read2mut.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read2mut.xml Wed Nov 20 17:47:35 2019 -0500 @@ -0,0 +1,84 @@ + + + Looks for reads with mutation at known positions and calculates frequencies and stats. + + va_macros.xml + + + python + matplotlib + pysam + xlsxwriter + + + + + + + + + + + + + + + + + + + + + + + + + + + + `_. + +**Dataset 3:** JSON file generated by the **DCS mutations to tags/reads** tool +containing dictonaries of the tags of reads containing mutations +in the DCS. + +**Dataset 4:** JSON file generated by the **DCS mutations to SSCS stats** tool +stats of tags that carry a mutation in the SSCS at the same position a mutation +is called in the DCS. + +**Output** + +The output is an XLSX file containing frequencies stats for DCS mutations based +on information from the raw reads. In addition to that a tier based +classification is provided based on the amout of support for a true variant call. + + ]]> + + + diff -r 000000000000 -r 8d29173d49a9 test-data/Aligned_Families_test_data_VA.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Aligned_Families_test_data_VA.tabular Wed Nov 20 17:47:35 2019 -0500 @@ -0,0 +1,41 @@ +GATAACCTTGCTTCGTGATTAATC ab 1 M01897:257:000000000-AYB6W:1:2112:28792:17250 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAATGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGFGFFGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGAFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGG8<:>>FD>FFC=4=:0<;DD>6461992<)892:=0306(4)-.42((44(667(449?0, +GATAACCTTGCTTCGTGATTAATC ba 2 M01897:257:000000000-AYB6W:1:1108:16316:3620 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGGGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGFGGGGGGFCFGGGGFGGGGEGGFGGGGGGGFFGGGGGGGGGGGGGGGGBB?CFGGGGGGCCGGFGGGGGGGCFGECC79CEECGDDD99CFGGGGF4>>GG>BDE@BBFG5=B?AF98::A423BBDFFF392?G)-96<2<:<:44<232:B3:F>6??F0(34A248:>?1,(.404-,((4( +GATAACCTTGCTTCGTGATTAATC ba 2 M01897:257:000000000-AYB6W:1:1118:5518:20674 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACCCCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCCAGAAGCGGGACGGCCGTAAGTCCCAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC FGEF9FGGCCG@FGGGGGGCFCC@EGGGCAF9E8CFCEFFFFF,?F=F8FFD=,DFFE+@CGGGFF7DFG***27)?::D)5557@>BFD@)/))).(().9<2((-29BF>F4(83,:12-)4)2,3??<<1:(7>((, +GATAACCTTGCTTCGTGATTAATC ab 2 M01897:257:000000000-AYB6W:1:2112:28792:17250 1:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG FGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGFFGGGGGGGGGGGGGGGGCFGGFFGGGCGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGGGGGEGFGGFGFFGFGFGFFBEFGFFFF7F?FFB?DF>FDFFB:)9>FBFFF?F099E>;F0;BB1 +GATAACCTTGCTTCGTGATTAATC ba 1 M01897:257:000000000-AYB6W:1:1108:16316:3620 2:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCGCGGGACACG GGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGDGCEEFGGGGGCGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGFGGGCGFFGGGGGGGGGGFFFFGGGGCEG==CECFFCDGGGGGGGGGGGGG597*+*:=6FDFF4CFFF9B204>G?FE)5FAF?:7>FBBB69>;B))6<926(82 +GATAACCTTGCTTCGTGATTAATC ba 1 M01897:257:000000000-AYB6W:1:1118:22651:3876 2:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTTCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCCCGGGACACG FDCCF9FFDFGGGGGGGGGGGGFGGGGGGFGGGGGGGGGGGEGFECFGGGGGGGGGGGGG8EGFGEGFG,F@GGGGGEGGEG7FGGGGG@BFFGCCGGEGGGGGGGFGGGGGGFGGG@FFF9CFGGGGGGGGGGGGGGGGGGGFFGF5E*CECC>EGFGG7EGD==?E8:E7CCE3C+?:C?FFG@D3B5:>78)/C6=FFF<>B>>0:@EBF3))14>B?20>?A<2:>99>F<>>FAA9A:,403>BF?2;B(46(4((((( +GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:1101:14310:2734 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGGAAGTCACCGGAATCCGGGACGTCCTGGCAGCTAGGGCGGGCCCCGAGCCAGG GGGGCCFGGGGGGGGGFFGGGGGGGFGGGGEGGGGGGGFGGGGGGGGFGGGGGGGGGGGGGGGGGGGG<@FGGCCGGGGGGCFFGGGGGGGGC,@ECFBFDDGGG@FGGGGG9CFG@CCFF@DCDFC>=CEGGDEGCC@CDC*=CC*=5>FGCFEGGDFGG?:CGFD>5)/)47C@4),85:B:DF?(8)448:D:,5?7**430;>01661(-4((74,94:)(,-(-18(--(-(-(-(=01,442))(.(,(2((-(-(-((,(( +GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:1112:4840:17845 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCCGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCATACTGGGCACAGGGCCAGGCGTGAGGGCTCAAGAAGCGGGACCGCCGTCAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTCGGCGTGTCCCTAGCTAGC GGGC6<,CFDG8FG,CF6C,9=FFFG:@=BC7CCEFGFDGGGG788CEF66EFGGG7CF*:**=C5=FGG5AC=+:C*2:EFF*7*2/97DD>FC)7>@G@5(704(255005FFFB??FFB39((,--32()(./6>B<(())9))-38>0,43(-((((33<)-,((--(.4)).43)).( +GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:1118:8154:20084 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGG@EDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=FGGGGGFFBFGGGGGGGFGGFGGGGFGGGFGGGGGEGGGGGGG:FGGGG5EGGGG:FEFGEGGGGGGGGGGGGGGGD:EG?FGFFGFCEGG>GFFFCGGFFFGEFE:>?(7.()44>B>G*=F<7:F9>D>9>F03;26:6)6>B<9(38<7A?FB2>>?FF(=:?(((.2:A:)-4((.63-,49>:?0 +GATTGGATAACGTTGTGGCAATTG ab 1 M01897:257:000000000-AYB6W:1:2110:10849:23965 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG@CGGGGGGGFGGGGECGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGEGGGCCCGGGGGGGGGGGGGGGGGDGGEGGGDGGGGDFGGGGGFDGDDFFFGGGFFFGFGFE@:?GFFFFFGFFFFFD2?BFFFF09>B9>F(7)2.9A2)6:44<@A7BF?>BF?>6>:((,(,5AF?F91(-:B<>,(3>00( +GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:1113:13084:11145 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGTACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGCFGGGGFFFFBGFFFBEFFFFGFF?@AFFB?FFFFFFFFFFFFFFFB00:?FFFAFFFFFFFFF66>FFF<1 +GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:1115:14952:6061 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GFGGGGGEGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGFGGGGGGGGGGGGG,FGFFFFFGAFGGGGGGGGGGGGFGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGEGFGGGGGEGFGGGDCBEGGGGGGGGGGGGGGGGFEGGFGFGGGGGGFDGGGCDGD9DFFFGD4>FGG4FF@9DD>DD>>FFDBGFFDFFFFFFFFF=?:8?F>?F?FBFF?7>F>DB<>?B9>>?9>9?F1 +GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:2101:12835:23979 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGACGTAAGTCCAAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGGGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFGGGGFFGGGGGGGGGGFGGGGGGDFGGGGGFGGGGGGGGGGGGGEGG7FFGGGG@FGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGC6CFFFGGCFFGGGGGG:C:47FFD3*1<677<6<;EGB@><)-3:>55-9))).:12<6)4430;>3>0(*4??F1(.:7?>(,((-.8B1999B?1 +GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:2102:25716:13556 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGCTTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGCGCCAGC ,C@FCGGGGGE8C@FCFFGEECGCFGGDFFEFFGFFGGFGGGFFGFCF@FFAFGGGGGCGEG,EFGGG?=F@EGGGC,<=FF?DEGFFFGFFGFFDGCGG?FDDG>EFGFGA9?EFE@FGGGDFGFCFFGC+@EEE@:F:E7C1:FBCF@7<2CFFF**::8CFEEGE7C8BFF?CFGGC<9CFGCEG+CCC8:CFDCDC=:**202:65*CF5CGFD)6?5))).753>><:5>9@466-.((.9::0)4B>)8><>:0(80:2))501(--3:FF(,4>02( +GATTGGATAACGTTGTGGCAATTG ba 2 M01897:257:000000000-AYB6W:1:2107:10919:21008 1:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACCGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGCEGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG>EGEGGGFGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGCGGGGGGGGFGGFGGGFG@EGGBDGGFFFFGFFFFF)*4<:B@F?G6<>9BFFFFB?BFF?DF?BFF00:BFFFB;BFBB2 +GATTGGATAACGTTGTGGCAATTG ab 2 M01897:257:000000000-AYB6W:1:1101:14310:2734 1:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCGCTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFFEGGGGGFFGGGGGGGGGGGGGGFFEE:=FED=FFFFFFEEEGGGGGG:FC:FFGGGGCFGGGGGGGFGFGGGCCG9FGGGGGGGGGFFEFBFGGEEEEEGDGFDC4>EDDFGGEBEFGE5>CGGFFF*)7:773(-766223:(( +GATTGGATAACGTTGTGGCAATTG ab 2 M01897:257:000000000-AYB6W:1:1112:4840:17845 1:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCTTGGCCCGGTCCTGGCCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCCGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC FCEFGG@@FGGGGEFGGFFGGDCF8CG86ECEGGGFGF,C,C@@FFFGGE:,CFFDG7CFFGECE=7:FCGGG:@FC6B,=EE7FFE>EGG9C?*=5CC7887*/=*:?C5E76C*::*//C>D*8D4377CF*;:?)055.547;FF?4*7F)<2@?>(41((4))8>7:0(--,-338(( +GATTGGATAACGTTGTGGCAATTG ab 2 M01897:257:000000000-AYB6W:1:1118:8154:20084 1:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC GGGGGGGGGGGGGGGGGGGDDGGFFGGDFGGGGGGGGGGGGFEECFGGCFGEGGGGGGGGGGGGGGGFGGFCCFFFFGGGDGGGDGGGFGGGGGCFFFGGFGGGGGGGGFGC7EGG<=BFFGEGFGGGGGDGGGDGFFFGFGGGFGGFFF6@FFFFFFFF?B2>B<09B?AF:?:0(3139:FFGGFDBGFGFC?FFGABFFFB>G?AFFBFB@>>?BB>BEF?AB:??0:B;71??F?(. +GATTGGATAACGTTGTGGCAATTG ba 1 M01897:257:000000000-AYB6W:1:1113:13084:11145 2:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTACCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFCDFBGGGFFFFFFFGF:?D>FFBF>?F@FFFFFBFFFF??FFFF?62>:?FF>?FDFFFF?0:?F?ABFF>09BF?AFFF?:?>>9>?FF::0 +GATTGGATAACGTTGTGGCAATTG ba 1 M01897:257:000000000-AYB6W:1:1115:14952:6061 2:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC GFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEDGF@FGGGGGGGGFGGGGGGCGGGFGGGGFGFFFCD@7DGF58FFFFFGF3:>D:6>>GBBFF474:79?28508?>>4<04>AA<09>0>F?:66C?3C.76)0319:*4)57?F*5?:7:93:B)1)21>99(489<1(((,751681(8-( +GATTGGATAACGTTGTGGCAATTG ba 1 M01897:257:000000000-AYB6W:1:2102:25716:13556 2:N:0:1 GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTTGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCCTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGCGCCCACTTCCCATCTGCGTCCCCACAGGCCTCTCCTGTTGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGTCC ;,EACFFEGD6FFGGG8F<@BF6+,9BFA+4EDFFFEFGCC,BFCB:FFGD==C?CFG,,CEE9E7CGGE@FCCGD+8:CC<9DFGG,@:B9:F9BC@5DD5FFG;@DFBCECEE7EC7,,?CFDCDGGGFFFFC://=?=4CF4C+C558DDF5EDGBB5/*.0?@B2?04)4)1699<<:>247667<340;>B?A71((-49@(-((4()) +CCTCCCGGCAGTGCGAAAATGTCA ab 1 M01897:257:000000000-AYB6W:1:1108:17396:7377 2:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGCCAGGCCGGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGTGGCTGCCCAGGCGGCCTGTTTTTTTGCAGGCTCCCTACGCTACGGGGTGGGCTTTTTCCGTTTCATCTTGGTGTTGCCGGCTGGGACGCCTTGCGCC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG7FGGGGGGGGGGGCC5*:/C:*:<+2/>C:*:*+*<>?+*+0<5:/>E5<35***<6293*935=DC)))1707C5)(1*))())()*06)(((0,(*(,(,(-4(9),4D6(4((5)4*(,).2))-).5)5:228))-1(-(((((-((,()5(-( +CCTCCCGGCAGTGCGAAAATGTCA ba 2 M01897:257:000000000-AYB6W:1:1106:22053:22582 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGACGCGGGCAGTGTGTATGCAGTCATCCTCAGCTACGGGCTGGGCTTCTTCCTGTTTATCCTGGTGGTGGCGGCTGTGTCGCTCTGCCGTC CFGGAFCFCFGGGFDGDDDGGDGGGG;F:BFGEGFGGGGFF8A:@;EFG8>:EEGE0>7+CGG>?F:?4*7?FG6).-))7)/DFGGFGGGGGGFFFFFFFFFF@FFFFCDFGF?FFAFFFDAAFFBFB9?FFD08<<6?BFFF;F?2B>9 +CCTCCCGGCAGTGCGAAAATGTCA ba 2 M01897:257:000000000-AYB6W:1:1111:28216:18792 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=8FGGEGDGGGGGGGGCFFGGGGGGGGGFFFFFFFFFGFFFFB58AABAFF<9<5FBF?):F:B?:2@FFFF1.54EFBFFFGFFFFF:BFFF?F?FFFFFF?FF7F?+A7FF+==CC@F?CCFFCFC@C,26,3224@C@C,,?CG+<+2CFC*:*:);C7E*21*9CE**>DDFC7+:0=/))5C)1)(*)00>*9:(.4(,577:*=47)721),,),(-(4(47()((43460(.)(0..).))).4(()(,(,)6)((((,4((((4(-(((((( +CCTCCCGGCAGTGCGAAAATGTCA ba 1 M01897:257:000000000-AYB6W:1:1109:21342:10199 2:N:0:1 CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGCAGCCCAGCCCGT- GGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGFGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGFEGDFGFGGGGFGFGGFGGGEG?FGCDGGEGGGGGGGGG6>FEGFDFGGFFGGGEE3DFF@=@FFGF2?>FB9FFFFFBFFFBFFFFFF9>>F>F68?>>?:BABFFFFF6B??:?BF5<>BB<49?:?:?(4?:0:0(.3399 +CCTAGTCTTTGATTGGCCACTTTT ab 1 M01897:257:000000000-AYB6W:1:1106:12553:14962 2:N:0:1 TAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCACGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGAAGCCCACCCCGT-- GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEEEGGGGGGGGFGGGGGGFGGFFGGD@9DFFFGGFFFFF7AFFGFFFFFFFFFFFFFFBFF6>FFFFBBB>FBBADBFFF((428?F:?:DFA:1:?FF7??F10(3755>:DF>AFFGA=>FF>>09>09@6>>BFBF(3.116>F:0)-:1)37(:>)4:D>F6:AFBBFFFFFF0(8,1<6?7(6FGACCCCFGGE7EGGGGGGGFGCCFF;@A<;E;CFGFFFFG5:*=+588C8>57EEGGDEFGGGCFC?*9@FGGGB>)97)<=@F?/))3:BGFFFE:)5925?>326909>>6>54,7((-8)-8-71(--24641:B)47445270,(3124(.,(,:<>(. +CCTAGTCTTTGATTGGCCACTTTT ab 2 M01897:257:000000000-AYB6W:1:1106:12553:14962 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGG--CCAGGCAGGGCCCCAAGCCCCTTGTCT-TGCAGCCGGGGGGGGGGCGGTGGGAGCCTAACAAGCGGGGCGGGGGGTTGGAGGCCTCCCCAAGTTCGGGGGTGGCTTCTTCCTGTTCATCCTTGGTGTGGGGGCTGTGACGCCTTTGCGGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG* ?8*;*;**:*;***2***2A***00+< C++0++;***:**:*****://:**;**0++*++2*/:E/*1**)))/)1)+*1**))9))**)/**)03>))8D)(8().5<*)-7))1)67)6/.8118((4(-,.()-(()(-)).(,- +CCTAGTCTTTGATTGGCCACTTTT ab 2 M01897:257:000000000-AYB6W:1:1106:15615:18803 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGA--CCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCC-GC GGGGGFGGGGFGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGDEFGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGCFGGGGGGGCFGEGFGEEGGGGGGGDGGDGGCFGGGGGGGDGGGGGGFGGGGGGGGGGG GDGGGGGGGFGGGGFGGGGGGGGGFGG9FFGGGGGGGGGCEGECGGCFCEEGGGGGGGGGGGCGGCEC3*:C>DG=FCB0?B><:D243847 10 +CCTAGTCTTTGATTGGCCACTTTT ab 2 M01897:257:000000000-AYB6W:1:1110:11692:17499 1:N:0:1 CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGGGCCAGGCCAGGCCCCAACGCCCATGTCTTTGCAGCCGAGGGGGAGCTGGTTGGGGCTGACGAGGCGGGCAGTGGGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGGGGTGGCGGCTTGTACCCTCTTCC--- GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8>5*;:8C8E@;?*:;88*2CE*8*++3/4;<31<2)9:4=).0))/69?<((213(7:960(,1.-))))(()-) +GATAAGCCAACTGCCATCTAGAAT ab 1 M01897:257:000000000-AYB6W:1:1105:25798:19415 2:N:0:1 CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTACGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTTTCCCGAGCCAGC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGFGGGFGGGGGGGGGGGGGGEFCFFGGDEGG8EGGGGFGGGGEGFEGFFGFFCDBF)7:>7FF:EF?B01>BF;FFF*4(,2:24?FBBF>?F?FFBF0;B2:0(:??FF7:BF?03:2>CEFDGB7B5/<<:9<>>>>>:>:D(47:6<26)402346>2<>(-49??0 +GATAAGCCAACTGCCATCTAGAAT ab 2 M01897:257:000000000-AYB6W:1:1105:25798:19415 1:N:0:1 GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFFFFGGFFGDB9FFGFFFF0F?:?FFFFFFFFFFFFBF@FFBA?B9;9B9>BB>FFF>FF>><:>>FD>FFC=4=:0<;DD>6461992<)892:=0306(4)-.42((44(667(449?0, +@GATAACCTTGCTTCGTGATTAATC.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGGGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGFGGGGGGFCFGGGGFGGGGEGGFGGGGGGGFFGGGGGGGGGGGGGGGGBB?CFGGGGGGCCGGFGGGGGGGCFGECC79CEECGDDD99CFGGGGF4>>GG>BDE@BBFG5=B?AF98::A423BBDFFF392?G)-96<2<:<:44<232:B3:F>6??F0(34A248:>?1,(.404-,((4( +@GATAACCTTGCTTCGTGATTAATC.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACCCCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCCAGAAGCGGGACGGCCGTAAGTCCCAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +FGEF9FGGCCG@FGGGGGGCFCC@EGGGCAF9E8CFCEFFFFF,?F=F8FFD=,DFFE+@CGGGFF7DFG***27)?::D)5557@>BFD@)/))).(().9<2((-29BF>F4(83,:12-)4)2,3??<<1:(7>((, +@GATAACCTTGCTTCGTGATTAATC.ab.2 +GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG ++ +FGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGFFGGGGGGGGGGGGGGGGCFGGFFGGGCGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGGGGGEGFGGFGFFGFGFGFFBEFGFFFF7F?FFB?DF>FDFFB:)9>FBFFF?F099E>;F0;BB1 +@GATAACCTTGCTTCGTGATTAATC.ba.1 +GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCGCGGGACACG ++ +GGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGDGCEEFGGGGGCGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGFGGGCGFFGGGGGGGGGGFFFFGGGGCEG==CECFFCDGGGGGGGGGGGGG597*+*:=6FDFF4CFFF9B204>G?FE)5FAF?:7>FBBB69>;B))6<926(82 +@GATAACCTTGCTTCGTGATTAATC.ba.1 +GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTTCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCCCGGGACACG ++ +FDCCF9FFDFGGGGGGGGGGGGFGGGGGGFGGGGGGGGGGGEGFECFGGGGGGGGGGGGG8EGFGEGFG,F@GGGGGEGGEG7FGGGGG@BFFGCCGGEGGGGGGGFGGGGGGFGGG@FFF9CFGGGGGGGGGGGGGGGGGGGFFGF5E*CECC>EGFGG7EGD==?E8:E7CCE3C+?:C?FFG@D3B5:>78)/C6=FFF<>B>>0:@EBF3))14>B?20>?A<2:>99>F<>>FAA9A:,403>BF?2;B(46(4((((( +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGGAAGTCACCGGAATCCGGGACGTCCTGGCAGCTAGGGCGGGCCCCGAGCCAGG ++ +GGGGCCFGGGGGGGGGFFGGGGGGGFGGGGEGGGGGGGFGGGGGGGGFGGGGGGGGGGGGGGGGGGGG<@FGGCCGGGGGGCFFGGGGGGGGC,@ECFBFDDGGG@FGGGGG9CFG@CCFF@DCDFC>=CEGGDEGCC@CDC*=CC*=5>FGCFEGGDFGG?:CGFD>5)/)47C@4),85:B:DF?(8)448:D:,5?7**430;>01661(-4((74,94:)(,-(-18(--(-(-(-(=01,442))(.(,(2((-(-(-((,(( +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCCGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCATACTGGGCACAGGGCCAGGCGTGAGGGCTCAAGAAGCGGGACCGCCGTCAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTCGGCGTGTCCCTAGCTAGC ++ +GGGC6<,CFDG8FG,CF6C,9=FFFG:@=BC7CCEFGFDGGGG788CEF66EFGGG7CF*:**=C5=FGG5AC=+:C*2:EFF*7*2/97DD>FC)7>@G@5(704(255005FFFB??FFB39((,--32()(./6>B<(())9))-38>0,43(-((((33<)-,((--(.4)).43)).( +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGG@EDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=FGGGGGFFBFGGGGGGGFGGFGGGGFGGGFGGGGGEGGGGGGG:FGGGG5EGGGG:FEFGEGGGGGGGGGGGGGGGD:EG?FGFFGFCEGG>GFFFCGGFFFGEFE:>?(7.()44>B>G*=F<7:F9>D>9>F03;26:6)6>B<9(38<7A?FB2>>?FF(=:?(((.2:A:)-4((.63-,49>:?0 +@GATTGGATAACGTTGTGGCAATTG.ab.1 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG@CGGGGGGGFGGGGECGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGEGGGCCCGGGGGGGGGGGGGGGGGDGGEGGGDGGGGDFGGGGGFDGDDFFFGGGFFFGFGFE@:?GFFFFFGFFFFFD2?BFFFF09>B9>F(7)2.9A2)6:44<@A7BF?>BF?>6>:((,(,5AF?F91(-:B<>,(3>00( +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGTACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGCFGGGGFFFFBGFFFBEFFFFGFF?@AFFB?FFFFFFFFFFFFFFFB00:?FFFAFFFFFFFFF66>FFF<1 +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GFGGGGGEGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGFGGGGGGGGGGGGG,FGFFFFFGAFGGGGGGGGGGGGFGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGEGFGGGGGEGFGGGDCBEGGGGGGGGGGGGGGGGFEGGFGFGGGGGGFDGGGCDGD9DFFFGD4>FGG4FF@9DD>DD>>FFDBGFFDFFFFFFFFF=?:8?F>?F?FBFF?7>F>DB<>?B9>>?9>9?F1 +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGACGTAAGTCCAAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGGGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFGGGGFFGGGGGGGGGGFGGGGGGDFGGGGGFGGGGGGGGGGGGGEGG7FFGGGG@FGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGC6CFFFGGCFFGGGGGG:C:47FFD3*1<677<6<;EGB@><)-3:>55-9))).:12<6)4430;>3>0(*4??F1(.:7?>(,((-.8B1999B?1 +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGCTTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGCGCCAGC ++ +,C@FCGGGGGE8C@FCFFGEECGCFGGDFFEFFGFFGGFGGGFFGFCF@FFAFGGGGGCGEG,EFGGG?=F@EGGGC,<=FF?DEGFFFGFFGFFDGCGG?FDDG>EFGFGA9?EFE@FGGGDFGFCFFGC+@EEE@:F:E7C1:FBCF@7<2CFFF**::8CFEEGE7C8BFF?CFGGC<9CFGCEG+CCC8:CFDCDC=:**202:65*CF5CGFD)6?5))).753>><:5>9@466-.((.9::0)4B>)8><>:0(80:2))501(--3:FF(,4>02( +@GATTGGATAACGTTGTGGCAATTG.ba.2 +CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCACCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACCGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGCEGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG>EGEGGGFGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGCGGGGGGGGFGGFGGGFG@EGGBDGGFFFFGFFFFF)*4<:B@F?G6<>9BFFFFB?BFF?DF?BFF00:BFFFB;BFBB2 +@GATTGGATAACGTTGTGGCAATTG.ab.2 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCGCTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGGG ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFFEGGGGGFFGGGGGGGGGGGGGGFFEE:=FED=FFFFFFEEEGGGGGG:FC:FFGGGGCFGGGGGGGFGFGGGCCG9FGGGGGGGGGFFEFBFGGEEEEEGDGFDC4>EDDFGGEBEFGE5>CGGFFF*)7:773(-766223:(( +@GATTGGATAACGTTGTGGCAATTG.ab.2 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCTTGGCCCGGTCCTGGCCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCCGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGCGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC ++ +FCEFGG@@FGGGGEFGGFFGGDCF8CG86ECEGGGFGF,C,C@@FFFGGE:,CFFDG7CFFGECE=7:FCGGG:@FC6B,=EE7FFE>EGG9C?*=5CC7887*/=*:?C5E76C*::*//C>D*8D4377CF*;:?)055.547;FF?4*7F)<2@?>(41((4))8>7:0(--,-338(( +@GATTGGATAACGTTGTGGCAATTG.ab.2 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC ++ +GGGGGGGGGGGGGGGGGGGDDGGFFGGDFGGGGGGGGGGGGFEECFGGCFGEGGGGGGGGGGGGGGGFGGFCCFFFFGGGDGGGDGGGFGGGGGCFFFGGFGGGGGGGGFGC7EGG<=BFFGEGFGGGGGDGGGDGFFFGFGGGFGGFFF6@FFFFFFFF?B2>B<09B?AF:?:0(3139:FFGGFDBGFGFC?FFGABFFFB>G?AFFBFB@>>?BB>BEF?AB:??0:B;71??F?(. +@GATTGGATAACGTTGTGGCAATTG.ba.1 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTACCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFCDFBGGGFFFFFFFGF:?D>FFBF>?F@FFFFFBFFFF??FFFF?62>:?FF>?FDFFFF?0:?F?ABFF>09BF?AFFF?:?>>9>?FF::0 +@GATTGGATAACGTTGTGGCAATTG.ba.1 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCTTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGAGCCCACTTCCCATCTGGGTCCCCAAAGGCCTCTCCTGTGGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGGAC ++ +GFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEDGF@FGGGGGGGGFGGGGGGCGGGFGGGGFGFFFCD@7DGF58FFFFFGF3:>D:6>>GBBFF474:79?28508?>>4<04>AA<09>0>F?:66C?3C.76)0319:*4)57?F*5?:7:93:B)1)21>99(489<1(((,751681(8-( +@GATTGGATAACGTTGTGGCAATTG.ba.1 +GGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTTGCTCGGGACACGCCAAAGCTGCCAGGACGGACGGGAATCCTGTGACTTACGGCCGTCCCGCCTCTTGAGCCCTCACTCCTGGCCCTGTGCCCAGTGTGGGGACAAAGTTGGCCTGGCCCGGTCCTGGTCCCAGAGGGGCCCCCTCAGCCCCCTCGCGCCCACTTCCCATCTGCGTCCCCACAGGCCTCTCCTGTTGCTCTGGTGTCTCCCGGGTGCCTGGTGGCGGTGTGGTCC ++ +;,EACFFEGD6FFGGG8F<@BF6+,9BFA+4EDFFFEFGCC,BFCB:FFGD==C?CFG,,CEE9E7CGGE@FCCGD+8:CC<9DFGG,@:B9:F9BC@5DD5FFG;@DFBCECEE7EC7,,?CFDCDGGGFFFFC://=?=4CF4C+C558DDF5EDGBB5/*.0?@B2?04)4)1699<<:>247667<340;>B?A71((-49@(-((4()) +@CCTCCCGGCAGTGCGAAAATGTCA.ab.1 +CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGCCAGGCCGGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGTGGCTGCCCAGGCGGCCTGTTTTTTTGCAGGCTCCCTACGCTACGGGGTGGGCTTTTTCCGTTTCATCTTGGTGTTGCCGGCTGGGACGCCTTGCGCC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG7FGGGGGGGGGGGCC5*:/C:*:<+2/>C:*:*+*<>?+*+0<5:/>E5<35***<6293*935=DC)))1707C5)(1*))())()*06)(((0,(*(,(,(-4(9),4D6(4((5)4*(,).2))-).5)5:228))-1(-(((((-((,()5(-( +@CCTCCCGGCAGTGCGAAAATGTCA.ba.2 +CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGACGCGGGCAGTGTGTATGCAGTCATCCTCAGCTACGGGCTGGGCTTCTTCCTGTTTATCCTGGTGGTGGCGGCTGTGTCGCTCTGCCGTC ++ +CFGGAFCFCFGGGFDGDDDGGDGGGG;F:BFGEGFGGGGFF8A:@;EFG8>:EEGE0>7+CGG>?F:?4*7?FG6).-))7)/DFGGFGGGGGGFFFFFFFFFF@FFFFCDFGF?FFAFFFDAAFFBFB9?FFD08<<6?BFFF;F?2B>9 +@CCTCCCGGCAGTGCGAAAATGTCA.ba.2 +CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC ++ +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=8FGGEGDGGGGGGGGCFFGGGGGGGGGFFFFFFFFFGFFFFB58AABAFF<9<5FBF?):F:B?:2@FFFF1.54EFBFFFGFFFFF:BFFF?F?FFFFFF?FF7F?+A7FF+==CC@F?CCFFCFC@C,26,3224@C@C,,?CG+<+2CFC*:*:);C7E*21*9CE**>DDFC7+:0=/))5C)1)(*)00>*9:(.4(,577:*=47)721),,),(-(4(47()((43460(.)(0..).))).4(()(,(,)6)((((,4((((4(-(((((( +@CCTCCCGGCAGTGCGAAAATGTCA.ba.1 +CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGCAGCCCAGCCCGT ++ +GGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGFGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGFEGDFGFGGGGFGFGGFGGGEG?FGCDGGEGGGGGGGGG6>FEGFDFGGFFGGGEE3DFF@=@FFGF2?>FB9FFFFFBFFFBFFFFFF9>>F>F68?>>?:BABFFFFF6B??:?BF5<>BB<49?:?:?(4?:0:0(.3399 diff -r 000000000000 -r 8d29173d49a9 test-data/Interesting_Reads_test_data_VA.trim.bam Binary file test-data/Interesting_Reads_test_data_VA.trim.bam has changed diff -r 000000000000 -r 8d29173d49a9 test-data/Interesting_Reads_test_data_VA.trim.bam.bai Binary file test-data/Interesting_Reads_test_data_VA.trim.bam.bai has changed diff -r 000000000000 -r 8d29173d49a9 test-data/SSCS_counts_test_data_VA.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/SSCS_counts_test_data_VA.json Wed Nov 20 17:47:35 2019 -0500 @@ -0,0 +1,1 @@ +[{"ACH_TDII_5regions#505": {"ab": 2, "ba": 1}, "ACH_TDII_5regions#571": {"ab": 1, "ba": 1}, "ACH_TDII_5regions#958": {"ab": 1, "ba": 1}}, {"ACH_TDII_5regions#505": {"ab": 1, "ba": 1}, "ACH_TDII_5regions#571": {"ab": 2, "ba": 1}, "ACH_TDII_5regions#958": {"ab": 1}}] \ No newline at end of file diff -r 000000000000 -r 8d29173d49a9 test-data/SSCS_test_data_VA.bam Binary file test-data/SSCS_test_data_VA.bam has changed diff -r 000000000000 -r 8d29173d49a9 test-data/SSCS_test_data_VA.bam.bai Binary file test-data/SSCS_test_data_VA.bam.bai has changed diff -r 000000000000 -r 8d29173d49a9 test-data/mutant_reads_summary_short_trim_test_data_VA.xlsx Binary file test-data/mutant_reads_summary_short_trim_test_data_VA.xlsx has changed diff -r 000000000000 -r 8d29173d49a9 test-data/tag_count_dict_test_data_VA.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tag_count_dict_test_data_VA.json Wed Nov 20 17:47:35 2019 -0500 @@ -0,0 +1,1 @@ +[{"GATAACCTTGCTTCGTGATTAATC": {"ACH_TDII_5regions#505": "A"}, "GATTGGATAACGTTGTGGCAATTG": {"ACH_TDII_5regions#571": "T"}, "CCTCCCGGCAGTGCGAAAATGTCA": {"ACH_TDII_5regions#958": "C"}}, {"ACH_TDII_5regions#505": [1, 1, 173.0], "ACH_TDII_5regions#571": [1, 1, 143.0], "ACH_TDII_5regions#958": [0, 1, 195.0]}] \ No newline at end of file diff -r 000000000000 -r 8d29173d49a9 va_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/va_macros.xml Wed Nov 20 17:47:35 2019 -0500 @@ -0,0 +1,13 @@ + + + + + @misc{duplex, + author = {Povysil, Gundula and Heinzl, Monika and Salazar, Renato and Stoler, Nicholas and Nekrutenko, Anton and Tiemann-Boege, Irene}, + year = {2019}, + title = {{Variant Analyzer: a quality control for variant calling in duplex sequencing data (manuscript)}} + } + + + + \ No newline at end of file