Next changeset 1:3556001ff2db (2019-12-04) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8" |
added:
mut2read.py mut2read.xml mut2sscs.py mut2sscs.xml read2mut.py read2mut.xml test-data/Aligned_Families_test_data_VA.tabular test-data/DCS_Mutations_test_data_VA.tabular test-data/DCS_test_data_VA.bam test-data/DCS_test_data_VA.bam.bai test-data/Interesting_Reads_test_data_VA.fastq test-data/Interesting_Reads_test_data_VA.trim.bam test-data/Interesting_Reads_test_data_VA.trim.bam.bai test-data/SSCS_counts_test_data_VA.json test-data/SSCS_test_data_VA.bam test-data/SSCS_test_data_VA.bam.bai test-data/mutant_reads_summary_short_trim_test_data_VA.xlsx test-data/tag_count_dict_test_data_VA.json va_macros.xml |
b |
diff -r 000000000000 -r 8d29173d49a9 mut2read.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mut2read.py Wed Nov 20 17:47:35 2019 -0500 |
[ |
@@ -0,0 +1,155 @@ +#!/usr/bin/env python + +"""mut2read.py + +Author -- Gundula Povysil +Contact -- povysil@bioinf.jku.at + +Takes a tabular file with mutations and a BAM file as input and prints +all tags of reads that carry the mutation to a user specified output file. +Creates fastq file of reads of tags with mutation. + +======= ========== ================= ================================ +Version Date Author Description +0.2.1 2019-10-27 Gundula Povysil - +======= ========== ================= ================================ + +USAGE: python mut2read.py DCS_Mutations.tabular DCS.bam Aligned_Families.tabular Interesting_Reads.fastq + tag_count_dict.json +""" + +import argparse +import json +import os +import sys + +import numpy as np +import pysam + + +def make_argparser(): + parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file and creates a fastq file of reads of tags with mutation.') + parser.add_argument('--mutFile', + help='TABULAR file with DCS mutations.') + parser.add_argument('--bamFile', + help='BAM file with aligned DCS reads.') + parser.add_argument('--familiesFile', + help='TABULAR file with aligned families.') + parser.add_argument('--outputFastq', + help='Output FASTQ file of reads with mutations.') + parser.add_argument('--outputJson', + help='Output JSON file to store collected data.') + return parser + + +def mut2read(argv): + parser = make_argparser() + args = parser.parse_args(argv[1:]) + + file1 = args.mutFile + file2 = args.bamFile + file3 = args.familiesFile + outfile = args.outputFastq + json_file = args.outputJson + + if os.path.isfile(file1) is False: + sys.exit("Error: Could not find '{}'".format(file1)) + + if os.path.isfile(file2) is False: + sys.exit("Error: Could not find '{}'".format(file2)) + + if os.path.isfile(file3) is False: + sys.exit("Error: Could not find '{}'".format(file3)) + + # read mut file + with open(file1, 'r') as mut: + mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') + + # read dcs bam file + # pysam.index(file2) + bam = pysam.AlignmentFile(file2, "rb") + + # get tags + tag_dict = {} + cvrg_dict = {} + + if len(mut_array) == 13: + mut_array = mut_array.reshape((1, len(mut_array))) + + for m in range(len(mut_array[:, 0])): + print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) + chrom = mut_array[m, 1] + stop_pos = mut_array[m, 2].astype(int) + chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + ref = mut_array[m, 9] + alt = mut_array[m, 10] + + dcs_len = [] + + for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=100000000): + + if pileupcolumn.reference_pos == stop_pos - 1: + count_alt = 0 + count_ref = 0 + count_indel = 0 + count_n = 0 + count_other = 0 + count_lowq = 0 + print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), + "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) + for pileupread in pileupcolumn.pileups: + if not pileupread.is_del and not pileupread.is_refskip: + # query position is None if is_del or is_refskip is set. + nuc = pileupread.alignment.query_sequence[pileupread.query_position] + dcs_len.append(len(pileupread.alignment.query_sequence)) + if nuc == alt: + count_alt += 1 + tag = pileupread.alignment.query_name + if tag in tag_dict: + tag_dict[tag][chrom_stop_pos] = alt + else: + tag_dict[tag] = {} + tag_dict[tag][chrom_stop_pos] = alt + elif nuc == ref: + count_ref += 1 + elif nuc == "N": + count_n += 1 + elif nuc == "lowQ": + count_lowq += 1 + else: + count_other += 1 + else: + count_indel += 1 + dcs_median = np.median(np.array(dcs_len)) + cvrg_dict[chrom_stop_pos] = (count_ref, count_alt, dcs_median) + + print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s, median length of DCS = %s\n" % + (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, + count_indel, count_lowq, dcs_median)) + bam.close() + + with open(json_file, "w") as f: + json.dump((tag_dict, cvrg_dict), f) + + # create fastq from aligned reads + with open(outfile, 'w') as out: + with open(file3, 'r') as families: + for line in families: + line = line.rstrip('\n') + splits = line.split('\t') + tag = splits[0] + + if tag in tag_dict: + str1 = splits[4] + curr_seq = str1.replace("-", "") + str2 = splits[5] + curr_qual = str2.replace(" ", "") + + out.write("@" + splits[0] + "." + splits[1] + "." + splits[2] + "\n") + out.write(curr_seq + "\n") + out.write("+" + "\n") + out.write(curr_qual + "\n") + + +if __name__ == '__main__': + sys.exit(mut2read(sys.argv)) |
b |
diff -r 000000000000 -r 8d29173d49a9 mut2read.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mut2read.xml Wed Nov 20 17:47:35 2019 -0500 |
[ |
@@ -0,0 +1,69 @@ +<?xml version="1.0" encoding="UTF-8"?> +<tool id="mut2read" name="DCS mutations to tags/reads:" version="1.0.0" profile="19.01"> + <description>Extracts all tags that carry a mutation in the duplex consensus sequence (DCS)</description> + <macros> + <import>va_macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.4.0">matplotlib</requirement> + <requirement type="package" version="0.15">pysam</requirement> + </requirements> + <command><![CDATA[ + ln -s '$file2' bam_input.bam && + ln -s '${file2.metadata.bam_index}' bam_input.bam.bai && + python '$__tool_directory__/mut2read.py' + --mutFile '$file1' + --bamFile bam_input.bam + --familiesFile '$file3' + --outputFastq '$output_fastq' + --outputJson '$output_json' + ]]> + </command> + <inputs> + <param name="file1" type="data" format="tabular" label="DCS Mutation File" optional="false" help="TABULAR file with DCS mutations. See Help section below for a detailed explanation."/> + <param name="file2" type="data" format="bam" label="DCS BAM File" optional="false" help="BAM file with aligned DCS reads."/> + <param name="file3" type="data" format="tabular" label="Aligned Families File" optional="false" help="TABULAR file with aligned families."/> + </inputs> + <outputs> + <data name="output_fastq" format="fastq" label="${tool.name} on ${on_string}: FASTQ"/> + <data name="output_json" format="json" label="${tool.name} on ${on_string}: JSON"/> + </outputs> + <tests> + <test> + <param name="file1" value="DCS_Mutations_test_data_VA.tabular"/> + <param name="file2" value="DCS_test_data_VA.bam"/> + <param name="file3" value="Aligned_Families_test_data_VA.tabular"/> + <output name="output_fastq" file="Interesting_Reads_test_data_VA.fastq" lines_diff="136"/> + <output name="output_json" file="tag_count_dict_test_data_VA.json" lines_diff="2"/> + </test> + </tests> + <help> <![CDATA[ +**What it does** + +Takes a tabular file with mutations, a BAM file of aligned DCS reads, and a +tabular file with aligned families as input and prints all tags of reads that +carry a mutation to a user specified output file and creates a fastq file of +reads of tags with a mutation. + +**Input** + +**Dataset 1:** Tabular file with duplex consesus sequence (DCS) mutations as +generated by the **Variant Annotator** tool. + +**Dataset 2:** BAM file of aligned DCS reads. This file can be obtained by the +tool `Map with BWA-MEM <https://arxiv.org/abs/1303.3997>`_. + +**Dataset 3:** Tabular file with reads as produced by the +**Du Novo: Align families** tool of the `Du Novo Analysis Pipeline +<https://doi.org/10.1186/s13059-016-1039-4>`_ + +**Output** + +The output is a json file containing dictonaries of the tags of reads containing mutations +in the DCS and a fastq file of all reads of these tags. + + ]]> + </help> + <expand macro="citation" /> +</tool> |
b |
diff -r 000000000000 -r 8d29173d49a9 mut2sscs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mut2sscs.py Wed Nov 20 17:47:35 2019 -0500 |
[ |
@@ -0,0 +1,133 @@ +#!/usr/bin/env python + +"""mut2sscs.py + +Author -- Gundula Povysil +Contact -- povysil@bioinf.jku.at + +Takes a tabular file with mutations from DCS and a BAM file of SSCS as input +and extracts all tags of reads that carry the mutation. +Calculates statistics about number of ab/ba/duplex per mutation. + +======= ========== ================= ================================ +Version Date Author Description +0.2.1 2019-10-27 Gundula Povysil - +======= ========== ================= ================================ + +USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json + +""" + +from __future__ import division + +import argparse +import json +import os +import sys + +import numpy as np +import pysam + + +def make_argparser(): + parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.') + parser.add_argument('--mutFile', + help='TABULAR file with DCS mutations.') + parser.add_argument('--bamFile', + help='BAM file with aligned SSCS reads.') + parser.add_argument('--outputJson', + help='Output JSON file to store SSCS counts.') + return parser + + +def mut2sscs(argv): + parser = make_argparser() + args = parser.parse_args(argv[1:]) + + file1 = args.mutFile + file2 = args.bamFile + sscs_counts_json = args.outputJson + + if os.path.isfile(file1) is False: + sys.exit("Error: Could not find '{}'".format(file1)) + + if os.path.isfile(file2) is False: + sys.exit("Error: Could not find '{}'".format(file2)) + + # 1. read mut file + with open(file1, 'r') as mut: + mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') + + # 2 read SSCS bam file + # pysam.index(file2) + bam = pysam.AlignmentFile(file2, "rb") + + # get tags + mut_pos_dict = {} + ref_pos_dict = {} + if len(mut_array) == 13: + mut_array = mut_array.reshape((1, len(mut_array))) + + for m in range(0, len(mut_array[:, 0])): + print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) + chrom = mut_array[m, 1] + stop_pos = mut_array[m, 2].astype(int) + chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + ref = mut_array[m, 9] + alt = mut_array[m, 10] + + for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000): + if pileupcolumn.reference_pos == stop_pos - 1: + count_alt = 0 + count_ref = 0 + count_indel = 0 + print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), + "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) + for pileupread in pileupcolumn.pileups: + if not pileupread.is_del and not pileupread.is_refskip: + tag = pileupread.alignment.query_name + abba = tag[-2:] + # query position is None if is_del or is_refskip is set. + if pileupread.alignment.query_sequence[pileupread.query_position] == alt: + count_alt += 1 + if chrom_stop_pos in mut_pos_dict: + if abba in mut_pos_dict[chrom_stop_pos]: + mut_pos_dict[chrom_stop_pos][abba] += 1 + else: + mut_pos_dict[chrom_stop_pos][abba] = 1 + else: + mut_pos_dict[chrom_stop_pos] = {} + mut_pos_dict[chrom_stop_pos][abba] = 1 + elif pileupread.alignment.query_sequence[pileupread.query_position] == ref: + count_ref += 1 + if chrom_stop_pos in ref_pos_dict: + if abba in ref_pos_dict[chrom_stop_pos]: + ref_pos_dict[chrom_stop_pos][abba] += 1 + else: + ref_pos_dict[chrom_stop_pos][abba] = 1 + else: + ref_pos_dict[chrom_stop_pos] = {} + ref_pos_dict[chrom_stop_pos][abba] = 1 + else: + count_indel += 1 + + print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" % + (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel)) + + # if mutation is in DCS file but not in SSCS, then set counts to NA + if chrom_stop_pos not in mut_pos_dict.keys(): + mut_pos_dict[chrom_stop_pos] = {} + mut_pos_dict[chrom_stop_pos]["ab"] = 0 + mut_pos_dict[chrom_stop_pos]["ba"] = 0 + ref_pos_dict[chrom_stop_pos] = {} + ref_pos_dict[chrom_stop_pos]["ab"] = 0 + ref_pos_dict[chrom_stop_pos]["ba"] = 0 + bam.close() + + # save counts + with open(sscs_counts_json, "w") as f: + json.dump((mut_pos_dict, ref_pos_dict), f) + + +if __name__ == '__main__': + sys.exit(mut2sscs(sys.argv)) |
b |
diff -r 000000000000 -r 8d29173d49a9 mut2sscs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mut2sscs.xml Wed Nov 20 17:47:35 2019 -0500 |
[ |
@@ -0,0 +1,63 @@ +<?xml version="1.0" encoding="UTF-8"?> +<tool id="mut2sscs" name="DCS mutations to SSCS stats:" version="1.0.0" profile="19.01"> + <description>Extracts all tags from the single stranded consensus sequence (SSCS) bam file that carry a mutation at the same position a mutation is called in the duplex consensus sequence (DCS) and calculates their frequencies</description> + <macros> + <import>va_macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.4.0">matplotlib</requirement> + <requirement type="package" version="0.15">pysam</requirement> + </requirements> + <command><![CDATA[ + ln -s '$file2' bam_input.bam && + ln -s '${file2.metadata.bam_index}' bam_input.bam.bai && + python '$__tool_directory__/mut2sscs.py' + --mutFile '$file1' + --bamFile bam_input.bam + --outputJson '$output_json' + ]]> + </command> + <inputs> + <param name="file1" type="data" format="tabular" label="DCS Mutation File" optional="false" help="TABULAR file with DCS mutations. See Help section below for a detailed explanation."/> + <param name="file2" type="data" format="bam" label="SSCS BAM File" optional="false" help="BAM file with aligned SSCS reads."/> + </inputs> + <outputs> + <data name="output_json" format="json" label="${tool.name} on ${on_string}: JSON"/> + </outputs> + <tests> + <test> + <param name="file1" value="DCS_Mutations_test_data_VA.tabular"/> + <param name="file2" value="SSCS_test_data_VA.bam"/> + <output name="output_json" file="SSCS_counts_test_data_VA.json" lines_diff="2"/> + </test> + </tests> + <help> <![CDATA[ +**What it does** + +Takes a tabular file with DCS mutations and a BAM file of aligned SSCS reads +as input and writes statistics about tags of reads that carry a mutation in the +SSCS at the same position a mutation is called in the DCS to a user specified output file.. + +**Input** + +**Dataset 1:** Tabular file with duplex consesus sequence (DCS) mutations as +generated by the **Variant Annotator** tool. + +**Dataset 2:** BAM file of aligned single stranded consensus sequence (SSCS) +reads. This file can be obtained by the tool `Map with BWA-MEM +<https://arxiv.org/abs/1303.3997>`_. + +**Dataset 3:** Tabular file with reads as produced by the +**Du Novo: Align families** tool of the `Du Novo Analysis Pipeline +<https://doi.org/10.1186/s13059-016-1039-4>`_ + +**Output** + +The output is a json file containing dictonaries with stats of tags that carry a mutation in the SSCS +at the same position a mutation is called in the DCS. + + ]]> + </help> + <expand macro="citation" /> +</tool> |
b |
diff -r 000000000000 -r 8d29173d49a9 read2mut.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read2mut.py Wed Nov 20 17:47:35 2019 -0500 |
[ |
b'@@ -0,0 +1,852 @@\n+#!/usr/bin/env python\n+\n+"""read2mut.py\n+\n+Author -- Gundula Povysil\n+Contact -- povysil@bioinf.jku.at\n+\n+Looks for reads with mutation at known\n+positions and calculates frequencies and stats.\n+\n+======= ========== ================= ================================\n+Version Date Author Description\n+0.2.1 2019-10-27 Gundula Povysil -\n+======= ========== ================= ================================\n+\n+\n+USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam\n+ --inputJson tag_count_dict.json --sscsJson SSCS_counts.json\n+ --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim 10\n+\n+"""\n+\n+from __future__ import division\n+\n+import argparse\n+import itertools\n+import json\n+import operator\n+import os\n+import re\n+import sys\n+\n+import numpy as np\n+import pysam\n+import xlsxwriter\n+\n+\n+def make_argparser():\n+ parser = argparse.ArgumentParser(description=\'Takes a tabular file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.\')\n+ parser.add_argument(\'--mutFile\',\n+ help=\'TABULAR file with DCS mutations.\')\n+ parser.add_argument(\'--bamFile\',\n+ help=\'BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).\')\n+ parser.add_argument(\'--inputJson\',\n+ help=\'JSON file with data collected by mut2read.py.\')\n+ parser.add_argument(\'--sscsJson\',\n+ help=\'JSON file with SSCS counts collected by mut2sscs.py.\')\n+ parser.add_argument(\'--outputFile\',\n+ help=\'Output xlsx file of mutation details.\')\n+ parser.add_argument(\'--thresh\', type=int, default=0,\n+ help=\'Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.\')\n+ parser.add_argument(\'--phred\', type=int, default=20,\n+ help=\'Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.\')\n+ parser.add_argument(\'--trim\', type=int, default=10,\n+ help=\'Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.\')\n+ return parser\n+\n+\n+def safe_div(x, y):\n+ if y == 0:\n+ return None\n+ return x / y\n+\n+\n+def read2mut(argv):\n+ parser = make_argparser()\n+ args = parser.parse_args(argv[1:])\n+ file1 = args.mutFile\n+ file2 = args.bamFile\n+ json_file = args.inputJson\n+ sscs_json = args.sscsJson\n+ outfile = args.outputFile\n+ thresh = args.thresh\n+ phred_score = args.phred\n+ trim = args.trim\n+\n+ if os.path.isfile(file1) is False:\n+ sys.exit("Error: Could not find \'{}\'".format(file1))\n+ if os.path.isfile(file2) is False:\n+ sys.exit("Error: Could not find \'{}\'".format(file2))\n+ if os.path.isfile(json_file) is False:\n+ sys.exit("Error: Could not find \'{}\'".format(json_file))\n+ if thresh < 0:\n+ sys.exit("Error: thresh is \'{}\', but only non-negative integers allowed".format(thresh))\n+ if phred_score < 0:\n+ sys.exit("Error: phred is \'{}\', but only non-negative integers allowed".format(phred_score))\n+ if trim < 0:\n+ sys.exit("Error: trim is \'{}\', but only non-negative integers allowed".format(thresh))\n+\n+ # 1. read mut file\n+ with open(file1, \'r\') as mut:\n+ mut_array = np.genfromtxt(mut, skip_header=1, delimiter=\'\\t\', comments=\'#\', dtype=\'string\')\n+\n+ # 2. load dicts\n+ with open(json_file, "r") as f:\n+ (tag_dict, cvrg_dict) = json.load(f)\n+\n+ with open(sscs_json, "r") as f:\n+ (mut_pos_dict, ref_pos_dict) = json.load(f)\n+\n+ # 3. read bam file\n+ # pysam.index(file2)\n+ bam = pysam.AlignmentFile(file2, "rb")\n+\n+ # 4. crea'..b' "3", "3", "47170", "41149", "", ""),\n+ ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None,\n+ "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5",\n+ "0", "0", "0", "1", "3", "3", "47170", "41149", "", "")],\n+ [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271",\n+ "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1",\n+ "0.666666666666667", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""),\n+ ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271",\n+ "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1",\n+ "0.666666666666667", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")],\n+ [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "0", "255", "276", "269",\n+ "5", "6", "5", "6", "0", "0", "5", "6", "0", "0", "1", "1", "0", "0", "0", "0", "1",\n+ "1", "5348", "5350", "", ""),\n+ ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None,\n+ "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",\n+ "0", "0", "0", "1", "1", "5348", "5350", "", "")],\n+ [("Chr5:5-20000-13983-G-C", "4.2", "ATGTTGTGAATAACCCACAC", "ab1.ba2", "209", "186", "255", "276", "269",\n+ "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "1",\n+ "1", "5348", "5350", "", ""),\n+ ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None,\n+ "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",\n+ "0", "0", "0", "1", "1", "5348", "5350", "", "")]]\n+\n+ ws3.write(11, 0, "Description of tiers with examples")\n+ ws3.write_row(12, 0, header_line)\n+ row = 0\n+ for i in range(len(description_tiers)):\n+ ws3.write_row(13 + row + i + 1, 0, description_tiers[i])\n+ ex = examples_tiers[i]\n+ for k in range(len(ex)):\n+ ws3.write_row(13 + row + i + k + 2, 0, ex[k])\n+ ws3.conditional_format(\'L{}:M{}\'.format(13 + row + i + k + 2, 13 + row + i + k + 3), {\'type\': \'formula\', \'criteria\': \'=OR($B${}="1.1", $B${}="1.2")\'.format(13 + row + i + k + 2, 13 + row + i + k + 2), \'format\': format1, \'multi_range\': \'L{}:M{} T{}:U{} B{}\'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)})\n+ ws3.conditional_format(\'L{}:M{}\'.format(13 + row + i + k + 2, 13 + row + i + k + 3),\n+ {\'type\': \'formula\', \'criteria\': \'=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")\'.format(13 + row + i + k + 2, 13 + row + i + k + 2, 13 + row + i + k + 2, 13 + row + i + k + 2),\n+ \'format\': format3,\n+ \'multi_range\': \'L{}:M{} T{}:U{} B{}\'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)})\n+ ws3.conditional_format(\'L{}:M{}\'.format(13 + row + i + k + 2, 13 + row + i + k + 3),\n+ {\'type\': \'formula\',\n+ \'criteria\': \'=$B${}>="3"\'.format(13 + row + i + k + 2),\n+ \'format\': format2,\n+ \'multi_range\': \'L{}:M{} T{}:U{} B{}\'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)})\n+ row += 3\n+ workbook.close()\n+\n+\n+if __name__ == \'__main__\':\n+ sys.exit(read2mut(sys.argv))\n' |
b |
diff -r 000000000000 -r 8d29173d49a9 read2mut.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read2mut.xml Wed Nov 20 17:47:35 2019 -0500 |
[ |
@@ -0,0 +1,84 @@ +<?xml version="1.0" encoding="UTF-8"?> +<tool id="read2mut" name="Call specific mutations in reads:" version="1.0.0" profile="19.01"> + <description>Looks for reads with mutation at known positions and calculates frequencies and stats.</description> + <macros> + <import>va_macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.4.0">matplotlib</requirement> + <requirement type="package" version="0.15">pysam</requirement> + <requirement type="package" version="1.1.0">xlsxwriter</requirement> + </requirements> + <command><![CDATA[ + ln -s '$file2' bam_input.bam && + ln -s '${file2.metadata.bam_index}' bam_input.bam.bai && + python '$__tool_directory__/read2mut.py' + --mutFile '$file1' + --bamFile bam_input.bam + --inputJson '$file3' + --sscsJson '$file4' + --thresh '$thresh' + --phred '$phred' + --trim '$trim' + --outputFile '$output_xlsx' + ]]> + </command> + <inputs> + <param name="file1" type="data" format="tabular" label="DCS Mutation File" optional="false" help="TABULAR file with DCS mutations. See Help section below for a detailed explanation."/> + <param name="file2" type="data" format="bam" label="BAM File of raw reads" optional="false" help="BAM file with aligned raw reads of selected tags."/> + <param name="file3" type="data" format="json" label="JSON File with DCS tag stats" optional="false" help="JSON file generated by DCS mutations to tags/reads"/> + <param name="file4" type="data" format="json" label="JSON File with SSCS tag stats" optional="false" help="JSON file generated by DCS mutations to SSCS stats."/> + <param name="thresh" type="integer" label="Tag count threshold" value="0" help="Integer threshold for displaying mutations. Only mutations occuring in DCS of less than thresh tags are displayed. Default of 0 displays all."/> + <param name="phred" type="integer" label="Phred quality score threshold" min="0" max="41" value="20" help="Integer threshold for Phred quality score. Only reads higher than this threshold are considered. Default = 20."/> + <param name="trim" type="integer" label="Trimming threshold" value="10" help="Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10."/> + </inputs> + <outputs> + <data name="output_xlsx" format="xlsx" label="${tool.name} on ${on_string}: XLSX"/> + </outputs> + <tests> + <test> + <param name="file1" value="DCS_Mutations_test_data_VA.tabular"/> + <param name="file2" value="Interesting_Reads_test_data_VA.trim.bam"/> + <param name="file3" value="tag_count_dict_test_data_VA.json"/> + <param name="file4" value="SSCS_counts_test_data_VA.json"/> + <param name="thresh" value="0"/> + <param name="phred" value="20"/> + <param name="trim" value="10"/> + <output name="output_xlsx" file="mutant_reads_summary_short_trim_test_data_VA.xlsx" decompress="true" lines_diff="10"/> + </test> + </tests> + <help> <![CDATA[ +**What it does** + +Takes a tabular file with mutations, a BAM file of aligned raw reads, and JSON files +created by the tools **DCS mutations to tags/reads** and **DCS mutations to SSCS stats** +as input and calculates frequencies and stats for DCS mutations based on information +from the raw reads. + +**Input** + +**Dataset 1:** Tabular file with duplex consesus sequence (DCS) mutations as +generated by the **Variant Annotator** tool. + +**Dataset 2:** BAM file of aligned raw reads. This file can be obtained by the +tool `Map with BWA-MEM <https://arxiv.org/abs/1303.3997>`_. + +**Dataset 3:** JSON file generated by the **DCS mutations to tags/reads** tool +containing dictonaries of the tags of reads containing mutations +in the DCS. + +**Dataset 4:** JSON file generated by the **DCS mutations to SSCS stats** tool +stats of tags that carry a mutation in the SSCS at the same position a mutation +is called in the DCS. + +**Output** + +The output is an XLSX file containing frequencies stats for DCS mutations based +on information from the raw reads. In addition to that a tier based +classification is provided based on the amout of support for a true variant call. + + ]]> + </help> + <expand macro="citation" /> +</tool> |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/Aligned_Families_test_data_VA.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Aligned_Families_test_data_VA.tabular Wed Nov 20 17:47:35 2019 -0500 |
b |
b'@@ -0,0 +1,41 @@\n+GATAACCTTGCTTCGTGATTAATC\tab\t1\tM01897:257:000000000-AYB6W:1:2112:28792:17250 2:N:0:1\tCTAGAGGGCCAGACCCTGGAGAGAATGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC\tGGGGGGGGGGGGGGFGFFGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGAFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGG8<FED@FGEGGGGGGGGGGGGGGGGGGGGGGGGFEFGGGGGGGEEGGGGGGGGGCECE8EGGGEFFGGGGGGFGGGDEDD5EGGGGGFGGGDCFCFGFGFGCFAGGFDFFFEEGGFF3><:>>FD>FFC=4=:0<;DD>6461992<)892<AFBFFFFFF244:-1:1>:=0306(4)-.42((44(667(449?0,\n+GATAACCTTGCTTCGTGATTAATC\tba\t2\tM01897:257:000000000-AYB6W:1:1108:16316:3620 1:N:0:1\tCTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGGGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGFGGGGGGFCFGGGGFGGGGEGGFGGGGGGGFFGGGGGGGGGGGGGGGGBB?CFGGGGGGCCGGFGGGGGGGCFGECC79CEECGDDD99CFGGGGF4>>GG>BDE@BBFG5=B?AF98::A42<EF?;B::((7:?7???<)/:?91;1,6?F?29902(\n+GATAACCTTGCTTCGTGATTAATC\tba\t2\tM01897:257:000000000-AYB6W:1:1118:22651:3876 1:N:0:1\tCTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGDGGGGEEGGGGGGGGGGGGGGGGGGGCEFDFGGGFGFGGEGEBFGG<FGFCFEFDFDF<DGGGGGGFGFGFGGGGGFGGGCE:FFBFGGGGDGGGGGGGGDD@FGGFC6AE7E1CGGFGCCFGGGGCEGFGGGCCFG9A*59@FGD><?9=CFF6>3BBDFFF392?G)-96<2<:<:44<232:B3:F>6??F0(34A248:>?1,(.404-,((4(\n+GATAACCTTGCTTCGTGATTAATC\tba\t2\tM01897:257:000000000-AYB6W:1:1118:5518:20674 1:N:0:1\tCTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACCCCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCCAGAAGCGGGACGGCCGTAAGTCCCAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC\tFGEF9FGGCCG@FGGGGGGCFCC@EGGG<FFGGFGGCGGGGGEG?FGGGGDGGGGGGGGFEFGGGGGGGGG7==F:F,=FEGGFFFFG<<F<=8FG>CAF9E8CFCEFFFFF,?F=F8FFD=,DFFE+@CGGGFF7D<FGGEFEGGFG2DCCFGECC*=CGEGGGGGGGGCCFFFGF7FFGFGGGGGGGFFG=E56C55CEGF3:F*9*./>FG***27)?::D)5557@>BFD@)/))).(().9<2((-29BF>F4(83,:12-)4)2,3??<<1:(7>((,\n+GATAACCTTGCTTCGTGATTAATC\tab\t2\tM01897:257:000000000-AYB6W:1:2112:28792:17250 1:N:0:1\tGATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG\tFGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGFFGGGGGGGGGGGGGGGGCFGGFFGGGCGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGGGGGEGFGGFGFFGFGFGFFBEFGFFFF7F?FFB?DF>FDFFB:)9>FBFFF?F099E>;<?1:<?0>F0;BB1\n+GATAACCTTGCTTCGTGATTAATC\tba\t1\tM01897:257:000000000-AYB6W:1:1108:16316:3620 2:N:0:1\tGATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCGCGGGACACG\tGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGDGCEEFGGGGGCGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGFGGGCGFFGGGGGGGGGGFFFFGGGGCEG==CECFFCDGGGGGGGGGGGGG597*<FGGFGDFC35>+*:=6FDFF4CFFF9B204>G?FE)5FAF?:7>FBB<A?FB(9?<AFFF<0?B?F4:BFF2>B69>;B))6<<?(,(46((4,42(7>926(82\n+GATAACCTTGCTTCGTGATTAATC\tba\t1\tM01897:257:000000000-AYB6'..b'4,7((-8)-8-71(--24641:B)47445270,(3124(.,(,:<>(. \n+CCTAGTCTTTGATTGGCCACTTTT\tab\t2\tM01897:257:000000000-AYB6W:1:1106:12553:14962 1:N:0:1\tCTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGG--CCAGGCAGGGCCCCAAGCCCCTTGTCT-TGCAGCCGGGGGGGGGGCGGTGGGAGCCTAACAAGCGGGGCGGGGGGTTGGAGGCCTCCCCAAGTTCGGGGGTGGCTTCTTCCTGTTCATCCTTGGTGTGGGGGCTGTGACGCCTTTGCGGC\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG* ?8*;*;**:*;***2***2A***00+< C++0++;***:**:*****://:**;**0++*++2*/:E/*1**)))/)1)+*1**))9))**)/**)03>))8D)(8().5<*)-7))1)67)6/.8118((4(-,.()-(()(-)).(,-\n+CCTAGTCTTTGATTGGCCACTTTT\tab\t2\tM01897:257:000000000-AYB6W:1:1106:15615:18803 1:N:0:1\tCTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGA--CCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCC-GC\tGGGGGFGGGGFGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGDEFGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGCFGGGGGGGCFGEGFGEEGGGGGGGDGGDGGCFGGGGGGGDGGGGGGFGGGGGGGGGGG GDGGGGGGGFGGGGFGGGGGGGGGFGG9FFGGGGGGGGGCEGECGGCFCEEGGGGGGGGGGGCGGCEC3*:C>DG=FC<?CGGFFGFFFGFFGFFFFFG:BDFFF<:AFFFFFFFF4<4?BAFB:<BB??09>B0?B><:D243847 10\n+CCTAGTCTTTGATTGGCCACTTTT\tab\t2\tM01897:257:000000000-AYB6W:1:1110:11692:17499 1:N:0:1\tCTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGGGGCCAGGCCAGGCCCCAACGCCCATGTCTTTGCAGCCGAGGGGGAGCTGGTTGGGGCTGACGAGGCGGGCAGTGGGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGGGGTGGCGGCTTGTACCCTCTTCC---\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8>5*;:8C8E@;?*:;88*2CE*8*++<C99+9+@C**88*:C*:?6<C*+1*:858C*;7/9E*1CGCCC0*)1)*+<C7C.5766<69=<))9*05>3/4;<31<2)9:4=).0))/69?<((213(7:960(,1.-))))(()-) \n+GATAAGCCAACTGCCATCTAGAAT\tab\t1\tM01897:257:000000000-AYB6W:1:1105:25798:19415 2:N:0:1\tCTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTACGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTTTCCCGAGCCAGC\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGFGGGFGGGGGGGGGGGGGGEFCFFGGDEGG8EGGGGFGGGGEGFEGFFG<EDGGGGGGGGGCC7FGGGGGGGFGG=>FFCDBF)7:>7FF:EF?<?FEE:@F@?6??F6>B01>BF;FFF*4(,2:24?FBBF>?F?FFBF0;B2:0(:??FF7:BF?03:2<BBFBFB?0\n+GATAAGCCAACTGCCATCTAGAAT\tba\t2\tM01897:257:000000000-AYB6W:1:2104:15100:19675 1:N:0:1\tCTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGGCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTCGCGTGTCCCGAGCCAGC\tGGFGFGGDGGGGGGCGGGD@EFGGFFGEGGGGGGGFECDEDFFGGGFGGGGFGDEGDGGEGGGGGG:FGGG@CFECFGGGGGFFGGGGGGF<CGGEGGGEFGGGEE7FGFF,=B=DBBFFDDFFFGGGGGGGEGGG:><FEGCF:FGEFGGFFFGGGDGGFEGDFGGGFGGGFFGGGCGGGEGFGGGGFFCFFEDG57CGGCFFC6C*CEGG6:CGGG:6<C>>CEFDGB7B5/<<:9<>>><F279?FG<>>>:>:D(47:6<26)402346>2<>(-49??0\n+GATAAGCCAACTGCCATCTAGAAT\tab\t2\tM01897:257:000000000-AYB6W:1:1105:25798:19415 1:N:0:1\tGATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFFFFGGFFGDB9FFGFFFF0F?:?FFFFFFFFFFFFBF@FFBA?B9;9B9>BB>FFF>FF>><?4\n' |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/DCS_Mutations_test_data_VA.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DCS_Mutations_test_data_VA.tabular Wed Nov 20 17:47:35 2019 -0500 |
b |
@@ -0,0 +1,5 @@ +#SAMPLE CHR POS A C G T CVRG ALLELES MAJOR MINOR MAF BIAS +__NONE__ ACH_TDII_5regions 505 1 2208 0 0 2209 1 C A 0.00045 1.09465 +__NONE__ ACH_TDII_5regions 571 0 2817 0 1 2818 1 C T 0.00035 1.04139 +__NONE__ ACH_TDII_5regions 958 0 1 0 14667 14668 1 T C 7e-05 1.03624 + |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/DCS_test_data_VA.bam |
b |
Binary file test-data/DCS_test_data_VA.bam has changed |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/DCS_test_data_VA.bam.bai |
b |
Binary file test-data/DCS_test_data_VA.bam.bai has changed |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/Interesting_Reads_test_data_VA.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Interesting_Reads_test_data_VA.fastq Wed Nov 20 17:47:35 2019 -0500 |
b |
b'@@ -0,0 +1,124 @@\n+@GATAACCTTGCTTCGTGATTAATC.ab.1\n+CTAGAGGGCCAGACCCTGGAGAGAATGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC\n++\n+GGGGGGGGGGGGGGFGFFGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGAFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGG8<FED@FGEGGGGGGGGGGGGGGGGGGGGGGGGFEFGGGGGGGEEGGGGGGGGGCECE8EGGGEFFGGGGGGFGGGDEDD5EGGGGGFGGGDCFCFGFGFGCFAGGFDFFFEEGGFF3><:>>FD>FFC=4=:0<;DD>6461992<)892<AFBFFFFFF244:-1:1>:=0306(4)-.42((44(667(449?0,\n+@GATAACCTTGCTTCGTGATTAATC.ba.2\n+CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGGGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG\n++\n+GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGFGGGGGGFCFGGGGFGGGGEGGFGGGGGGGFFGGGGGGGGGGGGGGGGBB?CFGGGGGGCCGGFGGGGGGGCFGECC79CEECGDDD99CFGGGGF4>>GG>BDE@BBFG5=B?AF98::A42<EF?;B::((7:?7???<)/:?91;1,6?F?29902(\n+@GATAACCTTGCTTCGTGATTAATC.ba.2\n+CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACACCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCAAGAAGCGGGACGGCCGTAAGTCACAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGG\n++\n+GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGDGGGGEEGGGGGGGGGGGGGGGGGGGCEFDFGGGFGFGGEGEBFGG<FGFCFEFDFDF<DGGGGGGFGFGFGGGGGFGGGCE:FFBFGGGGDGGGGGGGGDD@FGGFC6AE7E1CGGFGCCFGGGGCEGFGGGCCFG9A*59@FGD><?9=CFF6>3BBDFFF392?G)-96<2<:<:44<232:B3:F>6??F0(34A248:>?1,(.404-,((4(\n+@GATAACCTTGCTTCGTGATTAATC.ba.2\n+CTAGAGGGCCAGACCCTGGAGAGAAGGAGCCCAGCAGAGCCAGCCAGTCCCACACCGCCACCAGGCGCCCGGGAGACCCCAGAGCCACAGGAGAGGCCTTTGGGGACCCAGATGGGAAGTGGGCTCGAGGGGTCTGAGGGGGCCCCTCTGGGACCAGGACCGGGCCAGGCCAACTTTGTCCCCACACTGGGCACAGGGCCAGGAGTGAGGGCTCCAGAAGCGGGACGGCCGTAAGTCCCAGGATTCCCGTCCGTCCTGGCAGCTTTGGCGTGTCCCGAGCCAGC\n++\n+FGEF9FGGCCG@FGGGGGGCFCC@EGGG<FFGGFGGCGGGGGEG?FGGGGDGGGGGGGGFEFGGGGGGGGG7==F:F,=FEGGFFFFG<<F<=8FG>CAF9E8CFCEFFFFF,?F=F8FFD=,DFFE+@CGGGFF7D<FGGEFEGGFG2DCCFGECC*=CGEGGGGGGGGCCFFFGF7FFGFGGGGGGGFFG=E56C55CEGF3:F*9*./>FG***27)?::D)5557@>BFD@)/))).(().9<2((-29BF>F4(83,:12-)4)2,3??<<1:(7>((,\n+@GATAACCTTGCTTCGTGATTAATC.ab.2\n+GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCTCGGGACACG\n++\n+FGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGFFGGGGGGGGGGGGGGGGCFGGFFGGGCGGGGGGGGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGGGGGEGFGGFGFFGFGFGFFBEFGFFFF7F?FFB?DF>FDFFB:)9>FBFFF?F099E>;<?1:<?0>F0;BB1\n+@GATAACCTTGCTTCGTGATTAATC.ba.1\n+GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTGCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCGCGGGACACG\n++\n+GGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGDGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGDGCEEFGGGGGCGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGFGGGCGFFGGGGGGGGGGFFFFGGGGCEG==CECFFCDGGGGGGGGGGGGG597*<FGGFGDFC35>+*:=6FDFF4CFFF9B204>G?FE)5FAF?:7>FBB<A?FB(9?<AFFF<0?B?F4:BFF2>B69>;B))6<<?(,(46((4,42(7>926(82\n+@GATAACCTTGCTTCGTGATTAATC.ba.1\n+GATCCTGCCGTGTGGACTCTGTGCGGTGCCCGCAGGGCGGTGCTGGCGCTCGCCTATCGCTCTGCTCTCTCTTTGTAGACGGCGGGCGCTAACACCACCGACAAGGAGCTAGAGGTTCTCTCCTTGCACAACGTCACCTTTGAGGACGCCGGGGAGTACACCTGCCTGGCGGGCAATTCTATTGGGTTTTCTCATCACTCTGCGTGGCTGGTGGTTCTGCCAGGTACCGGCTTCTGCTGCTGCTGCTGCTCCGCACTGTCTGGGGGACGCTGGCCCGGGACACG\n++\n+FDCCF9FFDFGGGGGGG'..b'CAGGCTCCCTACGCTACGGGGTGGGCTTTTTCCGTTTCATCTTGGTGTTGCCGGCTGGGACGCCTTGCGCC\n++\n+GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG7FGGGGGGGGGGGC<FGGGDGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGDEC8C>C5*:/C:*:<+2/>C:*:*+*<>?+*+0<5:/>E5<35***<6293*935=DC)))1707C5)(1*))())()*06)(((0,(*(,(,(-4(9),4D6(4((5)4*(,).2))-).5)5:228))-1(-(((((-((,()5(-(\n+@CCTCCCGGCAGTGCGAAAATGTCA.ba.2\n+CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGACGCGGGCAGTGTGTATGCAGTCATCCTCAGCTACGGGCTGGGCTTCTTCCTGTTTATCCTGGTGGTGGCGGCTGTGTCGCTCTGCCGTC\n++\n+CFGGAFCFCFGGGFDGDDDGGDGGGG;F:BFGEGFGGGGFF<FFDECG@CFDGGF@FECFAEGFGGGGGAFFEGGGEGF<?E@FFGFEFGEGG+BEF=<FGGCFCFGGGGGGGG8FDFGGDF@FFGGGEEG*88:C88AFEC>8A:@;EFG8>:EEGE0<CCF+<E:CE/C8C*8C*;;C:0*;=EFEDG*/0*7*:7*18*27:CFGD?>>7+CGG>?F:?4*7?FG6).-))7)/<BF0)6.)/--/)67.:F209304(((493(,:5-)(2;:<2).4((\n+@CCTCCCGGCAGTGCGAAAATGTCA.ba.2\n+CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC\n++\n+GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGC?>DFGGFGGGGGGFFFFFFFFFF@FFFFCDFGF?FFAFFFDAAFFBFB9?FFD08<<6?BFFF;F?2<??6??<7>B>9\n+@CCTCCCGGCAGTGCGAAAATGTCA.ba.2\n+CTAGACTCACTGGCGTTACTGACTGCGAGACCCTCCAGACAAGGCGCGTGCTGAGGTTCTGAGCCCCCTTCCGCTCCCAGTGGTGCCTGCGGCTCTGGGCCAGGGGCATCCATGGGAGCCCCGTGGGGGGGGGGCCAGGCCAGGCCTCAACGCCCATGTCTTTGCAGCCGAGGAGGAGCTGGTGGAGGCTGACGAGGCGGGCAGTGTGTATGCAGGCATCCTCAGCTACGGGGTGGGCTTCTTCCTGTTCATCCTGGTGGTGGCGGCTGTGACGCTCTGCCGCC\n++\n+GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=8FGGEGDGGGGGGGGCFFGGGGGGGGGFFFFFFFFFGFFFFB5<BEFB>8AABAFF<9<5FBF?):F:B?:2@FFFF1.54<?:.323<?FF9\n+@CCTCCCGGCAGTGCGAAAATGTCA.ab.2\n+CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGTGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGAAGCCCACCCCGT\n++\n+GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGBCGGGGGGGGGGGGGGCFGGGGGGGGEGGEGGGGGGGEGGGGGGGGGGGGGGGDGDDDEFDGGFFGFFFFFGFFFF>EFBFFFGFFFFF:BFFF?F?FFFFFF?F<BBF??BBFFFFBBFF\n+@CCTCCCGGCAGTGCGAAAATGTCA.ba.1\n+CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCACCAGGAGGCCTGGCGGGCCGGCAGCTCAGAACCTGATATCTACTTTCTGTTAGCTGTCGCTCGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGTGGGAGCCTAGCCCTTTCTTGGGGTGGCTGCGCAGGCGGCAGAGCGTCACAGCTGCTACAACCAGGATGAACAGGAAGAGCCCCACCCCGTC\n++\n+FCF<9C@F8E9@FGC,,,,<,CF<,C@B@CC@<F,,@F::FD+FC@@F,CFFEEDFGD:C=<<B?FF:E8,B,B,AC<FA8C44++B=>F7F?+A7FF+==<F+:+@7+AFB,8C:F**>CC@F?CCFFCFC@C,26,3224@C@C,,?CG+<+2CFC*:*:);C7E*21*9CE**>DDFC7+:0=/))5C)1)(*)00>*9:(.4(,577:*=47)721),,),(-(4(47()((43460(.)(0..).))).4(()(,(,)6)((((,4((((4(-((((((\n+@CCTCCCGGCAGTGCGAAAATGTCA.ba.1\n+CTAGGCTCTACATGGTGAGCAGAGACGAGGAGAGGGGAGCCCGCCTGGCTGCAGAGAGGGCTCACACAGCCCAGGACCAGCGTGGGCCGAGGTGGGGCTCCAGGAGGCCTGGCGGGCAGGCAGCTCAGAACCTGGTATCTACTTTCTGTTACCTGTCGCTTGAGCGGGAAGCGGGAGATCTTGTGCGCGGTGGGGGAGCCCAGGCCTTTCTTGGGGGGGCTGCGCAGGCGGCAGAGCGTCACAGCCGCCACCACCAGGATGAACAGGAAGCAGCCCAGCCCGT\n++\n+GGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGFGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGFEGDFGFGGGGFGFGGFGGGEG?FGCDGGEGGGGGGGGG6>FEGFDFGGFFGGGEE3DFF@=@FFGF2?>FB9FFFFFBFFFBFFFFFF9>>F>F68?>>?:BABFFFFF6B??:?BF5<>BB<49?:?:?(4?:0:0(.3399\n' |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/Interesting_Reads_test_data_VA.trim.bam |
b |
Binary file test-data/Interesting_Reads_test_data_VA.trim.bam has changed |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/Interesting_Reads_test_data_VA.trim.bam.bai |
b |
Binary file test-data/Interesting_Reads_test_data_VA.trim.bam.bai has changed |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/SSCS_counts_test_data_VA.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/SSCS_counts_test_data_VA.json Wed Nov 20 17:47:35 2019 -0500 |
[ |
@@ -0,0 +1,1 @@ +[{"ACH_TDII_5regions#505": {"ab": 2, "ba": 1}, "ACH_TDII_5regions#571": {"ab": 1, "ba": 1}, "ACH_TDII_5regions#958": {"ab": 1, "ba": 1}}, {"ACH_TDII_5regions#505": {"ab": 1, "ba": 1}, "ACH_TDII_5regions#571": {"ab": 2, "ba": 1}, "ACH_TDII_5regions#958": {"ab": 1}}] \ No newline at end of file |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/SSCS_test_data_VA.bam |
b |
Binary file test-data/SSCS_test_data_VA.bam has changed |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/SSCS_test_data_VA.bam.bai |
b |
Binary file test-data/SSCS_test_data_VA.bam.bai has changed |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/mutant_reads_summary_short_trim_test_data_VA.xlsx |
b |
Binary file test-data/mutant_reads_summary_short_trim_test_data_VA.xlsx has changed |
b |
diff -r 000000000000 -r 8d29173d49a9 test-data/tag_count_dict_test_data_VA.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tag_count_dict_test_data_VA.json Wed Nov 20 17:47:35 2019 -0500 |
[ |
@@ -0,0 +1,1 @@ +[{"GATAACCTTGCTTCGTGATTAATC": {"ACH_TDII_5regions#505": "A"}, "GATTGGATAACGTTGTGGCAATTG": {"ACH_TDII_5regions#571": "T"}, "CCTCCCGGCAGTGCGAAAATGTCA": {"ACH_TDII_5regions#958": "C"}}, {"ACH_TDII_5regions#505": [1, 1, 173.0], "ACH_TDII_5regions#571": [1, 1, 143.0], "ACH_TDII_5regions#958": [0, 1, 195.0]}] \ No newline at end of file |
b |
diff -r 000000000000 -r 8d29173d49a9 va_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/va_macros.xml Wed Nov 20 17:47:35 2019 -0500 |
b |
@@ -0,0 +1,13 @@ +<macros> + <xml name="citation"> + <citations> + <citation type="bibtex"> + @misc{duplex, + author = {Povysil, Gundula and Heinzl, Monika and Salazar, Renato and Stoler, Nicholas and Nekrutenko, Anton and Tiemann-Boege, Irene}, + year = {2019}, + title = {{Variant Analyzer: a quality control for variant calling in duplex sequencing data (manuscript)}} + } + </citation> + </citations> +</xml> +</macros> \ No newline at end of file |