Repository 'medaka_consensus_pipeline'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/medaka_consensus_pipeline

Changeset 3:35666d44fe7a (2020-09-01)
Previous changeset 2:6a87478ed985 (2020-06-18) Next changeset 4:a1b70f038b4a (2020-10-13)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44"
modified:
macros.xml
medaka_consensus.xml
test-data/basecalls.fastq
test-data/variants.vcf.gz
added:
annotateVCF.py
test-data/annotate_vcf_test.pileup
test-data/annotate_vcf_test.vcf
test-data/annotated.vcf
test-data/bwa-mem-mt-genome.fa.fai
test-data/medaka_test.bam
test-data/medaka_test.bam.bai
test-data/medaka_test.hdf
test-data/ref.fasta
test-data/ref.fasta.fai
b
diff -r 6a87478ed985 -r 35666d44fe7a annotateVCF.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/annotateVCF.py Tue Sep 01 03:08:04 2020 -0400
[
b'@@ -0,0 +1,398 @@\n+#!/usr/bin/env python3\n+\n+# Takes in VCF file and a samtools mpileup output file\n+# Fills in annotation for the VCF file including AF, DP\n+# SB, and DP4\n+#\n+# Usage statement:\n+# python annotateVCF.py in_vcf.vcf in_mpileup.txt out_vcf.vcf\n+#\n+# Can generate in_mileup.txt with samtools mpileup (and can restrict which sites to generate pileups for with in_vcf.vcf)\n+\n+# 08/24/2020 - Nathan P. Roach, natproach@gmail.com\n+\n+import sys\n+from math import isnan, log10\n+\n+from scipy.stats import fisher_exact\n+\n+\n+def pval_to_phredqual(pval):\n+    return int(round(-10. * log10(pval)))\n+\n+\n+def parseSimpleSNPpileup(fields, ref_base, alt_base):\n+    base_to_idx = {\n+        \'A\': 0,\n+        \'a\': 0,\n+        \'T\': 1,\n+        \'t\': 1,\n+        \'C\': 2,\n+        \'c\': 2,\n+        \'G\': 3,\n+        \'g\': 3\n+    }\n+\n+    base_to_idx_stranded = {\n+        \'A\': 0,\n+        \'T\': 1,\n+        \'C\': 2,\n+        \'G\': 3,\n+        \'a\': 4,\n+        \'t\': 5,\n+        \'c\': 6,\n+        \'g\': 7\n+    }\n+    ref_base2 = fields[2]\n+    counts = [0, 0, 0, 0]\n+    stranded_counts = [0, 0, 0, 0, 0, 0, 0, 0]\n+    ref_idx = base_to_idx[fields[2]]\n+    dp = int(fields[3])\n+    carrot_flag = False\n+    ins_flag = False\n+    ins_str = ""\n+    ins_len = 0\n+    insertion = ""\n+    del_flag = False\n+    del_str = ""\n+    del_len = 0\n+    deletion = ""\n+    # dollar_flag = False\n+    for base in fields[4]:\n+        if carrot_flag:\n+            carrot_flag = False\n+            continue\n+        if ins_len > 0:\n+            insertion += base\n+            ins_len -= 1\n+            continue\n+        if del_len > 0:\n+            deletion += base\n+            del_len -= 1\n+            continue\n+        if ins_flag:\n+            if base.isdigit():\n+                ins_str += base\n+            else:\n+                ins_len = int(ins_str) - 1\n+                insertion = base\n+                ins_flag = False\n+        elif del_flag:\n+            if base.isdigit():\n+                del_str += base\n+            else:\n+                del_len = int(del_str) - 1\n+                deletion = base\n+                del_flag = False\n+        else:\n+            if base == \'^\':\n+                carrot_flag = True\n+                continue\n+            elif base == \'$\':\n+                continue\n+            elif base == \'+\':\n+                ins_flag = True\n+            elif base == \'-\':\n+                del_flag = True\n+            elif base == \'.\':\n+                counts[ref_idx] += 1\n+                stranded_counts[base_to_idx_stranded[ref_base2]] += 1\n+            elif base == \',\':\n+                counts[ref_idx] += 1\n+                stranded_counts[base_to_idx_stranded[ref_base2.lower()]] += 1\n+            elif base == \'N\' or base == \'n\':\n+                continue\n+            elif base == \'*\':\n+                continue\n+            else:\n+                counts[base_to_idx[base]] += 1\n+                stranded_counts[base_to_idx_stranded[base]] += 1\n+    af = float(counts[base_to_idx[alt_base]]) / float(sum(counts))\n+    if float(sum(stranded_counts[0:4])) == 0:\n+        faf = float("nan")\n+    else:\n+        faf = float(stranded_counts[base_to_idx_stranded[alt_base]]) / float(sum(stranded_counts[0:4]))\n+    if float(sum(stranded_counts[4:])) == 0:\n+        raf = float("nan")\n+    else:\n+        raf = float(stranded_counts[base_to_idx_stranded[alt_base.lower()]]) / float(sum(stranded_counts[4:]))\n+    dp4 = [stranded_counts[base_to_idx_stranded[ref_base]],\n+           stranded_counts[base_to_idx_stranded[ref_base.lower()]],\n+           stranded_counts[base_to_idx_stranded[alt_base]],\n+           stranded_counts[base_to_idx_stranded[alt_base.lower()]]]\n+    return (dp, af, faf, raf, dp4)\n+\n+\n+def parseIndelPileup(fields, ref_base, alt_base):\n+    counts = [0, 0, 0, 0, 0, 0, 0, 0, 0]  # indel ref match, indel fwd ref match, indel rev ref match, indel alt match, indel fwd alt match, indel rev alt match, other, other fwd, other rev\n+    ref_base2 = fields[2]\n+\n+    carrot_'..b'xamine = {}\n+    for line in in_vcf:\n+        if line[0:2] == "##":\n+            out_vcf.write(line)\n+        elif line[0] == "#":\n+            out_vcf.write("##annotateVCFVersion=0.1\\n")\n+            out_vcf.write("##INFO=<ID=DP,Number=1,Type=Integer,Description=\\"Raw Depth\\">\\n")\n+            out_vcf.write("##INFO=<ID=AF,Number=1,Type=Float,Description=\\"Allele Frequency\\">\\n")\n+            out_vcf.write("##INFO=<ID=FAF,Number=1,Type=Float,Description=\\"Forward Allele Frequency\\">\\n")\n+            out_vcf.write("##INFO=<ID=RAF,Number=1,Type=Float,Description=\\"Reverse Allele Frequency\\">\\n")\n+            out_vcf.write("##INFO=<ID=SB,Number=1,Type=Integer,Description=\\"Phred-scaled strand bias at this position\\">\\n")\n+            out_vcf.write("##INFO=<ID=DP4,Number=4,Type=Integer,Description=\\"Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases\\">\\n")\n+            out_vcf.write(line)\n+        else:\n+            fields = line.strip().split()\n+            if fields[0] in to_examine:\n+                to_examine[fields[0]][int(fields[1])] = (fields[3], fields[4])\n+            else:\n+                to_examine[fields[0]] = {int(fields[1]): (fields[3], fields[4])}\n+    in_vcf.close()\n+    data = {}\n+\n+    # Populate data dictionary, which relates chromosome and position to the following:\n+    #  depth of coverage\n+    #  allele frequency\n+    #  forward strand allele frequency\n+    #  reverse strand allele frequency\n+    #  dp4 - depth of coverage of ref allele fwd strand, DOC of ref allele rev strand, DOC of alt allele fwd strand, DOC of alt allele rev strand\n+    for line in in_mpileup:\n+        fields = line.strip().split()\n+        if fields[0] not in to_examine:\n+            continue\n+        if int(fields[1]) not in to_examine[fields[0]]:\n+            continue\n+        (ref_base, alt_base) = to_examine[fields[0]][int(fields[1])]\n+        if len(ref_base.split(\',\')) > 1:  # Can\'t handle multiple ref alleles\n+            continue\n+        if len(alt_base.split(\',\')) > 1:  # Can\'t handle multiple alt alleles\n+            continue\n+        if len(ref_base) > 1 or len(alt_base) > 1:\n+            if len(ref_base) > 1 and len(alt_base) > 1:  # Can\'t handle complex indels\n+                continue\n+            data[(fields[0], int(fields[1]))] = parseIndelPileup(fields, ref_base, alt_base)\n+        if len(ref_base) == 1 and len(alt_base) == 1:\n+            data[(fields[0], int(fields[1]))] = parseSimpleSNPpileup(fields, ref_base, alt_base)\n+    in_mpileup.close()\n+    # Reopen vcf, this time, skip header, annotate all the sites for which there is an entry in data dictionary\n+    # (Sites without entries have either multiple ref or alt bases, or have complex indels. Not supported (for now), and not reported as a result)\n+    in_vcf = open(in_vcf_filepath, \'r\')\n+    for line in in_vcf:\n+        if line[0] == \'#\':\n+            continue\n+        fields = line.strip().split(\'\\t\')\n+        if (fields[0], int(fields[1])) not in data:\n+            continue\n+        (dp, af, faf, raf, dp4) = data[(fields[0], int(fields[1]))]\n+        dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]]\n+        _, p_val = fisher_exact(dp2x2)\n+        sb = pval_to_phredqual(p_val)\n+        if fields[7] == "":\n+            info = []\n+        else:\n+            info = fields[7].split(\';\')\n+        info.append("DP=%d" % (dp))\n+        info.append("AF=%.6f" % (af))\n+        if isnan(faf):\n+            info.append("FAF=NaN")\n+        else:\n+            info.append("FAF=%.6f" % (faf))\n+        if isnan(raf):\n+            info.append("RAF=NaN")\n+        else:\n+            info.append("RAF=%.6f" % (raf))\n+        info.append("SB=%d" % (sb))\n+        info.append("DP4=%s" % (\',\'.join([str(x) for x in dp4])))\n+        new_info = \';\'.join(info)\n+        fields[7] = new_info\n+        out_vcf.write("%s\\n" % ("\\t".join(fields)))\n+    in_vcf.close()\n+    out_vcf.close()\n+\n+\n+if __name__ == "__main__":\n+    annotateVCF(sys.argv[1], sys.argv[2], sys.argv[3])\n'
b
diff -r 6a87478ed985 -r 35666d44fe7a macros.xml
--- a/macros.xml Thu Jun 18 05:07:32 2020 -0400
+++ b/macros.xml Tue Sep 01 03:08:04 2020 -0400
b
@@ -5,6 +5,7 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">medaka</requirement>
+            <requirement type="package" version="1.4.1">scipy</requirement>
         </requirements>
     </xml>
     <xml name="version_command">
b
diff -r 6a87478ed985 -r 35666d44fe7a medaka_consensus.xml
--- a/medaka_consensus.xml Thu Jun 18 05:07:32 2020 -0400
+++ b/medaka_consensus.xml Tue Sep 01 03:08:04 2020 -0400
b
@@ -1,4 +1,4 @@
-<tool id="medaka_consensus_pipeline" name="medaka: Consensus pipeline" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@">
+<tool id="medaka_consensus_pipeline" name="medaka: Consensus pipeline" version="@TOOL_VERSION@+galaxy2" profile="@PROFILE@">
     <description>Assembly polishing via neural networks</description>
     <macros>
         <import>macros.xml</import>
@@ -19,7 +19,7 @@
 -i '$i'
 -d '$d'
 
-|& tee '$out_log'
+2>&1 | tee '$out_log'
     ]]></command>
     <inputs>
         <param argument="-i" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz,fasta,fasta.gz" label="Select basecalls"/>
@@ -76,12 +76,14 @@
             </output>
             <output name="out_probs">
                 <assert_contents>
-                    <has_size value="192187" delta="100"/>
+                    <!-- <has_size value="192187" delta="100"/> -->
+                    <has_size value="165271" delta="100"/>
                 </assert_contents>
             </output>
             <output name="out_calls">
                 <assert_contents>
-                    <has_size value="533242" delta="100"/>
+                    <!-- <has_size value="533242" delta="100"/> -->
+                    <has_size value="343197" delta="100"/>
                 </assert_contents>
             </output>
         </test>
@@ -96,24 +98,29 @@
                 <assert_contents>
                     <has_n_lines n="65"/>
                     <has_line line=">4 length=3792 depth=114.52x circular=true"/>
-                    <has_line line="CATCTCTTT"/>
+                    <has_line line="ATCTCTTT"/>
                 </assert_contents>
             </output>
             <output name="out_probs">
                 <assert_contents>
-                    <has_size value="192710" delta="100"/>
+                    <has_size value="166251" delta="100"/>
+                    <!-- <has_size value="192710" delta="100"/> -->
                 </assert_contents>
             </output>
             <output name="out_calls">
                 <assert_contents>
-                    <has_size value="533163" delta="100"/>
+                    <has_size value="343197" delta="100"/>
+                    <!-- <has_size value="533163" delta="100"/> -->
                 </assert_contents>
             </output>
             <output name="out_draft">
                 <assert_contents>
-                    <has_n_lines n="6"/>
-                    <has_line line="chain 3789 4 3792 + 0 3792 4 3789 + 0 3789 1"/>
-                    <has_line line="1204"/>
+                    <has_line line="chain 3788 4 3792 + 0 3792 4 3788 + 0 3788 1"/>
+                    <has_line line="1409 1 0"/>
+                    <has_line line="440 1 0"/>
+                    <has_line line="736 1 0"/>
+                    <has_line line="853 1 0"/>
+                    <has_line line="350"/>
                 </assert_contents>
             </output>
             <output name="out_polished">
b
diff -r 6a87478ed985 -r 35666d44fe7a test-data/annotate_vcf_test.pileup
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate_vcf_test.pileup Tue Sep 01 03:08:04 2020 -0400
b
@@ -0,0 +1,3 @@
+NC_045512.2 45 G 31 CCCCACCCCCCccccccccc,ccCCC*,+3ttccCC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+NC_045512.2 685 A 75 .-9AAGTCATTT,-9aagtcattt,-9aagtcattt*.-9AAGTCATTT*G.-9AAGTCATTT.-9AAGTCATTT,-9aagtcattt*,-9aagtcattt,-9aagtcattt,-9aagtcattt,-9aagtcattt,,-9aagtcattt,-9aagtcattt,-9aagtcatttG.-9AAGTCATTT*,-9aagtcatttG.-9AAGTCATTT.-9AAGTCATTTG.-9AAGTCATTTG.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT*.-9AAGTCATTT*.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTTG.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT*,-9aagtcattt,-9aagtcattt*,-9aagtcattt,-9aagtcattt,-9aagtcattt,-9aagtcattt,-9aagtcattt,-9aagtcattt*,-4aagt,-9aagtcattt,-9aagtcattt**G,-9aagtcattt,-9aagtcattt,-9aagtcatttG,-9aagtcattt,-9aagtcattt*,-9aagtcattt* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+NC_045512.2 1813 T 51 ..-3AAA..-2AA..-1A*.-1A.-3AAA*..-1A..-1A.-2AA.-1A.-3AAA.-2AA.-2AA.-3AAA.,-1a,-1a,-6aaaaaa,,,-1a,,-2aa,-2aa,,-1a,-1a,*,-1a,-1a,+1a,-2aa,-1a,+1a,-1a,,-2aa.-3AAA,-1a.-2AA,-1a*,-2aa,-2aa ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
b
diff -r 6a87478ed985 -r 35666d44fe7a test-data/annotate_vcf_test.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate_vcf_test.vcf Tue Sep 01 03:08:04 2020 -0400
b
@@ -0,0 +1,9 @@
+##fileformat=VCFv4.1
+##medaka_version=1.0.3
+##contig=<ID=NC_045512.2>
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Medaka genotype.">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Medaka genotype quality score">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE
+NC_045512.2 45 . G C 46.359 PASS GT:GQ 1:46
+NC_045512.2 685 . AAAGTCATTT A 260.487 PASS GT:GQ 1:260
+NC_045512.2 1813 . TA T 11.034 PASS GT:GQ 1:11
b
diff -r 6a87478ed985 -r 35666d44fe7a test-data/annotated.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated.vcf Tue Sep 01 03:08:04 2020 -0400
b
@@ -0,0 +1,16 @@
+##fileformat=VCFv4.1
+##medaka_version=1.0.3
+##contig=<ID=NC_045512.2>
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Medaka genotype.">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Medaka genotype quality score">
+##annotateVCFVersion=0.1
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw Depth">
+##INFO=<ID=AF,Number=1,Type=Float,Description="Allele Frequency">
+##INFO=<ID=FAF,Number=1,Type=Float,Description="Forward Allele Frequency">
+##INFO=<ID=RAF,Number=1,Type=Float,Description="Reverse Allele Frequency">
+##INFO=<ID=SB,Number=1,Type=Integer,Description="Phred-scaled strand bias at this position">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE
+NC_045512.2 45 . G C 46.359 PASS DP=31;AF=0.900000;FAF=0.937500;RAF=0.857143;SB=6;DP4=0,2,15,12 GT:GQ 1:46
+NC_045512.2 685 . AAAGTCATTT A 260.487 PASS DP=75;AF=0.880000;FAF=0.789474;RAF=0.972973;SB=0;DP4=0,0,30,36 GT:GQ 1:260
+NC_045512.2 1813 . TA T 11.034 PASS DP=51;AF=0.607843;FAF=0.521739;RAF=0.678571;SB=0;DP4=0,0,12,19 GT:GQ 1:11
b
diff -r 6a87478ed985 -r 35666d44fe7a test-data/basecalls.fastq
--- a/test-data/basecalls.fastq Thu Jun 18 05:07:32 2020 -0400
+++ b/test-data/basecalls.fastq Tue Sep 01 03:08:04 2020 -0400
b
b"@@ -498,519 +498,3 @@\n GTTGTACTTCGTTCGGTTACGTATTGCTGCGCGCGCACTCTCTGTCGTCAACGGCGTCAGATATTATTATAAGAGACAGCTCCTTCACCAAGCATTGATATTTCATTTAATTCGCCAGTCCTATATAAAAAAAATCTCTTTGTTTGATCTGTTGCTATTATTGCAATATCGTTAATTCTATTATCTATTCTTTTTAGGTTACCTATTATTTCATCTTTTCTTTTTGTGTTAGTAGTTTTGATACGTTTTATCGTTGCTTCTTCATTTATGTTTCTTGATGTGGTTATTATCGCTTGCGGTTTTGAATTCATATAGCTTACCTTTTTTAGTTTGTTTTATTTCTCCGTTTATTTCTTGTGCTTTTTTGTGTTATTTTTATAGTAATCAATTCTTCTGGGGGTTCTTTTTCCGTTCTGTGTTTCACTAACCTTAATCATTTAAAACATTTCCCCCCTTTCCTGAGAGCCGAGGGGTTTGTCGCAATGGAGGGTGTTTGATAATATTGACTCAACATTGGGGGACACGCTTGATTCTGTCTCTTATATACAATCTCCGAGCCCACGAGACCAGAGTGCGCGCAGCAATGCGTAACT\n +\n )8;541>AA?9F3@'.FD:->?E?D?AB56<70+)*&%)/02-2<<67;$$/$06518..1(*',)&(24')$&)16:6@@89EA=3)703,3''+0=85MG>B>@C?:;:,>$%%).))6-&'&%BD???@>=:?B;<?D>76750:>C9EE9D=@<8./1-.;>GE?A<NIDDECAGCFIC>BECA@;D7==491<E1%==A9HE<DD>D@>?@CM;)=@KHD9BABA98@A<BENCA;:6;3<=CAA@@@FI>78834.''24DA99@BI?MFFDD<>27*=<:?($4$%.11705A@MD?<8A=@7<@4C1ACCB@>GDB?8,1(8=F==:?A?FCC9A3><<=;79<C==<%?)9>?IG>='>,AF=CAJIC6AB@>0?9<68&*8K@C?467.<=5134LID564>:621=ACEH<@E:8<343/BA7C779<?AD@>9?BD23DB;889@:@=>=**342,+68:4CELD@G2C?@@BDC9)=GDHH@E@FBBGBGJEDBBA007/65$%<?FKB=B?((+EBJ>>?;00:;9;60+&&;798>>&924&ACG8>>=BC@?8:2;EA7.BB1+481254D>:5/#%\n-@f2a18031-a79d-4fd4-8200-836066a428c6\n-GTGTGTATTACTTCGTTCAGTTACGTATTGCTGAAAAAGAAACAGTGGCATAGTGATGCATAAATAAACACATTATTACTATGTAAACGTAAACAGAAATGGAGAGCAAGTTTAAACATGATAAAGATATCCATAAAAAGAAATTTTAAAGTGTTTCGAGGAACATAACTCTCAAACAACTAAAAACACTTAATGTAATATCCGGAAAAAACAACTTTTGGGAAAACATCGATATTAGACTGGAAATATTTTCATTTATGACGTAAAAAAACCCAGCAGCTCTGTTAAAGTAGCTTTAGAAATGAAGCATCTCTTTTAAATAAAGCTCATTTGGGATAGCTATTTTTACAACATGGACACATCAAAGAAATATCCATAACAATAAGAGATGAAAGGTCAAGAAGTAACACAAAAATACTCAACGATAGATAACACCTAGAAAGTCTGAATCAAGCGTGTCCCTCAATGTTGAGTCAATATTATCAAACACCCCTCCATTTACGACAAACCCTCGGCTCTCAGGAAAAGGGAATGTTTTAATGATTAAGGTTAAGTGAAGCCACAGAACGGAAAAAGAACCCCCCAGAAGAATTGATTACTATAAAAATAACACAAAAAGCACAAGAAATAAACGGAGAAATAAACAAACCCCAAAGGTAAGCTATATGAATTCAAAACCGCAGCGATAATAACCACATCAAAAACATAAACAAAGAAGCAACGATAAAAAACGTATCCAAAACTACTAACACAAAGAAAAGATGAAATAATAGGCAACCTAAAAAGAATAGATAATAGAATTAACGATATTGCAATAATAGCAACAGATTAAACAAAGAGATTTTTAGATATAGGACCTGTCTCTTATACACATCTGACGCTGCCGACGACAGAGAGTGCGCGCGCAACAATACGTAAC\n-+\n-#(%$%&%'+0..=<<HF>9CC=5D@EFEE<=1,,:F4)*9>?</2D:::=<;7><EM;D:=7;??HJC;:G<<=7@BAAAI=AGHKF09>DIHD98?2;=*,/*(*;+,9+2EBDA?/6>?:672&?=A67=&)GK6(%+19:;?B>*+**86DC@/*(&&-)2:AAC;9*',$(2?::<?FHJGA89004DF@AAFID>**7/5>GQID=A55:;950(,?3:GG@C?B<;7344&&1%%#$$.;99:97./-.2AE?F<556;<B@;934/(&)255BC@>=:<55,21=0>JI>94453759>>=878,*/?>:BEA9>?-$170,03504>@77:>=@>?IB;--''??;78?>D?.(::;/')).&&**6*:89?@A;554*6?<867*/563&&%0(,0..-A;BC==.((:@A)&$,,:8:?B??>:(((/),-*)(*$&$)1346;68<947<;<B7:=;?==5?:@=>ABD@<99%:7<>=>>::,?..2-%%//=FGFGGD@@9=C@6?6049:?6+03>AK@EHIHACG>A?DA88>;A=,++*%$+)&+16B2C=7%-5D53++;FA@G@?%:':51'+&-:9>CE>CE>BCBBA?>=<'0.00.+*8+://,+.AA<G7C7860)50)&&13445-)(-65)&'858+8<;73))%&=A8BB=B=>30%24?FCADBAC?9(+/)=EG><CAA?IQHAKN;,+,.?+,=:E?GIQMJE70,)%$//122,1147976B?KH2'@=8&127:38A<458--7@68;&9FNH3%2<IBECKD@A2--9<7:789.13033,9,,)<=@D9953,-&&(*).97;;'75;6BCRF557?:41.1=?99::;=D=;<0,,.4B25A:$&1-.0025A1;9-7,,.',(6&9?.B>@C-.;76337.2-,'\n-@435ec27b-52bf-4575-b4f5-fcb8892a768b\n-CTATATCTAAAAAATCTCTTTGTTTTGAAGTCTGTTGCTATTATTACAATATCGTTAATTCTATTATCTATTCTTTTAGGTTGCCTATTGTTTCATCTTTTCTTTTTGTGTTAGTAGTTTTTGATACGTTTTTTTATCGTTGCTTCTTTGTTTATGTTTCTTGATGTAGATTTATTATCAACTGCGTTTTAGATTCGTATAAGCTTACCTTTTTAGTTTGTTTTATTTCTCCGTTTGTTTCTTATTGCTTTTGTGTTTTTATAGTAATCAATTCTTCTGGGGGTTCTTTTTCCGTTTCTGTGTTTCACTAACCCAATCATTAAAACATTCCCTTTTCCCCCTGAGAGCCGAGGGTTTGTCGCAATGGAGGGAGTGTTTGAT\n-+\n-;4@:7360AEC=B?77E9GFJ>??H1+,-'484.67258251(4.%0)-66448883434=6</1<<=?;@?;4=L3'47?@4<=?:9:&899@?AADBPE<A@C?E:2%:705(%;@BLJ@>;,%827?SKD=B?<<<HD=679;;;83?@-$$$,/<76982.*+$+0BE@;;;5.1$$$*%%%2574(':B<5#$$$'')***76:?@HD<6;=?@3=@E93<@@::$21675$//7/.,%$(&$&&&)(2&.0??=<0,,5<90='*/'/04--',('9.4;<>HBA,-$%,=80168@D:-5/.-'$##08=??HE@A>4.$)+/2/0255,,*&%*)3-#)99B40'?HHD0-0(/..<<:<1*(%%5,?<',+*\n-@7e5675d5-2114-45cc-917e-631462362ae5\n-CTGATGTGCTTCGTTCAGTTACGTATTGCTGCGCGCACTCTCTGTCGTCGAG"..b"TCCTGTTTTCGGGGTAAAACATCTCTTT\n-+\n-(?A@=CE-3()(49))6,69>H>::/(79,%3(0:&&%*3)-0/23-5$)**+&6;<),%:CC=5?7562**'%%61>=?:;FDF@?:4>56>?3/(<;)0*02/36<<;AAABCDFE::JIF<9;5:4<:?>@-)'3'.1$&&$&,;:00C?'4=>((,@D9H?<B<>@B<;=HG=<L(=()(&7<>@=F<;CB>D?=\n-@7a97d860-e33a-4e27-b5be-c4754a2eb907\n-ATTACGAAGTATCACGTGCCCTGTTATCCAGACTTATGGCAGATTAGCTTCCCGGAGAGAAAACTGTCGAAAACAGACGGTATGAACACCGTAAGCTCCAAAGTGATCACCATTCGCTTTCATGCGCCTTGGCTATGCAGCGAGCTGAAAACGATCCTGACGCATCCTTCCTGTTTTCCCGAAGGGTAAAACATCTCTTT\n-+\n-?>A?'-$%+?'%8$03F;;;=8*57>((?3:E?E?65%,0,.,B>77?AA6<G<9C(,):0IGAD=;AEIJLC@'?3A+/68966>9?<>??:</:<-.'(%32228@;:=CGDA>78:@97424%%$'%')460((8,:;1/4////@;5;?@//5896:.+?:<ACA>@KEA@LD=C22.+01849?D<>@;A=<K5/\n-@cc6e152f-3be2-4154-95c1-265c4f77a7ac\n-TACGGAGTATCACCGTGCCTGTTATCCAGACTTATGGCAGATTAGCTTCCCGGAGAAAACTGCTCCAACAGACGGTATGAACACCGTAAGCTCCCAGAAGTGATCACCATTCGCTTTCTCATGCATAGCTATGCGGCGAGCTGAAAAGCTGATCCTGACGCATCCTTCCTGTTTTTCCCGGGAGTAAAACATCTCTTT\n-+\n-,*635,44()6,87.6'66=(@;0**E=DFCL?77-56732,/(-./1-.//17&(5EE;:)')(6,,4=8>C29;5>8832,>>459;+;;)-(+$%$*77;6CAABDB?-0*+,(&$+&**8:85=??@))?%.7+*4/+(*.'($#%,,++4;EBB:B>78=8<=@GBDCGB@5;A+1<$6:EGDAG;:>>@@?H\n-@749ba682-4ec2-4367-8bb3-51878f06992d\n-CCGGGAAAGCGGAAGGATCGTCAGGATCGTTGCTACAGCTCTGCTACATATGCATGAAAGCGAATGGTGATGCTTTGGGACTTTACGGTGTTTCATACCGTCTGTTTTCGACAGTTTTCTCTCCAGGGAAACTAATCTGCCAGCAAGTCTGGATAACAGGG\n-+\n-#$$(+('&%,$)5;:8/&(,121165++.-/$$(%)(%#%'#$$$$&&%$%(3/1<222(666:=8<47/&$&)),&'&',-/.39=B>B?=-89;=>=;;;335686E;=9499=PA@-8<%/'/*+,)$77=7/-5//.-&(()**7',./102,&,&'\n-@3d6bdcab-ef5c-4ff8-a928-23066fb4e887\n-CTGTTATCAGACTTATGGCAGATTAGCTTCCCGGAGAAGAAAACTGTCGAAAACAGACGGTATGAACACCGTAAGCTCCCAAAAGTGATCACCATTCATCATGCATAGCTATGCGGGCAGACTGCGAAAACGATCCTGACTCCTTCCTGTTTTCCGGGGTAAAACATCTCTTT\n-+\n-12551)$>,,/,,,AE??>==?6/.'''-8>B03<+,,-=CA@<A@<A98=<17581:*0<BC66@AA<>;;/09:6470999-%171+98==8<;6#'36;AC;7:689<<89%*./(&$++$-+--<-;=>55:4;*%&%*%/454@CAHA/318?G?4599=9?DEA@?E\n-@0b41ea60-ea37-43ef-a912-260c4d33a356\n-CTGTTATCCAGACTTATGGCAGATTAGCTTCCGGAGAAAACTGCTTCGAAAAACAGACGGTATGAACACCGTAAGCTCCCAAAGTGATCACCATTCGCTTTCATGCATAGCGCTATGCAGCGAGCTGAAAACGATCTGACGCATCCTTCCTGTTTTCCCGGGGTAAAACATCTCTTT\n-+\n-=B>B3+++7-2-))%&.%,3**749689:=1B0.3&)(-'$$%#%(*(((('$,)+(/+,,/48:>;62+7:)*.<;9>@DE75@?<?GFGD@DDA$$=>;>?::>-043+).2?;<(&551.;><:8G@6E@<6@7<6@1CE78BDB>?=?57I@'-G3;@B=>@F@?8<?>794-\n-@cd8d2944-a807-434b-92d5-bfa925697c85\n-GTTATCCAGACTTATGGCAGATTAGCTTCCCGGAGAGAAAACTGGCATGAAAACAGACAGGTATGAACACCGTAAACTCGGTGATCACCATTCGCTTTCATGCATAGCTATGCAGCGAGCTGAAAACGATCCTGACGCATCTCCTTCCTGTTTTCCGGGGTAAAACATCTCTTT\n-+\n-8?>>%%06/B@FEFG7=854742')9<=+3B9:<98%;<>3..-(&$*/13,*;8;1+&59ADB55<94<<:;99(&)+#&62-'('668:7855;=C><24B=>==>;B<:30=;=1;9575IF9A=E55<:@?GC;77=069<=25152/56)/,45?>@BE162;>:9DCE\n-@a492dd23-2c03-4d9b-a606-dee7268bd4fa\n-AAAGAGATGTTTTTGCGGAAAACAGGAAGGATGCGTCAGGATCGTTTTCAGCGCTATAGCTATGCATGAAAGCGAATGGTGATCCTTTGGGAGCTTACGGTGTTCATGCCAATCTGTTTTCGACAGTTTCTCTCCGAGGAAGCTAATCTGCCGCAAGTCTAGTTAAC\n-+\n-F:&-/77;:@??=8%%$(#'+()%+9.99AB?==<<7/;;4/;;B>?-+(*2%%&0-1-=5;99GD=88>9>??=IC2:B?:%%*#*-&.1**,--;A::8BID?4&&%8'(<AE3??;C@>B64>:;31&*,(5('7A04.>@CC;AA7%1-1%&-/5'%*%-+,/\n-@4a58265a-1d25-47a1-b851-e7687203734f\n-GGGAAACCCAGGAGGATCGTCAGGATCGTTTTTTCAGCTCGCTGCTATGTATGCGCTGAAAGCGAATGGTGATCACTTTGGGGAGCTTACGGTGTTCATACTGTCTGTTTTCGACAGTTTTCTCTCCGGGAAGCTAATCTGCCATAAGTCTGGATAAC\n-+\n-)**%&&%5'%)*$..5%$$,70?B?:>?GBLB<0))%&'-78>.11/$$+($&$&%***2),=88=@,1A=:3<25;<959E;.9?BB*A<CCHBBB==BCC>=B>B9?LL;AA74C>CNEDF<C%?7<D78.=>C=4=?@BB9@>>2-A14:C;..5\n-@f9a56724-1fa4-4807-bacc-f0a85587afd7\n-GAACACCGTAAGCTCCCAAAGTGATCACCATTCGCTTTTTCATGCATAGCTATGCAGCGACTGGCGATCCTGACGCATCTTCTGTTTTCC\n-+\n-414===BA;3-.21$+&')'%-20-,((+6:57(*+,139@@?A534546/24022**)$$$#&(--++-7<+6*.*(())(,-03<:(,\n-@e525f31a-e4d6-4fe6-a703-7038efa2f178\n-GCTATGCAGCGAGCTGAAAACGATCCTGACGCATCCTTCCTGTTTCCCGGGTAAAACATCTCTTT\n-+\n-;B@E::0+5511=2122@>52((''*<G=E:?A45:79$=JFCC?AJG*024<>HECB@C@?KHI\n-@eeef3ccf-2188-4382-8399-a9442bbbf40a\n-GCTATGCAGCGAGCTGAAAACGATCCTGACGCATCCTTCCTGTTTTCCCGGGGTAAAACATCTCTTT\n-+\n-@=:3335937))43378KC?<?A<>CKMEH?BC55=4C>?KDFCLC4@G363877800+7<=;3;;>\n"
b
diff -r 6a87478ed985 -r 35666d44fe7a test-data/bwa-mem-mt-genome.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bwa-mem-mt-genome.fa.fai Tue Sep 01 03:08:04 2020 -0400
b
@@ -0,0 +1,1 @@
+gi|251831106|ref|NC_012920.1| 16569 31 70 71
b
diff -r 6a87478ed985 -r 35666d44fe7a test-data/medaka_test.bam
b
Binary file test-data/medaka_test.bam has changed
b
diff -r 6a87478ed985 -r 35666d44fe7a test-data/medaka_test.bam.bai
b
Binary file test-data/medaka_test.bam.bai has changed
b
diff -r 6a87478ed985 -r 35666d44fe7a test-data/medaka_test.hdf
b
Binary file test-data/medaka_test.hdf has changed
b
diff -r 6a87478ed985 -r 35666d44fe7a test-data/ref.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ref.fasta Tue Sep 01 03:08:04 2020 -0400
b
@@ -0,0 +1,2 @@
+>NC_045512.2
+ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC
b
diff -r 6a87478ed985 -r 35666d44fe7a test-data/ref.fasta.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ref.fasta.fai Tue Sep 01 03:08:04 2020 -0400
b
@@ -0,0 +1,1 @@
+NC_045512.2 2940 13 2940 2941
b
diff -r 6a87478ed985 -r 35666d44fe7a test-data/variants.vcf.gz
b
Binary file test-data/variants.vcf.gz has changed