Repository 'medaka_variant'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/medaka_variant

Changeset 12:0f5f4a208660 (2021-09-17)
Previous changeset 11:2bf63b38ee9b (2021-09-12) Next changeset 13:222669c4afb6 (2021-11-18)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 86211daa63a6f39524df8759364795b782324303"
modified:
convert_VCF_info_fields.py
test-data/ref.fasta
variant.xml
b
diff -r 2bf63b38ee9b -r 0f5f4a208660 convert_VCF_info_fields.py
--- a/convert_VCF_info_fields.py Sun Sep 12 20:35:26 2021 +0000
+++ b/convert_VCF_info_fields.py Fri Sep 17 20:22:49 2021 +0000
[
b'@@ -7,11 +7,11 @@\n \n # 10/21/2020 - Nathan P. Roach, natproach@gmail.com\n \n+import re\n import sys\n from collections import OrderedDict\n from math import log10\n \n-import scipy\n import scipy.stats\n \n \n@@ -33,84 +33,144 @@\n \n \n def annotateVCF(in_vcf_filepath, out_vcf_filepath):\n+    """Postprocess output of medaka tools annotate.\n+\n+    Splits multiallelic sites into separate records.\n+    Replaces medaka INFO fields that might represent information of the ref\n+    and multiple alternate alleles with simple ref, alt allele counterparts.\n+    """\n+\n     in_vcf = open(in_vcf_filepath, \'r\')\n-    out_vcf = open(out_vcf_filepath, \'w\')\n-    to_skip = set([\'SC\', \'SR\'])\n-    for i, line in enumerate(in_vcf):\n-        if i == 1:\n-            out_vcf.write("##convert_VCF_info_fields=0.2\\n")\n-        if line[0:2] == "##":\n-            if line[0:11] == "##INFO=<ID=":\n-                id_ = line[11:].split(\',\')[0]\n-                if id_ in to_skip:\n+    # medaka INFO fields that do not make sense after splitting of\n+    # multi-allelic records\n+    # DP will be overwritten with the value of DPSP because medaka tools\n+    # annotate currently only calculates the latter correctly\n+    # (https://github.com/nanoporetech/medaka/issues/192).\n+    # DPS, which is as unreliable as DP, gets skipped and the code\n+    # calculates the spanning reads equivalent DPSPS instead.\n+    to_skip = {\'SC\', \'SR\', \'AR\', \'DP\', \'DPSP\', \'DPS\'}\n+    struct_meta_pat = re.compile(\'##(.+)=<ID=([^,]+)(,.+)?>\')\n+    header_lines = []\n+    contig_ids = set()\n+    contig_ids_simple = set()\n+    # parse the metadata lines of the input VCF and drop:\n+    # - duplicate lines\n+    # - INFO lines declaring keys we are not going to write\n+    # - redundant contig information\n+    while True:\n+        line = in_vcf.readline()\n+        if line[:2] != \'##\':\n+            assert line.startswith(\'#CHROM\')\n+            break\n+        if line in header_lines:\n+            # the annotate tool may generate lines already written by\n+            # medaka variant again (example: medaka version line)\n+            continue\n+        match = struct_meta_pat.match(line)\n+        if match:\n+            match_type, match_id, match_misc = match.groups()\n+            if match_type == \'INFO\':\n+                if match_id == \'DPSP\':\n+                    line = line.replace(\'DPSP\', \'DP\')\n+                elif match_id in to_skip:\n                     continue\n-            out_vcf.write(line)\n-        elif line[0] == "#":\n-            out_vcf.write(\'##INFO=<ID=DPSPS,Number=2,Type=Integer,Description="Spanning Reads Allele Frequency By Strand">\\n\')\n-            out_vcf.write(\'##INFO=<ID=AF,Number=1,Type=Float,Description="Spanning Reads Allele Frequency">\\n\')\n-            out_vcf.write(\'##INFO=<ID=FAF,Number=1,Type=Float,Description="Forward Spanning Reads Allele Frequency">\\n\')\n-            out_vcf.write(\'##INFO=<ID=RAF,Number=1,Type=Float,Description="Reverse Spanning Reads Allele Frequency">\\n\')\n-            out_vcf.write(\'##INFO=<ID=SB,Number=1,Type=Integer,Description="Phred-scaled strand bias of spanning reads at this position">\\n\')\n-            out_vcf.write(\'##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases in spanning reads">\\n\')\n-            out_vcf.write(\'##INFO=<ID=AS,Number=4,Type=Integer,Description="Total alignment score to ref and alt allele of spanning reads by strand (ref fwd, ref rev, alt fwd, alt rev) aligned with parasail match 5, mismatch -4, open 5, extend 3">\\n\')\n-            out_vcf.write(line)\n-        else:\n+            elif match_type == \'contig\':\n+                contig_ids.add(match_id)\n+                if not match_misc:\n+                    # the annotate tools writes its own contig info,\n+                    # which is redundant with contig info generated by\n+                    # medaka variant, but lacks a length value.\n+                    # We don\'t need the incomplete line.\n+                '..b'         print(line.strip())  # Print the line for debugging purposes\n+                continue\n+            variant_list = fields[4].split(\',\')\n+            dpsp = int(info_dict[\'DPSP\'])\n+            ref_fwd, ref_rev = 0, 1\n+            dpspf, dpspr = (int(x) for x in info_dict[\'AR\'].split(\',\'))\n+            for i in range(0, len(sr_list), 2):\n+                dpspf += sr_list[i]\n+                dpspr += sr_list[i + 1]\n+            for j, i in enumerate(range(2, len(sr_list), 2)):\n+                dp4 = (\n+                    sr_list[ref_fwd],\n+                    sr_list[ref_rev],\n+                    sr_list[i],\n+                    sr_list[i + 1]\n+                )\n+                dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]]\n+                _, p_val = scipy.stats.fisher_exact(dp2x2)\n+                sb = pval_to_phredqual(p_val)\n \n-                    as_ = (sc_list[ref_fwd], sc_list[ref_rev], sc_list[i], sc_list[i + 1])\n-\n-                    info = []\n-                    for code in info_dict:\n-                        if code in to_skip:\n-                            continue\n-                        val = info_dict[code]\n-                        info.append("%s=%s" % (code, val))\n-\n-                    info.append("DPSPS=%d,%d" % (dpspf, dpspr))\n+                as_ = (\n+                    sc_list[ref_fwd],\n+                    sc_list[ref_rev],\n+                    sc_list[i],\n+                    sc_list[i + 1]\n+                )\n \n-                    if dpsp == 0:\n-                        info.append("AF=NaN")\n-                    else:\n-                        af = (dp4[2] + dp4[3]) / dpsp\n-                        info.append("AF=%.6f" % (af))\n-                    if dpspf == 0:\n-                        info.append("FAF=NaN")\n-                    else:\n-                        faf = dp4[2] / dpspf\n-                        info.append("FAF=%.6f" % (faf))\n-                    if dpspr == 0:\n-                        info.append("RAF=NaN")\n-                    else:\n-                        raf = dp4[3] / dpspr\n-                        info.append("RAF=%.6f" % (raf))\n-                    info.append("SB=%d" % (sb))\n-                    info.append("DP4=%d,%d,%d,%d" % (dp4))\n-                    info.append("AS=%d,%d,%d,%d" % (as_))\n-                    new_info = \';\'.join(info)\n-                    fields[4] = variant_list[j]\n-                    fields[7] = new_info\n-                    out_vcf.write("%s" % ("\\t".join(fields)))\n-            else:\n-                print("WARNING - SR and SC are different lengths, skipping variant")\n-                print(line.strip())  # Print the line for debugging purposes\n+                info = []\n+                for code in info_dict:\n+                    if code in to_skip:\n+                        continue\n+                    val = info_dict[code]\n+                    info.append("%s=%s" % (code, val))\n+\n+                info.append(\'DP=%d\' % dpsp)\n+                info.append(\'DPSPS=%d,%d\' % (dpspf, dpspr))\n+\n+                if dpsp == 0:\n+                    info.append(\'AF=NaN\')\n+                else:\n+                    af = (dp4[2] + dp4[3]) / dpsp\n+                    info.append(\'AF=%.6f\' % af)\n+                if dpspf == 0:\n+                    info.append(\'FAF=NaN\')\n+                else:\n+                    faf = dp4[2] / dpspf\n+                    info.append(\'FAF=%.6f\' % faf)\n+                if dpspr == 0:\n+                    info.append(\'RAF=NaN\')\n+                else:\n+                    raf = dp4[3] / dpspr\n+                    info.append(\'RAF=%.6f\' % raf)\n+                info.append(\'SB=%d\' % sb)\n+                info.append(\'DP4=%d,%d,%d,%d\' % dp4)\n+                info.append(\'AS=%d,%d,%d,%d\' % as_)\n+                new_info = \';\'.join(info)\n+                fields[4] = variant_list[j]\n+                fields[7] = new_info\n+                out_vcf.write(\'\\t\'.join(fields))\n     in_vcf.close()\n-    out_vcf.close()\n \n \n if __name__ == "__main__":\n'
b
diff -r 2bf63b38ee9b -r 0f5f4a208660 test-data/ref.fasta
--- a/test-data/ref.fasta Sun Sep 12 20:35:26 2021 +0000
+++ b/test-data/ref.fasta Fri Sep 17 20:22:49 2021 +0000
b
@@ -1,2 +1,2 @@
 >NC_045512.2
-ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC
+attaaaggtttataccttcccaggtaacaaaccaaccaactttcgatctcttgtagatctgttctctaaacgaactttaaaatctgtgtggctGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC
b
diff -r 2bf63b38ee9b -r 0f5f4a208660 variant.xml
--- a/variant.xml Sun Sep 12 20:35:26 2021 +0000
+++ b/variant.xml Fri Sep 17 20:22:49 2021 +0000
[
b'@@ -1,28 +1,11 @@\n <?xml version="1.0"?>\n-<tool id="medaka_variant" name="medaka variant tool" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">\n-    <description>Probability decoding</description>\n+<tool id="medaka_variant" name="medaka variant tool" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@">\n+    <description>decodes variant calls from medaka consensus output</description>\n     <macros>\n         <import>macros.xml</import>\n     </macros>\n     <expand macro="requirements"/>\n-\n     <expand macro="version_command"/>\n-\n-    <configfiles>\n-        <configfile name="convert_fasta">\n-import sys\n-infile = open(sys.argv[1], \'r\')\n-outfile = open(sys.argv[2], \'w\')\n-for line in infile:\n-    if line[0] == \'>\':\n-        outfile.write(line)\n-    else:\n-        outfile.write(line.upper())\n-infile.close()\n-outfile.close()\n-        </configfile>\n-    </configfiles>\n-\n     <command detect_errors="exit_code"><![CDATA[\n ## initialize\n @REF_FASTA@\n@@ -44,8 +27,6 @@\n     #for $current in $pool.inputs\n         \'$current\'\n     #end for\n-    \'$out_result\' ## output\n-    2>&1 | tee \'$out_log\'\n #elif $pool.pool_mode == "No":\n     ## run\n     medaka variant\n@@ -60,16 +41,17 @@\n     ## required\n     reference.fa\n     \'$pool.input\'\n-    \'$out_result\' ##output\n+#end if\n+#if str($output_annotated.output_annotated_select) == \'false\':\n+    \'$out_variants\' ##output\n     2>&1 | tee \'$out_log\'\n-#end if\n-#if $out_annotated:\n-    ## medaka annotate errors out if the reference is lower case at a position it\'s annotating because it checks vs the ref base in the vcf\n-    && python \'$convert_fasta\' reference.fa  upper_reference.fa\n+#else\n+    raw.vcf ##output of medaka variant\n+    2>&1 | tee \'$out_log\'\n     && ln -s \'$output_annotated.in_bam\' in.bam\n     && ln -s \'$output_annotated.in_bam.metadata.bam_index\' in.bai\n-    && medaka tools annotate --dpsp --pad $output_annotated.pad \'$out_result\' upper_reference.fa in.bam tmp.vcf\n-    && python \'$__tool_directory__/convert_VCF_info_fields.py\' tmp.vcf \'$out_annotated\'\n+    && medaka tools annotate --dpsp --pad $output_annotated.pad raw.vcf reference.fa in.bam tmp.vcf\n+    && python \'$__tool_directory__/convert_VCF_info_fields.py\' tmp.vcf \'$out_variants\'\n #end if\n     ]]></command>\n     <inputs>\n@@ -99,39 +81,38 @@\n         <param argument="--ambig_ref" type="boolean" truevalue="--ambig_ref" falsevalue="" label="Decode variants at ambiguous reference positions?" checked="false"/>\n         <param argument="--gvcf" type="boolean" truevalue="--gvcf" falsevalue="" label="Output VCF records for reference loci predicted to be non-variant?" checked="false"/>\n         <conditional name="output_annotated">\n-            <param name="output_annotated_select" type="select" label="Output annotated VCF?" help="Annotate allele frequency, depth of coverage, etc for each variant (requires BAM file)">\n-                <option value="true" selected="true">Output annotated VCF</option>\n-                <option value="false">Don\'t output annotated VCF</option>\n+            <param name="output_annotated_select" type="select"\n+            label="Type of VCF to generate"\n+            help="Variant INFO fields in the VCF can be extended to include allele frequency, depth of coverage, etc., but this requires a BAM dataset to calculate those values from.">\n+                <option value="true" selected="true">Write annotated VCF with extended INFO</option>\n+                <option value="false">Write original decoded VCF with minimal INFO field</option>\n             </param>\n             <when value="true">\n-                <param name="in_bam" type="data" format="bam" optional="false" label="BAM to annotate the VCF"/>\n-                <param name="pad" type="integer" min="1" value="25" label="Padding width on either side of variant for realignment in medaka tools anntotate, used to calculate DPSP, DPSPS, AF, FAF, RAF, SB, DP4, and AS in the output annotated VCF"/>\n+                <param name="in_bam" type="data" format="bam" option'..b'tandard -->\n-        <data name="out_result" format="vcf" label="${tool.name} on ${on_string}: Result"/>\n-        <!-- optional -->\n-        <data name="out_annotated" format="vcf" label="${tool.name} on ${on_string}: Annotated">\n-            <filter>output_annotated[\'output_annotated_select\']!=\'false\'</filter>\n-        </data>\n+        <data name="out_variants" format="vcf" label="${tool.name} on ${on_string}: called variants"/>\n         <data name="out_log" format="tabular" label="${tool.name} on ${on_string}: Log">\n             <filter>output_log_bool</filter>\n         </data>\n     </outputs>\n     <tests>\n         <!-- #1 default -->\n-        <test expect_num_outputs="3">\n+        <test expect_num_outputs="2">\n             <conditional name="pool">\n                 <param name="pool_mode" value="Yes"/>\n                 <param name="inputs" value="medaka_test.hdf,medaka_test.hdf"/>\n             </conditional>\n             <conditional name="reference_source">\n                 <param name="reference_source_selector" value="history"/>\n-                <param name="ref_file" value="ref.fasta.gz"/>\n+                <param name="ref_file" value="ref.fasta"/>\n             </conditional>\n             <param name="ambig_ref" value="true"/>\n             <conditional name="output_annotated">\n@@ -139,17 +120,9 @@\n                 <param name="in_bam" value="medaka_test.bam"/>\n             </conditional>\n             <param name="output_log_bool" value="true"/>\n-            \n-            <output name="out_result">\n+            <output name="out_variants">\n                 <assert_contents>\n-                    <has_n_lines n="9"/>\n-                    <has_line line="##fileformat=VCFv4.1" />\n-                    <has_line_matching expression="##medaka_version=[0-9]+\\.[0-9]+\\.[0-9]+" />\n-                </assert_contents>\n-            </output>\n-            <output name="out_annotated">\n-                <assert_contents>\n-                    <has_n_lines n="23"/>\n+                    <has_n_lines n="18"/>\n                     <has_line line="##fileformat=VCFv4.1" />\n                     <has_line_matching expression="##medaka_version=[0-9]+\\.[0-9]+\\.[0-9]+" />\n                     <has_line_matching expression="##convert_VCF_info_fields=[0-9]+\\.[0-9]+" />\n@@ -162,7 +135,7 @@\n             </output>\n         </test>\n         <!--No pooling-->\n-        <test expect_num_outputs="3">\n+        <test expect_num_outputs="2">\n             <conditional name="pool">\n                 <param name="pool_mode" value="No"/>\n                 <param name="input" value="medaka_test.hdf"/>\n@@ -177,17 +150,9 @@\n                 <param name="in_bam" value="medaka_test.bam"/>\n             </conditional>\n             <param name="output_log_bool" value="true"/>\n-            \n-            <output name="out_result">\n+            <output name="out_variants">\n                 <assert_contents>\n-                    <has_n_lines n="9"/>\n-                    <has_line line="##fileformat=VCFv4.1" />\n-                    <has_line_matching expression="##medaka_version=[0-9]+\\.[0-9]+\\.[0-9]+" />\n-                </assert_contents>\n-            </output>\n-            <output name="out_annotated">\n-                <assert_contents>\n-                    <has_n_lines n="23"/>\n+                    <has_n_lines n="18"/>\n                     <has_line line="##fileformat=VCFv4.1" />\n                     <has_line_matching expression="##medaka_version=[0-9]+\\.[0-9]+\\.[0-9]+" />\n                     <has_line_matching expression="##convert_VCF_info_fields=[0-9]+\\.[0-9]+" />\n@@ -213,8 +178,7 @@\n                 <param name="output_annotated_select" value="false"/>\n             </conditional>\n             <param name="output_log_bool" value="false"/>\n-            \n-            <output name="out_result">\n+            <output name="out_variants">\n                 <assert_contents>\n                     <has_n_lines n="9"/>\n                     <has_line line="##fileformat=VCFv4.1" />\n'