Repository 'medaka_variant'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/medaka_variant

Changeset 10:9fb055604648 (2021-04-22)
Previous changeset 9:43815fa60f3a (2021-03-29) Next changeset 11:2bf63b38ee9b (2021-09-12)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 0faf0ade3f13d7c78d93869823ea9fdf25c21b13"
modified:
convert_VCF_info_fields.py
macros.xml
test-data/all_fasta.loc
variant.xml
removed:
annotateVCF.py
test-data/bwa-mem-mt-genome.fa
test-data/bwa-mem-mt-genome.fa.fai
b
diff -r 43815fa60f3a -r 9fb055604648 annotateVCF.py
--- a/annotateVCF.py Mon Mar 29 20:05:39 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,409 +0,0 @@\n-#!/usr/bin/env python3\n-\n-# Takes in VCF file and a samtools mpileup output file\n-# Fills in annotation for the VCF file including AF, DP\n-# SB, and DP4\n-#\n-# Usage statement:\n-# python annotateVCF.py in_vcf.vcf in_mpileup.txt out_vcf.vcf\n-#\n-# Can generate in_mileup.txt with samtools mpileup (and can restrict which sites to generate pileups for with in_vcf.vcf)\n-\n-# 08/24/2020 - Nathan P. Roach, natproach@gmail.com\n-\n-import sys\n-from math import isnan, log10\n-\n-from scipy.stats import fisher_exact\n-\n-\n-def pval_to_phredqual(pval):\n-    return int(round(-10. * log10(pval)))\n-\n-\n-def parseSimpleSNPpileup(fields, ref_base, alt_base):\n-    base_to_idx = {\n-        \'A\': 0,\n-        \'a\': 0,\n-        \'T\': 1,\n-        \'t\': 1,\n-        \'C\': 2,\n-        \'c\': 2,\n-        \'G\': 3,\n-        \'g\': 3\n-    }\n-\n-    base_to_idx_stranded = {\n-        \'A\': 0,\n-        \'T\': 1,\n-        \'C\': 2,\n-        \'G\': 3,\n-        \'a\': 4,\n-        \'t\': 5,\n-        \'c\': 6,\n-        \'g\': 7\n-    }\n-    ref_base2 = fields[2]\n-    counts = [0, 0, 0, 0]\n-    stranded_counts = [0, 0, 0, 0, 0, 0, 0, 0]\n-    ref_idx = base_to_idx[fields[2]]\n-    dp = int(fields[3])\n-    carrot_flag = False\n-    ins_flag = False\n-    ins_str = ""\n-    ins_len = 0\n-    insertion = ""\n-    del_flag = False\n-    del_str = ""\n-    del_len = 0\n-    deletion = ""\n-    # dollar_flag = False\n-    for base in fields[4]:\n-        if carrot_flag:\n-            carrot_flag = False\n-            continue\n-        if ins_len > 0:\n-            insertion += base\n-            ins_len -= 1\n-            continue\n-        if del_len > 0:\n-            deletion += base\n-            del_len -= 1\n-            continue\n-        if ins_flag:\n-            if base.isdigit():\n-                ins_str += base\n-            else:\n-                ins_len = int(ins_str) - 1\n-                ins_str = ""\n-                insertion = base\n-                ins_flag = False\n-        elif del_flag:\n-            if base.isdigit():\n-                del_str += base\n-            else:\n-                del_len = int(del_str) - 1\n-                del_str = ""\n-                deletion = base\n-                del_flag = False\n-        else:\n-            if base == \'^\':\n-                carrot_flag = True\n-                continue\n-            elif base == \'$\':\n-                continue\n-            elif base == \'+\':\n-                ins_flag = True\n-            elif base == \'-\':\n-                del_flag = True\n-            elif base == \'.\':\n-                counts[ref_idx] += 1\n-                stranded_counts[base_to_idx_stranded[ref_base2]] += 1\n-            elif base == \',\':\n-                counts[ref_idx] += 1\n-                stranded_counts[base_to_idx_stranded[ref_base2.lower()]] += 1\n-            elif base == \'N\' or base == \'n\':\n-                continue\n-            elif base == \'*\':\n-                continue\n-            else:\n-                counts[base_to_idx[base]] += 1\n-                stranded_counts[base_to_idx_stranded[base]] += 1\n-    if sum(counts) == 0:\n-        af = float("nan")\n-    else:\n-        af = float(counts[base_to_idx[alt_base]]) / float(sum(counts))\n-    if float(sum(stranded_counts[0:4])) == 0:\n-        faf = float("nan")\n-    else:\n-        faf = float(stranded_counts[base_to_idx_stranded[alt_base]]) / float(sum(stranded_counts[0:4]))\n-    if float(sum(stranded_counts[4:])) == 0:\n-        raf = float("nan")\n-    else:\n-        raf = float(stranded_counts[base_to_idx_stranded[alt_base.lower()]]) / float(sum(stranded_counts[4:]))\n-    dp4 = [stranded_counts[base_to_idx_stranded[ref_base]],\n-           stranded_counts[base_to_idx_stranded[ref_base.lower()]],\n-           stranded_counts[base_to_idx_stranded[alt_base]],\n-           stranded_counts[base_to_idx_stranded[alt_base.lower()]]]\n-    return (dp, af, faf, raf, dp4)\n-\n-\n-def parseIndelPileup(fields, ref_base, alt_base):\n-    counts = [0, 0, 0, 0, 0, 0, 0, 0, 0]  # indel ref match, indel fwd ref match, indel rev ref match, '..b'    out_vcf.write(line)\n-        elif line[0] == "#":\n-            out_vcf.write("##annotateVCFVersion=0.2\\n")\n-            out_vcf.write("##INFO=<ID=DP,Number=1,Type=Integer,Description=\\"Raw Depth\\">\\n")\n-            out_vcf.write("##INFO=<ID=AF,Number=1,Type=Float,Description=\\"Allele Frequency\\">\\n")\n-            out_vcf.write("##INFO=<ID=FAF,Number=1,Type=Float,Description=\\"Forward Allele Frequency\\">\\n")\n-            out_vcf.write("##INFO=<ID=RAF,Number=1,Type=Float,Description=\\"Reverse Allele Frequency\\">\\n")\n-            out_vcf.write("##INFO=<ID=SB,Number=1,Type=Integer,Description=\\"Phred-scaled strand bias at this position\\">\\n")\n-            out_vcf.write("##INFO=<ID=DP4,Number=4,Type=Integer,Description=\\"Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases\\">\\n")\n-            out_vcf.write(line)\n-        else:\n-            fields = line.strip().split()\n-            if fields[0] in to_examine:\n-                to_examine[fields[0]][int(fields[1])] = (fields[3], fields[4])\n-            else:\n-                to_examine[fields[0]] = {int(fields[1]): (fields[3], fields[4])}\n-    in_vcf.close()\n-    data = {}\n-\n-    # Populate data dictionary, which relates chromosome and position to the following:\n-    #  depth of coverage\n-    #  allele frequency\n-    #  forward strand allele frequency\n-    #  reverse strand allele frequency\n-    #  dp4 - depth of coverage of ref allele fwd strand, DOC of ref allele rev strand, DOC of alt allele fwd strand, DOC of alt allele rev strand\n-    for line in in_mpileup:\n-        fields = line.strip().split()\n-        if fields[0] not in to_examine:\n-            continue\n-        if int(fields[1]) not in to_examine[fields[0]]:\n-            continue\n-        (ref_base, alt_base) = to_examine[fields[0]][int(fields[1])]\n-        if len(ref_base.split(\',\')) > 1:  # Can\'t handle multiple ref alleles\n-            continue\n-        if len(alt_base.split(\',\')) > 1:  # Can\'t handle multiple alt alleles\n-            continue\n-        if len(ref_base) > 1 or len(alt_base) > 1:\n-            if len(ref_base) > 1 and len(alt_base) > 1:  # Can\'t handle complex indels\n-                continue\n-            data[(fields[0], int(fields[1]))] = parseIndelPileup(fields, ref_base, alt_base)\n-        if len(ref_base) == 1 and len(alt_base) == 1:\n-            data[(fields[0], int(fields[1]))] = parseSimpleSNPpileup(fields, ref_base, alt_base)\n-    in_mpileup.close()\n-    # Reopen vcf, this time, skip header, annotate all the sites for which there is an entry in data dictionary\n-    # (Sites without entries have either multiple ref or alt bases, or have complex indels. Not supported (for now), and not reported as a result)\n-    in_vcf = open(in_vcf_filepath, \'r\')\n-    for line in in_vcf:\n-        if line[0] == \'#\':\n-            continue\n-        fields = line.strip().split(\'\\t\')\n-        if (fields[0], int(fields[1])) not in data:\n-            continue\n-        (dp, af, faf, raf, dp4) = data[(fields[0], int(fields[1]))]\n-        dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]]\n-        _, p_val = fisher_exact(dp2x2)\n-        sb = pval_to_phredqual(p_val)\n-        if fields[7] == "":\n-            info = []\n-        else:\n-            info = fields[7].split(\';\')\n-        info.append("DP=%d" % (dp))\n-        if isnan(af):\n-            info.append("AF=NaN")\n-        else:\n-            info.append("AF=%.6f" % (af))\n-        if isnan(faf):\n-            info.append("FAF=NaN")\n-        else:\n-            info.append("FAF=%.6f" % (faf))\n-        if isnan(raf):\n-            info.append("RAF=NaN")\n-        else:\n-            info.append("RAF=%.6f" % (raf))\n-        info.append("SB=%d" % (sb))\n-        info.append("DP4=%s" % (\',\'.join([str(x) for x in dp4])))\n-        new_info = \';\'.join(info)\n-        fields[7] = new_info\n-        out_vcf.write("%s\\n" % ("\\t".join(fields)))\n-    in_vcf.close()\n-    out_vcf.close()\n-\n-\n-if __name__ == "__main__":\n-    annotateVCF(sys.argv[1], sys.argv[2], sys.argv[3])\n'
b
diff -r 43815fa60f3a -r 9fb055604648 convert_VCF_info_fields.py
--- a/convert_VCF_info_fields.py Mon Mar 29 20:05:39 2021 +0000
+++ b/convert_VCF_info_fields.py Thu Apr 22 20:25:35 2021 +0000
[
@@ -11,7 +11,8 @@
 from collections import OrderedDict
 from math import log10
 
-from scipy.stats import fisher_exact
+import scipy
+import scipy.stats
 
 
 def pval_to_phredqual(pval):
@@ -69,7 +70,7 @@
                 for j, i in enumerate(range(2, len(sr_list), 2)):
                     dp4 = (sr_list[ref_fwd], sr_list[ref_rev], sr_list[i], sr_list[i + 1])
                     dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]]
-                    _, p_val = fisher_exact(dp2x2)
+                    _, p_val = scipy.stats.fisher_exact(dp2x2)
                     sb = pval_to_phredqual(p_val)
 
                     as_ = (sc_list[ref_fwd], sc_list[ref_rev], sc_list[i], sc_list[i + 1])
b
diff -r 43815fa60f3a -r 9fb055604648 macros.xml
--- a/macros.xml Mon Mar 29 20:05:39 2021 +0000
+++ b/macros.xml Thu Apr 22 20:25:35 2021 +0000
b
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">1.0.3</token>
-    <token name="@PROFILE@">18.01</token>
+    <token name="@TOOL_VERSION@">1.3.2</token>
+    <token name="@PROFILE@">20.01</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">medaka</requirement>
@@ -48,29 +48,32 @@
     </xml>
     <xml name="model" token_argument="-m" token_label="Select model">
         <param argument="@ARGUMENT@" type="select" label="@LABEL@">
-            <option value="r10_min_high_g303">r10_min_high_g303</option>
-            <option value="r10_min_high_g340">r10_min_high_g340</option>
             <option value="r103_min_high_g345">r103_min_high_g345</option>
             <option value="r103_min_high_g360">r103_min_high_g360</option>
             <option value="r103_prom_high_g360">r103_prom_high_g360</option>
             <option value="r103_prom_snp_g3210">r103_prom_snp_g3210</option>
             <option value="r103_prom_variant_g3210">r103_prom_variant_g3210</option>
+            <option value="r10_min_high_g303">r10_min_high_g303</option>
+            <option value="r10_min_high_g340">r10_min_high_g340</option>
             <option value="r941_min_fast_g303">r941_min_fast_g303</option>
             <option value="r941_min_high_g303">r941_min_high_g303</option>
             <option value="r941_min_high_g330">r941_min_high_g330</option>
             <option value="r941_min_high_g340_rle">r941_min_high_g340_rle</option>
             <option value="r941_min_high_g344">r941_min_high_g344</option>
             <option value="r941_min_high_g351">r941_min_high_g351</option>
-            <option value="r941_min_high_g360">r941_min_high_g360</option>
+            <option value="r941_min_high_g360" selected="true">r941_min_high_g360</option>
             <option value="r941_prom_fast_g303">r941_prom_fast_g303</option>
             <option value="r941_prom_high_g303">r941_prom_high_g303</option>
             <option value="r941_prom_high_g330">r941_prom_high_g330</option>
             <option value="r941_prom_high_g344">r941_prom_high_g344</option>
-            <option value="r941_prom_high_g360" selected="true">r941_prom_high_g360</option>
+            <option value="r941_prom_high_g360">r941_prom_high_g360</option>
+            <option value="r941_prom_high_g4011">r941_prom_high_g4011</option>
             <option value="r941_prom_snp_g303">r941_prom_snp_g303</option>
             <option value="r941_prom_snp_g322">r941_prom_snp_g322</option>
+            <option value="r941_prom_snp_g360">r941_prom_snp_g360</option>
             <option value="r941_prom_variant_g303">r941_prom_variant_g303</option>
             <option value="r941_prom_variant_g322">r941_prom_variant_g322</option>
+            <option value="r941_prom_variant_g360">r941_prom_variant_g360</option>
         </param>
     </xml>
     <xml name="reference">
b
diff -r 43815fa60f3a -r 9fb055604648 test-data/all_fasta.loc
--- a/test-data/all_fasta.loc Mon Mar 29 20:05:39 2021 +0000
+++ b/test-data/all_fasta.loc Thu Apr 22 20:25:35 2021 +0000
b
@@ -1,1 +1,1 @@
-bwa-mem-mt-genome bwa-mem-mt-genome bwa-mem-mt-genome ${__HERE__}/bwa-mem-mt-genome.fa
\ No newline at end of file
+ref_fasta ref_fasta ref_fasta ${__HERE__}/ref.fasta
\ No newline at end of file
b
diff -r 43815fa60f3a -r 9fb055604648 test-data/bwa-mem-mt-genome.fa
--- a/test-data/bwa-mem-mt-genome.fa Mon Mar 29 20:05:39 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,238 +0,0 @@\n->gi|251831106|ref|NC_012920.1|\n-GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGG\n-GTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTC\n-CTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAAAGTGTGTTA\n-ATTAATTAATGCTTGTAGGACATAATAATAACAATTGAATGTCTGCACAGCCACTTTCCACACAGACATC\n-ATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCA\n-AACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC\n-TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATA\n-CAACCCCCGCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCC\n-AAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC\n-ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAA\n-GCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGC\n-AATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAA\n-ACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGA\n-TTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACT\n-CACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAACAC\n-ACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATC\n-AACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATC\n-CCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATA\n-CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTC\n-AAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTAT\n-GAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGA\n-AGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCA\n-TTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTA\n-GCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA\n-GCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCG\n-ATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATA\n-ATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCC\n-AAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCA\n-AAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGAT\n-AGAATCTTAGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC\n-CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAG\n-TAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC\n-ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAGTATAAG\n-TAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCC\n-AATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA\n-AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC\n-ATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAA\n-AGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCT\n-TACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTA\n-TGGAGCTTTAATTTATTAATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATT\n-AAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAG\n-TCAAAGCGAACTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA\n-GCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCG\n-ATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGG\n-AGTAATCCAGGTCGGTTTCTATCTACNTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCT\n-ACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTATACCCACACCCACCCAAGA\n-ACAGGGTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAGTCAGAGGTTCAAT\n-TCCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCA\n-TTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCC\n-CCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCAC\n-ATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACCCCC\n-CTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAG\n-CCGTTTACTCAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGC\n-AGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGGC\n-TCCTTTAACCTCTCCACCCTTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCA'..b'CAGTTCTTCAAATATCTACTCATCTTCCTAATTACCATACTAATCTTAGTTACCGCTAA\n-CAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCATCAGTTGATGA\n-TACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATCCTATACAACCGTATCGGCGATATCGGTT\n-TCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAA\n-CGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCAGGCAAATCAGCCCAATTAGGT\n-CTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTA\n-TAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCACTAATCCAAAC\n-TCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCAGCAGTCTGCGCCCTTACACAAAATGACATC\n-AAAAAAATCGTAGCCTTCTCCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAAC\n-CACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTC\n-CATCATCCACAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTC\n-ACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGGTTTCTACTCCA\n-AAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCATCGC\n-TACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCCC\n-ACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCC\n-TATTCGCAGGATTTCTCATTACTAACAACATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTA\n-CCTAAAACTCACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCAACTACCTAACC\n-AACAAACTTAAAATAAAATCCCCACTATGCACATTTTATTTCTCCAACATACTCGGATTCTACCCTAGCA\n-TCACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACCT\n-AACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCA\n-ACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAA\n-TCACATAACCTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAA\n-CTACTACTAATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTGA\n-CCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATAC\n-TCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCC\n-CTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCC\n-CCCTAAATAAATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCG\n-ACCACACCGCTAACAATCAATACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACC\n-CCATTACTAAACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGAC\n-CAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATACGCAAAACTAAC\n-CCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAA\n-ACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATGCACTA\n-CTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATC\n-ATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCC\n-TATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGC\n-AACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAAC\n-TTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACA\n-GTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCTTGCCCTTCATTATTGCAGCCCTAGCAACACT\n-CCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATC\n-ACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACAT\n-TAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCC\n-TCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTA\n-GGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAAC\n-AACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCT\n-AACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATAC\n-TTCACAACAATCCTAATCCTAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTC\n-CTTGTAGTATAAACTAATACACCAGTCTTGTAAACCGGAGATGAAAACCTTTTTCCAAGGACAAATCAGA\n-GAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTC\n-ATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTACA\n-TTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCA\n-ATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCA\n-ACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAG\n-TACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCC\n-TCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCG\n-CTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTC\n-ATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATG\n'
b
diff -r 43815fa60f3a -r 9fb055604648 test-data/bwa-mem-mt-genome.fa.fai
--- a/test-data/bwa-mem-mt-genome.fa.fai Mon Mar 29 20:05:39 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-gi|251831106|ref|NC_012920.1| 16569 31 70 71
b
diff -r 43815fa60f3a -r 9fb055604648 variant.xml
--- a/variant.xml Mon Mar 29 20:05:39 2021 +0000
+++ b/variant.xml Thu Apr 22 20:25:35 2021 +0000
[
@@ -1,12 +1,10 @@
 <?xml version="1.0"?>
-<tool id="medaka_variant" name="medaka variant tool" version="@TOOL_VERSION@+galaxy7" profile="@PROFILE@">
+<tool id="medaka_variant" name="medaka variant tool" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
     <description>Probability decoding</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements">
-        <requirement type="package" version="1.4.1">scipy</requirement>
-    </expand>
+    <expand macro="requirements"/>
 
     <expand macro="version_command"/>
 
@@ -39,13 +37,14 @@
         --regions '${regions}'
     #end if
     $verbose
+    ${ambig_ref}
+    ${gvcf}
     ## required
     reference.fa
     #for $current in $pool.inputs
         '$current'
     #end for
     '$out_result' ## output
-
     2>&1 | tee '$out_log'
 #elif $pool.pool_mode == "No":
     ## run
@@ -56,26 +55,27 @@
         --regions '${regions}'
     #end if
     $verbose
+    ${ambig_ref}
+    ${gvcf}
     ## required
     reference.fa
     '$pool.input'
     '$out_result' ##output
     2>&1 | tee '$out_log'
 #end if
-
 #if $out_annotated:
     ## medaka annotate errors out if the reference is lower case at a position it's annotating because it checks vs the ref base in the vcf
     && python '$convert_fasta' reference.fa  upper_reference.fa
     && ln -s '$output_annotated.in_bam' in.bam
     && ln -s '$output_annotated.in_bam.metadata.bam_index' in.bai
-    && medaka tools annotate --pad $output_annotated.pad '$out_result' upper_reference.fa in.bam tmp.vcf
-    && '$__tool_directory__/convert_VCF_info_fields.py' tmp.vcf '$out_annotated'
+    && medaka tools annotate --dpsp --pad $output_annotated.pad '$out_result' upper_reference.fa in.bam tmp.vcf
+    && python '$__tool_directory__/convert_VCF_info_fields.py' tmp.vcf '$out_annotated'
 #end if
     ]]></command>
     <inputs>
         <conditional name="pool">
             <param name="pool_mode"  type="select" label="Are you pooling HDF5 datasets?">
-                <option value="No" selected="True">No</option>
+                <option value="No" selected="true">No</option>
                 <option value="Yes">Yes</option>
             </param>
             <when value="Yes">
@@ -96,6 +96,8 @@
             </sanitizer>
         </param>
         <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" label="Populate VCF info fields?"/>
+        <param argument="--ambig_ref" type="boolean" truevalue="--ambig_ref" falsevalue="" label="Decode variants at ambiguous reference positions?" checked="false"/>
+        <param argument="--gvcf" type="boolean" truevalue="--gvcf" falsevalue="" label="Output VCF records for reference loci predicted to be non-variant?" checked="false"/>
         <conditional name="output_annotated">
             <param name="output_annotated_select" type="select" label="Output annotated VCF?" help="Annotate allele frequency, depth of coverage, etc for each variant (requires BAM file)">
                 <option value="true" selected="true">Output annotated VCF</option>
@@ -129,8 +131,9 @@
             </conditional>
             <conditional name="reference_source">
                 <param name="reference_source_selector" value="history"/>
-                <param name="ref_file" value="ref.fasta"/>
+                <param name="ref_file" value="ref.fasta.gz"/>
             </conditional>
+            <param name="ambig_ref" value="true"/>
             <conditional name="output_annotated">
                 <param name="output_annotated_select" value="true"/>
                 <param name="in_bam" value="medaka_test.bam"/>
@@ -146,7 +149,7 @@
             </output>
             <output name="out_annotated">
                 <assert_contents>
-                    <has_n_lines n="22"/>
+                    <has_n_lines n="23"/>
                     <has_line line="##fileformat=VCFv4.1" />
                     <has_line_matching expression="##medaka_version=[0-9]+\.[0-9]+\.[0-9]+" />
                     <has_line_matching expression="##convert_VCF_info_fields=[0-9]+\.[0-9]+" />
@@ -168,6 +171,7 @@
                 <param name="reference_source_selector" value="history"/>
                 <param name="ref_file" value="ref.fasta.gz" ftype="fasta.gz"/>
             </conditional>
+            <param name="ambig_ref" value="true"/>
             <conditional name="output_annotated">
                 <param name="output_annotated_select" value="true"/>
                 <param name="in_bam" value="medaka_test.bam"/>
@@ -183,7 +187,7 @@
             </output>
             <output name="out_annotated">
                 <assert_contents>
-                    <has_n_lines n="22"/>
+                    <has_n_lines n="23"/>
                     <has_line line="##fileformat=VCFv4.1" />
                     <has_line_matching expression="##medaka_version=[0-9]+\.[0-9]+\.[0-9]+" />
                     <has_line_matching expression="##convert_VCF_info_fields=[0-9]+\.[0-9]+" />