Previous changeset 11:2bf63b38ee9b (2021-09-12) Next changeset 13:222669c4afb6 (2021-11-18) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 86211daa63a6f39524df8759364795b782324303" |
modified:
convert_VCF_info_fields.py test-data/ref.fasta variant.xml |
b |
diff -r 2bf63b38ee9b -r 0f5f4a208660 convert_VCF_info_fields.py --- a/convert_VCF_info_fields.py Sun Sep 12 20:35:26 2021 +0000 +++ b/convert_VCF_info_fields.py Fri Sep 17 20:22:49 2021 +0000 |
[ |
b'@@ -7,11 +7,11 @@\n \n # 10/21/2020 - Nathan P. Roach, natproach@gmail.com\n \n+import re\n import sys\n from collections import OrderedDict\n from math import log10\n \n-import scipy\n import scipy.stats\n \n \n@@ -33,84 +33,144 @@\n \n \n def annotateVCF(in_vcf_filepath, out_vcf_filepath):\n+ """Postprocess output of medaka tools annotate.\n+\n+ Splits multiallelic sites into separate records.\n+ Replaces medaka INFO fields that might represent information of the ref\n+ and multiple alternate alleles with simple ref, alt allele counterparts.\n+ """\n+\n in_vcf = open(in_vcf_filepath, \'r\')\n- out_vcf = open(out_vcf_filepath, \'w\')\n- to_skip = set([\'SC\', \'SR\'])\n- for i, line in enumerate(in_vcf):\n- if i == 1:\n- out_vcf.write("##convert_VCF_info_fields=0.2\\n")\n- if line[0:2] == "##":\n- if line[0:11] == "##INFO=<ID=":\n- id_ = line[11:].split(\',\')[0]\n- if id_ in to_skip:\n+ # medaka INFO fields that do not make sense after splitting of\n+ # multi-allelic records\n+ # DP will be overwritten with the value of DPSP because medaka tools\n+ # annotate currently only calculates the latter correctly\n+ # (https://github.com/nanoporetech/medaka/issues/192).\n+ # DPS, which is as unreliable as DP, gets skipped and the code\n+ # calculates the spanning reads equivalent DPSPS instead.\n+ to_skip = {\'SC\', \'SR\', \'AR\', \'DP\', \'DPSP\', \'DPS\'}\n+ struct_meta_pat = re.compile(\'##(.+)=<ID=([^,]+)(,.+)?>\')\n+ header_lines = []\n+ contig_ids = set()\n+ contig_ids_simple = set()\n+ # parse the metadata lines of the input VCF and drop:\n+ # - duplicate lines\n+ # - INFO lines declaring keys we are not going to write\n+ # - redundant contig information\n+ while True:\n+ line = in_vcf.readline()\n+ if line[:2] != \'##\':\n+ assert line.startswith(\'#CHROM\')\n+ break\n+ if line in header_lines:\n+ # the annotate tool may generate lines already written by\n+ # medaka variant again (example: medaka version line)\n+ continue\n+ match = struct_meta_pat.match(line)\n+ if match:\n+ match_type, match_id, match_misc = match.groups()\n+ if match_type == \'INFO\':\n+ if match_id == \'DPSP\':\n+ line = line.replace(\'DPSP\', \'DP\')\n+ elif match_id in to_skip:\n continue\n- out_vcf.write(line)\n- elif line[0] == "#":\n- out_vcf.write(\'##INFO=<ID=DPSPS,Number=2,Type=Integer,Description="Spanning Reads Allele Frequency By Strand">\\n\')\n- out_vcf.write(\'##INFO=<ID=AF,Number=1,Type=Float,Description="Spanning Reads Allele Frequency">\\n\')\n- out_vcf.write(\'##INFO=<ID=FAF,Number=1,Type=Float,Description="Forward Spanning Reads Allele Frequency">\\n\')\n- out_vcf.write(\'##INFO=<ID=RAF,Number=1,Type=Float,Description="Reverse Spanning Reads Allele Frequency">\\n\')\n- out_vcf.write(\'##INFO=<ID=SB,Number=1,Type=Integer,Description="Phred-scaled strand bias of spanning reads at this position">\\n\')\n- out_vcf.write(\'##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases in spanning reads">\\n\')\n- out_vcf.write(\'##INFO=<ID=AS,Number=4,Type=Integer,Description="Total alignment score to ref and alt allele of spanning reads by strand (ref fwd, ref rev, alt fwd, alt rev) aligned with parasail match 5, mismatch -4, open 5, extend 3">\\n\')\n- out_vcf.write(line)\n- else:\n+ elif match_type == \'contig\':\n+ contig_ids.add(match_id)\n+ if not match_misc:\n+ # the annotate tools writes its own contig info,\n+ # which is redundant with contig info generated by\n+ # medaka variant, but lacks a length value.\n+ # We don\'t need the incomplete line.\n+ '..b' print(line.strip()) # Print the line for debugging purposes\n+ continue\n+ variant_list = fields[4].split(\',\')\n+ dpsp = int(info_dict[\'DPSP\'])\n+ ref_fwd, ref_rev = 0, 1\n+ dpspf, dpspr = (int(x) for x in info_dict[\'AR\'].split(\',\'))\n+ for i in range(0, len(sr_list), 2):\n+ dpspf += sr_list[i]\n+ dpspr += sr_list[i + 1]\n+ for j, i in enumerate(range(2, len(sr_list), 2)):\n+ dp4 = (\n+ sr_list[ref_fwd],\n+ sr_list[ref_rev],\n+ sr_list[i],\n+ sr_list[i + 1]\n+ )\n+ dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]]\n+ _, p_val = scipy.stats.fisher_exact(dp2x2)\n+ sb = pval_to_phredqual(p_val)\n \n- as_ = (sc_list[ref_fwd], sc_list[ref_rev], sc_list[i], sc_list[i + 1])\n-\n- info = []\n- for code in info_dict:\n- if code in to_skip:\n- continue\n- val = info_dict[code]\n- info.append("%s=%s" % (code, val))\n-\n- info.append("DPSPS=%d,%d" % (dpspf, dpspr))\n+ as_ = (\n+ sc_list[ref_fwd],\n+ sc_list[ref_rev],\n+ sc_list[i],\n+ sc_list[i + 1]\n+ )\n \n- if dpsp == 0:\n- info.append("AF=NaN")\n- else:\n- af = (dp4[2] + dp4[3]) / dpsp\n- info.append("AF=%.6f" % (af))\n- if dpspf == 0:\n- info.append("FAF=NaN")\n- else:\n- faf = dp4[2] / dpspf\n- info.append("FAF=%.6f" % (faf))\n- if dpspr == 0:\n- info.append("RAF=NaN")\n- else:\n- raf = dp4[3] / dpspr\n- info.append("RAF=%.6f" % (raf))\n- info.append("SB=%d" % (sb))\n- info.append("DP4=%d,%d,%d,%d" % (dp4))\n- info.append("AS=%d,%d,%d,%d" % (as_))\n- new_info = \';\'.join(info)\n- fields[4] = variant_list[j]\n- fields[7] = new_info\n- out_vcf.write("%s" % ("\\t".join(fields)))\n- else:\n- print("WARNING - SR and SC are different lengths, skipping variant")\n- print(line.strip()) # Print the line for debugging purposes\n+ info = []\n+ for code in info_dict:\n+ if code in to_skip:\n+ continue\n+ val = info_dict[code]\n+ info.append("%s=%s" % (code, val))\n+\n+ info.append(\'DP=%d\' % dpsp)\n+ info.append(\'DPSPS=%d,%d\' % (dpspf, dpspr))\n+\n+ if dpsp == 0:\n+ info.append(\'AF=NaN\')\n+ else:\n+ af = (dp4[2] + dp4[3]) / dpsp\n+ info.append(\'AF=%.6f\' % af)\n+ if dpspf == 0:\n+ info.append(\'FAF=NaN\')\n+ else:\n+ faf = dp4[2] / dpspf\n+ info.append(\'FAF=%.6f\' % faf)\n+ if dpspr == 0:\n+ info.append(\'RAF=NaN\')\n+ else:\n+ raf = dp4[3] / dpspr\n+ info.append(\'RAF=%.6f\' % raf)\n+ info.append(\'SB=%d\' % sb)\n+ info.append(\'DP4=%d,%d,%d,%d\' % dp4)\n+ info.append(\'AS=%d,%d,%d,%d\' % as_)\n+ new_info = \';\'.join(info)\n+ fields[4] = variant_list[j]\n+ fields[7] = new_info\n+ out_vcf.write(\'\\t\'.join(fields))\n in_vcf.close()\n- out_vcf.close()\n \n \n if __name__ == "__main__":\n' |
b |
diff -r 2bf63b38ee9b -r 0f5f4a208660 test-data/ref.fasta --- a/test-data/ref.fasta Sun Sep 12 20:35:26 2021 +0000 +++ b/test-data/ref.fasta Fri Sep 17 20:22:49 2021 +0000 |
b |
@@ -1,2 +1,2 @@ >NC_045512.2 -ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC +attaaaggtttataccttcccaggtaacaaaccaaccaactttcgatctcttgtagatctgttctctaaacgaactttaaaatctgtgtggctGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC |
b |
diff -r 2bf63b38ee9b -r 0f5f4a208660 variant.xml --- a/variant.xml Sun Sep 12 20:35:26 2021 +0000 +++ b/variant.xml Fri Sep 17 20:22:49 2021 +0000 |
[ |
b'@@ -1,28 +1,11 @@\n <?xml version="1.0"?>\n-<tool id="medaka_variant" name="medaka variant tool" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">\n- <description>Probability decoding</description>\n+<tool id="medaka_variant" name="medaka variant tool" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@">\n+ <description>decodes variant calls from medaka consensus output</description>\n <macros>\n <import>macros.xml</import>\n </macros>\n <expand macro="requirements"/>\n-\n <expand macro="version_command"/>\n-\n- <configfiles>\n- <configfile name="convert_fasta">\n-import sys\n-infile = open(sys.argv[1], \'r\')\n-outfile = open(sys.argv[2], \'w\')\n-for line in infile:\n- if line[0] == \'>\':\n- outfile.write(line)\n- else:\n- outfile.write(line.upper())\n-infile.close()\n-outfile.close()\n- </configfile>\n- </configfiles>\n-\n <command detect_errors="exit_code"><![CDATA[\n ## initialize\n @REF_FASTA@\n@@ -44,8 +27,6 @@\n #for $current in $pool.inputs\n \'$current\'\n #end for\n- \'$out_result\' ## output\n- 2>&1 | tee \'$out_log\'\n #elif $pool.pool_mode == "No":\n ## run\n medaka variant\n@@ -60,16 +41,17 @@\n ## required\n reference.fa\n \'$pool.input\'\n- \'$out_result\' ##output\n+#end if\n+#if str($output_annotated.output_annotated_select) == \'false\':\n+ \'$out_variants\' ##output\n 2>&1 | tee \'$out_log\'\n-#end if\n-#if $out_annotated:\n- ## medaka annotate errors out if the reference is lower case at a position it\'s annotating because it checks vs the ref base in the vcf\n- && python \'$convert_fasta\' reference.fa upper_reference.fa\n+#else\n+ raw.vcf ##output of medaka variant\n+ 2>&1 | tee \'$out_log\'\n && ln -s \'$output_annotated.in_bam\' in.bam\n && ln -s \'$output_annotated.in_bam.metadata.bam_index\' in.bai\n- && medaka tools annotate --dpsp --pad $output_annotated.pad \'$out_result\' upper_reference.fa in.bam tmp.vcf\n- && python \'$__tool_directory__/convert_VCF_info_fields.py\' tmp.vcf \'$out_annotated\'\n+ && medaka tools annotate --dpsp --pad $output_annotated.pad raw.vcf reference.fa in.bam tmp.vcf\n+ && python \'$__tool_directory__/convert_VCF_info_fields.py\' tmp.vcf \'$out_variants\'\n #end if\n ]]></command>\n <inputs>\n@@ -99,39 +81,38 @@\n <param argument="--ambig_ref" type="boolean" truevalue="--ambig_ref" falsevalue="" label="Decode variants at ambiguous reference positions?" checked="false"/>\n <param argument="--gvcf" type="boolean" truevalue="--gvcf" falsevalue="" label="Output VCF records for reference loci predicted to be non-variant?" checked="false"/>\n <conditional name="output_annotated">\n- <param name="output_annotated_select" type="select" label="Output annotated VCF?" help="Annotate allele frequency, depth of coverage, etc for each variant (requires BAM file)">\n- <option value="true" selected="true">Output annotated VCF</option>\n- <option value="false">Don\'t output annotated VCF</option>\n+ <param name="output_annotated_select" type="select"\n+ label="Type of VCF to generate"\n+ help="Variant INFO fields in the VCF can be extended to include allele frequency, depth of coverage, etc., but this requires a BAM dataset to calculate those values from.">\n+ <option value="true" selected="true">Write annotated VCF with extended INFO</option>\n+ <option value="false">Write original decoded VCF with minimal INFO field</option>\n </param>\n <when value="true">\n- <param name="in_bam" type="data" format="bam" optional="false" label="BAM to annotate the VCF"/>\n- <param name="pad" type="integer" min="1" value="25" label="Padding width on either side of variant for realignment in medaka tools anntotate, used to calculate DPSP, DPSPS, AF, FAF, RAF, SB, DP4, and AS in the output annotated VCF"/>\n+ <param name="in_bam" type="data" format="bam" option'..b'tandard -->\n- <data name="out_result" format="vcf" label="${tool.name} on ${on_string}: Result"/>\n- <!-- optional -->\n- <data name="out_annotated" format="vcf" label="${tool.name} on ${on_string}: Annotated">\n- <filter>output_annotated[\'output_annotated_select\']!=\'false\'</filter>\n- </data>\n+ <data name="out_variants" format="vcf" label="${tool.name} on ${on_string}: called variants"/>\n <data name="out_log" format="tabular" label="${tool.name} on ${on_string}: Log">\n <filter>output_log_bool</filter>\n </data>\n </outputs>\n <tests>\n <!-- #1 default -->\n- <test expect_num_outputs="3">\n+ <test expect_num_outputs="2">\n <conditional name="pool">\n <param name="pool_mode" value="Yes"/>\n <param name="inputs" value="medaka_test.hdf,medaka_test.hdf"/>\n </conditional>\n <conditional name="reference_source">\n <param name="reference_source_selector" value="history"/>\n- <param name="ref_file" value="ref.fasta.gz"/>\n+ <param name="ref_file" value="ref.fasta"/>\n </conditional>\n <param name="ambig_ref" value="true"/>\n <conditional name="output_annotated">\n@@ -139,17 +120,9 @@\n <param name="in_bam" value="medaka_test.bam"/>\n </conditional>\n <param name="output_log_bool" value="true"/>\n- \n- <output name="out_result">\n+ <output name="out_variants">\n <assert_contents>\n- <has_n_lines n="9"/>\n- <has_line line="##fileformat=VCFv4.1" />\n- <has_line_matching expression="##medaka_version=[0-9]+\\.[0-9]+\\.[0-9]+" />\n- </assert_contents>\n- </output>\n- <output name="out_annotated">\n- <assert_contents>\n- <has_n_lines n="23"/>\n+ <has_n_lines n="18"/>\n <has_line line="##fileformat=VCFv4.1" />\n <has_line_matching expression="##medaka_version=[0-9]+\\.[0-9]+\\.[0-9]+" />\n <has_line_matching expression="##convert_VCF_info_fields=[0-9]+\\.[0-9]+" />\n@@ -162,7 +135,7 @@\n </output>\n </test>\n <!--No pooling-->\n- <test expect_num_outputs="3">\n+ <test expect_num_outputs="2">\n <conditional name="pool">\n <param name="pool_mode" value="No"/>\n <param name="input" value="medaka_test.hdf"/>\n@@ -177,17 +150,9 @@\n <param name="in_bam" value="medaka_test.bam"/>\n </conditional>\n <param name="output_log_bool" value="true"/>\n- \n- <output name="out_result">\n+ <output name="out_variants">\n <assert_contents>\n- <has_n_lines n="9"/>\n- <has_line line="##fileformat=VCFv4.1" />\n- <has_line_matching expression="##medaka_version=[0-9]+\\.[0-9]+\\.[0-9]+" />\n- </assert_contents>\n- </output>\n- <output name="out_annotated">\n- <assert_contents>\n- <has_n_lines n="23"/>\n+ <has_n_lines n="18"/>\n <has_line line="##fileformat=VCFv4.1" />\n <has_line_matching expression="##medaka_version=[0-9]+\\.[0-9]+\\.[0-9]+" />\n <has_line_matching expression="##convert_VCF_info_fields=[0-9]+\\.[0-9]+" />\n@@ -213,8 +178,7 @@\n <param name="output_annotated_select" value="false"/>\n </conditional>\n <param name="output_log_bool" value="false"/>\n- \n- <output name="out_result">\n+ <output name="out_variants">\n <assert_contents>\n <has_n_lines n="9"/>\n <has_line line="##fileformat=VCFv4.1" />\n' |