changeset 6:147465efa99c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 847ec10cd36ea4f3cd4c257d5742f0fb401e364e"
author iuc
date Thu, 10 Jun 2021 22:06:04 +0000
parents 49236b03e4fd
children 252dfb042563
files ivar_variants.xml ivar_variants_to_vcf.py macros.xml test-data/zika/Z52_a.vcf
diffstat 4 files changed, 236 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/ivar_variants.xml	Wed May 19 16:49:13 2021 +0000
+++ b/ivar_variants.xml	Thu Jun 10 22:06:04 2021 +0000
@@ -1,9 +1,11 @@
-<tool id="ivar_variants" name="ivar variants" version="@VERSION@+galaxy0">
+<tool id="ivar_variants" name="ivar variants" version="@VERSION@+galaxy1">
     <description>Call variants from aligned BAM file</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements" />
+    <expand macro="requirements">
+        <requirement type="package" version="3">python</requirement>
+    </expand>
     <expand macro="version_command" />
     <command detect_errors="exit_code"><![CDATA[
         ln -s '$ref' ref.fa &&
@@ -11,22 +13,51 @@
         samtools mpileup -A -d 0 --reference ref.fa -B -Q 0 sorted.bam | ivar variants 
         -p variants
         -q $min_qual
-        -t $min_freq
+        -t $min_freq &&
+        #if str($output_format.choice) == 'vcf'
+            python '${__tool_directory__}/ivar_variants_to_vcf.py' 
+            ${output_format.pass_only}
+            variants.tsv '$output_variants'
+        #else
+            cp variants.tsv '$output_variants'
+        #end if
     ]]>    </command>
     <inputs>
         <param name="input_bam" type="data" format="bam" label="Bam file" help="Aligned reads, to trim primers and quality"/>
         <param name="ref" type="data" format="fasta" label="Reference"/>
         <param name="min_qual" argument="-q" type="integer" min="1" value="20" label="Minimum quality score threshold to count base"/>
         <param name="min_freq" argument="-t" type="float" min="0" max="1" value="0.03" label="Minimum frequency threshold"/>
+        <conditional name="output_format">
+            <param name="choice" type="select" label="Output format">
+                <option value="tabular">Tabular (native tool output)</option>
+                <option value="vcf">VCF</option>
+            </param>
+            <when value="vcf">
+                <param argument="--pass_only" type="boolean" truevalue="--pass_only" falsevalue="" label="In VCF only output variants that PASS all filters" />
+            </when>
+            <when value="tabular" />
+        </conditional>    
     </inputs>
     <outputs>
-        <data name="output_variants" format="tabular" label="${tool.name} on ${on_string}" from_work_dir="variants.tsv"/>
+        <data name="output_variants" format="tabular" label="${tool.name} on ${on_string}">
+            <change_format>
+                <when input="output_format.choice" value="vcf" format="vcf" />
+            </change_format>
+        </data>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="1">
             <param name="input_bam" value="zika/Z52_a.masked.sorted.bam" />
             <param name="ref" value="zika/db/PRV.fa" />
-            <output name="output_variants" file="zika/Z52_a.tsv" lines_diff="10"/>
+            <output name="output_variants" file="zika/Z52_a.tsv" ftype="tabular" lines_diff="10"/>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_bam" value="zika/Z52_a.masked.sorted.bam" />
+            <param name="ref" value="zika/db/PRV.fa" />
+            <conditional name="output_format">
+                <param name="choice" value="vcf" />
+            </conditional>
+            <output name="output_variants" file="zika/Z52_a.vcf" ftype="vcf"/>
         </test>
     </tests>
     <help><![CDATA[
@@ -43,6 +74,15 @@
         required for a SNV or indel to be reported.
         
         Documentation can be found at `<https://andersen-lab.github.io/ivar/html/manualpage.html>`_.
+
+        Optionally output is converted to VCF using a version of the `ivar_variants_to_vcf.py script <https://github.com/nf-core/viralrecon/blob/dev/bin/ivar_variants_to_vcf.py>`_,
+        that has been modified to store attributes in INFO fields.
     ]]>    </help>
-    <expand macro="citations" />
+    <expand macro="citations">
+        <citation type="bibtex">@misc{githubivar_variants_to_vcf,
+            author = {Fernandez, Sarai Varona and Patel, Harshil},
+            year = {2021},
+            title = {ivar_variants_to_vcf},
+            url = {https://github.com/nf-core/viralrecon/blob/dev/bin/ivar_variants_to_vcf.py}
+        }</citation>    </expand>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ivar_variants_to_vcf.py	Thu Jun 10 22:06:04 2021 +0000
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+import argparse
+import errno
+import os
+import re
+import sys
+
+
+def parse_args(args=None):
+    Description = "Convert iVar variants tsv file to vcf format."
+    Epilog = """Example usage: python ivar_variants_to_vcf.py <FILE_IN> <FILE_OUT>"""
+
+    parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
+    parser.add_argument("FILE_IN", help="Input tsv file.")
+    parser.add_argument("FILE_OUT", help="Full path to output vcf file.")
+    parser.add_argument(
+        "-po",
+        "--pass_only",
+        dest="PASS_ONLY",
+        help="Only output variants that PASS all filters.",
+        action="store_true",
+    )
+
+    return parser.parse_args(args)
+
+
+def make_dir(path):
+    if not len(path) == 0:
+        try:
+            os.makedirs(path)
+        except OSError as exception:
+            if exception.errno != errno.EEXIST:
+                raise
+
+
+def info_line(info_keys, kv):
+    info_strings = []
+    for key in info_keys:
+        if key not in kv:
+            raise KeyError(
+                'Expected key {} missing from INFO field key value pairs'
+                .format(key)
+            )
+        if kv[key] is False:
+            # a FLAG element, which should not be set
+            continue
+        if kv[key] is True:
+            # a FLAG element => write the key only
+            info_strings.append(key)
+        else:
+            info_strings.append('{}={}'.format(key, kv[key]))
+    return ';'.join(info_strings)
+
+
+def ivar_variants_to_vcf(FileIn, FileOut, passOnly=False):
+    filename = os.path.splitext(FileIn)[0]
+    header = (
+        "##fileformat=VCFv4.2\n"
+        "##source=iVar\n"
+        '##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">\n'
+        '##INFO=<ID=REF_DP,Number=1,Type=Integer,Description="Depth of reference base">\n'
+        '##INFO=<ID=REF_RV,Number=1,Type=Integer,Description="Depth of reference base on reverse reads">\n'
+        '##INFO=<ID=REF_QUAL,Number=1,Type=Integer,Description="Mean quality of reference base">\n'
+        '##INFO=<ID=ALT_DP,Number=1,Type=Integer,Description="Depth of alternate base">\n'
+        '##INFO=<ID=ALT_RV,Number=1,Type=Integer,Description="Deapth of alternate base on reverse reads">\n'
+        '##INFO=<ID=ALT_QUAL,Number=1,Type=Integer,Description="Mean quality of alternate base">\n'
+        '##INFO=<ID=AF,Number=1,Type=Float,Description="Frequency of alternate base">\n'
+        '##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n'
+        '##FILTER=<ID=PASS,Description="Result of p-value <= 0.05">\n'
+        '##FILTER=<ID=FAIL,Description="Result of p-value > 0.05">\n'
+    )
+    info_keys = [
+        re.match(r'##INFO=<ID=([^,]+),', line).group(1)
+        for line in header.splitlines()
+        if line.startswith('##INFO=')
+    ]
+    header += (
+        "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n"
+    )
+
+    vars_seen = set()
+    varCountDict = {"SNP": 0, "INS": 0, "DEL": 0}
+    OutDir = os.path.dirname(FileOut)
+    make_dir(OutDir)
+    with open(FileIn) as f, open(FileOut, "w") as fout:
+        fout.write(header)
+        for line in f:
+            if line.startswith("REGION"):
+                continue
+
+            line = line.split("\t")
+            CHROM = line[0]
+            POS = line[1]
+            ID = "."
+            REF = line[2]
+            ALT = line[3]
+            if ALT[0] == "+":
+                ALT = REF + ALT[1:]
+                var_type = "INS"
+            elif ALT[0] == "-":
+                REF += ALT[1:]
+                ALT = line[2]
+                var_type = "DEL"
+            else:
+                var_type = "SNP"
+            QUAL = "."
+            pass_test = line[13]
+            if pass_test == "TRUE":
+                FILTER = "PASS"
+            else:
+                FILTER = "FAIL"
+
+            if (passOnly and FILTER != "PASS"):
+                continue
+            var = (CHROM, POS, REF, ALT)
+            if var in vars_seen:
+                continue
+
+            info_elements = {
+                'DP': line[11],
+                'REF_DP': line[4],
+                'REF_RV': line[5],
+                'REF_QUAL': line[6],
+                'ALT_DP': line[7],
+                'ALT_RV': line[8],
+                'ALT_QUAL': line[9],
+                'AF': line[10]
+            }
+            if var_type in ['INS', 'DEL']:
+                # add INDEL FLAG
+                info_elements['INDEL'] = True
+            else:
+                info_elements['INDEL'] = False
+            INFO = info_line(info_keys, info_elements)
+
+            vars_seen.add(var)
+            varCountDict[var_type] += 1
+            fout.write(
+                '\t'.join(
+                    [CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO]
+                ) + '\n'
+            )
+
+    # Print variant counts to pass to MultiQC
+    varCountList = [(k, str(v)) for k, v in sorted(varCountDict.items())]
+    print("\t".join(["sample"] + [x[0] for x in varCountList]))
+    print("\t".join([filename] + [x[1] for x in varCountList]))
+
+
+def main(args=None):
+    args = parse_args(args)
+    ivar_variants_to_vcf(
+        args.FILE_IN, args.FILE_OUT, args.PASS_ONLY
+    )
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/macros.xml	Wed May 19 16:49:13 2021 +0000
+++ b/macros.xml	Thu Jun 10 22:06:04 2021 +0000
@@ -12,6 +12,7 @@
   <xml name="citations">
     <citations>
       <citation type="doi">10.1186/s13059-018-1618-7</citation>
+      <yield />
     </citations>
   </xml>
 </macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/zika/Z52_a.vcf	Thu Jun 10 22:06:04 2021 +0000
@@ -0,0 +1,30 @@
+##fileformat=VCFv4.2
+##source=iVar
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=REF_DP,Number=1,Type=Integer,Description="Depth of reference base">
+##INFO=<ID=REF_RV,Number=1,Type=Integer,Description="Depth of reference base on reverse reads">
+##INFO=<ID=REF_QUAL,Number=1,Type=Integer,Description="Mean quality of reference base">
+##INFO=<ID=ALT_DP,Number=1,Type=Integer,Description="Depth of alternate base">
+##INFO=<ID=ALT_RV,Number=1,Type=Integer,Description="Deapth of alternate base on reverse reads">
+##INFO=<ID=ALT_QUAL,Number=1,Type=Integer,Description="Mean quality of alternate base">
+##INFO=<ID=AF,Number=1,Type=Float,Description="Frequency of alternate base">
+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
+##FILTER=<ID=PASS,Description="Result of p-value <= 0.05">
+##FILTER=<ID=FAIL,Description="Result of p-value > 0.05">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+PRV	350	.	A	T	.	FAIL	DP=106;REF_DP=101;REF_RV=101;REF_QUAL=36;ALT_DP=5;ALT_RV=5;ALT_QUAL=35;AF=0.0471698
+PRV	722	.	C	CA	.	FAIL	DP=282;REF_DP=280;REF_RV=234;REF_QUAL=36;ALT_DP=9;ALT_RV=0;ALT_QUAL=20;AF=0.0319149;INDEL
+PRV	1682	.	C	T	.	PASS	DP=1133;REF_DP=1097;REF_RV=984;REF_QUAL=37;ALT_DP=34;ALT_RV=33;ALT_QUAL=37;AF=0.0300088
+PRV	1965	.	T	G	.	PASS	DP=365;REF_DP=302;REF_RV=113;REF_QUAL=37;ALT_DP=63;ALT_RV=25;ALT_QUAL=37;AF=0.172603
+PRV	2702	.	A	G	.	FAIL	DP=32;REF_DP=31;REF_RV=31;REF_QUAL=36;ALT_DP=1;ALT_RV=1;ALT_QUAL=23;AF=0.03125
+PRV	2781	.	T	G	.	PASS	DP=408;REF_DP=354;REF_RV=70;REF_QUAL=37;ALT_DP=48;ALT_RV=8;ALT_QUAL=36;AF=0.117647
+PRV	2922	.	C	T	.	PASS	DP=275;REF_DP=264;REF_RV=0;REF_QUAL=36;ALT_DP=11;ALT_RV=0;ALT_QUAL=36;AF=0.04
+PRV	3148	.	Y	T	.	PASS	DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=1324;ALT_RV=264;ALT_QUAL=36;AF=0.77563
+PRV	3148	.	Y	C	.	PASS	DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=381;ALT_RV=75;ALT_QUAL=36;AF=0.223199
+PRV	3295	.	A	G	.	PASS	DP=1040;REF_DP=1002;REF_RV=1002;REF_QUAL=35;ALT_DP=38;ALT_RV=38;ALT_QUAL=33;AF=0.0365385
+PRV	5680	.	C	T	.	PASS	DP=35;REF_DP=27;REF_RV=5;REF_QUAL=44;ALT_DP=8;ALT_RV=2;ALT_QUAL=46;AF=0.228571
+PRV	5723	.	T	G	.	FAIL	DP=32;REF_DP=31;REF_RV=31;REF_QUAL=35;ALT_DP=1;ALT_RV=1;ALT_QUAL=21;AF=0.03125
+PRV	6201	.	A	G	.	FAIL	DP=12;REF_DP=10;REF_RV=0;REF_QUAL=35;ALT_DP=2;ALT_RV=0;ALT_QUAL=38;AF=0.166667
+PRV	6211	.	T	C	.	FAIL	DP=9;REF_DP=8;REF_RV=0;REF_QUAL=36;ALT_DP=1;ALT_RV=0;ALT_QUAL=35;AF=0.111111
+PRV	7916	.	C	T	.	PASS	DP=432;REF_DP=351;REF_RV=289;REF_QUAL=36;ALT_DP=81;ALT_RV=78;ALT_QUAL=37;AF=0.1875
+PRV	9713	.	C	T	.	PASS	DP=387;REF_DP=374;REF_RV=0;REF_QUAL=37;ALT_DP=13;ALT_RV=0;ALT_QUAL=35;AF=0.0335917