Mercurial > repos > iuc > ivar_variants
changeset 6:147465efa99c draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 847ec10cd36ea4f3cd4c257d5742f0fb401e364e"
author | iuc |
---|---|
date | Thu, 10 Jun 2021 22:06:04 +0000 (2021-06-10) |
parents | 49236b03e4fd |
children | 252dfb042563 |
files | ivar_variants.xml ivar_variants_to_vcf.py macros.xml test-data/zika/Z52_a.vcf |
diffstat | 4 files changed, 236 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/ivar_variants.xml Wed May 19 16:49:13 2021 +0000 +++ b/ivar_variants.xml Thu Jun 10 22:06:04 2021 +0000 @@ -1,9 +1,11 @@ -<tool id="ivar_variants" name="ivar variants" version="@VERSION@+galaxy0"> +<tool id="ivar_variants" name="ivar variants" version="@VERSION@+galaxy1"> <description>Call variants from aligned BAM file</description> <macros> <import>macros.xml</import> </macros> - <expand macro="requirements" /> + <expand macro="requirements"> + <requirement type="package" version="3">python</requirement> + </expand> <expand macro="version_command" /> <command detect_errors="exit_code"><![CDATA[ ln -s '$ref' ref.fa && @@ -11,22 +13,51 @@ samtools mpileup -A -d 0 --reference ref.fa -B -Q 0 sorted.bam | ivar variants -p variants -q $min_qual - -t $min_freq + -t $min_freq && + #if str($output_format.choice) == 'vcf' + python '${__tool_directory__}/ivar_variants_to_vcf.py' + ${output_format.pass_only} + variants.tsv '$output_variants' + #else + cp variants.tsv '$output_variants' + #end if ]]> </command> <inputs> <param name="input_bam" type="data" format="bam" label="Bam file" help="Aligned reads, to trim primers and quality"/> <param name="ref" type="data" format="fasta" label="Reference"/> <param name="min_qual" argument="-q" type="integer" min="1" value="20" label="Minimum quality score threshold to count base"/> <param name="min_freq" argument="-t" type="float" min="0" max="1" value="0.03" label="Minimum frequency threshold"/> + <conditional name="output_format"> + <param name="choice" type="select" label="Output format"> + <option value="tabular">Tabular (native tool output)</option> + <option value="vcf">VCF</option> + </param> + <when value="vcf"> + <param argument="--pass_only" type="boolean" truevalue="--pass_only" falsevalue="" label="In VCF only output variants that PASS all filters" /> + </when> + <when value="tabular" /> + </conditional> </inputs> <outputs> - <data name="output_variants" format="tabular" label="${tool.name} on ${on_string}" from_work_dir="variants.tsv"/> + <data name="output_variants" format="tabular" label="${tool.name} on ${on_string}"> + <change_format> + <when input="output_format.choice" value="vcf" format="vcf" /> + </change_format> + </data> </outputs> <tests> - <test> + <test expect_num_outputs="1"> <param name="input_bam" value="zika/Z52_a.masked.sorted.bam" /> <param name="ref" value="zika/db/PRV.fa" /> - <output name="output_variants" file="zika/Z52_a.tsv" lines_diff="10"/> + <output name="output_variants" file="zika/Z52_a.tsv" ftype="tabular" lines_diff="10"/> + </test> + <test expect_num_outputs="1"> + <param name="input_bam" value="zika/Z52_a.masked.sorted.bam" /> + <param name="ref" value="zika/db/PRV.fa" /> + <conditional name="output_format"> + <param name="choice" value="vcf" /> + </conditional> + <output name="output_variants" file="zika/Z52_a.vcf" ftype="vcf"/> </test> </tests> <help><![CDATA[ @@ -43,6 +74,15 @@ required for a SNV or indel to be reported. Documentation can be found at `<https://andersen-lab.github.io/ivar/html/manualpage.html>`_. + + Optionally output is converted to VCF using a version of the `ivar_variants_to_vcf.py script <https://github.com/nf-core/viralrecon/blob/dev/bin/ivar_variants_to_vcf.py>`_, + that has been modified to store attributes in INFO fields. ]]> </help> - <expand macro="citations" /> + <expand macro="citations"> + <citation type="bibtex">@misc{githubivar_variants_to_vcf, + author = {Fernandez, Sarai Varona and Patel, Harshil}, + year = {2021}, + title = {ivar_variants_to_vcf}, + url = {https://github.com/nf-core/viralrecon/blob/dev/bin/ivar_variants_to_vcf.py} + }</citation> </expand> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ivar_variants_to_vcf.py Thu Jun 10 22:06:04 2021 +0000 @@ -0,0 +1,158 @@ +#!/usr/bin/env python +import argparse +import errno +import os +import re +import sys + + +def parse_args(args=None): + Description = "Convert iVar variants tsv file to vcf format." + Epilog = """Example usage: python ivar_variants_to_vcf.py <FILE_IN> <FILE_OUT>""" + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("FILE_IN", help="Input tsv file.") + parser.add_argument("FILE_OUT", help="Full path to output vcf file.") + parser.add_argument( + "-po", + "--pass_only", + dest="PASS_ONLY", + help="Only output variants that PASS all filters.", + action="store_true", + ) + + return parser.parse_args(args) + + +def make_dir(path): + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + + +def info_line(info_keys, kv): + info_strings = [] + for key in info_keys: + if key not in kv: + raise KeyError( + 'Expected key {} missing from INFO field key value pairs' + .format(key) + ) + if kv[key] is False: + # a FLAG element, which should not be set + continue + if kv[key] is True: + # a FLAG element => write the key only + info_strings.append(key) + else: + info_strings.append('{}={}'.format(key, kv[key])) + return ';'.join(info_strings) + + +def ivar_variants_to_vcf(FileIn, FileOut, passOnly=False): + filename = os.path.splitext(FileIn)[0] + header = ( + "##fileformat=VCFv4.2\n" + "##source=iVar\n" + '##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">\n' + '##INFO=<ID=REF_DP,Number=1,Type=Integer,Description="Depth of reference base">\n' + '##INFO=<ID=REF_RV,Number=1,Type=Integer,Description="Depth of reference base on reverse reads">\n' + '##INFO=<ID=REF_QUAL,Number=1,Type=Integer,Description="Mean quality of reference base">\n' + '##INFO=<ID=ALT_DP,Number=1,Type=Integer,Description="Depth of alternate base">\n' + '##INFO=<ID=ALT_RV,Number=1,Type=Integer,Description="Deapth of alternate base on reverse reads">\n' + '##INFO=<ID=ALT_QUAL,Number=1,Type=Integer,Description="Mean quality of alternate base">\n' + '##INFO=<ID=AF,Number=1,Type=Float,Description="Frequency of alternate base">\n' + '##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n' + '##FILTER=<ID=PASS,Description="Result of p-value <= 0.05">\n' + '##FILTER=<ID=FAIL,Description="Result of p-value > 0.05">\n' + ) + info_keys = [ + re.match(r'##INFO=<ID=([^,]+),', line).group(1) + for line in header.splitlines() + if line.startswith('##INFO=') + ] + header += ( + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n" + ) + + vars_seen = set() + varCountDict = {"SNP": 0, "INS": 0, "DEL": 0} + OutDir = os.path.dirname(FileOut) + make_dir(OutDir) + with open(FileIn) as f, open(FileOut, "w") as fout: + fout.write(header) + for line in f: + if line.startswith("REGION"): + continue + + line = line.split("\t") + CHROM = line[0] + POS = line[1] + ID = "." + REF = line[2] + ALT = line[3] + if ALT[0] == "+": + ALT = REF + ALT[1:] + var_type = "INS" + elif ALT[0] == "-": + REF += ALT[1:] + ALT = line[2] + var_type = "DEL" + else: + var_type = "SNP" + QUAL = "." + pass_test = line[13] + if pass_test == "TRUE": + FILTER = "PASS" + else: + FILTER = "FAIL" + + if (passOnly and FILTER != "PASS"): + continue + var = (CHROM, POS, REF, ALT) + if var in vars_seen: + continue + + info_elements = { + 'DP': line[11], + 'REF_DP': line[4], + 'REF_RV': line[5], + 'REF_QUAL': line[6], + 'ALT_DP': line[7], + 'ALT_RV': line[8], + 'ALT_QUAL': line[9], + 'AF': line[10] + } + if var_type in ['INS', 'DEL']: + # add INDEL FLAG + info_elements['INDEL'] = True + else: + info_elements['INDEL'] = False + INFO = info_line(info_keys, info_elements) + + vars_seen.add(var) + varCountDict[var_type] += 1 + fout.write( + '\t'.join( + [CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO] + ) + '\n' + ) + + # Print variant counts to pass to MultiQC + varCountList = [(k, str(v)) for k, v in sorted(varCountDict.items())] + print("\t".join(["sample"] + [x[0] for x in varCountList])) + print("\t".join([filename] + [x[1] for x in varCountList])) + + +def main(args=None): + args = parse_args(args) + ivar_variants_to_vcf( + args.FILE_IN, args.FILE_OUT, args.PASS_ONLY + ) + + +if __name__ == "__main__": + sys.exit(main())
--- a/macros.xml Wed May 19 16:49:13 2021 +0000 +++ b/macros.xml Thu Jun 10 22:06:04 2021 +0000 @@ -12,6 +12,7 @@ <xml name="citations"> <citations> <citation type="doi">10.1186/s13059-018-1618-7</citation> + <yield /> </citations> </xml> </macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/zika/Z52_a.vcf Thu Jun 10 22:06:04 2021 +0000 @@ -0,0 +1,30 @@ +##fileformat=VCFv4.2 +##source=iVar +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth"> +##INFO=<ID=REF_DP,Number=1,Type=Integer,Description="Depth of reference base"> +##INFO=<ID=REF_RV,Number=1,Type=Integer,Description="Depth of reference base on reverse reads"> +##INFO=<ID=REF_QUAL,Number=1,Type=Integer,Description="Mean quality of reference base"> +##INFO=<ID=ALT_DP,Number=1,Type=Integer,Description="Depth of alternate base"> +##INFO=<ID=ALT_RV,Number=1,Type=Integer,Description="Deapth of alternate base on reverse reads"> +##INFO=<ID=ALT_QUAL,Number=1,Type=Integer,Description="Mean quality of alternate base"> +##INFO=<ID=AF,Number=1,Type=Float,Description="Frequency of alternate base"> +##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> +##FILTER=<ID=PASS,Description="Result of p-value <= 0.05"> +##FILTER=<ID=FAIL,Description="Result of p-value > 0.05"> +#CHROM POS ID REF ALT QUAL FILTER INFO +PRV 350 . A T . FAIL DP=106;REF_DP=101;REF_RV=101;REF_QUAL=36;ALT_DP=5;ALT_RV=5;ALT_QUAL=35;AF=0.0471698 +PRV 722 . C CA . FAIL DP=282;REF_DP=280;REF_RV=234;REF_QUAL=36;ALT_DP=9;ALT_RV=0;ALT_QUAL=20;AF=0.0319149;INDEL +PRV 1682 . C T . PASS DP=1133;REF_DP=1097;REF_RV=984;REF_QUAL=37;ALT_DP=34;ALT_RV=33;ALT_QUAL=37;AF=0.0300088 +PRV 1965 . T G . PASS DP=365;REF_DP=302;REF_RV=113;REF_QUAL=37;ALT_DP=63;ALT_RV=25;ALT_QUAL=37;AF=0.172603 +PRV 2702 . A G . FAIL DP=32;REF_DP=31;REF_RV=31;REF_QUAL=36;ALT_DP=1;ALT_RV=1;ALT_QUAL=23;AF=0.03125 +PRV 2781 . T G . PASS DP=408;REF_DP=354;REF_RV=70;REF_QUAL=37;ALT_DP=48;ALT_RV=8;ALT_QUAL=36;AF=0.117647 +PRV 2922 . C T . PASS DP=275;REF_DP=264;REF_RV=0;REF_QUAL=36;ALT_DP=11;ALT_RV=0;ALT_QUAL=36;AF=0.04 +PRV 3148 . Y T . PASS DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=1324;ALT_RV=264;ALT_QUAL=36;AF=0.77563 +PRV 3148 . Y C . PASS DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=381;ALT_RV=75;ALT_QUAL=36;AF=0.223199 +PRV 3295 . A G . PASS DP=1040;REF_DP=1002;REF_RV=1002;REF_QUAL=35;ALT_DP=38;ALT_RV=38;ALT_QUAL=33;AF=0.0365385 +PRV 5680 . C T . PASS DP=35;REF_DP=27;REF_RV=5;REF_QUAL=44;ALT_DP=8;ALT_RV=2;ALT_QUAL=46;AF=0.228571 +PRV 5723 . T G . FAIL DP=32;REF_DP=31;REF_RV=31;REF_QUAL=35;ALT_DP=1;ALT_RV=1;ALT_QUAL=21;AF=0.03125 +PRV 6201 . A G . FAIL DP=12;REF_DP=10;REF_RV=0;REF_QUAL=35;ALT_DP=2;ALT_RV=0;ALT_QUAL=38;AF=0.166667 +PRV 6211 . T C . FAIL DP=9;REF_DP=8;REF_RV=0;REF_QUAL=36;ALT_DP=1;ALT_RV=0;ALT_QUAL=35;AF=0.111111 +PRV 7916 . C T . PASS DP=432;REF_DP=351;REF_RV=289;REF_QUAL=36;ALT_DP=81;ALT_RV=78;ALT_QUAL=37;AF=0.1875 +PRV 9713 . C T . PASS DP=387;REF_DP=374;REF_RV=0;REF_QUAL=37;ALT_DP=13;ALT_RV=0;ALT_QUAL=35;AF=0.0335917