changeset 15:045d6d00f606 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 9d25797b06335056930bddede98a2f3f9a303470
author iuc
date Tue, 27 Jun 2023 17:13:41 +0000
parents 230b05597996
children 194e3ceee923
files ivar_variants.xml ivar_variants_to_vcf.py test-data/zika/Z52_a.vcf test-data/zika/Z52_a_pass.vcf
diffstat 4 files changed, 71 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/ivar_variants.xml	Wed Apr 19 08:32:37 2023 +0000
+++ b/ivar_variants.xml	Tue Jun 27 17:13:41 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="ivar_variants" name="ivar variants" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+<tool id="ivar_variants" name="ivar variants" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@">
     <description>Call variants from aligned BAM file</description>
     <macros>
         <import>macros.xml</import>
@@ -59,12 +59,12 @@
         </data>
     </outputs>
     <tests>
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="1"> <!-- test 1 -->
             <param name="input_bam" value="zika/Z52_a.masked.sorted.bam"/>
             <param name="ref" value="zika/db/PRV.fa"/>
             <output name="output_variants_tabular" file="zika/Z52_a.tsv" ftype="tabular"/>
         </test>
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="1"> <!-- test 2 -->
             <param name="input_bam" value="zika/Z52_a.masked.sorted.bam"/>
             <param name="ref" value="zika/db/PRV.fa"/>
             <conditional name="output_format">
@@ -72,7 +72,7 @@
             </conditional>
             <output name="output_variants_vcf" file="zika/Z52_a.vcf" ftype="vcf"/>
         </test>
-        <test expect_num_outputs="2">
+        <test expect_num_outputs="2"> <!-- test 3 -->
             <param name="input_bam" value="zika/Z52_a.masked.sorted.bam"/>
             <param name="ref" value="zika/db/PRV.fa"/>
             <conditional name="output_format">
@@ -82,13 +82,13 @@
             <output name="output_variants_tabular" file="zika/Z52_a.tsv" ftype="tabular" lines_diff="6"/>
             <output name="output_variants_vcf" file="zika/Z52_a_pass.vcf" ftype="vcf"/>
         </test>
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="1"> <!-- test 4 -->
             <param name="input_bam" value="zika/Z52_a.masked.sorted.bam"/>
             <param name="ref" value="zika/db/PRV.fa"/>
             <param name="gtf" value="zika/db/PRV.gff"/>
             <output name="output_variants_tabular" file="zika/Z52_a_annotated.tsv" ftype="tabular"/>
         </test>
-        <test expect_num_outputs="2">
+        <test expect_num_outputs="2"> <!-- test 5 -->
             <param name="input_bam" value="zika/Z52_a.masked.sorted.bam"/>
             <param name="ref" value="zika/db/PRV.fa"/>
             <param name="gtf" value="zika/db/PRV.gff"/>
--- a/ivar_variants_to_vcf.py	Wed Apr 19 08:32:37 2023 +0000
+++ b/ivar_variants_to_vcf.py	Tue Jun 27 17:13:41 2023 +0000
@@ -66,6 +66,7 @@
         '##INFO=<ID=ALT_QUAL,Number=1,Type=Integer,Description="Mean quality of alternate base">\n'
         '##INFO=<ID=AF,Number=1,Type=Float,Description="Frequency of alternate base">\n'
         '##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n'
+        '##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">'
         '##FILTER=<ID=PASS,Description="Result of p-value <= 0.05">\n'
         '##FILTER=<ID=FAIL,Description="Result of p-value > 0.05">\n'
     )
@@ -88,6 +89,27 @@
             if line.startswith("REGION"):
                 continue
 
+            # fields:
+            # 0 REGION
+            # 1 POS
+            # 2 REF
+            # 3 ALT
+            # 4 REF_DP
+            # 5 REF_RV
+            # 6 REF_QUAL
+            # 7 ALT_DP
+            # 8 ALT_RV
+            # 9 ALT_QUAL
+            # 10 ALT_FREQ
+            # 11 TOTAL_DP
+            # 12 PVAL
+            # 13 PASS
+            # 14 GFF_FEATURE
+            # 15 REF_CODON
+            # 16 REF_AA
+            # 17 ALT_CODON
+            # 18 ALT_AA
+            # 19 POS_AA
             line = line.split("\t")
             CHROM = line[0]
             POS = line[1]
@@ -116,15 +138,25 @@
             if var in vars_seen:
                 continue
 
+            ref_dp = int(line[4])
+            ref_dp_rev = int(line[5])
+            ref_dp_fwd = ref_dp - ref_dp_rev
+
+            alt_dp = int(line[7])
+            alt_dp_rev = int(line[8])
+            alt_dp_fwd = alt_dp - alt_dp_rev
+
+            dp4 = f'{ref_dp_fwd},{ref_dp_rev},{alt_dp_fwd},{alt_dp_rev}'
             info_elements = {
                 'DP': line[11],
-                'REF_DP': line[4],
-                'REF_RV': line[5],
+                'REF_DP': ref_dp,
+                'REF_RV': ref_dp_rev,
                 'REF_QUAL': line[6],
-                'ALT_DP': line[7],
-                'ALT_RV': line[8],
+                'ALT_DP': alt_dp,
+                'ALT_RV': alt_dp_rev,
                 'ALT_QUAL': line[9],
-                'AF': line[10]
+                'AF': line[10],
+                'DP4': dp4
             }
             if var_type in ['INS', 'DEL']:
                 # add INDEL FLAG
--- a/test-data/zika/Z52_a.vcf	Wed Apr 19 08:32:37 2023 +0000
+++ b/test-data/zika/Z52_a.vcf	Tue Jun 27 17:13:41 2023 +0000
@@ -9,22 +9,22 @@
 ##INFO=<ID=ALT_QUAL,Number=1,Type=Integer,Description="Mean quality of alternate base">
 ##INFO=<ID=AF,Number=1,Type=Float,Description="Frequency of alternate base">
 ##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
-##FILTER=<ID=PASS,Description="Result of p-value <= 0.05">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">##FILTER=<ID=PASS,Description="Result of p-value <= 0.05">
 ##FILTER=<ID=FAIL,Description="Result of p-value > 0.05">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-PRV	350	.	A	T	.	FAIL	DP=106;REF_DP=101;REF_RV=101;REF_QUAL=36;ALT_DP=5;ALT_RV=5;ALT_QUAL=35;AF=0.0471698
-PRV	722	.	C	CA	.	FAIL	DP=282;REF_DP=280;REF_RV=234;REF_QUAL=36;ALT_DP=9;ALT_RV=0;ALT_QUAL=20;AF=0.0319149;INDEL
-PRV	1682	.	C	T	.	PASS	DP=1133;REF_DP=1097;REF_RV=984;REF_QUAL=37;ALT_DP=34;ALT_RV=33;ALT_QUAL=37;AF=0.0300088
-PRV	1965	.	T	G	.	PASS	DP=365;REF_DP=302;REF_RV=113;REF_QUAL=37;ALT_DP=63;ALT_RV=25;ALT_QUAL=37;AF=0.172603
-PRV	2702	.	A	G	.	FAIL	DP=32;REF_DP=31;REF_RV=31;REF_QUAL=36;ALT_DP=1;ALT_RV=1;ALT_QUAL=23;AF=0.03125
-PRV	2781	.	T	G	.	PASS	DP=408;REF_DP=354;REF_RV=70;REF_QUAL=37;ALT_DP=48;ALT_RV=8;ALT_QUAL=36;AF=0.117647
-PRV	2922	.	C	T	.	PASS	DP=275;REF_DP=264;REF_RV=0;REF_QUAL=36;ALT_DP=11;ALT_RV=0;ALT_QUAL=36;AF=0.04
-PRV	3148	.	Y	T	.	PASS	DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=1324;ALT_RV=264;ALT_QUAL=36;AF=0.77563
-PRV	3148	.	Y	C	.	PASS	DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=381;ALT_RV=75;ALT_QUAL=36;AF=0.223199
-PRV	3295	.	A	G	.	PASS	DP=1040;REF_DP=1002;REF_RV=1002;REF_QUAL=35;ALT_DP=38;ALT_RV=38;ALT_QUAL=33;AF=0.0365385
-PRV	5680	.	C	T	.	PASS	DP=35;REF_DP=27;REF_RV=10;REF_QUAL=44;ALT_DP=8;ALT_RV=3;ALT_QUAL=46;AF=0.228571
-PRV	5723	.	T	G	.	FAIL	DP=32;REF_DP=31;REF_RV=31;REF_QUAL=35;ALT_DP=1;ALT_RV=1;ALT_QUAL=21;AF=0.03125
-PRV	6201	.	A	G	.	FAIL	DP=12;REF_DP=10;REF_RV=0;REF_QUAL=35;ALT_DP=2;ALT_RV=0;ALT_QUAL=38;AF=0.166667
-PRV	6211	.	T	C	.	FAIL	DP=9;REF_DP=8;REF_RV=0;REF_QUAL=36;ALT_DP=1;ALT_RV=0;ALT_QUAL=35;AF=0.111111
-PRV	7916	.	C	T	.	PASS	DP=432;REF_DP=351;REF_RV=289;REF_QUAL=36;ALT_DP=81;ALT_RV=78;ALT_QUAL=37;AF=0.1875
-PRV	9713	.	C	T	.	PASS	DP=387;REF_DP=374;REF_RV=0;REF_QUAL=37;ALT_DP=13;ALT_RV=0;ALT_QUAL=35;AF=0.0335917
+PRV	350	.	A	T	.	FAIL	DP=106;REF_DP=101;REF_RV=101;REF_QUAL=36;ALT_DP=5;ALT_RV=5;ALT_QUAL=35;AF=0.0471698;DP4=0,101,0,5
+PRV	722	.	C	CA	.	FAIL	DP=282;REF_DP=280;REF_RV=234;REF_QUAL=36;ALT_DP=9;ALT_RV=0;ALT_QUAL=20;AF=0.0319149;INDEL;DP4=46,234,9,0
+PRV	1682	.	C	T	.	PASS	DP=1133;REF_DP=1097;REF_RV=984;REF_QUAL=37;ALT_DP=34;ALT_RV=33;ALT_QUAL=37;AF=0.0300088;DP4=113,984,1,33
+PRV	1965	.	T	G	.	PASS	DP=365;REF_DP=302;REF_RV=113;REF_QUAL=37;ALT_DP=63;ALT_RV=25;ALT_QUAL=37;AF=0.172603;DP4=189,113,38,25
+PRV	2702	.	A	G	.	FAIL	DP=32;REF_DP=31;REF_RV=31;REF_QUAL=36;ALT_DP=1;ALT_RV=1;ALT_QUAL=23;AF=0.03125;DP4=0,31,0,1
+PRV	2781	.	T	G	.	PASS	DP=408;REF_DP=354;REF_RV=70;REF_QUAL=37;ALT_DP=48;ALT_RV=8;ALT_QUAL=36;AF=0.117647;DP4=284,70,40,8
+PRV	2922	.	C	T	.	PASS	DP=275;REF_DP=264;REF_RV=0;REF_QUAL=36;ALT_DP=11;ALT_RV=0;ALT_QUAL=36;AF=0.04;DP4=264,0,11,0
+PRV	3148	.	Y	T	.	PASS	DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=1324;ALT_RV=264;ALT_QUAL=36;AF=0.77563;DP4=0,0,1060,264
+PRV	3148	.	Y	C	.	PASS	DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=381;ALT_RV=75;ALT_QUAL=36;AF=0.223199;DP4=0,0,306,75
+PRV	3295	.	A	G	.	PASS	DP=1040;REF_DP=1002;REF_RV=1002;REF_QUAL=35;ALT_DP=38;ALT_RV=38;ALT_QUAL=33;AF=0.0365385;DP4=0,1002,0,38
+PRV	5680	.	C	T	.	PASS	DP=35;REF_DP=27;REF_RV=10;REF_QUAL=44;ALT_DP=8;ALT_RV=3;ALT_QUAL=46;AF=0.228571;DP4=17,10,5,3
+PRV	5723	.	T	G	.	FAIL	DP=32;REF_DP=31;REF_RV=31;REF_QUAL=35;ALT_DP=1;ALT_RV=1;ALT_QUAL=21;AF=0.03125;DP4=0,31,0,1
+PRV	6201	.	A	G	.	FAIL	DP=12;REF_DP=10;REF_RV=0;REF_QUAL=35;ALT_DP=2;ALT_RV=0;ALT_QUAL=38;AF=0.166667;DP4=10,0,2,0
+PRV	6211	.	T	C	.	FAIL	DP=9;REF_DP=8;REF_RV=0;REF_QUAL=36;ALT_DP=1;ALT_RV=0;ALT_QUAL=35;AF=0.111111;DP4=8,0,1,0
+PRV	7916	.	C	T	.	PASS	DP=432;REF_DP=351;REF_RV=289;REF_QUAL=36;ALT_DP=81;ALT_RV=78;ALT_QUAL=37;AF=0.1875;DP4=62,289,3,78
+PRV	9713	.	C	T	.	PASS	DP=387;REF_DP=374;REF_RV=0;REF_QUAL=37;ALT_DP=13;ALT_RV=0;ALT_QUAL=35;AF=0.0335917;DP4=374,0,13,0
--- a/test-data/zika/Z52_a_pass.vcf	Wed Apr 19 08:32:37 2023 +0000
+++ b/test-data/zika/Z52_a_pass.vcf	Tue Jun 27 17:13:41 2023 +0000
@@ -9,16 +9,16 @@
 ##INFO=<ID=ALT_QUAL,Number=1,Type=Integer,Description="Mean quality of alternate base">
 ##INFO=<ID=AF,Number=1,Type=Float,Description="Frequency of alternate base">
 ##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
-##FILTER=<ID=PASS,Description="Result of p-value <= 0.05">
+##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">##FILTER=<ID=PASS,Description="Result of p-value <= 0.05">
 ##FILTER=<ID=FAIL,Description="Result of p-value > 0.05">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-PRV	1682	.	C	T	.	PASS	DP=1133;REF_DP=1097;REF_RV=984;REF_QUAL=37;ALT_DP=34;ALT_RV=33;ALT_QUAL=37;AF=0.0300088
-PRV	1965	.	T	G	.	PASS	DP=365;REF_DP=302;REF_RV=113;REF_QUAL=37;ALT_DP=63;ALT_RV=25;ALT_QUAL=37;AF=0.172603
-PRV	2781	.	T	G	.	PASS	DP=408;REF_DP=354;REF_RV=70;REF_QUAL=37;ALT_DP=48;ALT_RV=8;ALT_QUAL=36;AF=0.117647
-PRV	2922	.	C	T	.	PASS	DP=275;REF_DP=264;REF_RV=0;REF_QUAL=36;ALT_DP=11;ALT_RV=0;ALT_QUAL=36;AF=0.04
-PRV	3148	.	Y	T	.	PASS	DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=1324;ALT_RV=264;ALT_QUAL=36;AF=0.77563
-PRV	3148	.	Y	C	.	PASS	DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=381;ALT_RV=75;ALT_QUAL=36;AF=0.223199
-PRV	3295	.	A	G	.	PASS	DP=1040;REF_DP=1002;REF_RV=1002;REF_QUAL=35;ALT_DP=38;ALT_RV=38;ALT_QUAL=33;AF=0.0365385
-PRV	5680	.	C	T	.	PASS	DP=35;REF_DP=27;REF_RV=10;REF_QUAL=44;ALT_DP=8;ALT_RV=3;ALT_QUAL=46;AF=0.228571
-PRV	7916	.	C	T	.	PASS	DP=432;REF_DP=351;REF_RV=289;REF_QUAL=36;ALT_DP=81;ALT_RV=78;ALT_QUAL=37;AF=0.1875
-PRV	9713	.	C	T	.	PASS	DP=387;REF_DP=374;REF_RV=0;REF_QUAL=37;ALT_DP=13;ALT_RV=0;ALT_QUAL=35;AF=0.0335917
+PRV	1682	.	C	T	.	PASS	DP=1133;REF_DP=1097;REF_RV=984;REF_QUAL=37;ALT_DP=34;ALT_RV=33;ALT_QUAL=37;AF=0.0300088;DP4=113,984,1,33
+PRV	1965	.	T	G	.	PASS	DP=365;REF_DP=302;REF_RV=113;REF_QUAL=37;ALT_DP=63;ALT_RV=25;ALT_QUAL=37;AF=0.172603;DP4=189,113,38,25
+PRV	2781	.	T	G	.	PASS	DP=408;REF_DP=354;REF_RV=70;REF_QUAL=37;ALT_DP=48;ALT_RV=8;ALT_QUAL=36;AF=0.117647;DP4=284,70,40,8
+PRV	2922	.	C	T	.	PASS	DP=275;REF_DP=264;REF_RV=0;REF_QUAL=36;ALT_DP=11;ALT_RV=0;ALT_QUAL=36;AF=0.04;DP4=264,0,11,0
+PRV	3148	.	Y	T	.	PASS	DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=1324;ALT_RV=264;ALT_QUAL=36;AF=0.77563;DP4=0,0,1060,264
+PRV	3148	.	Y	C	.	PASS	DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=381;ALT_RV=75;ALT_QUAL=36;AF=0.223199;DP4=0,0,306,75
+PRV	3295	.	A	G	.	PASS	DP=1040;REF_DP=1002;REF_RV=1002;REF_QUAL=35;ALT_DP=38;ALT_RV=38;ALT_QUAL=33;AF=0.0365385;DP4=0,1002,0,38
+PRV	5680	.	C	T	.	PASS	DP=35;REF_DP=27;REF_RV=10;REF_QUAL=44;ALT_DP=8;ALT_RV=3;ALT_QUAL=46;AF=0.228571;DP4=17,10,5,3
+PRV	7916	.	C	T	.	PASS	DP=432;REF_DP=351;REF_RV=289;REF_QUAL=36;ALT_DP=81;ALT_RV=78;ALT_QUAL=37;AF=0.1875;DP4=62,289,3,78
+PRV	9713	.	C	T	.	PASS	DP=387;REF_DP=374;REF_RV=0;REF_QUAL=37;ALT_DP=13;ALT_RV=0;ALT_QUAL=35;AF=0.0335917;DP4=374,0,13,0