Mercurial > repos > iuc > ivar_filtervariants
changeset 15:1ef5b6e3e09d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 9d25797b06335056930bddede98a2f3f9a303470
author | iuc |
---|---|
date | Tue, 27 Jun 2023 17:14:18 +0000 |
parents | 096a5b0210cc |
children | 618297d23022 |
files | ivar_variants_to_vcf.py test-data/zika/Z52_a.vcf test-data/zika/Z52_a_pass.vcf |
diffstat | 3 files changed, 65 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/ivar_variants_to_vcf.py Wed Apr 19 08:29:46 2023 +0000 +++ b/ivar_variants_to_vcf.py Tue Jun 27 17:14:18 2023 +0000 @@ -66,6 +66,7 @@ '##INFO=<ID=ALT_QUAL,Number=1,Type=Integer,Description="Mean quality of alternate base">\n' '##INFO=<ID=AF,Number=1,Type=Float,Description="Frequency of alternate base">\n' '##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n' + '##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">' '##FILTER=<ID=PASS,Description="Result of p-value <= 0.05">\n' '##FILTER=<ID=FAIL,Description="Result of p-value > 0.05">\n' ) @@ -88,6 +89,27 @@ if line.startswith("REGION"): continue + # fields: + # 0 REGION + # 1 POS + # 2 REF + # 3 ALT + # 4 REF_DP + # 5 REF_RV + # 6 REF_QUAL + # 7 ALT_DP + # 8 ALT_RV + # 9 ALT_QUAL + # 10 ALT_FREQ + # 11 TOTAL_DP + # 12 PVAL + # 13 PASS + # 14 GFF_FEATURE + # 15 REF_CODON + # 16 REF_AA + # 17 ALT_CODON + # 18 ALT_AA + # 19 POS_AA line = line.split("\t") CHROM = line[0] POS = line[1] @@ -116,15 +138,25 @@ if var in vars_seen: continue + ref_dp = int(line[4]) + ref_dp_rev = int(line[5]) + ref_dp_fwd = ref_dp - ref_dp_rev + + alt_dp = int(line[7]) + alt_dp_rev = int(line[8]) + alt_dp_fwd = alt_dp - alt_dp_rev + + dp4 = f'{ref_dp_fwd},{ref_dp_rev},{alt_dp_fwd},{alt_dp_rev}' info_elements = { 'DP': line[11], - 'REF_DP': line[4], - 'REF_RV': line[5], + 'REF_DP': ref_dp, + 'REF_RV': ref_dp_rev, 'REF_QUAL': line[6], - 'ALT_DP': line[7], - 'ALT_RV': line[8], + 'ALT_DP': alt_dp, + 'ALT_RV': alt_dp_rev, 'ALT_QUAL': line[9], - 'AF': line[10] + 'AF': line[10], + 'DP4': dp4 } if var_type in ['INS', 'DEL']: # add INDEL FLAG
--- a/test-data/zika/Z52_a.vcf Wed Apr 19 08:29:46 2023 +0000 +++ b/test-data/zika/Z52_a.vcf Tue Jun 27 17:14:18 2023 +0000 @@ -9,22 +9,22 @@ ##INFO=<ID=ALT_QUAL,Number=1,Type=Integer,Description="Mean quality of alternate base"> ##INFO=<ID=AF,Number=1,Type=Float,Description="Frequency of alternate base"> ##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> -##FILTER=<ID=PASS,Description="Result of p-value <= 0.05"> +##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">##FILTER=<ID=PASS,Description="Result of p-value <= 0.05"> ##FILTER=<ID=FAIL,Description="Result of p-value > 0.05"> #CHROM POS ID REF ALT QUAL FILTER INFO -PRV 350 . A T . FAIL DP=106;REF_DP=101;REF_RV=101;REF_QUAL=36;ALT_DP=5;ALT_RV=5;ALT_QUAL=35;AF=0.0471698 -PRV 722 . C CA . FAIL DP=282;REF_DP=280;REF_RV=234;REF_QUAL=36;ALT_DP=9;ALT_RV=0;ALT_QUAL=20;AF=0.0319149;INDEL -PRV 1682 . C T . PASS DP=1133;REF_DP=1097;REF_RV=984;REF_QUAL=37;ALT_DP=34;ALT_RV=33;ALT_QUAL=37;AF=0.0300088 -PRV 1965 . T G . PASS DP=365;REF_DP=302;REF_RV=113;REF_QUAL=37;ALT_DP=63;ALT_RV=25;ALT_QUAL=37;AF=0.172603 -PRV 2702 . A G . FAIL DP=32;REF_DP=31;REF_RV=31;REF_QUAL=36;ALT_DP=1;ALT_RV=1;ALT_QUAL=23;AF=0.03125 -PRV 2781 . T G . PASS DP=408;REF_DP=354;REF_RV=70;REF_QUAL=37;ALT_DP=48;ALT_RV=8;ALT_QUAL=36;AF=0.117647 -PRV 2922 . C T . PASS DP=275;REF_DP=264;REF_RV=0;REF_QUAL=36;ALT_DP=11;ALT_RV=0;ALT_QUAL=36;AF=0.04 -PRV 3148 . Y T . PASS DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=1324;ALT_RV=264;ALT_QUAL=36;AF=0.77563 -PRV 3148 . Y C . PASS DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=381;ALT_RV=75;ALT_QUAL=36;AF=0.223199 -PRV 3295 . A G . PASS DP=1040;REF_DP=1002;REF_RV=1002;REF_QUAL=35;ALT_DP=38;ALT_RV=38;ALT_QUAL=33;AF=0.0365385 -PRV 5680 . C T . PASS DP=35;REF_DP=27;REF_RV=10;REF_QUAL=44;ALT_DP=8;ALT_RV=3;ALT_QUAL=46;AF=0.228571 -PRV 5723 . T G . FAIL DP=32;REF_DP=31;REF_RV=31;REF_QUAL=35;ALT_DP=1;ALT_RV=1;ALT_QUAL=21;AF=0.03125 -PRV 6201 . A G . FAIL DP=12;REF_DP=10;REF_RV=0;REF_QUAL=35;ALT_DP=2;ALT_RV=0;ALT_QUAL=38;AF=0.166667 -PRV 6211 . T C . FAIL DP=9;REF_DP=8;REF_RV=0;REF_QUAL=36;ALT_DP=1;ALT_RV=0;ALT_QUAL=35;AF=0.111111 -PRV 7916 . C T . PASS DP=432;REF_DP=351;REF_RV=289;REF_QUAL=36;ALT_DP=81;ALT_RV=78;ALT_QUAL=37;AF=0.1875 -PRV 9713 . C T . PASS DP=387;REF_DP=374;REF_RV=0;REF_QUAL=37;ALT_DP=13;ALT_RV=0;ALT_QUAL=35;AF=0.0335917 +PRV 350 . A T . FAIL DP=106;REF_DP=101;REF_RV=101;REF_QUAL=36;ALT_DP=5;ALT_RV=5;ALT_QUAL=35;AF=0.0471698;DP4=0,101,0,5 +PRV 722 . C CA . FAIL DP=282;REF_DP=280;REF_RV=234;REF_QUAL=36;ALT_DP=9;ALT_RV=0;ALT_QUAL=20;AF=0.0319149;INDEL;DP4=46,234,9,0 +PRV 1682 . C T . PASS DP=1133;REF_DP=1097;REF_RV=984;REF_QUAL=37;ALT_DP=34;ALT_RV=33;ALT_QUAL=37;AF=0.0300088;DP4=113,984,1,33 +PRV 1965 . T G . PASS DP=365;REF_DP=302;REF_RV=113;REF_QUAL=37;ALT_DP=63;ALT_RV=25;ALT_QUAL=37;AF=0.172603;DP4=189,113,38,25 +PRV 2702 . A G . FAIL DP=32;REF_DP=31;REF_RV=31;REF_QUAL=36;ALT_DP=1;ALT_RV=1;ALT_QUAL=23;AF=0.03125;DP4=0,31,0,1 +PRV 2781 . T G . PASS DP=408;REF_DP=354;REF_RV=70;REF_QUAL=37;ALT_DP=48;ALT_RV=8;ALT_QUAL=36;AF=0.117647;DP4=284,70,40,8 +PRV 2922 . C T . PASS DP=275;REF_DP=264;REF_RV=0;REF_QUAL=36;ALT_DP=11;ALT_RV=0;ALT_QUAL=36;AF=0.04;DP4=264,0,11,0 +PRV 3148 . Y T . PASS DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=1324;ALT_RV=264;ALT_QUAL=36;AF=0.77563;DP4=0,0,1060,264 +PRV 3148 . Y C . PASS DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=381;ALT_RV=75;ALT_QUAL=36;AF=0.223199;DP4=0,0,306,75 +PRV 3295 . A G . PASS DP=1040;REF_DP=1002;REF_RV=1002;REF_QUAL=35;ALT_DP=38;ALT_RV=38;ALT_QUAL=33;AF=0.0365385;DP4=0,1002,0,38 +PRV 5680 . C T . PASS DP=35;REF_DP=27;REF_RV=10;REF_QUAL=44;ALT_DP=8;ALT_RV=3;ALT_QUAL=46;AF=0.228571;DP4=17,10,5,3 +PRV 5723 . T G . FAIL DP=32;REF_DP=31;REF_RV=31;REF_QUAL=35;ALT_DP=1;ALT_RV=1;ALT_QUAL=21;AF=0.03125;DP4=0,31,0,1 +PRV 6201 . A G . FAIL DP=12;REF_DP=10;REF_RV=0;REF_QUAL=35;ALT_DP=2;ALT_RV=0;ALT_QUAL=38;AF=0.166667;DP4=10,0,2,0 +PRV 6211 . T C . FAIL DP=9;REF_DP=8;REF_RV=0;REF_QUAL=36;ALT_DP=1;ALT_RV=0;ALT_QUAL=35;AF=0.111111;DP4=8,0,1,0 +PRV 7916 . C T . PASS DP=432;REF_DP=351;REF_RV=289;REF_QUAL=36;ALT_DP=81;ALT_RV=78;ALT_QUAL=37;AF=0.1875;DP4=62,289,3,78 +PRV 9713 . C T . PASS DP=387;REF_DP=374;REF_RV=0;REF_QUAL=37;ALT_DP=13;ALT_RV=0;ALT_QUAL=35;AF=0.0335917;DP4=374,0,13,0
--- a/test-data/zika/Z52_a_pass.vcf Wed Apr 19 08:29:46 2023 +0000 +++ b/test-data/zika/Z52_a_pass.vcf Tue Jun 27 17:14:18 2023 +0000 @@ -9,16 +9,16 @@ ##INFO=<ID=ALT_QUAL,Number=1,Type=Integer,Description="Mean quality of alternate base"> ##INFO=<ID=AF,Number=1,Type=Float,Description="Frequency of alternate base"> ##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> -##FILTER=<ID=PASS,Description="Result of p-value <= 0.05"> +##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">##FILTER=<ID=PASS,Description="Result of p-value <= 0.05"> ##FILTER=<ID=FAIL,Description="Result of p-value > 0.05"> #CHROM POS ID REF ALT QUAL FILTER INFO -PRV 1682 . C T . PASS DP=1133;REF_DP=1097;REF_RV=984;REF_QUAL=37;ALT_DP=34;ALT_RV=33;ALT_QUAL=37;AF=0.0300088 -PRV 1965 . T G . PASS DP=365;REF_DP=302;REF_RV=113;REF_QUAL=37;ALT_DP=63;ALT_RV=25;ALT_QUAL=37;AF=0.172603 -PRV 2781 . T G . PASS DP=408;REF_DP=354;REF_RV=70;REF_QUAL=37;ALT_DP=48;ALT_RV=8;ALT_QUAL=36;AF=0.117647 -PRV 2922 . C T . PASS DP=275;REF_DP=264;REF_RV=0;REF_QUAL=36;ALT_DP=11;ALT_RV=0;ALT_QUAL=36;AF=0.04 -PRV 3148 . Y T . PASS DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=1324;ALT_RV=264;ALT_QUAL=36;AF=0.77563 -PRV 3148 . Y C . PASS DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=381;ALT_RV=75;ALT_QUAL=36;AF=0.223199 -PRV 3295 . A G . PASS DP=1040;REF_DP=1002;REF_RV=1002;REF_QUAL=35;ALT_DP=38;ALT_RV=38;ALT_QUAL=33;AF=0.0365385 -PRV 5680 . C T . PASS DP=35;REF_DP=27;REF_RV=10;REF_QUAL=44;ALT_DP=8;ALT_RV=3;ALT_QUAL=46;AF=0.228571 -PRV 7916 . C T . PASS DP=432;REF_DP=351;REF_RV=289;REF_QUAL=36;ALT_DP=81;ALT_RV=78;ALT_QUAL=37;AF=0.1875 -PRV 9713 . C T . PASS DP=387;REF_DP=374;REF_RV=0;REF_QUAL=37;ALT_DP=13;ALT_RV=0;ALT_QUAL=35;AF=0.0335917 +PRV 1682 . C T . PASS DP=1133;REF_DP=1097;REF_RV=984;REF_QUAL=37;ALT_DP=34;ALT_RV=33;ALT_QUAL=37;AF=0.0300088;DP4=113,984,1,33 +PRV 1965 . T G . PASS DP=365;REF_DP=302;REF_RV=113;REF_QUAL=37;ALT_DP=63;ALT_RV=25;ALT_QUAL=37;AF=0.172603;DP4=189,113,38,25 +PRV 2781 . T G . PASS DP=408;REF_DP=354;REF_RV=70;REF_QUAL=37;ALT_DP=48;ALT_RV=8;ALT_QUAL=36;AF=0.117647;DP4=284,70,40,8 +PRV 2922 . C T . PASS DP=275;REF_DP=264;REF_RV=0;REF_QUAL=36;ALT_DP=11;ALT_RV=0;ALT_QUAL=36;AF=0.04;DP4=264,0,11,0 +PRV 3148 . Y T . PASS DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=1324;ALT_RV=264;ALT_QUAL=36;AF=0.77563;DP4=0,0,1060,264 +PRV 3148 . Y C . PASS DP=1707;REF_DP=0;REF_RV=0;REF_QUAL=0;ALT_DP=381;ALT_RV=75;ALT_QUAL=36;AF=0.223199;DP4=0,0,306,75 +PRV 3295 . A G . PASS DP=1040;REF_DP=1002;REF_RV=1002;REF_QUAL=35;ALT_DP=38;ALT_RV=38;ALT_QUAL=33;AF=0.0365385;DP4=0,1002,0,38 +PRV 5680 . C T . PASS DP=35;REF_DP=27;REF_RV=10;REF_QUAL=44;ALT_DP=8;ALT_RV=3;ALT_QUAL=46;AF=0.228571;DP4=17,10,5,3 +PRV 7916 . C T . PASS DP=432;REF_DP=351;REF_RV=289;REF_QUAL=36;ALT_DP=81;ALT_RV=78;ALT_QUAL=37;AF=0.1875;DP4=62,289,3,78 +PRV 9713 . C T . PASS DP=387;REF_DP=374;REF_RV=0;REF_QUAL=37;ALT_DP=13;ALT_RV=0;ALT_QUAL=35;AF=0.0335917;DP4=374,0,13,0