comparison gfapts/gfap_r1.0_samvcf_data_parser.xml @ 0:f753b30013e6 draft

Uploaded
author rdaveau
date Fri, 29 Jun 2012 10:20:55 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f753b30013e6
1 <tool id="gfap_r1.0_samvcf_data_parser" name="SAMVCF data parser">
2 <description>Convert VCF-formatted variant calls as ANNOVAR input</description>
3 <command interpreter="perl">gfap_r1.0_samvcf_data_parser.pl -varfile=$varfile -outdir=$__new_file_path__/gfap -outfile=$outfile</command>
4 <inputs>
5 <param name="varfile" format="vcf" type="data" label="Input VCF file" />
6 </inputs>
7 <outputs>
8 <data format="txt" name="outfile" label="${varfile.name}.var" />
9 </outputs>
10 <help>
11 .. class:: infomark
12
13 **What it does**
14
15 - Convert a samtools-formatted VCF-file as **ANNOVAR** input.
16 - Merge calls whenever possible and compute statistics on calls.
17
18 .. class:: warningmark
19
20 **As the DP4-tag in the INFO field is required, only samtools/bcftools VCF-files are currently supported**.
21
22 .. class:: infomark
23
24 **Third-party resources**
25
26 - ANNOVAR: http://www.openbioinformatics.org/annovar
27 - samtools/bcftools: http://samtools.sourceforge.net
28
29 ----
30
31 **Input .vcf file**::
32
33 #CHROM POS ID REF ALT QUAL FILTER INFO
34 chr1 14522 . G A,C 7.8 . DP=2;AF1=0.9999;CI95=0.5,1;DP4=0,0,2,0;MQ=30;FQ=-30 GT:PL:GQ 1/1:37,3,0,38,1,35:41
35 chr1 14653 . C T 12.3 . DP=4;AF1=0.5001;CI95=0.5,0.5;DP4=2,0,1,1;MQ=38;FQ=6.58;PV4=1,0.45,1,0.43 GT:PL:GQ 0/1:42,0,33:35
36 chr1 69968 . A G 13 . DP=2;AF1=1;CI95=0.5,1;DP4=0,0,2,0;MQ=41;FQ=-33 GT:PL:GQ 1/1:44,6,0:49
37 chr1 129285 . G A 37.8 . DP=2;AF1=1;CI95=0.5,1;DP4=0,0,0,2;MQ=60;FQ=-33 GT:PL:GQ 1/1:69,6,0:49
38 chr1 808631 . G A 125 . DP=7;AF1=1;CI95=1,1;DP4=0,0,7,0;MQ=60;FQ=-48 GT:PL:GQ 1/1:158,21,0:84
39 chr1 808922 . G A 222 . DP=47;AF1=1;CI95=1,1;DP4=1,0,23,16;MQ=51;FQ=-131;PV4=1,0.0026,1,0.47 GT:PL:GQ 1/1:255,104,0:99
40 chr1 808928 . C T 219 . DP=47;AF1=1;CI95=1,1;DP4=1,0,23,16;MQ=52;FQ=-131;PV4=1,2.4e-05,1,0.22 GT:PL:GQ 1/1:252,104,0:99
41 chr1 824115 . A C 7.8 . DP=3;AF1=0.5001;CI95=0.5,0.5;DP4=0,1,0,2;MQ=60;FQ=4.79;PV4=1,0.037,1,0.33 GT:PL:GQ 0/1:37,0,31:33
42 chr1 824161 . C T 4.77 . DP=3;AF1=0.5001;CI95=0.5,0.5;DP4=0,1,1,1;MQ=53;FQ=4.06;PV4=1,0.11,0.33,1 GT:PL:GQ 0/1:33,0,31:32
43 chr1 824215 . T C 8.44 . DP=2;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=49;FQ=-33 GT:PL:GQ 1/1:39,6,0:49
44 chr1 852063 . G A 30.8 . DP=2;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=60;FQ=-33 GT:PL:GQ 1/1:62,6,0:49
45 chr1 861630 . G A 58 . DP=4;AF1=1;CI95=0.5,1;DP4=0,0,0,3;MQ=60;FQ=-36 GT:PL:GQ 1/1:90,9,0:63
46 chr1 861808 . A G 48.8 . DP=2;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=60;FQ=-33 GT:PL:GQ 1/1:80,6,0:49
47 chr1 866319 . G A 33.5 . DP=6;AF1=1;CI95=0.5,1;DP4=0,0,4,0;MQ=60;FQ=-39 GT:PL:GQ 1/1:66,12,0:72
48 chr1 870903 . T C 78.5 . DP=5;AF1=1;CI95=0.5,1;DP4=0,0,4,0;MQ=60;FQ=-39 GT:PL:GQ 1/1:111,12,0:72
49
50 ----
51
52 **Output .var file**::
53
54 #From [chr] to [ann] = ANNOVAR required fields
55 #NRF = #reads consistent w/ the reference allele on the F-strand
56 #NRR = #reads consistent w/ the reference allele on the R-strand
57 #NAF = #reads consistent w/ the alternate allele on the F-strand
58 #NAR = #reads consistent w/ the alternate allele on the R-strand
59 #DP = total #reads in call ie. NRF+NRR+NAF+NAR
60 #AD = total #reads consistent w/ the alternate allele ie. NAF+NAR
61 #AF = alternate allele ratio ie. AD/DP
62 #QC = Phred-scaled call quality
63 #P.str = NRF+NAF vs. NRR+NAR binomial test P-value ie. total strand bias
64 #P.ref = NRF vs. NRR binomial test P-value ie. reference allele strand bias
65 #P.alt = NAF vs. NAR binomial test P-value ie. alternate allele strand bias
66 #VCF.FILTER = FILTER field from the input vcf file
67 #VAR.FILTER = GFAP default FILTER to discriminate between TP and FP variants
68 #chr start end ref alt ann QC NRF NRR NAF NAR VCF.FILTER P.str P.ref P.alt DP AD AF VAR.FILTER
69 chr1 14907 14907 A G het 9 4 0 0 3 NONE 3.33e-01 4.17e-02 8.33e-02 7 3 0.4290 SKIP
70 chr1 14930 14930 A G het 37 4 2 0 5 NONE 1.83e-01 2.29e-01 2.08e-02 11 5 0.4550 SKIP
71 chr1 68896 68896 G A hom 18 0 0 3 0 NONE 8.33e-02 3.33e-01 8.33e-02 3 3 1.0000 SKIP
72 chr1 69270 69270 A G hom 179 0 0 31 0 NONE 3.10e-10 3.33e-01 3.10e-10 31 31 1.0000 SKIP
73 chr1 69511 69511 A G hom 222 0 0 13 12 NONE 3.33e-01 3.33e-01 3.33e-01 25 25 1.0000 PASS
74 chr1 69897 69897 T C het 14 1 0 0 3 NONE 2.08e-01 3.33e-01 8.33e-02 4 3 0.7500 SKIP
75 chr1 129285 129285 G A het 56 0 4 0 4 NONE 2.60e-03 4.17e-02 4.17e-02 8 4 0.5000 SKIP
76 chr1 567697 567697 G A hom 30 0 0 0 2 NONE 1.67e-01 3.33e-01 1.67e-01 2 2 1.0000 SKIP
77 chr1 569803 569803 G A hom 50 0 0 4 0 NONE 4.17e-02 3.33e-01 4.17e-02 4 4 1.0000 SKIP
78 chr1 808631 808631 G A hom 142 0 0 7 1 NONE 2.34e-02 3.33e-01 2.34e-02 8 8 1.0000 SKIP
79 chr1 808922 808922 G A hom 222 0 0 15 26 NONE 3.91e-02 3.33e-01 3.91e-02 41 41 1.0000 PASS
80 chr1 808928 808928 C T hom 222 0 0 14 31 NONE 5.36e-03 3.33e-01 5.36e-03 45 45 1.0000 PASS
81 chr1 816725 816725 A G hom 22 0 0 2 0 NONE 1.67e-01 3.33e-01 1.67e-01 2 2 1.0000 SKIP
82 chr1 821030 821030 G T hom 36 0 0 2 0 NONE 1.67e-01 3.33e-01 1.67e-01 2 2 1.0000 SKIP
83 chr1 821143 821143 G T hom 8 0 0 0 2 NONE 1.67e-01 3.33e-01 1.67e-01 2 2 1.0000 SKIP
84
85 ----
86
87 .. class:: infomark
88
89 **Feedback**: romain.daveau@curie.fr
90 </help>
91 </tool>