comparison VCFStorage_wrapper.xml @ 0:3552a8d9f51c draft

Uploaded
author urgi-team
date Tue, 10 Nov 2015 08:30:56 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:3552a8d9f51c
1 <tool id="VCFStorage" name="VCFStorage" version="0.01">
2 <description> stores info from variant calling into a table. It will create a tabulate filed with SNP infos</description>
3 <requirements>
4 <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>
5 </requirements>
6 <version_command>
7 VCFStorage.py --version
8 </version_command>
9 <command interpreter="python">
10 VCFStorage_wrapper.py -f $inputFasta -o $outputVCFStorage
11 ## genome list
12 #for $VCF in $VCFFile
13 -l $VCF.strainName
14 -L $VCF.inputStrainVCF
15 #end for
16 </command>
17 <inputs>
18 <param name="inputFasta" type="data" format="fasta" label="Input genome sequence file name (fasta)"/>
19 <repeat name="VCFFile" title="VCF list" min="1">
20 <param name="strainName" size="20" type="text" value="V1" label="strain name (no space allowed)"/>
21 <param name="inputStrainVCF" type="data" format="vcf" label="Select VCF file "/>
22 </repeat>
23 </inputs>
24 <outputs>
25 <data format="tabular" name="outputVCFStorage" label="${tool.name} on ${on_string} (tabular)"/>
26 </outputs>
27 <tests>
28 <test>
29 <param name="inputFasta" ftype="fasta" value="chr17.fa" />
30 <param name="strainName" value="V1"/>
31 <param name="inputStrainVCF" ftype="vcf" value="chr17.VCF"/>
32 <output name="outputVCFStorage" ftype="tabular" file="Expchr17.tab"/>
33 </test>
34 </tests>
35 <help><![CDATA[
36 **stores info from variant calling into a table. It will create a tabulate filed with SNP infos**
37
38 -----
39
40 **what it does :**
41
42 VCFStorage.py is a python script that allows to store data from multiple VCF into a single tabular marker file. each VCF will be a new column on the final output.
43
44 -----
45
46 **input format :**
47
48 Multiple files are necessary as input :
49
50 - the fasta file of your genomic sequence
51 - multiple VCF files (1 per strain). It is strongly advised to use the column filter (col 7) for filtered positions instead of removing the lines from the VCF.
52
53 -----
54
55 **ouput format :**
56
57 the result is a tab delimited format file where all genomic positions are in rows, and all strains are in columns (in the order you gave the VCF)
58
59 For each position and each genome, a code is attributed :
60
61 - for the reference : ::
62
63 A,T,G,C for the corresponding nucleotidic acid
64
65 - for the genomes : ::
66
67 U if the position was not refered in the VCF file
68 R if the base is similar to the reference
69 F if the base has been filtered in the column FILTER (column 7) of the VCF
70 A,T,G,C if the genome has a validated SNP at the position
71
72
73 -----
74
75 **example :**
76
77 fasta input file (genomic sequence): ::
78
79 >chr_17
80 ccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaa
81 TACGCGCGCGCCTAACCCTACGACTTTAACCTACTCTAAACTCTCCTACTAGTACGTCTT
82
83 VCF input file : ::
84
85 ##fileformat=VCFv4.1
86 ##fileDate=20140725
87 ##source=freeBayes v0.9.13-2-ga830efd
88 ##reference=exmple.fsa
89 ##phasing=none
90 ##DetectedFormat=freebayes
91 ##FILTER=<ID=G_AN,Description="The SNP has been filtered ; out of AN range(over 2)">
92 ##FILTER=<ID=G_AF,Description="The SNP has been filtered ; out of AF range(under 0.9)">
93 ##FILTER=<ID=G_DP,Description="The SNP has been filtered ; out of DP range(15 - 35)">
94 ##FILTER=<ID=InDel,Description="The SNP has been filtered ; InDel detected">
95 ##FILTER=<ID=Nmatch,Description="The SNP has been filtered ; reference base detected : N">
96 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT V1
97 chr_17 17 . A G 529.213 G_AF;G_DP AB=0.583333;ABP=5.18177;AC=1;AF=0.5;AN=2;AO=21;CIGAR=1X;DP=36;DPB=36;DPRA=0;EPP=3.1137;EPPR=3.15506;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=77.012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=751;QR=535;RO=15;RPP=5.59539;RPPR=4.31318;RUN=1;SAF=11;SAP=3.1137;SAR=10;SRF=5;SRP=6.62942;SRR=10;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.58;G_DP=36;G_Base=G GT:DP:RO:QR:AO:QA:GL 0/1:36:15:535:21:751:-10,0,-10
98 chr_17 37 . C G 1082.38 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=34;CIGAR=1X;DP=34;DPB=34;DPRA=0;EPP=3.26577;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=48.0391;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1243;QR=0;RO=0;RPP=15.5282;RPPR=0;RUN=1;SAF=18;SAP=3.26577;SAR=16;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=34;G_Base=G GT:DP:RO:QR:AO:QA:GL 1/1:34:0:0:34:1243:-10,-9.23017,0
99 chr_17 40 . T T 825.518 G_AF AB=0;ABP=0;AC=2;AF=1;AN=2;AO=29;CIGAR=1X;DP=34;DPB=34;DPRA=0;EPP=6.67934;EPPR=13.8677;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=8.92992;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=1082;QR=178;RO=5;RPP=9.07545;RPPR=13.8677;RUN=1;SAF=13;SAP=3.68421;SAR=16;SRF=5;SRP=13.8677;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.85;G_DP=34;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:34:5:178:29:1082:-10,0,-6.82575
100 chr_17 60 . A . 699.741 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=22;CIGAR=1X;DP=22;DPB=22;DPRA=0;EPP=17.2236;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=32.2544;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=823;QR=0;RO=0;RPP=9.32731;RPPR=0;RUN=1;SAF=12;SAP=3.40511;SAR=10;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=22;G_Base=G GT:DP:RO:QR:AO:QA:GL 1/1:22:0:0:22:823:-10,-5.98732,0
101 chr_17 73 . T . 846.299 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=27;CIGAR=1X;DP=27;DPB=27;DPRA=0;EPP=16.6021;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=38.84;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1002;QR=0;RO=0;RPP=5.02092;RPPR=0;RUN=1;SAF=21;SAP=21.1059;SAR=6;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=27;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:27:0:0:27:1002:-10,-7.34226,0
102 chr_17 81 . C T 764.464 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=25;CIGAR=1X;DP=25;DPB=25;DPRA=0;EPP=13.5202;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=36.1324;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=902;QR=0;RO=0;RPP=3.79203;RPPR=0;RUN=1;SAF=19;SAP=17.6895;SAR=6;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=25;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:25:0:0:25:902:-10,-6.76842,0
103 chr_17 105 . C T 1154 G_DP AB=0;ABP=0;AC=2;AF=1;AN=2;AO=37;CIGAR=1X;DP=37;DPB=37;DPRA=0;EPP=5.88603;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=52.0047;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1336;QR=0;RO=0;RPP=19.9713;RPPR=0;RUN=1;SAF=23;SAP=7.76406;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=37;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:37:0:0:37:1336:-10,-10,0
104 chr_17 112 . G A 1276.25 G_DP AB=0;ABP=0;AC=2;AF=1;AN=2;AO=40;CIGAR=1X;DP=40;DPB=40;DPRA=0;EPP=10.8276;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=55.9501;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1471;QR=0;RO=0;RPP=10.8276;RPPR=0;RUN=1;SAF=26;SAP=10.8276;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=40;G_Base=A GT:DP:RO:QR:AO:QA:GL 1/1:40:0:0:40:1471:-10,-10,0
105
106 expected result : ::
107
108 CHROM POS reference V1
109 chr_17 1 C U
110 chr_17 2 C U
111 chr_17 3 C U
112 chr_17 4 T U
113 chr_17 5 A U
114 chr_17 6 A U
115 chr_17 7 C U
116 chr_17 8 C U
117 chr_17 9 C U
118 chr_17 10 T U
119 chr_17 11 A U
120 chr_17 12 A U
121 chr_17 13 C U
122 chr_17 14 C U
123 chr_17 15 C U
124 chr_17 16 T U
125 chr_17 17 A F
126 chr_17 18 A U
127 chr_17 19 C U
128 chr_17 20 C U
129 chr_17 21 C U
130 chr_17 22 T U
131 chr_17 23 A U
132 chr_17 24 A U
133 chr_17 25 C U
134 chr_17 26 C U
135 chr_17 27 C U
136 chr_17 28 T U
137 chr_17 29 A U
138 chr_17 30 A U
139 chr_17 31 C U
140 chr_17 32 C U
141 chr_17 33 C U
142 chr_17 34 T U
143 chr_17 35 A U
144 chr_17 36 A U
145 chr_17 37 C G
146 chr_17 38 C U
147 chr_17 39 C U
148 chr_17 40 T F
149 chr_17 41 A U
150 chr_17 42 A U
151 chr_17 43 C U
152 chr_17 44 C U
153 chr_17 45 C U
154 chr_17 46 T U
155 chr_17 47 A U
156 chr_17 48 A U
157 chr_17 49 C U
158 chr_17 50 C U
159 chr_17 51 C U
160 chr_17 52 T U
161 chr_17 53 A U
162 chr_17 54 A U
163 chr_17 55 C U
164 chr_17 56 C U
165 chr_17 57 C U
166 chr_17 58 T U
167 chr_17 59 A U
168 chr_17 60 A R
169 chr_17 61 T U
170 chr_17 62 A U
171 chr_17 63 C U
172 chr_17 64 G U
173 chr_17 65 C U
174 chr_17 66 G U
175 chr_17 67 C U
176 chr_17 68 G U
177 chr_17 69 C U
178 chr_17 70 G U
179 chr_17 71 C U
180 chr_17 72 C U
181 chr_17 73 T R
182 chr_17 74 A U
183 chr_17 75 A U
184 chr_17 76 C U
185 chr_17 77 C U
186 chr_17 78 C U
187 chr_17 79 T U
188 chr_17 80 A U
189 chr_17 81 C T
190 chr_17 82 G U
191 chr_17 83 A U
192 chr_17 84 C U
193 chr_17 85 T U
194 chr_17 86 T U
195 chr_17 87 T U
196 chr_17 88 A U
197 chr_17 89 A U
198 chr_17 90 C U
199 chr_17 91 C U
200 chr_17 92 T U
201 chr_17 93 A U
202 chr_17 94 C U
203 chr_17 95 T U
204 chr_17 96 C U
205 chr_17 97 T U
206 chr_17 98 A U
207 chr_17 99 A U
208 chr_17 100 A U
209 chr_17 101 C U
210 chr_17 102 T U
211 chr_17 103 C U
212 chr_17 104 T U
213 chr_17 105 C F
214 chr_17 106 C U
215 chr_17 107 T U
216 chr_17 108 A U
217 chr_17 109 C U
218 chr_17 110 T U
219 chr_17 111 A U
220 chr_17 112 G F
221 chr_17 113 T U
222 chr_17 114 A U
223 chr_17 115 C U
224 chr_17 116 G U
225 chr_17 117 T U
226 chr_17 118 C U
227 chr_17 119 T U
228 chr_17 120 T U
229
230 -----
231
232 **reference :**
233
234 ]]>
235 </help>
236 </tool>