0
|
1 <tool id="VCFStorage" name="VCFStorage" version="0.01">
|
|
2 <description> stores info from variant calling into a table. It will create a tabulate filed with SNP infos</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>
|
|
5 </requirements>
|
|
6 <version_command>
|
|
7 VCFStorage.py --version
|
|
8 </version_command>
|
|
9 <command interpreter="python">
|
|
10 VCFStorage_wrapper.py -f $inputFasta -o $outputVCFStorage
|
|
11 ## genome list
|
|
12 #for $VCF in $VCFFile
|
|
13 -l $VCF.strainName
|
|
14 -L $VCF.inputStrainVCF
|
|
15 #end for
|
|
16 </command>
|
|
17 <inputs>
|
|
18 <param name="inputFasta" type="data" format="fasta" label="Input genome sequence file name (fasta)"/>
|
|
19 <repeat name="VCFFile" title="VCF list" min="1">
|
|
20 <param name="strainName" size="20" type="text" value="V1" label="strain name (no space allowed)"/>
|
|
21 <param name="inputStrainVCF" type="data" format="vcf" label="Select VCF file "/>
|
|
22 </repeat>
|
|
23 </inputs>
|
|
24 <outputs>
|
|
25 <data format="tabular" name="outputVCFStorage" label="${tool.name} on ${on_string} (tabular)"/>
|
|
26 </outputs>
|
|
27 <tests>
|
|
28 <test>
|
|
29 <param name="inputFasta" ftype="fasta" value="chr17.fa" />
|
|
30 <param name="strainName" value="V1"/>
|
|
31 <param name="inputStrainVCF" ftype="vcf" value="chr17.VCF"/>
|
|
32 <output name="outputVCFStorage" ftype="tabular" file="Expchr17.tab"/>
|
|
33 </test>
|
|
34 </tests>
|
|
35 <help><![CDATA[
|
|
36 **stores info from variant calling into a table. It will create a tabulate filed with SNP infos**
|
|
37
|
|
38 -----
|
|
39
|
|
40 **what it does :**
|
|
41
|
|
42 VCFStorage.py is a python script that allows to store data from multiple VCF into a single tabular marker file. each VCF will be a new column on the final output.
|
|
43
|
|
44 -----
|
|
45
|
|
46 **input format :**
|
|
47
|
|
48 Multiple files are necessary as input :
|
|
49
|
|
50 - the fasta file of your genomic sequence
|
|
51 - multiple VCF files (1 per strain). It is strongly advised to use the column filter (col 7) for filtered positions instead of removing the lines from the VCF.
|
|
52
|
|
53 -----
|
|
54
|
|
55 **ouput format :**
|
|
56
|
|
57 the result is a tab delimited format file where all genomic positions are in rows, and all strains are in columns (in the order you gave the VCF)
|
|
58
|
|
59 For each position and each genome, a code is attributed :
|
|
60
|
|
61 - for the reference : ::
|
|
62
|
|
63 A,T,G,C for the corresponding nucleotidic acid
|
|
64
|
|
65 - for the genomes : ::
|
|
66
|
|
67 U if the position was not refered in the VCF file
|
|
68 R if the base is similar to the reference
|
|
69 F if the base has been filtered in the column FILTER (column 7) of the VCF
|
|
70 A,T,G,C if the genome has a validated SNP at the position
|
|
71
|
|
72
|
|
73 -----
|
|
74
|
|
75 **example :**
|
|
76
|
|
77 fasta input file (genomic sequence): ::
|
|
78
|
|
79 >chr_17
|
|
80 ccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaa
|
|
81 TACGCGCGCGCCTAACCCTACGACTTTAACCTACTCTAAACTCTCCTACTAGTACGTCTT
|
|
82
|
|
83 VCF input file : ::
|
|
84
|
|
85 ##fileformat=VCFv4.1
|
|
86 ##fileDate=20140725
|
|
87 ##source=freeBayes v0.9.13-2-ga830efd
|
|
88 ##reference=exmple.fsa
|
|
89 ##phasing=none
|
|
90 ##DetectedFormat=freebayes
|
|
91 ##FILTER=<ID=G_AN,Description="The SNP has been filtered ; out of AN range(over 2)">
|
|
92 ##FILTER=<ID=G_AF,Description="The SNP has been filtered ; out of AF range(under 0.9)">
|
|
93 ##FILTER=<ID=G_DP,Description="The SNP has been filtered ; out of DP range(15 - 35)">
|
|
94 ##FILTER=<ID=InDel,Description="The SNP has been filtered ; InDel detected">
|
|
95 ##FILTER=<ID=Nmatch,Description="The SNP has been filtered ; reference base detected : N">
|
|
96 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT V1
|
|
97 chr_17 17 . A G 529.213 G_AF;G_DP AB=0.583333;ABP=5.18177;AC=1;AF=0.5;AN=2;AO=21;CIGAR=1X;DP=36;DPB=36;DPRA=0;EPP=3.1137;EPPR=3.15506;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=77.012;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=751;QR=535;RO=15;RPP=5.59539;RPPR=4.31318;RUN=1;SAF=11;SAP=3.1137;SAR=10;SRF=5;SRP=6.62942;SRR=10;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.58;G_DP=36;G_Base=G GT:DP:RO:QR:AO:QA:GL 0/1:36:15:535:21:751:-10,0,-10
|
|
98 chr_17 37 . C G 1082.38 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=34;CIGAR=1X;DP=34;DPB=34;DPRA=0;EPP=3.26577;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=48.0391;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1243;QR=0;RO=0;RPP=15.5282;RPPR=0;RUN=1;SAF=18;SAP=3.26577;SAR=16;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=34;G_Base=G GT:DP:RO:QR:AO:QA:GL 1/1:34:0:0:34:1243:-10,-9.23017,0
|
|
99 chr_17 40 . T T 825.518 G_AF AB=0;ABP=0;AC=2;AF=1;AN=2;AO=29;CIGAR=1X;DP=34;DPB=34;DPRA=0;EPP=6.67934;EPPR=13.8677;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=8.92992;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=1082;QR=178;RO=5;RPP=9.07545;RPPR=13.8677;RUN=1;SAF=13;SAP=3.68421;SAR=16;SRF=5;SRP=13.8677;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=0.85;G_DP=34;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:34:5:178:29:1082:-10,0,-6.82575
|
|
100 chr_17 60 . A . 699.741 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=22;CIGAR=1X;DP=22;DPB=22;DPRA=0;EPP=17.2236;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=32.2544;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=823;QR=0;RO=0;RPP=9.32731;RPPR=0;RUN=1;SAF=12;SAP=3.40511;SAR=10;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=22;G_Base=G GT:DP:RO:QR:AO:QA:GL 1/1:22:0:0:22:823:-10,-5.98732,0
|
|
101 chr_17 73 . T . 846.299 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=27;CIGAR=1X;DP=27;DPB=27;DPRA=0;EPP=16.6021;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=38.84;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1002;QR=0;RO=0;RPP=5.02092;RPPR=0;RUN=1;SAF=21;SAP=21.1059;SAR=6;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=27;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:27:0:0:27:1002:-10,-7.34226,0
|
|
102 chr_17 81 . C T 764.464 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=25;CIGAR=1X;DP=25;DPB=25;DPRA=0;EPP=13.5202;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=36.1324;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=902;QR=0;RO=0;RPP=3.79203;RPPR=0;RUN=1;SAF=19;SAP=17.6895;SAR=6;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=25;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:25:0:0:25:902:-10,-6.76842,0
|
|
103 chr_17 105 . C T 1154 G_DP AB=0;ABP=0;AC=2;AF=1;AN=2;AO=37;CIGAR=1X;DP=37;DPB=37;DPRA=0;EPP=5.88603;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=52.0047;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1336;QR=0;RO=0;RPP=19.9713;RPPR=0;RUN=1;SAF=23;SAP=7.76406;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=37;G_Base=T GT:DP:RO:QR:AO:QA:GL 1/1:37:0:0:37:1336:-10,-10,0
|
|
104 chr_17 112 . G A 1276.25 G_DP AB=0;ABP=0;AC=2;AF=1;AN=2;AO=40;CIGAR=1X;DP=40;DPB=40;DPRA=0;EPP=10.8276;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=55.9501;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=1471;QR=0;RO=0;RPP=10.8276;RPPR=0;RUN=1;SAF=26;SAP=10.8276;SAR=14;SRF=0;SRP=0;SRR=0;TYPE=snp;technology.illumina=1;G_AN=2;G_AF=1.00;G_DP=40;G_Base=A GT:DP:RO:QR:AO:QA:GL 1/1:40:0:0:40:1471:-10,-10,0
|
|
105
|
|
106 expected result : ::
|
|
107
|
|
108 CHROM POS reference V1
|
|
109 chr_17 1 C U
|
|
110 chr_17 2 C U
|
|
111 chr_17 3 C U
|
|
112 chr_17 4 T U
|
|
113 chr_17 5 A U
|
|
114 chr_17 6 A U
|
|
115 chr_17 7 C U
|
|
116 chr_17 8 C U
|
|
117 chr_17 9 C U
|
|
118 chr_17 10 T U
|
|
119 chr_17 11 A U
|
|
120 chr_17 12 A U
|
|
121 chr_17 13 C U
|
|
122 chr_17 14 C U
|
|
123 chr_17 15 C U
|
|
124 chr_17 16 T U
|
|
125 chr_17 17 A F
|
|
126 chr_17 18 A U
|
|
127 chr_17 19 C U
|
|
128 chr_17 20 C U
|
|
129 chr_17 21 C U
|
|
130 chr_17 22 T U
|
|
131 chr_17 23 A U
|
|
132 chr_17 24 A U
|
|
133 chr_17 25 C U
|
|
134 chr_17 26 C U
|
|
135 chr_17 27 C U
|
|
136 chr_17 28 T U
|
|
137 chr_17 29 A U
|
|
138 chr_17 30 A U
|
|
139 chr_17 31 C U
|
|
140 chr_17 32 C U
|
|
141 chr_17 33 C U
|
|
142 chr_17 34 T U
|
|
143 chr_17 35 A U
|
|
144 chr_17 36 A U
|
|
145 chr_17 37 C G
|
|
146 chr_17 38 C U
|
|
147 chr_17 39 C U
|
|
148 chr_17 40 T F
|
|
149 chr_17 41 A U
|
|
150 chr_17 42 A U
|
|
151 chr_17 43 C U
|
|
152 chr_17 44 C U
|
|
153 chr_17 45 C U
|
|
154 chr_17 46 T U
|
|
155 chr_17 47 A U
|
|
156 chr_17 48 A U
|
|
157 chr_17 49 C U
|
|
158 chr_17 50 C U
|
|
159 chr_17 51 C U
|
|
160 chr_17 52 T U
|
|
161 chr_17 53 A U
|
|
162 chr_17 54 A U
|
|
163 chr_17 55 C U
|
|
164 chr_17 56 C U
|
|
165 chr_17 57 C U
|
|
166 chr_17 58 T U
|
|
167 chr_17 59 A U
|
|
168 chr_17 60 A R
|
|
169 chr_17 61 T U
|
|
170 chr_17 62 A U
|
|
171 chr_17 63 C U
|
|
172 chr_17 64 G U
|
|
173 chr_17 65 C U
|
|
174 chr_17 66 G U
|
|
175 chr_17 67 C U
|
|
176 chr_17 68 G U
|
|
177 chr_17 69 C U
|
|
178 chr_17 70 G U
|
|
179 chr_17 71 C U
|
|
180 chr_17 72 C U
|
|
181 chr_17 73 T R
|
|
182 chr_17 74 A U
|
|
183 chr_17 75 A U
|
|
184 chr_17 76 C U
|
|
185 chr_17 77 C U
|
|
186 chr_17 78 C U
|
|
187 chr_17 79 T U
|
|
188 chr_17 80 A U
|
|
189 chr_17 81 C T
|
|
190 chr_17 82 G U
|
|
191 chr_17 83 A U
|
|
192 chr_17 84 C U
|
|
193 chr_17 85 T U
|
|
194 chr_17 86 T U
|
|
195 chr_17 87 T U
|
|
196 chr_17 88 A U
|
|
197 chr_17 89 A U
|
|
198 chr_17 90 C U
|
|
199 chr_17 91 C U
|
|
200 chr_17 92 T U
|
|
201 chr_17 93 A U
|
|
202 chr_17 94 C U
|
|
203 chr_17 95 T U
|
|
204 chr_17 96 C U
|
|
205 chr_17 97 T U
|
|
206 chr_17 98 A U
|
|
207 chr_17 99 A U
|
|
208 chr_17 100 A U
|
|
209 chr_17 101 C U
|
|
210 chr_17 102 T U
|
|
211 chr_17 103 C U
|
|
212 chr_17 104 T U
|
|
213 chr_17 105 C F
|
|
214 chr_17 106 C U
|
|
215 chr_17 107 T U
|
|
216 chr_17 108 A U
|
|
217 chr_17 109 C U
|
|
218 chr_17 110 T U
|
|
219 chr_17 111 A U
|
|
220 chr_17 112 G F
|
|
221 chr_17 113 T U
|
|
222 chr_17 114 A U
|
|
223 chr_17 115 C U
|
|
224 chr_17 116 G U
|
|
225 chr_17 117 T U
|
|
226 chr_17 118 C U
|
|
227 chr_17 119 T U
|
|
228 chr_17 120 T U
|
|
229
|
|
230 -----
|
|
231
|
|
232 **reference :**
|
|
233
|
|
234 ]]>
|
|
235 </help>
|
|
236 </tool>
|