Mercurial > repos > iuc > snpsift
comparison snpSift_extractFields.xml @ 2:bf8c1526871b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift commit d12355cea76843e3ed6f09d96c3e9fe22afe4a4f
author | iuc |
---|---|
date | Mon, 05 Dec 2016 12:11:18 -0500 |
parents | 98708b88af9f |
children | 20c7d583fec1 |
comparison
equal
deleted
inserted
replaced
1:98708b88af9f | 2:bf8c1526871b |
---|---|
1 <tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.0"> | 1 <tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.1"> |
2 <options sanitize="False" /> | 2 <options sanitize="False" /> |
3 <description>from a VCF file inot a tabular file</description> | 3 <description>from a VCF file into a tabular file</description> |
4 <macros> | 4 <macros> |
5 <import>snpSift_macros.xml</import> | 5 <import>snpSift_macros.xml</import> |
6 </macros> | 6 </macros> |
7 <expand macro="requirements" /> | 7 <expand macro="requirements" /> |
8 <expand macro="stdio" /> | 8 <expand macro="stdio" /> |
9 <expand macro="version_command" /> | 9 <expand macro="version_command" /> |
10 <command><![CDATA[ | 10 <command><![CDATA[ |
11 @CONDA_SNPSIFT_JAR_PATH@ && | |
11 cat "$input" | 12 cat "$input" |
12 #if $one_effect_per_line: | 13 #if $one_effect_per_line: |
13 | "\$SNPEFF_JAR_PATH/scripts/vcfEffOnePerLine.pl" | 14 | "\$SNPSIFT_JAR_PATH/scripts/vcfEffOnePerLine.pl" |
14 #end if | 15 #end if |
15 | java -Xmx6G -jar "\$SNPEFF_JAR_PATH/SnpSift.jar" extractFields | 16 | java -Xmx6G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" extractFields |
16 #if $separator: | 17 #if $separator: |
17 -s '$separator' | 18 -s '$separator' |
18 #end if | 19 #end if |
19 #if $empty_text: | 20 #if $empty_text: |
20 -e '$empty_text' | 21 -e '$empty_text' |
21 #end if | 22 #end if |
22 - | 23 - |
23 #echo ' '.join(['"%s"' % x for x in $extract.split()]) | 24 #echo ' '.join(['"%s"' % x for x in $extract.split()]) |
24 > "$output" | 25 > "$output" |
25 ]]> | 26 ]]> |
26 </command> | 27 </command> |
27 <inputs> | 28 <inputs> |
28 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> | 29 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> |
29 <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> | 30 <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> |
30 <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> | 31 <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> |
31 <param name="separator" type="text" value="" optional="true" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values"> | 32 <param name="separator" type="text" value="" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values" /> |
32 </param> | 33 <param name="empty_text" type="text" value="" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" /> |
33 <param name="empty_text" type="text" value="" optional="true" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" > | |
34 </param> | |
35 </inputs> | 34 </inputs> |
36 <outputs> | 35 <outputs> |
37 <data format="tabular" name="output" /> | 36 <data format="tabular" name="output" /> |
38 </outputs> | 37 </outputs> |
39 <tests> | 38 <tests> |
76 CHROM | 75 CHROM |
77 POS | 76 POS |
78 ID | 77 ID |
79 REF | 78 REF |
80 ALT | 79 ALT |
81 FILTER | 80 FILTER |
82 INFO fields: | 81 INFO fields: |
83 AF | 82 AF |
84 AC | 83 AC |
85 DP | 84 DP |
86 MQ | 85 MQ |
87 etc. (any info field available) | 86 etc. (any info field available) |
88 SnpEff 'ANN' fields: | 87 SnpEff 'ANN' fields: |
89 "ANN[*].ALLELE" (alias GENOTYPE) | 88 "ANN[*].ALLELE" (alias GENOTYPE) |
90 "ANN[*].EFFECT" (alias ANNOTATION): Effect in Sequence ontology terms (e.g. 'missense_variant', 'synonymous_variant', 'stop_gained', etc.) | 89 "ANN[*].EFFECT" (alias ANNOTATION): Effect in Sequence ontology terms (e.g. 'missense_variant', 'synonymous_variant', 'stop_gained', etc.) |
91 "ANN[*].IMPACT" { HIGH, MODERATE, LOW, MODIFIER } | 90 "ANN[*].IMPACT" { HIGH, MODERATE, LOW, MODIFIER } |
92 "ANN[*].GENE" Gene name (e.g. 'PSD3') | 91 "ANN[*].GENE" Gene name (e.g. 'PSD3') |
102 "ANN[*].CDS_POS" (alias POS_CDS) | 101 "ANN[*].CDS_POS" (alias POS_CDS) |
103 "ANN[*].CDS_LEN" (alias LEN_CDS) | 102 "ANN[*].CDS_LEN" (alias LEN_CDS) |
104 "ANN[*].AA_POS" (alias POS_AA) | 103 "ANN[*].AA_POS" (alias POS_AA) |
105 "ANN[*].AA_LEN" (alias LEN_AA) | 104 "ANN[*].AA_LEN" (alias LEN_AA) |
106 "ANN[*].DISTANCE" | 105 "ANN[*].DISTANCE" |
107 "ANN[*].ERRORS" (alias WARNING, INFOS) | 106 "ANN[*].ERRORS" (alias WARNING, INFOS) |
108 SnpEff 'EFF' fields (this is for older SnpEff/SnpSift versions, new version use 'ANN' field): | 107 SnpEff 'EFF' fields (this is for older SnpEff/SnpSift versions, new version use 'ANN' field): |
109 "EFF[*].EFFECT" | 108 "EFF[*].EFFECT" |
110 "EFF[*].IMPACT" | 109 "EFF[*].IMPACT" |
111 "EFF[*].FUNCLASS" | 110 "EFF[*].FUNCLASS" |
112 "EFF[*].CODON" | 111 "EFF[*].CODON" |
114 "EFF[*].AA_LEN" | 113 "EFF[*].AA_LEN" |
115 "EFF[*].GENE" | 114 "EFF[*].GENE" |
116 "EFF[*].BIOTYPE" | 115 "EFF[*].BIOTYPE" |
117 "EFF[*].CODING" | 116 "EFF[*].CODING" |
118 "EFF[*].TRID" | 117 "EFF[*].TRID" |
119 "EFF[*].RANK" | 118 "EFF[*].RANK" |
120 SnpEff 'LOF' fields: | 119 SnpEff 'LOF' fields: |
121 "LOF[*].GENE" | 120 "LOF[*].GENE" |
122 "LOF[*].GENEID" | 121 "LOF[*].GENEID" |
123 "LOF[*].NUMTR" | 122 "LOF[*].NUMTR" |
124 "LOF[*].PERC" | 123 "LOF[*].PERC" |
125 SnpEff' NMD' fields: | 124 SnpEff' NMD' fields: |
126 "NMD[*].GENE" | 125 "NMD[*].GENE" |
127 "NMD[*].GENEID" | 126 "NMD[*].GENEID" |
128 "NMD[*].NUMTR" | 127 "NMD[*].NUMTR" |
129 "NMD[*].PERC" | 128 "NMD[*].PERC" |
130 | 129 |
131 | 130 |
132 Some examples: | 131 Some examples: |
133 | 132 |
134 - *Extracting chromosome, position, ID and allele frequency from a VCF file:* | 133 - *Extracting chromosome, position, ID and allele frequency from a VCF file:* |
135 | 134 |
136 **CHROM POS ID AF** | 135 **CHROM POS ID AF** |
137 | 136 |
138 The result will look something like: | 137 The result will look something like: |
139 | 138 |
140 :: | 139 :: |
141 | 140 |
142 #CHROM POS ID AF | 141 #CHROM POS ID AF |
143 1 69134 0.086 | 142 1 69134 0.086 |
153 - CHROM POS ID: regular fields (as in the previous example) | 152 - CHROM POS ID: regular fields (as in the previous example) |
154 - THETA : This one is from INFO | 153 - THETA : This one is from INFO |
155 - GEN[0].GL[1] : Second likelihood from first genotype | 154 - GEN[0].GL[1] : Second likelihood from first genotype |
156 - GEN[1].GL : The whole GL fiels (all entries without separating them) | 155 - GEN[1].GL : The whole GL fiels (all entries without separating them) |
157 - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). | 156 - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). |
158 - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). | 157 - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). |
159 | 158 |
160 The result will look something like: | 159 The result will look something like: |
161 | 160 |
162 :: | 161 :: |
163 | 162 |
164 #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT | 163 #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT |
165 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 | 164 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 |
166 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 | 165 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 |
167 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 | 166 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 |
168 | 167 |
169 - *Extracting fields with multiple values:* | 168 - *Extracting fields with multiple values:* |
170 (notice that there are multiple effect columns per line because there are mutiple effects per variant) | 169 (notice that there are multiple effect columns per line because there are mutiple effects per variant) |
171 | 170 |
172 **CHROM POS REF ALT ANN[*].EFFECT** | 171 **CHROM POS REF ALT ANN[*].EFFECT** |
173 | 172 |
174 The result will look something like: | 173 The result will look something like: |
175 | 174 |
176 :: | 175 :: |
177 | 176 |
178 #CHROM POS REF ALT ANN[*].EFFECT | 177 #CHROM POS REF ALT ANN[*].EFFECT |
179 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant | 178 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant |
180 22 17072035 C T missense_variant downstream_gene_variant | 179 22 17072035 C T missense_variant downstream_gene_variant |
181 22 17072258 C A missense_variant downstream_gene_variant | 180 22 17072258 C A missense_variant downstream_gene_variant |
182 | 181 |
183 - *Extracting fields with multiple values using a comma as a multipe field separator:* | 182 - *Extracting fields with multiple values using a comma as a multipe field separator:* |
184 | 183 |
185 **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** | 184 **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** |
186 | 185 |
187 The result will look something like: | 186 The result will look something like: |
188 | 187 |
189 :: | 188 :: |
190 | 189 |
191 #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P | 190 #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P |
192 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. | 191 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. |
193 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. | 192 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. |
194 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. | 193 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. |
196 | 195 |
197 - *Extracting fields with multiple values, one effect per line:* | 196 - *Extracting fields with multiple values, one effect per line:* |
198 | 197 |
199 **CHROM POS REF ALT ANN[*].EFFECT** | 198 **CHROM POS REF ALT ANN[*].EFFECT** |
200 | 199 |
201 The result will look something like: | 200 The result will look something like: |
202 | 201 |
203 :: | 202 :: |
204 | 203 |
205 #CHROM POS REF ALT ANN[*].EFFECT | 204 #CHROM POS REF ALT ANN[*].EFFECT |
206 22 17071756 T C 3_prime_UTR_variant | 205 22 17071756 T C 3_prime_UTR_variant |
207 22 17071756 T C downstream_gene_variant | 206 22 17071756 T C downstream_gene_variant |
208 22 17072035 C T missense_variant | 207 22 17072035 C T missense_variant |