comparison snpSift_dbnsfp.xml @ 1:13191d4914f7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift_dbnsfp commit 21b46ae2c90ba7e569b2b3a9eaf938f8dedb2c31
author iuc
date Tue, 07 Jun 2016 10:04:48 -0400
parents dc480609d9c1
children c838e7136a40
comparison
equal deleted inserted replaced
0:dc480609d9c1 1:13191d4914f7
1 <tool id="snpSift_dbnsfp" name="SnpSift dbNSFP" version="4.0.0"> 1 <tool id="snpSift_dbnsfp" name="SnpSift dbNSFP" version="@WRAPPER_VERSION@.0">
2 <description>Add Annotations from dbNSFP</description> 2 <description>Add Annotations from dbNSFP or similar annotation DBs</description>
3 <expand macro="requirements" />
4 <macros> 3 <macros>
5 <import>snpSift_macros.xml</import> 4 <import>snpSift_macros.xml</import>
6 </macros> 5 </macros>
7 <command> 6 <expand macro="requirements" />
8 java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar dbnsfp -v 7 <expand macro="stdio" />
9 #if $db.dbsrc == 'cached' : 8 <expand macro="version_command" />
10 -db $db.dbnsfp 9 <command><![CDATA[
11 #if $db.annotations and $db.annotations.__str__ != '': 10 java -Xmx6G -jar "\$SNPEFF_JAR_PATH/SnpSift.jar" dbnsfp -v
11 #if $db.dbsrc == 'cached':
12 -db "$db.dbnsfp"
13 #if $db.annotations and str($db.annotations) != '':
12 -f "$db.annotations" 14 -f "$db.annotations"
13 #end if 15 #end if
14 #else : 16 #else:
15 -db "${db.dbnsfpdb.extra_files_path}/${db.dbnsfpdb.metadata.bgzip}" 17 -db "${db.dbnsfpdb.extra_files_path}/${db.dbnsfpdb.metadata.bgzip}"
16 #if $db.annotations and $db.annotations.__str__ != '': 18 #if $db.annotations and str($db.annotations) != '':
17 -f "$db.annotations" 19 -f "$db.annotations"
18 #end if 20 #end if
19 #end if 21 #end if
20 $input > $output 22 "$input" > "$output"
21 2> tmp.err &amp;&amp; grep -v file tmp.err 23 2> tmp.err && grep -v file tmp.err
24 ]]>
22 </command> 25 </command>
23 <inputs> 26 <inputs>
24 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> 27 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/>
25 <conditional name="db"> 28 <conditional name="db">
26 <param name="dbsrc" type="select" label="dbNSFP "> 29 <param name="dbsrc" type="select" label="dbNSFP ">
27 <option value="cached">Locally installed dbNSFP database </option> 30 <option value="cached">Locally installed dbNSFP database </option>
28 <option value="history">dbNSFP database from your history</option> 31 <option value="history">dbNSFP database from your history</option>
29 </param> 32 </param>
30 <when value="cached"> 33 <when value="cached">
31 <param name="dbnsfp" type="select" label="Genome"> 34 <param name="dbnsfp" type="select" label="Genome">
32 <options from_data_table="snpsift_dbnsfp"> 35 <options from_data_table="snpsift_dbnsfps">
33 <column name="name" index="1"/> 36 <column name="name" index="2"/>
34 <column name="value" index="2"/> 37 <column name="value" index="3"/>
35 </options> 38 </options>
36 </param> 39 </param>
37 <param name="annotations" type="select" multiple="true" display="checkboxes" label="Annotate with"> 40 <param name="annotations" type="select" multiple="true" display="checkboxes" label="Annotate with">
38 <options from_data_table="snpsift_dbnsfp"> 41 <options from_data_table="snpsift_dbnsfps">
39 <column name="name" index="3"/> 42 <column name="name" index="4"/>
40 <column name="value" index="3"/> 43 <column name="value" index="4"/>
41 <filter type="param_value" ref="dbnsfp" column="2" /> 44 <filter type="param_value" ref="dbnsfp" column="3" />
42 <filter type="multiple_splitter" column="3" separator=","/> 45 <filter type="multiple_splitter" column="4" separator=","/>
43 </options> 46 </options>
44 </param> 47 </param>
45 </when> 48 </when>
46 <when value="history"> 49 <when value="history">
47 <param name="dbnsfpdb" type="data" format="snpsiftdbnsfp" label="DbNSFP"/> 50 <param name="dbnsfpdb" type="data" format="snpsiftdbnsfp" label="DbNSFP"/>
51 </options> 54 </options>
52 </param> 55 </param>
53 </when> 56 </when>
54 </conditional> 57 </conditional>
55 </inputs> 58 </inputs>
56 <expand macro="stdio" />
57 <outputs> 59 <outputs>
58 <data format="vcf" name="output" /> 60 <data format="vcf" name="output" />
59 </outputs> 61 </outputs>
60 <tests> 62 <tests>
61 <test> 63 <test>
68 <has_text text="dbNSFP_SIFT_score=0.15" /> 70 <has_text text="dbNSFP_SIFT_score=0.15" />
69 </assert_contents> 71 </assert_contents>
70 </output> 72 </output>
71 </test> 73 </test>
72 </tests> 74 </tests>
73 <help> 75 <help><![CDATA[
74 76
75 The dbNSFP is an integrated database of functional predictions from multiple algorithms (SIFT, Polyphen2, LRT and MutationTaster, PhyloP and GERP++, etc.). 77 The dbNSFP is an integrated database of functional predictions from multiple algorithms (SIFT, Polyphen2, LRT and MutationTaster, PhyloP and GERP++, etc.).
78 It contains variant annotations such as:
76 79
77 80
78 1000Gp1_AC 81 1000Gp1_AC
79 Alternative allele counts in the whole 1000 genomes phase 1 (1000Gp1) data 82 Alternative allele counts in the whole 1000 genomes phase 1 (1000Gp1) data
80 1000Gp1_AF 83 1000Gp1_AF
122 ESP6500_AA_AF 125 ESP6500_AA_AF
123 Alternative allele frequency in the African American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set) 126 Alternative allele frequency in the African American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)
124 ESP6500_EA_AF 127 ESP6500_EA_AF
125 Alternative allele frequency in the European American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set) 128 Alternative allele frequency in the European American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)
126 FATHMM_pred 129 FATHMM_pred
127 If a FATHMM_score is &lt;=-1.5 (or rankscore &lt;=0.81415) the corresponding non-synonymous SNP is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";" 130 If a FATHMM_score is <=-1.5 (or rankscore <=0.81415) the corresponding non-synonymous SNP is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";"
128 FATHMM_rankscore 131 FATHMM_rankscore
129 FATHMMori scores were ranked among all FATHMMori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of FATHMMori scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0 to 1 132 FATHMMori scores were ranked among all FATHMMori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of FATHMMori scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0 to 1
130 FATHMM_score 133 FATHMM_score
131 FATHMM default score (FATHMMori) 134 FATHMM default score (FATHMMori)
132 fold-degenerate 135 fold-degenerate
150 LR_score 153 LR_score
151 Our logistic regression (LR) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from 0 to 1 154 Our logistic regression (LR) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from 0 to 1
152 LRT_Omega 155 LRT_Omega
153 Estimated nonsynonymous-to-synonymous-rate ratio (Omega, reported by LRT) 156 Estimated nonsynonymous-to-synonymous-rate ratio (Omega, reported by LRT)
154 LRT_converted_rankscore 157 LRT_converted_rankscore
155 LRTori scores were first converted as LRTnew=1-LRTori*0.5 if Omega&lt;1, or LRTnew=LRTori*0.5 if Omega&gt;=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682 158 LRTori scores were first converted as LRTnew=1-LRTori*0.5 if Omega<1, or LRTnew=LRTori*0.5 if Omega>=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682
156 LRT_pred 159 LRT_pred
157 LRT prediction, D(eleterious), N(eutral) or U(nknown), which is not solely determined by the score 160 LRT prediction, D(eleterious), N(eutral) or U(nknown), which is not solely determined by the score
158 LRT_score 161 LRT_score
159 The original LRT two-sided p-value (LRTori), ranges from 0 to 1 162 The original LRT two-sided p-value (LRTori), ranges from 0 to 1
160 MutationAssessor_pred 163 MutationAssessor_pred
220 Reliability_index 223 Reliability_index
221 Number of observed component scores (except the maximum frequency in the 1000 genomes populations) for RadialSVM and LR. Ranges from 1 to 10. As RadialSVM and LR scores are calculated based on imputed data, the less missing component scores, the higher the reliability of the scores and predictions 224 Number of observed component scores (except the maximum frequency in the 1000 genomes populations) for RadialSVM and LR. Ranges from 1 to 10. As RadialSVM and LR scores are calculated based on imputed data, the less missing component scores, the higher the reliability of the scores and predictions
222 SIFT_converted_rankscore 225 SIFT_converted_rankscore
223 SIFTori scores were first converted to SIFTnew=1-SIFTori, then ranked among all SIFTnew scores in dbNSFP. The rankscore is the ratio of the rank the SIFTnew score over the total number of SIFTnew scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The rankscores range from 0.02654 to 0.87932 226 SIFTori scores were first converted to SIFTnew=1-SIFTori, then ranked among all SIFTnew scores in dbNSFP. The rankscore is the ratio of the rank the SIFTnew score over the total number of SIFTnew scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The rankscores range from 0.02654 to 0.87932
224 SIFT_pred 227 SIFT_pred
225 If SIFTori is smaller than 0.05 (rankscore&gt;0.55) the corresponding non-synonymous SNP is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple predictions separated by ";" 228 If SIFTori is smaller than 0.05 (rankscore>0.55) the corresponding non-synonymous SNP is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple predictions separated by ";"
226 SIFT_score 229 SIFT_score
227 SIFT score (SIFTori). Scores range from 0 to 1. The smaller the score the more likely the SNP has damaging effect. Multiple scores separated by ";" 230 SIFT score (SIFTori). Scores range from 0 to 1. The smaller the score the more likely the SNP has damaging effect. Multiple scores separated by ";"
228 SiPhy_29way_logOdds 231 SiPhy_29way_logOdds
229 SiPhy score based on 29 mammals genomes. The larger the score, the more conserved the site 232 SiPhy score based on 29 mammals genomes. The larger the score, the more conserved the site
230 SiPhy_29way_pi 233 SiPhy_29way_pi
239 Uniprot ID number. Multiple entries separated by ";" 242 Uniprot ID number. Multiple entries separated by ";"
240 UniSNP_ids 243 UniSNP_ids
241 rs numbers from UniSNP, which is a cleaned version of dbSNP build 129, in format: rs number1;rs number2;... 244 rs numbers from UniSNP, which is a cleaned version of dbSNP build 129, in format: rs number1;rs number2;...
242 245
243 246
244 The website for dbNSFP database is https://sites.google.com/site/jpopgen/dbNSFP and there is only annotation for human hg18 and hg19 genome builds. 247 The website for dbNSFP database is https://sites.google.com/site/jpopgen/dbNSFP and there is only annotation for human genome builds.
248
249 The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp is in the SnpSift documentation:
250 *( It also provides links for dbNSFP databases prebuilt for SnpSift )*
251 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP
245 252
246 However, any dbNSFP-like tabular file that be can used with SnpSift dbnsfp if it has:: 253 However, any dbNSFP-like tabular file that be can used with SnpSift dbnsfp if it has::
247 254
248 - The first line of the file must be column headers that name the annotations. 255 - The first line of the file must be column headers that name the annotations.
249 - The first 4 columns are required and must be:: 256 - The first 4 columns are required and must be::
263 4 100239319 T A H L ADH1B 0 270 4 100239319 T A H L ADH1B 0
264 4 100239319 T C H R ADH1B 0.15 271 4 100239319 T C H R ADH1B 0.15
265 4 100239319 T G H P ADH1B 0 272 4 100239319 T G H P ADH1B 0
266 273
267 274
268 The uploaded tabular file should be set to datatype: "dbnsfp.tabular" 275 The galaxy datatypes for dbNSFP can automatically convert the specially formatted tabular file for use by SnpSift dbNSFP:
269 Using "Convert Format" the "dbnsfp.tabular" can be converted to the correct format for SnpSift dbnsfp. 276 1. Upload the tabular file, set the datatype as: **"dbnsfp.tabular"**
270 277 2. Edit the history dataset attributes (pencil icon): Use "Convert Format" to convert the **"dbnsfp.tabular"** to the correct format for SnpSift dbnsfp: **"snpsiftdbnsfp"**.
271 The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp is in the SnpSift documentation.
272 278
273 279
274 @EXTERNAL_DOCUMENTATION@ 280 @EXTERNAL_DOCUMENTATION@
275 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP 281 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP
276 282
277 @CITATION_SECTION@ 283 @CITATION_SECTION@
278 284
279 285 ]]>
280 </help> 286 </help>
287 <expand macro="citations">
288 <citation type="doi">DOI: 10.1002/humu.21517</citation>
289 <citation type="doi">DOI: 10.1002/humu.22376</citation>
290 <citation type="doi">DOI: 10.1002/humu.22932</citation>
291 <citation type="doi">doi: 10.1093/hmg/ddu733</citation>
292 <citation type="doi">doi: 10.1093/nar/gku1206</citation>
293 <citation type="doi">doi: 10.3389/fgene.2012.00035</citation>
294 </expand>
281 </tool> 295 </tool>