comparison snpSift_dbnsfp.xml @ 1:1f4ee04c0841 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift_dbnsfp_generic/ commit d12355cea76843e3ed6f09d96c3e9fe22afe4a4f
author iuc
date Mon, 05 Dec 2016 12:06:09 -0500
parents 0624d484adba
children
comparison
equal deleted inserted replaced
0:0624d484adba 1:1f4ee04c0841
1 <tool id="snpSift_dbnsfp_generic" name="SnpSift dbNSFP" version="4.0.0"> 1 <tool id="snpSift_dbnsfp_generic" name="SnpSift dbNSFP" version="@WRAPPER_VERSION@.1">
2 <description>Add Annotations from dbNSFP and similar annotation DBs</description> 2 <description>Add Annotations from dbNSFP and similar annotation DBs</description>
3 <expand macro="requirements" />
4 <macros> 3 <macros>
5 <import>snpSift_macros.xml</import> 4 <import>snpSift_macros.xml</import>
6 </macros> 5 </macros>
7 <command> 6 <expand macro="requirements" />
8 java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar dbnsfp -v 7 <expand macro="stdio" />
9 #if $db.dbsrc == 'cached' : 8 <expand macro="version_command" />
9 <command><![CDATA[
10 @CONDA_SNPSIFT_JAR_PATH@ &&
11 java -Xmx6G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" dbnsfp -v
12 #if $db.dbsrc == 'cached':
10 -db $db.dbnsfp 13 -db $db.dbnsfp
11 #if $db.annotations and $db.annotations.__str__ != '': 14 #if $db.annotations and str($db.annotations) != '':
12 -f "$db.annotations" 15 -f "$db.annotations"
13 #end if 16 #end if
14 #else : 17 #else:
15 -db "${db.dbnsfpdb.extra_files_path}/${db.dbnsfpdb.metadata.bgzip}" 18 -db "${db.dbnsfpdb.extra_files_path}/${db.dbnsfpdb.metadata.bgzip}"
16 #if $db.annotations and $db.annotations.__str__ != '': 19 #if $db.annotations and str($db.annotations) != '':
17 -f "$db.annotations" 20 -f "$db.annotations"
18 #end if 21 #end if
19 #end if 22 #end if
20 $input > $output 23 "$input" > "$output"
21 2> tmp.err &amp;&amp; grep -v file tmp.err 24 2> tmp.err && grep -v file tmp.err
25 ]]>
22 </command> 26 </command>
23 <inputs> 27 <inputs>
24 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> 28 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/>
25 <conditional name="db"> 29 <conditional name="db">
26 <param name="dbsrc" type="select" label="dbNSFP "> 30 <param name="dbsrc" type="select" label="dbNSFP ">
51 </options> 55 </options>
52 </param> 56 </param>
53 </when> 57 </when>
54 </conditional> 58 </conditional>
55 </inputs> 59 </inputs>
56 <expand macro="stdio" />
57 <outputs> 60 <outputs>
58 <data format="vcf" name="output" /> 61 <data format="vcf" name="output" />
59 </outputs> 62 </outputs>
60 <tests> 63 <tests>
64 <!-- This cannot be tested at the moment because test_dbnsfpdb.tabular
65 is converted from dbnsfp.tabular to snpsiftdbnsfp format on-the-fly
66 when this tool is run and annotation metadata is not available
67 until after the conversion is completed.
61 <test> 68 <test>
62 <param name="input" ftype="vcf" value="test_annotate_in.vcf.vcf"/> 69 <param name="input" ftype="vcf" value="test_annotate_in.vcf"/>
63 <param name="dbsrc" value="history"/> 70 <param name="dbsrc" value="history"/>
64 <param name="dbnsfpdb" value="test_dbnsfpdb.tabular" ftype="dbnsfp.tabular" /> 71 <param name="dbnsfpdb" value="test_dbnsfpdb.tabular" ftype="dbnsfp.tabular" />
65 <annotations value="aaref,aaalt,genename,aapos,SIFT_score"/> 72 <param name="annotations" value="aaref,aaalt,genename,aapos,SIFT_score"/>
66 <output name="output"> 73 <output name="output">
67 <assert_contents> 74 <assert_contents>
68 <has_text text="dbNSFP_SIFT_score=0.15" /> 75 <has_text text="dbNSFP_SIFT_score=0.15" />
69 </assert_contents> 76 </assert_contents>
70 </output> 77 </output>
71 </test> 78 </test> -->
72 </tests> 79 </tests>
73 <help> 80 <help><![CDATA[
74 81
75 The dbNSFP is an integrated database of functional predictions from multiple algorithms (SIFT, Polyphen2, LRT and MutationTaster, PhyloP and GERP++, etc.). 82 The dbNSFP is an integrated database of functional predictions from multiple algorithms (SIFT, Polyphen2, LRT and MutationTaster, PhyloP and GERP++, etc.).
76 It contains variant annotations such as: 83 It contains variant annotations such as:
77 84
78 85
123 ESP6500_AA_AF 130 ESP6500_AA_AF
124 Alternative allele frequency in the African American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set) 131 Alternative allele frequency in the African American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)
125 ESP6500_EA_AF 132 ESP6500_EA_AF
126 Alternative allele frequency in the European American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set) 133 Alternative allele frequency in the European American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)
127 FATHMM_pred 134 FATHMM_pred
128 If a FATHMM_score is &lt;=-1.5 (or rankscore &lt;=0.81415) the corresponding non-synonymous SNP is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";" 135 If a FATHMM_score is <=-1.5 (or rankscore <=0.81415) the corresponding non-synonymous SNP is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";"
129 FATHMM_rankscore 136 FATHMM_rankscore
130 FATHMMori scores were ranked among all FATHMMori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of FATHMMori scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0 to 1 137 FATHMMori scores were ranked among all FATHMMori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of FATHMMori scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0 to 1
131 FATHMM_score 138 FATHMM_score
132 FATHMM default score (FATHMMori) 139 FATHMM default score (FATHMMori)
133 fold-degenerate 140 fold-degenerate
151 LR_score 158 LR_score
152 Our logistic regression (LR) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from 0 to 1 159 Our logistic regression (LR) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from 0 to 1
153 LRT_Omega 160 LRT_Omega
154 Estimated nonsynonymous-to-synonymous-rate ratio (Omega, reported by LRT) 161 Estimated nonsynonymous-to-synonymous-rate ratio (Omega, reported by LRT)
155 LRT_converted_rankscore 162 LRT_converted_rankscore
156 LRTori scores were first converted as LRTnew=1-LRTori*0.5 if Omega&lt;1, or LRTnew=LRTori*0.5 if Omega&gt;=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682 163 LRTori scores were first converted as LRTnew=1-LRTori*0.5 if Omega<1, or LRTnew=LRTori*0.5 if Omega>=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682
157 LRT_pred 164 LRT_pred
158 LRT prediction, D(eleterious), N(eutral) or U(nknown), which is not solely determined by the score 165 LRT prediction, D(eleterious), N(eutral) or U(nknown), which is not solely determined by the score
159 LRT_score 166 LRT_score
160 The original LRT two-sided p-value (LRTori), ranges from 0 to 1 167 The original LRT two-sided p-value (LRTori), ranges from 0 to 1
161 MutationAssessor_pred 168 MutationAssessor_pred
221 Reliability_index 228 Reliability_index
222 Number of observed component scores (except the maximum frequency in the 1000 genomes populations) for RadialSVM and LR. Ranges from 1 to 10. As RadialSVM and LR scores are calculated based on imputed data, the less missing component scores, the higher the reliability of the scores and predictions 229 Number of observed component scores (except the maximum frequency in the 1000 genomes populations) for RadialSVM and LR. Ranges from 1 to 10. As RadialSVM and LR scores are calculated based on imputed data, the less missing component scores, the higher the reliability of the scores and predictions
223 SIFT_converted_rankscore 230 SIFT_converted_rankscore
224 SIFTori scores were first converted to SIFTnew=1-SIFTori, then ranked among all SIFTnew scores in dbNSFP. The rankscore is the ratio of the rank the SIFTnew score over the total number of SIFTnew scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The rankscores range from 0.02654 to 0.87932 231 SIFTori scores were first converted to SIFTnew=1-SIFTori, then ranked among all SIFTnew scores in dbNSFP. The rankscore is the ratio of the rank the SIFTnew score over the total number of SIFTnew scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The rankscores range from 0.02654 to 0.87932
225 SIFT_pred 232 SIFT_pred
226 If SIFTori is smaller than 0.05 (rankscore&gt;0.55) the corresponding non-synonymous SNP is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple predictions separated by ";" 233 If SIFTori is smaller than 0.05 (rankscore>0.55) the corresponding non-synonymous SNP is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple predictions separated by ";"
227 SIFT_score 234 SIFT_score
228 SIFT score (SIFTori). Scores range from 0 to 1. The smaller the score the more likely the SNP has damaging effect. Multiple scores separated by ";" 235 SIFT score (SIFTori). Scores range from 0 to 1. The smaller the score the more likely the SNP has damaging effect. Multiple scores separated by ";"
229 SiPhy_29way_logOdds 236 SiPhy_29way_logOdds
230 SiPhy score based on 29 mammals genomes. The larger the score, the more conserved the site 237 SiPhy score based on 29 mammals genomes. The larger the score, the more conserved the site
231 SiPhy_29way_pi 238 SiPhy_29way_pi
240 Uniprot ID number. Multiple entries separated by ";" 247 Uniprot ID number. Multiple entries separated by ";"
241 UniSNP_ids 248 UniSNP_ids
242 rs numbers from UniSNP, which is a cleaned version of dbSNP build 129, in format: rs number1;rs number2;... 249 rs numbers from UniSNP, which is a cleaned version of dbSNP build 129, in format: rs number1;rs number2;...
243 250
244 251
245 252 The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp and a couple of prebuilt dbNSFP databases are available at:
246 The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp is in the SnpSift documentation:
247 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP 253 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP
248
249 A couple dbNSFP databases are prebuilt for SnpSift at:
250 http://sourceforge.net/projects/snpeff/files/databases/dbNSFP/
251
252
253 254
254 255
255 **Uploading Your Own Annotations for any Genome** 256 **Uploading Your Own Annotations for any Genome**
256 257
257 The website for dbNSFP databases releases is: 258 The website for dbNSFP databases releases is:
287 The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp is in the SnpSift documentation. 288 The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp is in the SnpSift documentation.
288 289
289 290
290 @EXTERNAL_DOCUMENTATION@ 291 @EXTERNAL_DOCUMENTATION@
291 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP 292 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP
292 293 ]]>
293 @CITATION_SECTION@
294
295
296 </help> 294 </help>
295 <expand macro="citations">
296 <citation type="doi">DOI: 10.1002/humu.21517</citation>
297 <citation type="doi">DOI: 10.1002/humu.22376</citation>
298 <citation type="doi">DOI: 10.1002/humu.22932</citation>
299 <citation type="doi">doi: 10.1093/hmg/ddu733</citation>
300 <citation type="doi">doi: 10.1093/nar/gku1206</citation>
301 <citation type="doi">doi: 10.3389/fgene.2012.00035</citation>
302 </expand>
297 </tool> 303 </tool>