snpsift_dbnsfp: snpSift_dbnsfp.xml annotate

author	iuc
date	Thu, 22 Jan 2015 08:53:21 -0500
parents
children	13191d4914f7

rev	line source
0 dc480609d9c1 Uploaded iuc parents: diff changeset	1 <tool id="snpSift_dbnsfp" name="SnpSift dbNSFP" version="4.0.0">
dc480609d9c1 Uploaded iuc parents: diff changeset	2 <description>Add Annotations from dbNSFP</description>
dc480609d9c1 Uploaded iuc parents: diff changeset	3 <expand macro="requirements" />
dc480609d9c1 Uploaded iuc parents: diff changeset	4 <macros>
dc480609d9c1 Uploaded iuc parents: diff changeset	5 <import>snpSift_macros.xml</import>
dc480609d9c1 Uploaded iuc parents: diff changeset	6 </macros>
dc480609d9c1 Uploaded iuc parents: diff changeset	7 <command>
dc480609d9c1 Uploaded iuc parents: diff changeset	8 java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar dbnsfp -v
dc480609d9c1 Uploaded iuc parents: diff changeset	9 #if $db.dbsrc == 'cached' :
dc480609d9c1 Uploaded iuc parents: diff changeset	10 -db $db.dbnsfp
dc480609d9c1 Uploaded iuc parents: diff changeset	11 #if $db.annotations and $db.annotations.__str__ != '':
dc480609d9c1 Uploaded iuc parents: diff changeset	12 -f "$db.annotations"
dc480609d9c1 Uploaded iuc parents: diff changeset	13 #end if
dc480609d9c1 Uploaded iuc parents: diff changeset	14 #else :
dc480609d9c1 Uploaded iuc parents: diff changeset	15 -db "${db.dbnsfpdb.extra_files_path}/${db.dbnsfpdb.metadata.bgzip}"
dc480609d9c1 Uploaded iuc parents: diff changeset	16 #if $db.annotations and $db.annotations.__str__ != '':
dc480609d9c1 Uploaded iuc parents: diff changeset	17 -f "$db.annotations"
dc480609d9c1 Uploaded iuc parents: diff changeset	18 #end if
dc480609d9c1 Uploaded iuc parents: diff changeset	19 #end if
dc480609d9c1 Uploaded iuc parents: diff changeset	20 $input > $output
dc480609d9c1 Uploaded iuc parents: diff changeset	21 2> tmp.err && grep -v file tmp.err
dc480609d9c1 Uploaded iuc parents: diff changeset	22 </command>
dc480609d9c1 Uploaded iuc parents: diff changeset	23 <inputs>
dc480609d9c1 Uploaded iuc parents: diff changeset	24 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/>
dc480609d9c1 Uploaded iuc parents: diff changeset	25 <conditional name="db">
dc480609d9c1 Uploaded iuc parents: diff changeset	26 <param name="dbsrc" type="select" label="dbNSFP ">
dc480609d9c1 Uploaded iuc parents: diff changeset	27 <option value="cached">Locally installed dbNSFP database </option>
dc480609d9c1 Uploaded iuc parents: diff changeset	28 <option value="history">dbNSFP database from your history</option>
dc480609d9c1 Uploaded iuc parents: diff changeset	29 </param>
dc480609d9c1 Uploaded iuc parents: diff changeset	30 <when value="cached">
dc480609d9c1 Uploaded iuc parents: diff changeset	31 <param name="dbnsfp" type="select" label="Genome">
dc480609d9c1 Uploaded iuc parents: diff changeset	32 <options from_data_table="snpsift_dbnsfp">
dc480609d9c1 Uploaded iuc parents: diff changeset	33 <column name="name" index="1"/>
dc480609d9c1 Uploaded iuc parents: diff changeset	34 <column name="value" index="2"/>
dc480609d9c1 Uploaded iuc parents: diff changeset	35 </options>
dc480609d9c1 Uploaded iuc parents: diff changeset	36 </param>
dc480609d9c1 Uploaded iuc parents: diff changeset	37 <param name="annotations" type="select" multiple="true" display="checkboxes" label="Annotate with">
dc480609d9c1 Uploaded iuc parents: diff changeset	38 <options from_data_table="snpsift_dbnsfp">
dc480609d9c1 Uploaded iuc parents: diff changeset	39 <column name="name" index="3"/>
dc480609d9c1 Uploaded iuc parents: diff changeset	40 <column name="value" index="3"/>
dc480609d9c1 Uploaded iuc parents: diff changeset	41 <filter type="param_value" ref="dbnsfp" column="2" />
dc480609d9c1 Uploaded iuc parents: diff changeset	42 <filter type="multiple_splitter" column="3" separator=","/>
dc480609d9c1 Uploaded iuc parents: diff changeset	43 </options>
dc480609d9c1 Uploaded iuc parents: diff changeset	44 </param>
dc480609d9c1 Uploaded iuc parents: diff changeset	45 </when>
dc480609d9c1 Uploaded iuc parents: diff changeset	46 <when value="history">
dc480609d9c1 Uploaded iuc parents: diff changeset	47 <param name="dbnsfpdb" type="data" format="snpsiftdbnsfp" label="DbNSFP"/>
dc480609d9c1 Uploaded iuc parents: diff changeset	48 <param name="annotations" type="select" multiple="true" display="checkboxes" label="Annotate with">
dc480609d9c1 Uploaded iuc parents: diff changeset	49 <options>
dc480609d9c1 Uploaded iuc parents: diff changeset	50 <filter type="data_meta" ref="dbnsfpdb" key="annotation" />
dc480609d9c1 Uploaded iuc parents: diff changeset	51 </options>
dc480609d9c1 Uploaded iuc parents: diff changeset	52 </param>
dc480609d9c1 Uploaded iuc parents: diff changeset	53 </when>
dc480609d9c1 Uploaded iuc parents: diff changeset	54 </conditional>
dc480609d9c1 Uploaded iuc parents: diff changeset	55 </inputs>
dc480609d9c1 Uploaded iuc parents: diff changeset	56 <expand macro="stdio" />
dc480609d9c1 Uploaded iuc parents: diff changeset	57 <outputs>
dc480609d9c1 Uploaded iuc parents: diff changeset	58 <data format="vcf" name="output" />
dc480609d9c1 Uploaded iuc parents: diff changeset	59 </outputs>
dc480609d9c1 Uploaded iuc parents: diff changeset	60 <tests>
dc480609d9c1 Uploaded iuc parents: diff changeset	61 <test>
dc480609d9c1 Uploaded iuc parents: diff changeset	62 <param name="input" ftype="vcf" value="test_annotate_in.vcf.vcf"/>
dc480609d9c1 Uploaded iuc parents: diff changeset	63 <param name="dbsrc" value="history"/>
dc480609d9c1 Uploaded iuc parents: diff changeset	64 <param name="dbnsfpdb" value="test_dbnsfpdb.tabular" ftype="dbnsfp.tabular" />
dc480609d9c1 Uploaded iuc parents: diff changeset	65 <annotations value="aaref,aaalt,genename,aapos,SIFT_score"/>
dc480609d9c1 Uploaded iuc parents: diff changeset	66 <output name="output">
dc480609d9c1 Uploaded iuc parents: diff changeset	67 <assert_contents>
dc480609d9c1 Uploaded iuc parents: diff changeset	68 <has_text text="dbNSFP_SIFT_score=0.15" />
dc480609d9c1 Uploaded iuc parents: diff changeset	69 </assert_contents>
dc480609d9c1 Uploaded iuc parents: diff changeset	70 </output>
dc480609d9c1 Uploaded iuc parents: diff changeset	71 </test>
dc480609d9c1 Uploaded iuc parents: diff changeset	72 </tests>
dc480609d9c1 Uploaded iuc parents: diff changeset	73 <help>
dc480609d9c1 Uploaded iuc parents: diff changeset	74
dc480609d9c1 Uploaded iuc parents: diff changeset	75 The dbNSFP is an integrated database of functional predictions from multiple algorithms (SIFT, Polyphen2, LRT and MutationTaster, PhyloP and GERP++, etc.).
dc480609d9c1 Uploaded iuc parents: diff changeset	76
dc480609d9c1 Uploaded iuc parents: diff changeset	77
dc480609d9c1 Uploaded iuc parents: diff changeset	78 1000Gp1_AC
dc480609d9c1 Uploaded iuc parents: diff changeset	79 Alternative allele counts in the whole 1000 genomes phase 1 (1000Gp1) data
dc480609d9c1 Uploaded iuc parents: diff changeset	80 1000Gp1_AF
dc480609d9c1 Uploaded iuc parents: diff changeset	81 Alternative allele frequency in the whole 1000Gp1 data
dc480609d9c1 Uploaded iuc parents: diff changeset	82 1000Gp1_AFR_AC
dc480609d9c1 Uploaded iuc parents: diff changeset	83 Alternative allele counts in the 1000Gp1 African descendent samples
dc480609d9c1 Uploaded iuc parents: diff changeset	84 1000Gp1_AFR_AF
dc480609d9c1 Uploaded iuc parents: diff changeset	85 Alternative allele frequency in the 1000Gp1 African descendent samples
dc480609d9c1 Uploaded iuc parents: diff changeset	86 1000Gp1_AMR_AC
dc480609d9c1 Uploaded iuc parents: diff changeset	87 Alternative allele counts in the 1000Gp1 American descendent samples
dc480609d9c1 Uploaded iuc parents: diff changeset	88 1000Gp1_AMR_AF
dc480609d9c1 Uploaded iuc parents: diff changeset	89 Alternative allele frequency in the 1000Gp1 American descendent samples
dc480609d9c1 Uploaded iuc parents: diff changeset	90 1000Gp1_ASN_AC
dc480609d9c1 Uploaded iuc parents: diff changeset	91 Alternative allele counts in the 1000Gp1 Asian descendent samples
dc480609d9c1 Uploaded iuc parents: diff changeset	92 1000Gp1_ASN_AF
dc480609d9c1 Uploaded iuc parents: diff changeset	93 Alternative allele frequency in the 1000Gp1 Asian descendent samples
dc480609d9c1 Uploaded iuc parents: diff changeset	94 1000Gp1_EUR_AC
dc480609d9c1 Uploaded iuc parents: diff changeset	95 Alternative allele counts in the 1000Gp1 European descendent samples
dc480609d9c1 Uploaded iuc parents: diff changeset	96 1000Gp1_EUR_AF
dc480609d9c1 Uploaded iuc parents: diff changeset	97 Alternative allele frequency in the 1000Gp1 European descendent samples
dc480609d9c1 Uploaded iuc parents: diff changeset	98 aaalt
dc480609d9c1 Uploaded iuc parents: diff changeset	99 Alternative amino acid. "." if the variant is a splicing site SNP (2bp on each end of an intron)
dc480609d9c1 Uploaded iuc parents: diff changeset	100 aapos
dc480609d9c1 Uploaded iuc parents: diff changeset	101 Amino acid position as to the protein. "-1" if the variant is a splicing site SNP (2bp on each end of an intron)
dc480609d9c1 Uploaded iuc parents: diff changeset	102 aapos_SIFT
dc480609d9c1 Uploaded iuc parents: diff changeset	103 ENSP id and amino acid positions corresponding to SIFT scores. Multiple entries separated by ";"
dc480609d9c1 Uploaded iuc parents: diff changeset	104 aapos_FATHMM
dc480609d9c1 Uploaded iuc parents: diff changeset	105 ENSP id and amino acid positions corresponding to FATHMM scores. Multiple entries separated by ";"
dc480609d9c1 Uploaded iuc parents: diff changeset	106 aaref
dc480609d9c1 Uploaded iuc parents: diff changeset	107 Reference amino acid. "." if the variant is a splicing site SNP (2bp on each end of an intron)
dc480609d9c1 Uploaded iuc parents: diff changeset	108 alt
dc480609d9c1 Uploaded iuc parents: diff changeset	109 Alternative nucleotide allele (as on the + strand)
dc480609d9c1 Uploaded iuc parents: diff changeset	110 Ancestral_allele
dc480609d9c1 Uploaded iuc parents: diff changeset	111 Ancestral allele (based on 1000 genomes reference data)
dc480609d9c1 Uploaded iuc parents: diff changeset	112 cds_strand
dc480609d9c1 Uploaded iuc parents: diff changeset	113 Coding sequence (CDS) strand (+ or -)
dc480609d9c1 Uploaded iuc parents: diff changeset	114 chr
dc480609d9c1 Uploaded iuc parents: diff changeset	115 Chromosome number
dc480609d9c1 Uploaded iuc parents: diff changeset	116 codonpos
dc480609d9c1 Uploaded iuc parents: diff changeset	117 Position on the codon (1, 2 or 3)
dc480609d9c1 Uploaded iuc parents: diff changeset	118 Ensembl_geneid
dc480609d9c1 Uploaded iuc parents: diff changeset	119 Ensembl gene ID
dc480609d9c1 Uploaded iuc parents: diff changeset	120 Ensembl_transcriptid
dc480609d9c1 Uploaded iuc parents: diff changeset	121 Ensembl transcript IDs (separated by ";")
dc480609d9c1 Uploaded iuc parents: diff changeset	122 ESP6500_AA_AF
dc480609d9c1 Uploaded iuc parents: diff changeset	123 Alternative allele frequency in the African American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)
dc480609d9c1 Uploaded iuc parents: diff changeset	124 ESP6500_EA_AF
dc480609d9c1 Uploaded iuc parents: diff changeset	125 Alternative allele frequency in the European American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)
dc480609d9c1 Uploaded iuc parents: diff changeset	126 FATHMM_pred
dc480609d9c1 Uploaded iuc parents: diff changeset	127 If a FATHMM_score is <=-1.5 (or rankscore <=0.81415) the corresponding non-synonymous SNP is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";"
dc480609d9c1 Uploaded iuc parents: diff changeset	128 FATHMM_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	129 FATHMMori scores were ranked among all FATHMMori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of FATHMMori scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0 to 1
dc480609d9c1 Uploaded iuc parents: diff changeset	130 FATHMM_score
dc480609d9c1 Uploaded iuc parents: diff changeset	131 FATHMM default score (FATHMMori)
dc480609d9c1 Uploaded iuc parents: diff changeset	132 fold-degenerate
dc480609d9c1 Uploaded iuc parents: diff changeset	133 Degenerate type (0, 2 or 3)
dc480609d9c1 Uploaded iuc parents: diff changeset	134 genename
dc480609d9c1 Uploaded iuc parents: diff changeset	135 Gene name; if the non-synonymous SNP can be assigned to multiple genes, gene names are separated by ";"
dc480609d9c1 Uploaded iuc parents: diff changeset	136 GERP++_NR
dc480609d9c1 Uploaded iuc parents: diff changeset	137 GERP++ neutral rate
dc480609d9c1 Uploaded iuc parents: diff changeset	138 GERP++_RS
dc480609d9c1 Uploaded iuc parents: diff changeset	139 GERP++ RS score, the larger the score, the more conserved the site
dc480609d9c1 Uploaded iuc parents: diff changeset	140 GERP++_RS_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	141 GERP++ RS scores were ranked among all GERP++ RS scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of GERP++ RS scores in dbNSFP
dc480609d9c1 Uploaded iuc parents: diff changeset	142 hg18_pos(1-coor)
dc480609d9c1 Uploaded iuc parents: diff changeset	143 Physical position on the chromosome as to hg18 (1-based coordinate)
dc480609d9c1 Uploaded iuc parents: diff changeset	144 Interpro_domain
dc480609d9c1 Uploaded iuc parents: diff changeset	145 Domain or conserved site on which the variant locates
dc480609d9c1 Uploaded iuc parents: diff changeset	146 LR_pred
dc480609d9c1 Uploaded iuc parents: diff changeset	147 Prediction of our LR based ensemble prediction score, "T(olerated)" or "D(amaging)". The score cutoff between "D" and "T" is 0.5. The rankscore cutoff between "D" and "T" is 0.82268
dc480609d9c1 Uploaded iuc parents: diff changeset	148 LR_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	149 LR scores were ranked among all LR scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of LR scores in dbNSFP. The scores range from 0 to 1
dc480609d9c1 Uploaded iuc parents: diff changeset	150 LR_score
dc480609d9c1 Uploaded iuc parents: diff changeset	151 Our logistic regression (LR) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from 0 to 1
dc480609d9c1 Uploaded iuc parents: diff changeset	152 LRT_Omega
dc480609d9c1 Uploaded iuc parents: diff changeset	153 Estimated nonsynonymous-to-synonymous-rate ratio (Omega, reported by LRT)
dc480609d9c1 Uploaded iuc parents: diff changeset	154 LRT_converted_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	155 LRTori scores were first converted as LRTnew=1-LRTori0.5 if Omega<1, or LRTnew=LRTori0.5 if Omega>=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682
dc480609d9c1 Uploaded iuc parents: diff changeset	156 LRT_pred
dc480609d9c1 Uploaded iuc parents: diff changeset	157 LRT prediction, D(eleterious), N(eutral) or U(nknown), which is not solely determined by the score
dc480609d9c1 Uploaded iuc parents: diff changeset	158 LRT_score
dc480609d9c1 Uploaded iuc parents: diff changeset	159 The original LRT two-sided p-value (LRTori), ranges from 0 to 1
dc480609d9c1 Uploaded iuc parents: diff changeset	160 MutationAssessor_pred
dc480609d9c1 Uploaded iuc parents: diff changeset	161 MutationAssessor's functional impact of a variant
dc480609d9c1 Uploaded iuc parents: diff changeset	162 MutationAssessor_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	163 MAori scores were ranked among all MAori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MAori scores in dbNSFP. The scores range from 0 to 1
dc480609d9c1 Uploaded iuc parents: diff changeset	164 MutationAssessor_score
dc480609d9c1 Uploaded iuc parents: diff changeset	165 MutationAssessor functional impact combined score (MAori)
dc480609d9c1 Uploaded iuc parents: diff changeset	166 MutationTaster_converted_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	167 The MTori scores were first converted: if the prediction is "A" or "D" MTnew=MTori; if the prediction is "N" or "P", MTnew=1-MTori. Then MTnew scores were ranked among all MTnew scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MTnew scores in dbNSFP. The scores range from 0.0931 to 0.80722
dc480609d9c1 Uploaded iuc parents: diff changeset	168 MutationTaster_pred
dc480609d9c1 Uploaded iuc parents: diff changeset	169 MutationTaster prediction
dc480609d9c1 Uploaded iuc parents: diff changeset	170 MutationTaster_score
dc480609d9c1 Uploaded iuc parents: diff changeset	171 MutationTaster p-value (MTori), ranges from 0 to 1
dc480609d9c1 Uploaded iuc parents: diff changeset	172 phastCons46way_placental
dc480609d9c1 Uploaded iuc parents: diff changeset	173 phastCons conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site
dc480609d9c1 Uploaded iuc parents: diff changeset	174 phastCons46way_placental_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	175 phastCons46way_placental scores were ranked among all phastCons46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_placental scores in dbNSFP
dc480609d9c1 Uploaded iuc parents: diff changeset	176 phastCons46way_primate
dc480609d9c1 Uploaded iuc parents: diff changeset	177 phastCons conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site
dc480609d9c1 Uploaded iuc parents: diff changeset	178 phastCons46way_primate_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	179 phastCons46way_primate scores were ranked among all phastCons46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_primate scores in dbNSFP
dc480609d9c1 Uploaded iuc parents: diff changeset	180 phastCons100way_vertebrate
dc480609d9c1 Uploaded iuc parents: diff changeset	181 phastCons conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site
dc480609d9c1 Uploaded iuc parents: diff changeset	182 phastCons100way_vertebrate_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	183 phastCons100way_vertebrate scores were ranked among all phastCons100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons100way_vertebrate scores in dbNSFP
dc480609d9c1 Uploaded iuc parents: diff changeset	184 phyloP46way_placental
dc480609d9c1 Uploaded iuc parents: diff changeset	185 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site
dc480609d9c1 Uploaded iuc parents: diff changeset	186 phyloP46way_placental_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	187 phyloP46way_placental scores were ranked among all phyloP46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_placental scores in dbNSFP
dc480609d9c1 Uploaded iuc parents: diff changeset	188 phyloP46way_primate
dc480609d9c1 Uploaded iuc parents: diff changeset	189 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site
dc480609d9c1 Uploaded iuc parents: diff changeset	190 phyloP46way_primate_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	191 phyloP46way_primate scores were ranked among all phyloP46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_primate scores in dbNSFP
dc480609d9c1 Uploaded iuc parents: diff changeset	192 phyloP100way_vertebrate
dc480609d9c1 Uploaded iuc parents: diff changeset	193 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site
dc480609d9c1 Uploaded iuc parents: diff changeset	194 phyloP100way_vertebrate_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	195 phyloP100way_vertebrate scores were ranked among all phyloP100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP100way_vertebrate scores in dbNSFP
dc480609d9c1 Uploaded iuc parents: diff changeset	196 Polyphen2_HDIV_pred
dc480609d9c1 Uploaded iuc parents: diff changeset	197 Polyphen2 prediction based on HumDiv
dc480609d9c1 Uploaded iuc parents: diff changeset	198 Polyphen2_HDIV_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	199 Polyphen2 HDIV scores were first ranked among all HDIV scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.02656 to 0.89917
dc480609d9c1 Uploaded iuc parents: diff changeset	200 Polyphen2_HDIV_score
dc480609d9c1 Uploaded iuc parents: diff changeset	201 Polyphen2 score based on HumDiv, i.e. hdiv_prob. The score ranges from 0 to 1. Multiple entries separated by ";"
dc480609d9c1 Uploaded iuc parents: diff changeset	202 Polyphen2_HVAR_pred
dc480609d9c1 Uploaded iuc parents: diff changeset	203 Polyphen2 prediction based on HumVar
dc480609d9c1 Uploaded iuc parents: diff changeset	204 Polyphen2_HVAR_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	205 Polyphen2 HVAR scores were first ranked among all HVAR scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.01281 to 0.9711
dc480609d9c1 Uploaded iuc parents: diff changeset	206 Polyphen2_HVAR_score
dc480609d9c1 Uploaded iuc parents: diff changeset	207 Polyphen2 score based on HumVar, i.e. hvar_prob. The score ranges from 0 to 1. Multiple entries separated by ";"
dc480609d9c1 Uploaded iuc parents: diff changeset	208 pos(1-coor)
dc480609d9c1 Uploaded iuc parents: diff changeset	209 Physical position on the chromosome as to hg19 (1-based coordinate)
dc480609d9c1 Uploaded iuc parents: diff changeset	210 RadialSVM_pred
dc480609d9c1 Uploaded iuc parents: diff changeset	211 Prediction of our SVM based ensemble prediction score, "T(olerated)" or "D(amaging)". The score cutoff between "D" and "T" is 0. The rankscore cutoff between "D" and "T" is 0.83357
dc480609d9c1 Uploaded iuc parents: diff changeset	212 RadialSVM_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	213 RadialSVM scores were ranked among all RadialSVM scores in dbNSFP. The rankscore is the ratio of the rank of the screo over the total number of RadialSVM scores in dbNSFP. The scores range from 0 to 1
dc480609d9c1 Uploaded iuc parents: diff changeset	214 RadialSVM_score
dc480609d9c1 Uploaded iuc parents: diff changeset	215 Our support vector machine (SVM) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from -2 to 3 in dbNSFP
dc480609d9c1 Uploaded iuc parents: diff changeset	216 ref
dc480609d9c1 Uploaded iuc parents: diff changeset	217 Reference nucleotide allele (as on the + strand)
dc480609d9c1 Uploaded iuc parents: diff changeset	218 refcodon
dc480609d9c1 Uploaded iuc parents: diff changeset	219 Reference codon
dc480609d9c1 Uploaded iuc parents: diff changeset	220 Reliability_index
dc480609d9c1 Uploaded iuc parents: diff changeset	221 Number of observed component scores (except the maximum frequency in the 1000 genomes populations) for RadialSVM and LR. Ranges from 1 to 10. As RadialSVM and LR scores are calculated based on imputed data, the less missing component scores, the higher the reliability of the scores and predictions
dc480609d9c1 Uploaded iuc parents: diff changeset	222 SIFT_converted_rankscore
dc480609d9c1 Uploaded iuc parents: diff changeset	223 SIFTori scores were first converted to SIFTnew=1-SIFTori, then ranked among all SIFTnew scores in dbNSFP. The rankscore is the ratio of the rank the SIFTnew score over the total number of SIFTnew scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The rankscores range from 0.02654 to 0.87932
dc480609d9c1 Uploaded iuc parents: diff changeset	224 SIFT_pred
dc480609d9c1 Uploaded iuc parents: diff changeset	225 If SIFTori is smaller than 0.05 (rankscore>0.55) the corresponding non-synonymous SNP is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple predictions separated by ";"
dc480609d9c1 Uploaded iuc parents: diff changeset	226 SIFT_score
dc480609d9c1 Uploaded iuc parents: diff changeset	227 SIFT score (SIFTori). Scores range from 0 to 1. The smaller the score the more likely the SNP has damaging effect. Multiple scores separated by ";"
dc480609d9c1 Uploaded iuc parents: diff changeset	228 SiPhy_29way_logOdds
dc480609d9c1 Uploaded iuc parents: diff changeset	229 SiPhy score based on 29 mammals genomes. The larger the score, the more conserved the site
dc480609d9c1 Uploaded iuc parents: diff changeset	230 SiPhy_29way_pi
dc480609d9c1 Uploaded iuc parents: diff changeset	231 The estimated stationary distribution of A, C, G and T at the site, using SiPhy algorithm based on 29 mammals genomes
dc480609d9c1 Uploaded iuc parents: diff changeset	232 SLR_test_statistic
dc480609d9c1 Uploaded iuc parents: diff changeset	233 SLR test statistic for testing natural selection on codons. A negative value indicates negative selection, and a positive value indicates positive selection. Larger magnitude of the value suggests stronger evidence
dc480609d9c1 Uploaded iuc parents: diff changeset	234 Uniprot_aapos
dc480609d9c1 Uploaded iuc parents: diff changeset	235 Amino acid position as to Uniprot. Multiple entries separated by ";"
dc480609d9c1 Uploaded iuc parents: diff changeset	236 Uniprot_acc
dc480609d9c1 Uploaded iuc parents: diff changeset	237 Uniprot accession number. Multiple entries separated by ";"
dc480609d9c1 Uploaded iuc parents: diff changeset	238 Uniprot_id
dc480609d9c1 Uploaded iuc parents: diff changeset	239 Uniprot ID number. Multiple entries separated by ";"
dc480609d9c1 Uploaded iuc parents: diff changeset	240 UniSNP_ids
dc480609d9c1 Uploaded iuc parents: diff changeset	241 rs numbers from UniSNP, which is a cleaned version of dbSNP build 129, in format: rs number1;rs number2;...
dc480609d9c1 Uploaded iuc parents: diff changeset	242
dc480609d9c1 Uploaded iuc parents: diff changeset	243
dc480609d9c1 Uploaded iuc parents: diff changeset	244 The website for dbNSFP database is https://sites.google.com/site/jpopgen/dbNSFP and there is only annotation for human hg18 and hg19 genome builds.
dc480609d9c1 Uploaded iuc parents: diff changeset	245
dc480609d9c1 Uploaded iuc parents: diff changeset	246 However, any dbNSFP-like tabular file that be can used with SnpSift dbnsfp if it has::
dc480609d9c1 Uploaded iuc parents: diff changeset	247
dc480609d9c1 Uploaded iuc parents: diff changeset	248 - The first line of the file must be column headers that name the annotations.
dc480609d9c1 Uploaded iuc parents: diff changeset	249 - The first 4 columns are required and must be::
dc480609d9c1 Uploaded iuc parents: diff changeset	250 1. chromosome
dc480609d9c1 Uploaded iuc parents: diff changeset	251 2. position in chromosome
dc480609d9c1 Uploaded iuc parents: diff changeset	252 3. reference base
dc480609d9c1 Uploaded iuc parents: diff changeset	253 4. alternate base
dc480609d9c1 Uploaded iuc parents: diff changeset	254
dc480609d9c1 Uploaded iuc parents: diff changeset	255 For example:
dc480609d9c1 Uploaded iuc parents: diff changeset	256
dc480609d9c1 Uploaded iuc parents: diff changeset	257 ::
dc480609d9c1 Uploaded iuc parents: diff changeset	258
dc480609d9c1 Uploaded iuc parents: diff changeset	259 #chr pos(1-coor) ref alt aaref aaalt genename SIFT_score
dc480609d9c1 Uploaded iuc parents: diff changeset	260 1 69134 A C E A OR4F5 0.03
dc480609d9c1 Uploaded iuc parents: diff changeset	261 1 69134 A G E G OR4F5 0.09
dc480609d9c1 Uploaded iuc parents: diff changeset	262 1 69134 A T E V OR4F5 0.03
dc480609d9c1 Uploaded iuc parents: diff changeset	263 4 100239319 T A H L ADH1B 0
dc480609d9c1 Uploaded iuc parents: diff changeset	264 4 100239319 T C H R ADH1B 0.15
dc480609d9c1 Uploaded iuc parents: diff changeset	265 4 100239319 T G H P ADH1B 0
dc480609d9c1 Uploaded iuc parents: diff changeset	266
dc480609d9c1 Uploaded iuc parents: diff changeset	267
dc480609d9c1 Uploaded iuc parents: diff changeset	268 The uploaded tabular file should be set to datatype: "dbnsfp.tabular"
dc480609d9c1 Uploaded iuc parents: diff changeset	269 Using "Convert Format" the "dbnsfp.tabular" can be converted to the correct format for SnpSift dbnsfp.
dc480609d9c1 Uploaded iuc parents: diff changeset	270
dc480609d9c1 Uploaded iuc parents: diff changeset	271 The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp is in the SnpSift documentation.
dc480609d9c1 Uploaded iuc parents: diff changeset	272
dc480609d9c1 Uploaded iuc parents: diff changeset	273
dc480609d9c1 Uploaded iuc parents: diff changeset	274 @EXTERNAL_DOCUMENTATION@
dc480609d9c1 Uploaded iuc parents: diff changeset	275 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP
dc480609d9c1 Uploaded iuc parents: diff changeset	276
dc480609d9c1 Uploaded iuc parents: diff changeset	277 @CITATION_SECTION@
dc480609d9c1 Uploaded iuc parents: diff changeset	278
dc480609d9c1 Uploaded iuc parents: diff changeset	279
dc480609d9c1 Uploaded iuc parents: diff changeset	280 </help>
dc480609d9c1 Uploaded iuc parents: diff changeset	281 </tool>

0

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

1 <tool id="snpSift_dbnsfp" name="SnpSift dbNSFP" version="4.0.0">

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

2 <description>Add Annotations from dbNSFP</description>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

3 <expand macro="requirements" />

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

4 <macros>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

5 <import>snpSift_macros.xml</import>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

6 </macros>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

7 <command>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

8 java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar dbnsfp -v

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

9 #if $db.dbsrc == 'cached' :

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

10 -db $db.dbnsfp

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

11 #if $db.annotations and $db.annotations.__str__ != '':

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

12 -f "$db.annotations"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

13 #end if

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

14 #else :

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

15 -db "${db.dbnsfpdb.extra_files_path}/${db.dbnsfpdb.metadata.bgzip}"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

16 #if $db.annotations and $db.annotations.__str__ != '':

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

17 -f "$db.annotations"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

18 #end if

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

19 #end if

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

20 $input > $output

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

21 2> tmp.err && grep -v file tmp.err

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

22 </command>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

23 <inputs>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

24 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

25 <conditional name="db">

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

26 <param name="dbsrc" type="select" label="dbNSFP ">

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

27 <option value="cached">Locally installed dbNSFP database </option>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

28 <option value="history">dbNSFP database from your history</option>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

29 </param>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

30 <when value="cached">

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

31 <param name="dbnsfp" type="select" label="Genome">

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

32 <options from_data_table="snpsift_dbnsfp">

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

33 <column name="name" index="1"/>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

34 <column name="value" index="2"/>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

35 </options>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

36 </param>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

37 <param name="annotations" type="select" multiple="true" display="checkboxes" label="Annotate with">

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

38 <options from_data_table="snpsift_dbnsfp">

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

39 <column name="name" index="3"/>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

40 <column name="value" index="3"/>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

41 <filter type="param_value" ref="dbnsfp" column="2" />

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

42 <filter type="multiple_splitter" column="3" separator=","/>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

43 </options>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

44 </param>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

45 </when>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

46 <when value="history">

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

47 <param name="dbnsfpdb" type="data" format="snpsiftdbnsfp" label="DbNSFP"/>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

48 <param name="annotations" type="select" multiple="true" display="checkboxes" label="Annotate with">

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

49 <options>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

50 <filter type="data_meta" ref="dbnsfpdb" key="annotation" />

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

51 </options>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

52 </param>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

53 </when>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

54 </conditional>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

55 </inputs>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

56 <expand macro="stdio" />

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

57 <outputs>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

58 <data format="vcf" name="output" />

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

59 </outputs>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

60 <tests>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

61 <test>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

62 <param name="input" ftype="vcf" value="test_annotate_in.vcf.vcf"/>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

63 <param name="dbsrc" value="history"/>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

64 <param name="dbnsfpdb" value="test_dbnsfpdb.tabular" ftype="dbnsfp.tabular" />

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

65 <annotations value="aaref,aaalt,genename,aapos,SIFT_score"/>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

66 <output name="output">

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

67 <assert_contents>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

68 <has_text text="dbNSFP_SIFT_score=0.15" />

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

69 </assert_contents>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

70 </output>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

71 </test>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

72 </tests>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

73 <help>

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

74

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

75 The dbNSFP is an integrated database of functional predictions from multiple algorithms (SIFT, Polyphen2, LRT and MutationTaster, PhyloP and GERP++, etc.).

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

76

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

77

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

78 1000Gp1_AC

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

79 Alternative allele counts in the whole 1000 genomes phase 1 (1000Gp1) data

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

80 1000Gp1_AF

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

81 Alternative allele frequency in the whole 1000Gp1 data

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

82 1000Gp1_AFR_AC

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

83 Alternative allele counts in the 1000Gp1 African descendent samples

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

84 1000Gp1_AFR_AF

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

85 Alternative allele frequency in the 1000Gp1 African descendent samples

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

86 1000Gp1_AMR_AC

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

87 Alternative allele counts in the 1000Gp1 American descendent samples

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

88 1000Gp1_AMR_AF

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

89 Alternative allele frequency in the 1000Gp1 American descendent samples

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

90 1000Gp1_ASN_AC

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

91 Alternative allele counts in the 1000Gp1 Asian descendent samples

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

92 1000Gp1_ASN_AF

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

93 Alternative allele frequency in the 1000Gp1 Asian descendent samples

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

94 1000Gp1_EUR_AC

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

95 Alternative allele counts in the 1000Gp1 European descendent samples

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

96 1000Gp1_EUR_AF

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

97 Alternative allele frequency in the 1000Gp1 European descendent samples

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

98 aaalt

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

99 Alternative amino acid. "." if the variant is a splicing site SNP (2bp on each end of an intron)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

100 aapos

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

101 Amino acid position as to the protein. "-1" if the variant is a splicing site SNP (2bp on each end of an intron)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

102 aapos_SIFT

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

103 ENSP id and amino acid positions corresponding to SIFT scores. Multiple entries separated by ";"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

104 aapos_FATHMM

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

105 ENSP id and amino acid positions corresponding to FATHMM scores. Multiple entries separated by ";"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

106 aaref

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

107 Reference amino acid. "." if the variant is a splicing site SNP (2bp on each end of an intron)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

108 alt

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

109 Alternative nucleotide allele (as on the + strand)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

110 Ancestral_allele

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

111 Ancestral allele (based on 1000 genomes reference data)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

112 cds_strand

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

113 Coding sequence (CDS) strand (+ or -)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

114 chr

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

115 Chromosome number

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

116 codonpos

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

117 Position on the codon (1, 2 or 3)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

118 Ensembl_geneid

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

119 Ensembl gene ID

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

120 Ensembl_transcriptid

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

121 Ensembl transcript IDs (separated by ";")

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

122 ESP6500_AA_AF

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

123 Alternative allele frequency in the African American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

124 ESP6500_EA_AF

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

125 Alternative allele frequency in the European American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

126 FATHMM_pred

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

127 If a FATHMM_score is <=-1.5 (or rankscore <=0.81415) the corresponding non-synonymous SNP is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

128 FATHMM_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

129 FATHMMori scores were ranked among all FATHMMori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of FATHMMori scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0 to 1

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

130 FATHMM_score

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

131 FATHMM default score (FATHMMori)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

132 fold-degenerate

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

133 Degenerate type (0, 2 or 3)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

134 genename

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

135 Gene name; if the non-synonymous SNP can be assigned to multiple genes, gene names are separated by ";"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

136 GERP++_NR

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

137 GERP++ neutral rate

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

138 GERP++_RS

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

139 GERP++ RS score, the larger the score, the more conserved the site

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

140 GERP++_RS_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

141 GERP++ RS scores were ranked among all GERP++ RS scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of GERP++ RS scores in dbNSFP

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

142 hg18_pos(1-coor)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

143 Physical position on the chromosome as to hg18 (1-based coordinate)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

144 Interpro_domain

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

145 Domain or conserved site on which the variant locates

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

146 LR_pred

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

147 Prediction of our LR based ensemble prediction score, "T(olerated)" or "D(amaging)". The score cutoff between "D" and "T" is 0.5. The rankscore cutoff between "D" and "T" is 0.82268

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

148 LR_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

149 LR scores were ranked among all LR scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of LR scores in dbNSFP. The scores range from 0 to 1

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

150 LR_score

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

151 Our logistic regression (LR) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from 0 to 1

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

152 LRT_Omega

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

153 Estimated nonsynonymous-to-synonymous-rate ratio (Omega, reported by LRT)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

154 LRT_converted_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

155 LRTori scores were first converted as LRTnew=1-LRTori*0.5 if Omega<1, or LRTnew=LRTori*0.5 if Omega>=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

156 LRT_pred

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

157 LRT prediction, D(eleterious), N(eutral) or U(nknown), which is not solely determined by the score

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

158 LRT_score

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

159 The original LRT two-sided p-value (LRTori), ranges from 0 to 1

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

160 MutationAssessor_pred

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

161 MutationAssessor's functional impact of a variant

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

162 MutationAssessor_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

163 MAori scores were ranked among all MAori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MAori scores in dbNSFP. The scores range from 0 to 1

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

164 MutationAssessor_score

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

165 MutationAssessor functional impact combined score (MAori)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

166 MutationTaster_converted_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

167 The MTori scores were first converted: if the prediction is "A" or "D" MTnew=MTori; if the prediction is "N" or "P", MTnew=1-MTori. Then MTnew scores were ranked among all MTnew scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MTnew scores in dbNSFP. The scores range from 0.0931 to 0.80722

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

168 MutationTaster_pred

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

169 MutationTaster prediction

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

170 MutationTaster_score

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

171 MutationTaster p-value (MTori), ranges from 0 to 1

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

172 phastCons46way_placental

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

173 phastCons conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

174 phastCons46way_placental_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

175 phastCons46way_placental scores were ranked among all phastCons46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_placental scores in dbNSFP

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

176 phastCons46way_primate

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

177 phastCons conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

178 phastCons46way_primate_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

179 phastCons46way_primate scores were ranked among all phastCons46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_primate scores in dbNSFP

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

180 phastCons100way_vertebrate

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

181 phastCons conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

182 phastCons100way_vertebrate_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

183 phastCons100way_vertebrate scores were ranked among all phastCons100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons100way_vertebrate scores in dbNSFP

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

184 phyloP46way_placental

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

185 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

186 phyloP46way_placental_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

187 phyloP46way_placental scores were ranked among all phyloP46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_placental scores in dbNSFP

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

188 phyloP46way_primate

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

189 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

190 phyloP46way_primate_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

191 phyloP46way_primate scores were ranked among all phyloP46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_primate scores in dbNSFP

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

192 phyloP100way_vertebrate

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

193 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

194 phyloP100way_vertebrate_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

195 phyloP100way_vertebrate scores were ranked among all phyloP100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP100way_vertebrate scores in dbNSFP

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

196 Polyphen2_HDIV_pred

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

197 Polyphen2 prediction based on HumDiv

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

198 Polyphen2_HDIV_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

199 Polyphen2 HDIV scores were first ranked among all HDIV scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.02656 to 0.89917

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

200 Polyphen2_HDIV_score

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

201 Polyphen2 score based on HumDiv, i.e. hdiv_prob. The score ranges from 0 to 1. Multiple entries separated by ";"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

202 Polyphen2_HVAR_pred

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

203 Polyphen2 prediction based on HumVar

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

204 Polyphen2_HVAR_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

205 Polyphen2 HVAR scores were first ranked among all HVAR scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.01281 to 0.9711

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

206 Polyphen2_HVAR_score

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

207 Polyphen2 score based on HumVar, i.e. hvar_prob. The score ranges from 0 to 1. Multiple entries separated by ";"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

208 pos(1-coor)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

209 Physical position on the chromosome as to hg19 (1-based coordinate)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

210 RadialSVM_pred

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

211 Prediction of our SVM based ensemble prediction score, "T(olerated)" or "D(amaging)". The score cutoff between "D" and "T" is 0. The rankscore cutoff between "D" and "T" is 0.83357

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

212 RadialSVM_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

213 RadialSVM scores were ranked among all RadialSVM scores in dbNSFP. The rankscore is the ratio of the rank of the screo over the total number of RadialSVM scores in dbNSFP. The scores range from 0 to 1

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

214 RadialSVM_score

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

215 Our support vector machine (SVM) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from -2 to 3 in dbNSFP

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

216 ref

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

217 Reference nucleotide allele (as on the + strand)

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

218 refcodon

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

219 Reference codon

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

220 Reliability_index

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

221 Number of observed component scores (except the maximum frequency in the 1000 genomes populations) for RadialSVM and LR. Ranges from 1 to 10. As RadialSVM and LR scores are calculated based on imputed data, the less missing component scores, the higher the reliability of the scores and predictions

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

222 SIFT_converted_rankscore

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

223 SIFTori scores were first converted to SIFTnew=1-SIFTori, then ranked among all SIFTnew scores in dbNSFP. The rankscore is the ratio of the rank the SIFTnew score over the total number of SIFTnew scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The rankscores range from 0.02654 to 0.87932

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

224 SIFT_pred

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

225 If SIFTori is smaller than 0.05 (rankscore>0.55) the corresponding non-synonymous SNP is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple predictions separated by ";"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

226 SIFT_score

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

227 SIFT score (SIFTori). Scores range from 0 to 1. The smaller the score the more likely the SNP has damaging effect. Multiple scores separated by ";"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

228 SiPhy_29way_logOdds

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

229 SiPhy score based on 29 mammals genomes. The larger the score, the more conserved the site

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

230 SiPhy_29way_pi

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

231 The estimated stationary distribution of A, C, G and T at the site, using SiPhy algorithm based on 29 mammals genomes

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

232 SLR_test_statistic

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

233 SLR test statistic for testing natural selection on codons. A negative value indicates negative selection, and a positive value indicates positive selection. Larger magnitude of the value suggests stronger evidence

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

234 Uniprot_aapos

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

235 Amino acid position as to Uniprot. Multiple entries separated by ";"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

236 Uniprot_acc

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

237 Uniprot accession number. Multiple entries separated by ";"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

238 Uniprot_id

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

239 Uniprot ID number. Multiple entries separated by ";"

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

240 UniSNP_ids

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

241 rs numbers from UniSNP, which is a cleaned version of dbSNP build 129, in format: rs number1;rs number2;...

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

242

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

243

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

244 The website for dbNSFP database is https://sites.google.com/site/jpopgen/dbNSFP and there is only annotation for human hg18 and hg19 genome builds.

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

245

dc480609d9c1 Uploaded

iuc

parents:

diff changeset

246 However, any dbNSFP-like tabular file that be can used with SnpSift dbnsfp if it has::

dc480609d9c1 Uploaded

iuc