0
|
1 <tool id="MutSpecfilter" name="MutSpec Filter" version="0.1" hidden="false">
|
|
2 <description>Filter out variants present in public databases</description>
|
|
3
|
|
4 <requirements>
|
|
5 <requirement type="set_environment">SCRIPT_PATH</requirement>
|
|
6 <requirement type="package" version="5.18.1">perl</requirement>
|
|
7 </requirements>
|
|
8
|
|
9 <command interpreter="perl">
|
|
10 mutspecFilter.pl
|
|
11 --dir \$SCRIPT_PATH
|
|
12 $segDup
|
|
13 $esp
|
|
14 $thG
|
1
|
15 #if $FilterdbSNP.dbSNP == True:
|
0
|
16 --dbSNP ${FilterdbSNP.column}
|
|
17 #else
|
|
18 --dbSNP 0
|
|
19 #end if
|
|
20 --refGenome ${refGenome}
|
|
21 --outfile $output
|
|
22 $input
|
|
23 </command>
|
|
24
|
|
25 <inputs>
|
|
26 <param name="input" type="data" format="txt" label="Input file"/>
|
|
27
|
|
28 <param name="refGenome" type="select" label="Reference genome" help="All your data should have been annotated with the selected genome">
|
|
29 <options from_data_table="annovar_index" />
|
|
30 </param>
|
|
31
|
|
32 <conditional name="FilterdbSNP">
|
|
33 <param name="dbSNP" type="boolean" checked="true" truevalue="true" label="Filter against dbSNP database" help="Remove variants with a RS number" />
|
|
34 <when value="true">
|
|
35 <param name="column" type="data_column" data_ref="input" label="Select the dbSNP column for filtering" use_header_names="true" help="Select a column name snp or snpNonFlagged" />
|
|
36 </when>
|
|
37 </conditional>
|
|
38
|
|
39
|
|
40 <param name="segDup" type="boolean" checked="true" truevalue="--segDup" falsevalue="" label="Filter against SegDup database" help="Remove variants present at >= 0.9 frequency in the genomic duplicate segments database" />
|
|
41 <param name="esp" type="boolean" checked="true" truevalue="--esp" falsevalue="" label="Filter against the ESP database" help="Remove variants present at frequency > 0.001 in the Exome Sequencing Project database (only valid for human genomes)" />
|
|
42 <param name="thG" type="boolean" checked="true" truevalue="--thG" falsevalue="" label="Filter against the 1000g database project" help="Remove variants present at frequency > 0.001 in the 1000 genome database (only valid for human genomes)" />
|
|
43 </inputs>
|
|
44
|
|
45 <outputs>
|
|
46 <data type="data" name="output" format="tabular" label="${input.name.split(' ')[0]} filtered" />
|
|
47 </outputs>
|
|
48
|
|
49 <help>
|
|
50
|
|
51 **What it does**
|
|
52
|
|
53 Filter a file annotated with MutSpec-Annot tool. Variants present in public databases (dbSNP, SegDup, ESP, 1000 genome obtained from Annovar) will be removed from the input file (with frequency limits described above).
|
|
54
|
|
55 .. class:: warningmark
|
|
56
|
|
57 The databases ESP and 1000 genome can be used only for human genomes
|
|
58
|
|
59 --------------------------------------------------------------------------------------------------------------------------------------------------
|
|
60
|
|
61 **Input**
|
|
62
|
|
63 .. class:: warningmark
|
|
64
|
|
65 Tab delimited text files generated by MutSpec-Annot tool.
|
|
66
|
|
67 --------------------------------------------------------------------------------------------------------------------------------------------------
|
|
68
|
|
69 **Output**
|
|
70
|
|
71 Tab delimited text file filtered for variants considered as neutral polymorphisms.
|
|
72
|
|
73 --------------------------------------------------------------------------------------------------------------------------------------------------
|
|
74
|
|
75 **Example**
|
|
76
|
|
77 Filter the following file::
|
|
78
|
|
79 Chr Start End Ref Alt Func.refGene Gene.refGene ExonicFunc.refGene AAChange.refGene genomicSuperDups snp138 1000g2014oct_all esp6500si_all Strand context Chromosome Start_Position End_Position Reference_Allele Tumor_Seq_Allele2
|
|
80 chr7 121717919 121717920 - G exonic AASS frameshift insertion AASS:NM_005763:exon23:c.2634dupC:p.A879fs NA rs147476318 NA NA - GCG chr7 121717919 121717920 - G
|
|
81 chr1 230846235 230846235 T A exonic AGT nonsynonymous SNV AGT:NM_000029:exon2:c.A362T:p.H121L NA NA NA NA - GTG chr1 230846235 230846235 T A
|
|
82 chr14 33290999 33290999 A G exonic AKAP6 nonsynonymous SNV AKAP6:NM_004274:exon13:c.A3980G:p.D1327G NA NA NA NA + GAC chr14 33290999 33290999 A G
|
|
83 chr12 8082458 8082458 C T exonic SLC2A3 nonsynonymous SNV SLC2A3:NM_006931:exon6:c.G683A:p.R228Q NA rs200481428 0.000199681 NA - CCG chr12 8082458 8082458 C T
|
|
84 chr4 70156391 70156391 T C exonic UGT2B28 nonsynonymous SNV UGT2B28:NM_053039:exon5:c.T1172C:p.V391A score=0.949699;Name=chr4:70035680 NA 0.000199681 NA + GTA chr4 70156391 70156391 T C
|
|
85
|
|
86 Will produce::
|
|
87
|
|
88 Chr Start End Ref Alt Func.refGene Gene.refGene ExonicFunc.refGene AAChange.refGene genomicSuperDups snp138 1000g2014oct_all esp6500si_all Strand context Chromosome Start_Position End_Position Reference_Allele Tumor_Seq_Allele2
|
|
89 chr1 230846235 230846235 T A exonic AGT nonsynonymous SNV AGT:NM_000029:exon2:c.A362T:p.H121L NA NA NA NA - GTG chr1 230846235 230846235 T A
|
|
90 chr14 33290999 33290999 A G exonic AKAP6 nonsynonymous SNV AKAP6:NM_004274:exon13:c.A3980G:p.D1327G NA NA NA NA + GAC chr14 33290999 33290999 A G
|
|
91 chr4 70156391 70156391 T C exonic UGT2B28 nonsynonymous SNV UGT2B28:NM_053039:exon5:c.T1172C:p.V391A score=0.949699;Name=chr4:70035680 NA 0.000199681 NA + GTA chr4 70156391 70156391 T C
|
|
92
|
|
93
|
|
94
|
|
95 </help>
|
|
96
|
1
|
97 <citations>
|
|
98 <citation type="bibtex">
|
|
99 @ARTICLE{ardin_mutspec:_2016,
|
|
100 author = {Ardin et al},
|
|
101 keywords = {Galaxy, Mutation signatures, Mutation spectra, Single base substitutions},
|
|
102 title = {{MutSpec}: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes},
|
|
103 url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1011-z}
|
|
104 }
|
|
105 </citation>
|
|
106 </citations>
|
|
107
|
0
|
108 </tool>
|