annotate mutspecFilter.xml @ 7:eda59b985b1c draft default tip

Uploaded
author iarc
date Mon, 13 Mar 2017 08:21:19 -0400
parents 46a10309dfe2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1 <tool id="MutSpecfilter" name="MutSpec Filter" version="0.1" hidden="false">
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2 <description>Filter out variants present in public databases</description>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
3
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
4 <requirements>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
5 <requirement type="set_environment">SCRIPT_PATH</requirement>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
6 <requirement type="package" version="5.18.1">perl</requirement>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
7 </requirements>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
8
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
9 <command interpreter="perl">
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
10 mutspecFilter.pl
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
11 --dir \$SCRIPT_PATH
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
12 $segDup
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
13 $esp
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
14 $thG
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
15 $exac
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
16 #if str($FilterdbSNP.dbSNP) == "true":
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
17 --dbSNP ${FilterdbSNP.column}
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
18 #else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
19 --dbSNP 0
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
20 #end if
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
21 --refGenome ${refGenome}
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
22 --outfile $output
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
23
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
24 #for $i, $filter in enumerate( $filters )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
25 --filter $filter.reference
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
26 #end for
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
27
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
28 $input;
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
29
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
30
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
31 </command>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
32
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
33 <inputs>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
34 <param name="input" type="data" format="txt" label="Input file"/>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
35
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
36 <param name="refGenome" type="select" label="Reference genome" help="All your data should have been annotated with the selected genome">
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
37 <options from_data_table="annovar_index" />
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
38 </param>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
39
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
40 <conditional name="FilterdbSNP">
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
41 <param name="dbSNP" type="boolean" checked="true" truevalue="true" label="Filter against dbSNP database" help="Remove variants with a RS number" />
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
42 <when value="true">
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
43 <param name="column" type="data_column" data_ref="input" label="Select the dbSNP column for filtering" use_header_names="true" help="Select a column name snp or snpNonFlagged" />
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
44 </when>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
45 </conditional>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
46
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
47 <param name="segDup" type="boolean" checked="true" truevalue="--segDup" falsevalue="" label="Filter against SegDup database" help="Remove variants present at &#62;= 0.9 frequency in the genomic duplicate segments database (Use only for human and mouse genomes)" />
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
48 <param name="esp" type="boolean" checked="true" truevalue="--esp" falsevalue="" label="Filter against the ESP database" help="Remove variants present at frequency &#62; 0.001 in the Exome Sequencing Project database (Use only for human genome)" />
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
49 <param name="thG" type="boolean" checked="true" truevalue="--thG" falsevalue="" label="Filter against the 1000g database project" help="Remove variants present at frequency &#62; 0.001 in the 1000 genome database (Use only for human genome)" />
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
50 <param name="exac" type="boolean" checked="true" truevalue="--exac" falsevalue="" label="Filter against the ExAC database" help="Remove variants present at frequency &#62; 0.001 in the EXome Agregate Consortium database (Use only for human genome)" />
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
51
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
52 <repeat name="filters" title="Additional filters">
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
53 <param name="reference" type="data" format="bed" label="Reference file (bed or vcf)" help="Remove variants present in the reference file"/>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
54 </repeat>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
55
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
56 </inputs>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
57
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
58 <outputs>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
59 <data type="data" name="output" format="tabular" label="${input.name.split(' ')[0]} filtered" />
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
60 </outputs>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
61
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
62 <stdio>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
63 <regex match="Error message:" source="stderr" level="fatal" description="Read error message for more details" />
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
64 <regex match="Warning message:" source="stdout" level="warning" description="" />
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
65 </stdio>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
66
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
67 <help>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
68
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
69 **What it does**
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
70
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
71 Filter a file annotated with MutSpec-Annot tool. Variants present in public databases obtained from Annovar will be removed from the input file (with frequency limits described above).
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
72
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
73 .. class:: warningmark
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
74
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
75 The database genomic duplicate segments can be used only for human and mouse genomes
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
76
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
77 .. class:: warningmark
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
78
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
79 The databases ESP, 1000 genome and ExAC can be used only for human genome
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
80
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
81 --------------------------------------------------------------------------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
82
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
83 **Input**
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
84
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
85 .. class:: warningmark
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
86
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
87 Tab delimited text files generated by MutSpec-Annot tool.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
88
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
89 --------------------------------------------------------------------------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
90
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
91 **Additional Filters**
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
92
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
93 .. class:: warningmark
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
94
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
95 You eventually would like to filter for additional features like repeats and tandem repeats.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
96 You just need to provide the reference in vcf or bed format.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
97
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
98 .. class:: infomark
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
99
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
100 Reference files are available on IARC Galaxy Shared Data.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
101 On the top panel click on "Shared Data" and select "Data Libraries".
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
102 The category "BED annotations" contains reference files for different genomes.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
103
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
104
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
105 --------------------------------------------------------------------------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
106
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
107 **Output**
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
108
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
109 Tab delimited text file filtered for variants considered as neutral polymorphisms.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
110
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
111 --------------------------------------------------------------------------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
112
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
113 **Example**
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
114
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
115 Filter the following file::
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
116
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
117 Chr Start End Ref Alt Func.refGene Gene.refGene ExonicFunc.refGene AAChange.refGene genomicSuperDups snp138 1000g2014oct_all esp6500si_all Strand context Chromosome Start_Position End_Position Reference_Allele Tumor_Seq_Allele2
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
118 chr7 121717919 121717920 - G exonic AASS frameshift insertion AASS:NM_005763:exon23:c.2634dupC:p.A879fs NA rs147476318 NA NA - GCG chr7 121717919 121717920 - G
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
119 chr1 230846235 230846235 T A exonic AGT nonsynonymous SNV AGT:NM_000029:exon2:c.A362T:p.H121L NA NA NA NA - GTG chr1 230846235 230846235 T A
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
120 chr14 33290999 33290999 A G exonic AKAP6 nonsynonymous SNV AKAP6:NM_004274:exon13:c.A3980G:p.D1327G NA NA NA NA + GAC chr14 33290999 33290999 A G
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
121 chr12 8082458 8082458 C T exonic SLC2A3 nonsynonymous SNV SLC2A3:NM_006931:exon6:c.G683A:p.R228Q NA rs200481428 0.000199681 NA - CCG chr12 8082458 8082458 C T
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
122 chr4 70156391 70156391 T C exonic UGT2B28 nonsynonymous SNV UGT2B28:NM_053039:exon5:c.T1172C:p.V391A score=0.949699;Name=chr4:70035680 NA 0.000199681 NA + GTA chr4 70156391 70156391 T C
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
123
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
124 Will produce::
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
125
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
126 Chr Start End Ref Alt Func.refGene Gene.refGene ExonicFunc.refGene AAChange.refGene genomicSuperDups snp138 1000g2014oct_all esp6500si_all Strand context Chromosome Start_Position End_Position Reference_Allele Tumor_Seq_Allele2
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
127 chr1 230846235 230846235 T A exonic AGT nonsynonymous SNV AGT:NM_000029:exon2:c.A362T:p.H121L NA NA NA NA - GTG chr1 230846235 230846235 T A
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
128 chr14 33290999 33290999 A G exonic AKAP6 nonsynonymous SNV AKAP6:NM_004274:exon13:c.A3980G:p.D1327G NA NA NA NA + GAC chr14 33290999 33290999 A G
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
129 chr4 70156391 70156391 T C exonic UGT2B28 nonsynonymous SNV UGT2B28:NM_053039:exon5:c.T1172C:p.V391A score=0.949699;Name=chr4:70035680 NA 0.000199681 NA + GTA chr4 70156391 70156391 T C
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
130
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
131
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
132 --------------------------------------------------------------------------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
133
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
134 **Contact**
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
135
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
136 ardinm@fellows.iarc.fr; cahaisv@iarc.fr
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
137
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
138 --------------------------------------------------------------------------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
139
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
140 **Code**
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
141
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
142 The source code is available on `GitHub`__
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
143
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
144 .. __: https://github.com/IARCbioinfo/mutspec.git
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
145
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
146
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
147 </help>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
148
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
149
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
150 <citations>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
151 <citation type="bibtex">
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
152 @article{ardin_mutspec:_2016,
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
153 title = {{MutSpec}: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
154 volume = {17},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
155 issn = {1471-2105},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
156 doi = {10.1186/s12859-016-1011-z},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
157 shorttitle = {{MutSpec}},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
158 abstract = {{BACKGROUND}: The nature of somatic mutations observed in human tumors at single gene or genome-wide levels can reveal information on past carcinogenic exposures and mutational processes contributing to tumor development. While large amounts of sequencing data are being generated, the associated analysis and interpretation of mutation patterns that may reveal clues about the natural history of cancer present complex and challenging tasks that require advanced bioinformatics skills. To make such analyses accessible to a wider community of researchers with no programming expertise, we have developed within the web-based user-friendly platform Galaxy a first-of-its-kind package called {MutSpec}.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
159 {RESULTS}: {MutSpec} includes a set of tools that perform variant annotation and use advanced statistics for the identification of mutation signatures present in cancer genomes and for comparing the obtained signatures with those published in the {COSMIC} database and other sources. {MutSpec} offers an accessible framework for building reproducible analysis pipelines, integrating existing methods and scripts developed in-house with publicly available R packages. {MutSpec} may be used to analyse data from whole-exome, whole-genome or targeted sequencing experiments performed on human or mouse genomes. Results are provided in various formats including rich graphical outputs. An example is presented to illustrate the package functionalities, the straightforward workflow analysis and the richness of the statistics and publication-grade graphics produced by the tool.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
160 {CONCLUSIONS}: {MutSpec} offers an easy-to-use graphical interface embedded in the popular Galaxy platform that can be used by researchers with limited programming or bioinformatics expertise to analyse mutation signatures present in cancer genomes. {MutSpec} can thus effectively assist in the discovery of complex mutational processes resulting from exogenous and endogenous carcinogenic insults.},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
161 pages = {170},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
162 number = {1},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
163 journaltitle = {{BMC} Bioinformatics},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
164 author = {Ardin, Maude and Cahais, Vincent and Castells, Xavier and Bouaoun, Liacine and Byrnes, Graham and Herceg, Zdenko and Zavadil, Jiri and Olivier, Magali},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
165 date = {2016},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
166 pmid = {27091472},
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
167 keywords = {Galaxy, Mutation signatures, Mutation spectra, Single base substitutions}
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
168 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
169 </citation>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
170 </citations>
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
171
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
172 </tool>