annotate mutspecStat.xml @ 5:097ae310ced0 draft

Uploaded
author iarc
date Fri, 29 Apr 2016 05:14:23 -0400
parents 916846f73e25
children 46a10309dfe2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
1 <tool id="mutSpecStat" name="MutSpec Stat" version="0.1" hidden="false">
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
2 <description>Calculate various statistics on mutations</description>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
3
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
4 <requirements>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
5 <requirement type="set_environment">SCRIPT_PATH</requirement>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
6 <requirement type="package" version="5.18.1">perl</requirement>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
7 <requirement type="package" version="3.3">weblogo</requirement>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
8 <requirement type="package" version="1.7.1">numpy</requirement>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
9 <requirement type="package" version="3.1.2">R</requirement>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
10 <requirement type="package" version="0.1">mutspec</requirement>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
11 </requirements>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
12
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
13 <command interpreter="bash">
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
14 mutspecStat_wrapper.sh
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
15 $html
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
16 ${GALAXY_DATA_INDEX_DIR}/shared/ucsc/chrom/
2
9d363eb081b5 Uploaded
iarc
parents: 1
diff changeset
17 #if str($estimateSignature.estimSign) == "true" or $estimateSignature.estimSign == True:
0
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
18 ${estimateSignature.estimT}
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
19 #else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
20 0
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
21 #end if
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
22
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
23 "--refGenome ${refGenome} --pathSeqRefGenome ${refGenome.fields.path} $pooldata $reportSample"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
24 #import re
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
25 #for $f in $dataset_list
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
26 #set $regexp = $re.compile("\((.*)\)")
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
27 #if $regexp.search($f.name)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
28 #set filename=$regexp.search($f.name)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
29 "$f=${filename.group(1)}"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
30 #else
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
31 "$f=${f.name}"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
32 #end if
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
33 #end for
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
34 </command>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
35
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
36 <inputs>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
37 <param name="dataset_list" type="data_collection" format="tabular" collection_type="list" label="Annotated Dataset List" help="Select a dataset list/collection from your history" />
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
38 <param name="refGenome" type="select" label="Reference genome" help="All data in your dataset list should have been generated with the selected genome">
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
39 <options from_data_table="annovar_index" />
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
40 </param>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
41
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
42 <param name="pooldata" type="boolean" checked="true" truevalue="--pooldata" falsevalue="" label="Include statistics on the pooled samples" />
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
43 <param name="reportSample" type="boolean" checked="false" truevalue="--reportSample" falsevalue="" label="Generate one output file for each sample" help="By default, one output Excel file will be generated with statistics of each sample shown in different data sheets. Setting this option to true will generate one Excel file for each sample instead. It is recommended to use this option if your dataset list contains more than 250 files as the Excel output file may be too heavy to open easily on a computer with limited RAM"/>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
44
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
45 <conditional name="estimateSignature">
2
9d363eb081b5 Uploaded
iarc
parents: 1
diff changeset
46 <param name="estimSign" type="boolean" checked="false" truevalue="true" label="Compute statistics for estimating the number of signatures" help="This option gererates different statistics that can be used to estimate the number of signatures to extract with NMF (this number should be used in the MutSpec-NMF tool"/>
0
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
47 <when value="true">
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
48 <param name="estimT" type="text" value="8" label="Maximum number of signatures to compute" help="Warning: Selecting a number above 8 may not work on small datasets"/>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
49 </when>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
50 </conditional>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
51
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
52 </inputs>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
53
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
54 <outputs>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
55 <data name="html" type="data" format="html" label="mutation spectra report on ${dataset_list.name}" />
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
56 </outputs>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
57
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
58 <stdio>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
59 <regex match="FutureWarning"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
60 source="both"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
61 level="warning"
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
62 description="FutureWarning" />
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
63 </stdio>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
64
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
65 <help>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
66
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
67 **What it does**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
68
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
69 MutSpec-Stat calculates various statistics describing mutation characteristics extracted from a dataset collection, and estimate (optional) the number of signatures present in the dataset.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
70 The statistics include overall distribution of mutations, mutation distribution for single base substitutions (SBS) by functional regions, chromosomes, or in their trinucleotide sequence context (see details below).
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
71
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
72 --------------------------------------------------------------------------------------------------------------------------------------------------
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
73
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
74 **Input formats**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
75
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
76 The tool accepts a dataset list
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
77
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
78 .. class:: infomark
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
79
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
80 You should thus create a dataset list even when using one file (see Galaxy help to learn `how to create a dataset list`__)
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
81
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
82 .. __: https://wiki.galaxyproject.org/Histories#Dataset_Collections
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
83
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
84 .. class:: warningmark
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
85
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
86 The input files must have been generated by the MutSpec-Annot tool (so they contain the required annotations).
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
87
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
88 --------------------------------------------------------------------------------------------------------------------------------------------------
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
89
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
90 **Output**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
91
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
92 MutSpec-Stat generates an html page with links to :
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
93 - an Excel file that includes all computed statistics shown in tabular and graphical formats, for each sample (one by datasheet) and for the pooled samples (optional),
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
94 - html pages for individual sample results,
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
95 - the input matrix for the tool MutSpec-NMF,
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
96 - the result of the estimation of the number of signatures (if the option "Compute statistics for estimating the number of signatures" was selected).
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
97
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
98 The following statistics are generated:
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
99
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
100 **Graph 1. SBS distribution**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
101 Proportion (percent of all SBS) of each type of single base substitution (SBS).
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
102 All SBS are considered, including the ones without strand orientation annotation.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
103
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
104 **Table 1. Frequency and counts of all SBS**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
105 Values corresponding to graph 1.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
106
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
107
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
108 **Graph 2. Impact on protein sequence**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
109 Impact of all mutations (SBS and Indel) on the protein sequence based on the ExonicFunc.refGene annotation.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
110 For more details about the annotation, please visit the `Annovar web page`__
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
111
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
112 .. __: http://www.openbioinformatics.org/annovar/annovar_gene.html#output1
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
113
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
114
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
115 **Table 2. Frequency and counts of functional impacts**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
116 Values corresponding to graph 2.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
117
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
118
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
119 **Graph 3. Stranded distribution of SBS**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
120 Proportion (percent of all SBS with strand annotation) of the six substitution types on the transcribed and non-transcribed strand.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
121 Only regions with strand annotation are considered.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
122
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
123 **Table 3. Significance of the strand biases**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
124 The strand bias for each SBS type is calculated as the ratio of SBS on the non-transcribed (coding) versus the transcribed (non-coding) strand.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
125 The statistical significance of the differences between the mutational frequencies on the non-transcribed and the
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
126 transcribed strand (equal to 0.5, as expected by chance) is assessed using a chi-squared test followed by the Benjamini-
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
127 Hochberg procedure for multiple testing corrections (only samples with at least 1 mutations on the non-transcribed or on the transcribed strand are considered).
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
128 Two tables are shown to display the 6 SBS types in both orientations.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
129
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
130
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
131 **Table 4. SBS distribution by functional region**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
132 Count and percentages of SBS in genomic regions based on the Func.refGene annotation.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
133
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
134
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
135 **Table 5. Strand bias by functional region**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
136 Counts of the strand bias for the 6 SBS types in different functional regions.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
137
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
138
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
139 **Table 6. SBS distribution per chromosome**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
140 Counts of SBS per chromosome for the six SBS types.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
141 The correlation between SBS counts and chromosome size is calculated using a Pearson correlation test.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
142
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
143
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
144 **Panel 1. Trinucleotide sequence context of SBS on the genomic sequence**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
145 The trinucleotide sequence context takes into consideration the flanking base in 5' and in 3' of the SBS.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
146 SBS counts and frequency data are shown as tables, heatmaps or bar graphs. The heatmap colors are scaled to the maximum value of the corresponding table. The bar graph is scaled to the maximum frequency value (total number of mutation by SBS type is shown in parenthesis).
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
147
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
148
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
149
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
150 **Panel 2. Stranded analysis of trinucleotide sequence context of SBS**
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
151 SBS within their trinucleotide sequence context are counted on the non-transcribed and transcribed strands of the gene region they are located in. Counts and frequencies are shown as tables or bar graphs.
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
152 Only SBS with strand orientation annotation are considered in this analysis (strand annotation retrieved from RefSeq database).
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
153
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
154
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
155 </help>
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
156
1
748b7a8b634c Uploaded
iarc
parents: 0
diff changeset
157 <citations>
748b7a8b634c Uploaded
iarc
parents: 0
diff changeset
158 <citation type="bibtex">
4
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
159 @article{ardin_mutspec:_2016,
1
748b7a8b634c Uploaded
iarc
parents: 0
diff changeset
160 title = {{MutSpec}: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes},
4
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
161 volume = {17},
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
162 issn = {1471-2105},
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
163 doi = {10.1186/s12859-016-1011-z},
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
164 shorttitle = {{MutSpec}},
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
165 abstract = {{BACKGROUND}: The nature of somatic mutations observed in human tumors at single gene or genome-wide levels can reveal information on past carcinogenic exposures and mutational processes contributing to tumor development. While large amounts of sequencing data are being generated, the associated analysis and interpretation of mutation patterns that may reveal clues about the natural history of cancer present complex and challenging tasks that require advanced bioinformatics skills. To make such analyses accessible to a wider community of researchers with no programming expertise, we have developed within the web-based user-friendly platform Galaxy a first-of-its-kind package called {MutSpec}.
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
166 {RESULTS}: {MutSpec} includes a set of tools that perform variant annotation and use advanced statistics for the identification of mutation signatures present in cancer genomes and for comparing the obtained signatures with those published in the {COSMIC} database and other sources. {MutSpec} offers an accessible framework for building reproducible analysis pipelines, integrating existing methods and scripts developed in-house with publicly available R packages. {MutSpec} may be used to analyse data from whole-exome, whole-genome or targeted sequencing experiments performed on human or mouse genomes. Results are provided in various formats including rich graphical outputs. An example is presented to illustrate the package functionalities, the straightforward workflow analysis and the richness of the statistics and publication-grade graphics produced by the tool.
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
167 {CONCLUSIONS}: {MutSpec} offers an easy-to-use graphical interface embedded in the popular Galaxy platform that can be used by researchers with limited programming or bioinformatics expertise to analyse mutation signatures present in cancer genomes. {MutSpec} can thus effectively assist in the discovery of complex mutational processes resulting from exogenous and endogenous carcinogenic insults.},
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
168 pages = {170},
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
169 number = {1},
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
170 journaltitle = {{BMC} Bioinformatics},
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
171 author = {Ardin, Maude and Cahais, Vincent and Castells, Xavier and Bouaoun, Liacine and Byrnes, Graham and Herceg, Zdenko and Zavadil, Jiri and Olivier, Magali},
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
172 date = {2016},
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
173 pmid = {27091472},
916846f73e25 Uploaded
iarc
parents: 2
diff changeset
174 keywords = {Galaxy, Mutation signatures, Mutation spectra, Single base substitutions}
1
748b7a8b634c Uploaded
iarc
parents: 0
diff changeset
175 }
748b7a8b634c Uploaded
iarc
parents: 0
diff changeset
176 </citation>
748b7a8b634c Uploaded
iarc
parents: 0
diff changeset
177 </citations>
748b7a8b634c Uploaded
iarc
parents: 0
diff changeset
178
0
8c682b3a7c5b Uploaded
iarc
parents:
diff changeset
179 </tool>