mutspec: mutspecStat.xml annotate

annotate mutspecStat.xml @ 6:46a10309dfe2 draft

Uploaded

author	iarc
date	Tue, 28 Jun 2016 02:59:32 -0400
parents	916846f73e25
children	eda59b985b1c

rev	line source
6 46a10309dfe2 Uploaded iarc parents: 4 diff changeset	1 <tool id="mutSpecStat" name="MutSpec Stat" version="0.1" hidden="false">
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	2 <description>Calculate various statistics on mutations</description>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	3
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	4 <requirements>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	5 <requirement type="set_environment">SCRIPT_PATH</requirement>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	6 <requirement type="package" version="5.18.1">perl</requirement>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	7 <requirement type="package" version="3.3">weblogo</requirement>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	8 <requirement type="package" version="1.7.1">numpy</requirement>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	9 <requirement type="package" version="3.1.2">R</requirement>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	10 <requirement type="package" version="0.1">mutspec</requirement>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	11 </requirements>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	12
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	13 <command interpreter="bash">
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	14 mutspecStat_wrapper.sh
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	15 $html
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	16 ${GALAXY_DATA_INDEX_DIR}/shared/ucsc/chrom/
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	17 #if str($estimateSignature.estimSign) == "true" or $estimateSignature.estimSign == True:
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	18 ${estimateSignature.estimT}
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	19 #else
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	20 0
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	21 #end if
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	22
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	23 "--refGenome ${refGenome} --pathSeqRefGenome ${refGenome.fields.path} $pooldata $reportSample"
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	24 #import re
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	25 #for $f in $dataset_list
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	26 #set $regexp = $re.compile("$(.*)$")
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	27 #if $regexp.search($f.name)
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	28 #set filename=$regexp.search($f.name)
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	29 "$f=${filename.group(1)}"
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	30 #else
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	31 "$f=${f.name}"
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	32 #end if
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	33 #end for
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	34 </command>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	35
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	36 <inputs>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	37 <param name="dataset_list" type="data_collection" format="tabular" collection_type="list" label="Annotated Dataset List" help="Select a dataset list/collection from your history" />
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	38 <param name="refGenome" type="select" label="Reference genome" help="All data in your dataset list should have been generated with the selected genome">
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	39 <options from_data_table="annovar_index" />
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	40 </param>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	41
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	42 <param name="pooldata" type="boolean" checked="true" truevalue="--pooldata" falsevalue="" label="Include statistics on the pooled samples" />
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	43 <param name="reportSample" type="boolean" checked="false" truevalue="--reportSample" falsevalue="" label="Generate one output file for each sample" help="By default, one output Excel file will be generated with statistics of each sample shown in different data sheets. Setting this option to true will generate one Excel file for each sample instead. It is recommended to use this option if your dataset list contains more than 250 files as the Excel output file may be too heavy to open easily on a computer with limited RAM"/>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	44
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	45 <conditional name="estimateSignature">
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	46 <param name="estimSign" type="boolean" checked="false" truevalue="true" label="Compute statistics for estimating the number of signatures" help="This option gererates different statistics that can be used to estimate the number of signatures to extract with NMF (this number should be used in the MutSpec-NMF tool"/>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	47 <when value="true">
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	48 <param name="estimT" type="text" value="8" label="Maximum number of signatures to compute" help="Warning: Selecting a number above 8 may not work on small datasets"/>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	49 </when>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	50 </conditional>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	51
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	52 </inputs>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	53
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	54 <outputs>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	55 <data name="html" type="data" format="html" label="mutation spectra report on ${dataset_list.name}" />
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	56 </outputs>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	57
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	58 <stdio>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	59 <regex match="FutureWarning"
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	60 source="both"
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	61 level="warning"
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	62 description="FutureWarning" />
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	63 </stdio>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	64
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	65 <help>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	66
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	67 What it does
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	68
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	69 MutSpec-Stat calculates various statistics describing mutation characteristics extracted from a dataset collection, and estimate (optional) the number of signatures present in the dataset.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	70 The statistics include overall distribution of mutations, mutation distribution for single base substitutions (SBS) by functional regions, chromosomes, or in their trinucleotide sequence context (see details below).
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	71
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	72 --------------------------------------------------------------------------------------------------------------------------------------------------
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	73
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	74 Input formats
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	75
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	76 The tool accepts a dataset list
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	77
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	78 .. class:: infomark
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	79
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	80 You should thus create a dataset list even when using one file (see Galaxy help to learn `how to create a dataset list`__)
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	81
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	82 .. __: https://wiki.galaxyproject.org/Histories#Dataset_Collections
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	83
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	84 .. class:: warningmark
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	85
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	86 The input files must have been generated by the MutSpec-Annot tool (so they contain the required annotations).
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	87
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	88 --------------------------------------------------------------------------------------------------------------------------------------------------
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	89
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	90 Output
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	91
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	92 MutSpec-Stat generates an html page with links to :
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	93 - an Excel file that includes all computed statistics shown in tabular and graphical formats, for each sample (one by datasheet) and for the pooled samples (optional),
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	94 - html pages for individual sample results,
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	95 - the input matrix for the tool MutSpec-NMF,
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	96 - the result of the estimation of the number of signatures (if the option "Compute statistics for estimating the number of signatures" was selected).
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	97
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	98 The following statistics are generated:
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	99
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	100 Graph 1. SBS distribution
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	101 Proportion (percent of all SBS) of each type of single base substitution (SBS).
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	102 All SBS are considered, including the ones without strand orientation annotation.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	103
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	104 Table 1. Frequency and counts of all SBS
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	105 Values corresponding to graph 1.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	106
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	107
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	108 Graph 2. Impact on protein sequence
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	109 Impact of all mutations (SBS and Indel) on the protein sequence based on the ExonicFunc.refGene annotation.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	110 For more details about the annotation, please visit the `Annovar web page`__
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	111
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	112 .. __: http://www.openbioinformatics.org/annovar/annovar_gene.html#output1
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	113
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	114
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	115 Table 2. Frequency and counts of functional impacts
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	116 Values corresponding to graph 2.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	117
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	118
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	119 Graph 3. Stranded distribution of SBS
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	120 Proportion (percent of all SBS with strand annotation) of the six substitution types on the transcribed and non-transcribed strand.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	121 Only regions with strand annotation are considered.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	122
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	123 Table 3. Significance of the strand biases
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	124 The strand bias for each SBS type is calculated as the ratio of SBS on the non-transcribed (coding) versus the transcribed (non-coding) strand.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	125 The statistical significance of the differences between the mutational frequencies on the non-transcribed and the
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	126 transcribed strand (equal to 0.5, as expected by chance) is assessed using a chi-squared test followed by the Benjamini-
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	127 Hochberg procedure for multiple testing corrections (only samples with at least 1 mutations on the non-transcribed or on the transcribed strand are considered).
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	128 Two tables are shown to display the 6 SBS types in both orientations.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	129
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	130
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	131 Table 4. SBS distribution by functional region
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	132 Count and percentages of SBS in genomic regions based on the Func.refGene annotation.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	133
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	134
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	135 Table 5. Strand bias by functional region
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	136 Counts of the strand bias for the 6 SBS types in different functional regions.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	137
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	138
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	139 Table 6. SBS distribution per chromosome
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	140 Counts of SBS per chromosome for the six SBS types.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	141 The correlation between SBS counts and chromosome size is calculated using a Pearson correlation test.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	142
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	143
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	144 Panel 1. Trinucleotide sequence context of SBS on the genomic sequence
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	145 The trinucleotide sequence context takes into consideration the flanking base in 5' and in 3' of the SBS.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	146 SBS counts and frequency data are shown as tables, heatmaps or bar graphs. The heatmap colors are scaled to the maximum value of the corresponding table. The bar graph is scaled to the maximum frequency value (total number of mutation by SBS type is shown in parenthesis).
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	147
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	148
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	149
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	150 Panel 2. Stranded analysis of trinucleotide sequence context of SBS
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	151 SBS within their trinucleotide sequence context are counted on the non-transcribed and transcribed strands of the gene region they are located in. Counts and frequencies are shown as tables or bar graphs.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	152 Only SBS with strand orientation annotation are considered in this analysis (strand annotation retrieved from RefSeq database).
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	153
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	154
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	155 </help>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	156
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	157 <citations>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	158 <citation type="bibtex">
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	159 @article{ardin_mutspec:_2016,
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	160 title = {{MutSpec}: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	161 volume = {17},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	162 issn = {1471-2105},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	163 doi = {10.1186/s12859-016-1011-z},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	164 shorttitle = {{MutSpec}},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	165 abstract = {{BACKGROUND}: The nature of somatic mutations observed in human tumors at single gene or genome-wide levels can reveal information on past carcinogenic exposures and mutational processes contributing to tumor development. While large amounts of sequencing data are being generated, the associated analysis and interpretation of mutation patterns that may reveal clues about the natural history of cancer present complex and challenging tasks that require advanced bioinformatics skills. To make such analyses accessible to a wider community of researchers with no programming expertise, we have developed within the web-based user-friendly platform Galaxy a first-of-its-kind package called {MutSpec}.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	166 {RESULTS}: {MutSpec} includes a set of tools that perform variant annotation and use advanced statistics for the identification of mutation signatures present in cancer genomes and for comparing the obtained signatures with those published in the {COSMIC} database and other sources. {MutSpec} offers an accessible framework for building reproducible analysis pipelines, integrating existing methods and scripts developed in-house with publicly available R packages. {MutSpec} may be used to analyse data from whole-exome, whole-genome or targeted sequencing experiments performed on human or mouse genomes. Results are provided in various formats including rich graphical outputs. An example is presented to illustrate the package functionalities, the straightforward workflow analysis and the richness of the statistics and publication-grade graphics produced by the tool.
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	167 {CONCLUSIONS}: {MutSpec} offers an easy-to-use graphical interface embedded in the popular Galaxy platform that can be used by researchers with limited programming or bioinformatics expertise to analyse mutation signatures present in cancer genomes. {MutSpec} can thus effectively assist in the discovery of complex mutational processes resulting from exogenous and endogenous carcinogenic insults.},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	168 pages = {170},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	169 number = {1},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	170 journaltitle = {{BMC} Bioinformatics},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	171 author = {Ardin, Maude and Cahais, Vincent and Castells, Xavier and Bouaoun, Liacine and Byrnes, Graham and Herceg, Zdenko and Zavadil, Jiri and Olivier, Magali},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	172 date = {2016},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	173 pmid = {27091472},
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	174 keywords = {Galaxy, Mutation signatures, Mutation spectra, Single base substitutions}
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	175 }
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	176 </citation>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	177 </citations>
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	178
46a10309dfe2 Uploaded iarc parents: 4 diff changeset	179 </tool>

Mercurial > repos > iarc > mutspec

annotate mutspecStat.xml @ 6:46a10309dfe2 draft