mutspec: mutspecStat.xml annotate

annotate mutspecStat.xml @ 0:8c682b3a7c5b draft

Uploaded

author	iarc
date	Tue, 19 Apr 2016 03:07:11 -0400
parents
children	748b7a8b634c

rev	line source
0 8c682b3a7c5b Uploaded iarc parents: diff changeset	1 <tool id="mutSpecStat" name="MutSpec Stat" version="0.1" hidden="false">
8c682b3a7c5b Uploaded iarc parents: diff changeset	2 <description>Calculate various statistics on mutations</description>
8c682b3a7c5b Uploaded iarc parents: diff changeset	3
8c682b3a7c5b Uploaded iarc parents: diff changeset	4 <requirements>
8c682b3a7c5b Uploaded iarc parents: diff changeset	5 <requirement type="set_environment">SCRIPT_PATH</requirement>
8c682b3a7c5b Uploaded iarc parents: diff changeset	6 <requirement type="package" version="5.18.1">perl</requirement>
8c682b3a7c5b Uploaded iarc parents: diff changeset	7 <requirement type="package" version="3.3">weblogo</requirement>
8c682b3a7c5b Uploaded iarc parents: diff changeset	8 <requirement type="package" version="1.7.1">numpy</requirement>
8c682b3a7c5b Uploaded iarc parents: diff changeset	9 <requirement type="package" version="3.1.2">R</requirement>
8c682b3a7c5b Uploaded iarc parents: diff changeset	10 <requirement type="package" version="0.1">mutspec</requirement>
8c682b3a7c5b Uploaded iarc parents: diff changeset	11 </requirements>
8c682b3a7c5b Uploaded iarc parents: diff changeset	12
8c682b3a7c5b Uploaded iarc parents: diff changeset	13 <command interpreter="bash">
8c682b3a7c5b Uploaded iarc parents: diff changeset	14 mutspecStat_wrapper.sh
8c682b3a7c5b Uploaded iarc parents: diff changeset	15 $html
8c682b3a7c5b Uploaded iarc parents: diff changeset	16 ${GALAXY_DATA_INDEX_DIR}/shared/ucsc/chrom/
8c682b3a7c5b Uploaded iarc parents: diff changeset	17 #if $estimateSignature.estimSign == "true":
8c682b3a7c5b Uploaded iarc parents: diff changeset	18 ${estimateSignature.estimT}
8c682b3a7c5b Uploaded iarc parents: diff changeset	19 #else
8c682b3a7c5b Uploaded iarc parents: diff changeset	20 0
8c682b3a7c5b Uploaded iarc parents: diff changeset	21 #end if
8c682b3a7c5b Uploaded iarc parents: diff changeset	22
8c682b3a7c5b Uploaded iarc parents: diff changeset	23 "--refGenome ${refGenome} --pathSeqRefGenome ${refGenome.fields.path} $pooldata $reportSample"
8c682b3a7c5b Uploaded iarc parents: diff changeset	24 #import re
8c682b3a7c5b Uploaded iarc parents: diff changeset	25 #for $f in $dataset_list
8c682b3a7c5b Uploaded iarc parents: diff changeset	26 #set $regexp = $re.compile("$(.*)$")
8c682b3a7c5b Uploaded iarc parents: diff changeset	27 #if $regexp.search($f.name)
8c682b3a7c5b Uploaded iarc parents: diff changeset	28 #set filename=$regexp.search($f.name)
8c682b3a7c5b Uploaded iarc parents: diff changeset	29 "$f=${filename.group(1)}"
8c682b3a7c5b Uploaded iarc parents: diff changeset	30 #else
8c682b3a7c5b Uploaded iarc parents: diff changeset	31 "$f=${f.name}"
8c682b3a7c5b Uploaded iarc parents: diff changeset	32 #end if
8c682b3a7c5b Uploaded iarc parents: diff changeset	33 #end for
8c682b3a7c5b Uploaded iarc parents: diff changeset	34 </command>
8c682b3a7c5b Uploaded iarc parents: diff changeset	35
8c682b3a7c5b Uploaded iarc parents: diff changeset	36 <inputs>
8c682b3a7c5b Uploaded iarc parents: diff changeset	37 <param name="dataset_list" type="data_collection" format="tabular" collection_type="list" label="Annotated Dataset List" help="Select a dataset list/collection from your history" />
8c682b3a7c5b Uploaded iarc parents: diff changeset	38 <param name="refGenome" type="select" label="Reference genome" help="All data in your dataset list should have been generated with the selected genome">
8c682b3a7c5b Uploaded iarc parents: diff changeset	39 <options from_data_table="annovar_index" />
8c682b3a7c5b Uploaded iarc parents: diff changeset	40 </param>
8c682b3a7c5b Uploaded iarc parents: diff changeset	41
8c682b3a7c5b Uploaded iarc parents: diff changeset	42 <param name="pooldata" type="boolean" checked="true" truevalue="--pooldata" falsevalue="" label="Include statistics on the pooled samples" />
8c682b3a7c5b Uploaded iarc parents: diff changeset	43 <param name="reportSample" type="boolean" checked="false" truevalue="--reportSample" falsevalue="" label="Generate one output file for each sample" help="By default, one output Excel file will be generated with statistics of each sample shown in different data sheets. Setting this option to true will generate one Excel file for each sample instead. It is recommended to use this option if your dataset list contains more than 250 files as the Excel output file may be too heavy to open easily on a computer with limited RAM"/>
8c682b3a7c5b Uploaded iarc parents: diff changeset	44
8c682b3a7c5b Uploaded iarc parents: diff changeset	45 <conditional name="estimateSignature">
8c682b3a7c5b Uploaded iarc parents: diff changeset	46 <param name="estimSign" type="boolean" label="Compute statistics for estimating the number of signatures" help="This option gererates different statistics that can be used to estimate the number of signatures to extract with NMF (this number should be used in the MutSpec-NMF tool"/>
8c682b3a7c5b Uploaded iarc parents: diff changeset	47 <when value="true">
8c682b3a7c5b Uploaded iarc parents: diff changeset	48 <param name="estimT" type="text" value="8" label="Maximum number of signatures to compute" help="Warning: Selecting a number above 8 may not work on small datasets"/>
8c682b3a7c5b Uploaded iarc parents: diff changeset	49 </when>
8c682b3a7c5b Uploaded iarc parents: diff changeset	50 </conditional>
8c682b3a7c5b Uploaded iarc parents: diff changeset	51
8c682b3a7c5b Uploaded iarc parents: diff changeset	52 </inputs>
8c682b3a7c5b Uploaded iarc parents: diff changeset	53
8c682b3a7c5b Uploaded iarc parents: diff changeset	54 <outputs>
8c682b3a7c5b Uploaded iarc parents: diff changeset	55 <data name="html" type="data" format="html" label="mutation spectra report on ${dataset_list.name}" />
8c682b3a7c5b Uploaded iarc parents: diff changeset	56 </outputs>
8c682b3a7c5b Uploaded iarc parents: diff changeset	57
8c682b3a7c5b Uploaded iarc parents: diff changeset	58 <stdio>
8c682b3a7c5b Uploaded iarc parents: diff changeset	59 <regex match="FutureWarning"
8c682b3a7c5b Uploaded iarc parents: diff changeset	60 source="both"
8c682b3a7c5b Uploaded iarc parents: diff changeset	61 level="warning"
8c682b3a7c5b Uploaded iarc parents: diff changeset	62 description="FutureWarning" />
8c682b3a7c5b Uploaded iarc parents: diff changeset	63 </stdio>
8c682b3a7c5b Uploaded iarc parents: diff changeset	64
8c682b3a7c5b Uploaded iarc parents: diff changeset	65 <help>
8c682b3a7c5b Uploaded iarc parents: diff changeset	66
8c682b3a7c5b Uploaded iarc parents: diff changeset	67 What it does
8c682b3a7c5b Uploaded iarc parents: diff changeset	68
8c682b3a7c5b Uploaded iarc parents: diff changeset	69 MutSpec-Stat calculates various statistics describing mutation characteristics extracted from a dataset collection, and estimate (optional) the number of signatures present in the dataset.
8c682b3a7c5b Uploaded iarc parents: diff changeset	70 The statistics include overall distribution of mutations, mutation distribution for single base substitutions (SBS) by functional regions, chromosomes, or in their trinucleotide sequence context (see details below).
8c682b3a7c5b Uploaded iarc parents: diff changeset	71
8c682b3a7c5b Uploaded iarc parents: diff changeset	72 --------------------------------------------------------------------------------------------------------------------------------------------------
8c682b3a7c5b Uploaded iarc parents: diff changeset	73
8c682b3a7c5b Uploaded iarc parents: diff changeset	74 Input formats
8c682b3a7c5b Uploaded iarc parents: diff changeset	75
8c682b3a7c5b Uploaded iarc parents: diff changeset	76 The tool accepts a dataset list
8c682b3a7c5b Uploaded iarc parents: diff changeset	77
8c682b3a7c5b Uploaded iarc parents: diff changeset	78 .. class:: infomark
8c682b3a7c5b Uploaded iarc parents: diff changeset	79
8c682b3a7c5b Uploaded iarc parents: diff changeset	80 You should thus create a dataset list even when using one file (see Galaxy help to learn `how to create a dataset list`__)
8c682b3a7c5b Uploaded iarc parents: diff changeset	81
8c682b3a7c5b Uploaded iarc parents: diff changeset	82 .. __: https://wiki.galaxyproject.org/Histories#Dataset_Collections
8c682b3a7c5b Uploaded iarc parents: diff changeset	83
8c682b3a7c5b Uploaded iarc parents: diff changeset	84 .. class:: warningmark
8c682b3a7c5b Uploaded iarc parents: diff changeset	85
8c682b3a7c5b Uploaded iarc parents: diff changeset	86 The input files must have been generated by the MutSpec-Annot tool (so they contain the required annotations).
8c682b3a7c5b Uploaded iarc parents: diff changeset	87
8c682b3a7c5b Uploaded iarc parents: diff changeset	88 --------------------------------------------------------------------------------------------------------------------------------------------------
8c682b3a7c5b Uploaded iarc parents: diff changeset	89
8c682b3a7c5b Uploaded iarc parents: diff changeset	90 Output
8c682b3a7c5b Uploaded iarc parents: diff changeset	91
8c682b3a7c5b Uploaded iarc parents: diff changeset	92 MutSpec-Stat generates an html page with links to :
8c682b3a7c5b Uploaded iarc parents: diff changeset	93 - an Excel file that includes all computed statistics shown in tabular and graphical formats, for each sample (one by datasheet) and for the pooled samples (optional),
8c682b3a7c5b Uploaded iarc parents: diff changeset	94 - html pages for individual sample results,
8c682b3a7c5b Uploaded iarc parents: diff changeset	95 - the input matrix for the tool MutSpec-NMF,
8c682b3a7c5b Uploaded iarc parents: diff changeset	96 - the result of the estimation of the number of signatures (if the option "Compute statistics for estimating the number of signatures" was selected).
8c682b3a7c5b Uploaded iarc parents: diff changeset	97
8c682b3a7c5b Uploaded iarc parents: diff changeset	98 The following statistics are generated:
8c682b3a7c5b Uploaded iarc parents: diff changeset	99
8c682b3a7c5b Uploaded iarc parents: diff changeset	100 Graph 1. SBS distribution
8c682b3a7c5b Uploaded iarc parents: diff changeset	101 Proportion (percent of all SBS) of each type of single base substitution (SBS).
8c682b3a7c5b Uploaded iarc parents: diff changeset	102 All SBS are considered, including the ones without strand orientation annotation.
8c682b3a7c5b Uploaded iarc parents: diff changeset	103
8c682b3a7c5b Uploaded iarc parents: diff changeset	104 Table 1. Frequency and counts of all SBS
8c682b3a7c5b Uploaded iarc parents: diff changeset	105 Values corresponding to graph 1.
8c682b3a7c5b Uploaded iarc parents: diff changeset	106
8c682b3a7c5b Uploaded iarc parents: diff changeset	107
8c682b3a7c5b Uploaded iarc parents: diff changeset	108 Graph 2. Impact on protein sequence
8c682b3a7c5b Uploaded iarc parents: diff changeset	109 Impact of all mutations (SBS and Indel) on the protein sequence based on the ExonicFunc.refGene annotation.
8c682b3a7c5b Uploaded iarc parents: diff changeset	110 For more details about the annotation, please visit the `Annovar web page`__
8c682b3a7c5b Uploaded iarc parents: diff changeset	111
8c682b3a7c5b Uploaded iarc parents: diff changeset	112 .. __: http://www.openbioinformatics.org/annovar/annovar_gene.html#output1
8c682b3a7c5b Uploaded iarc parents: diff changeset	113
8c682b3a7c5b Uploaded iarc parents: diff changeset	114
8c682b3a7c5b Uploaded iarc parents: diff changeset	115 Table 2. Frequency and counts of functional impacts
8c682b3a7c5b Uploaded iarc parents: diff changeset	116 Values corresponding to graph 2.
8c682b3a7c5b Uploaded iarc parents: diff changeset	117
8c682b3a7c5b Uploaded iarc parents: diff changeset	118
8c682b3a7c5b Uploaded iarc parents: diff changeset	119 Graph 3. Stranded distribution of SBS
8c682b3a7c5b Uploaded iarc parents: diff changeset	120 Proportion (percent of all SBS with strand annotation) of the six substitution types on the transcribed and non-transcribed strand.
8c682b3a7c5b Uploaded iarc parents: diff changeset	121 Only regions with strand annotation are considered.
8c682b3a7c5b Uploaded iarc parents: diff changeset	122
8c682b3a7c5b Uploaded iarc parents: diff changeset	123 Table 3. Significance of the strand biases
8c682b3a7c5b Uploaded iarc parents: diff changeset	124 The strand bias for each SBS type is calculated as the ratio of SBS on the non-transcribed (coding) versus the transcribed (non-coding) strand.
8c682b3a7c5b Uploaded iarc parents: diff changeset	125 The statistical significance of the differences between the mutational frequencies on the non-transcribed and the
8c682b3a7c5b Uploaded iarc parents: diff changeset	126 transcribed strand (equal to 0.5, as expected by chance) is assessed using a chi-squared test followed by the Benjamini-
8c682b3a7c5b Uploaded iarc parents: diff changeset	127 Hochberg procedure for multiple testing corrections (only samples with at least 1 mutations on the non-transcribed or on the transcribed strand are considered).
8c682b3a7c5b Uploaded iarc parents: diff changeset	128 Two tables are shown to display the 6 SBS types in both orientations.
8c682b3a7c5b Uploaded iarc parents: diff changeset	129
8c682b3a7c5b Uploaded iarc parents: diff changeset	130
8c682b3a7c5b Uploaded iarc parents: diff changeset	131 Table 4. SBS distribution by functional region
8c682b3a7c5b Uploaded iarc parents: diff changeset	132 Count and percentages of SBS in genomic regions based on the Func.refGene annotation.
8c682b3a7c5b Uploaded iarc parents: diff changeset	133
8c682b3a7c5b Uploaded iarc parents: diff changeset	134
8c682b3a7c5b Uploaded iarc parents: diff changeset	135 Table 5. Strand bias by functional region
8c682b3a7c5b Uploaded iarc parents: diff changeset	136 Counts of the strand bias for the 6 SBS types in different functional regions.
8c682b3a7c5b Uploaded iarc parents: diff changeset	137
8c682b3a7c5b Uploaded iarc parents: diff changeset	138
8c682b3a7c5b Uploaded iarc parents: diff changeset	139 Table 6. SBS distribution per chromosome
8c682b3a7c5b Uploaded iarc parents: diff changeset	140 Counts of SBS per chromosome for the six SBS types.
8c682b3a7c5b Uploaded iarc parents: diff changeset	141 The correlation between SBS counts and chromosome size is calculated using a Pearson correlation test.
8c682b3a7c5b Uploaded iarc parents: diff changeset	142
8c682b3a7c5b Uploaded iarc parents: diff changeset	143
8c682b3a7c5b Uploaded iarc parents: diff changeset	144 Panel 1. Trinucleotide sequence context of SBS on the genomic sequence
8c682b3a7c5b Uploaded iarc parents: diff changeset	145 The trinucleotide sequence context takes into consideration the flanking base in 5' and in 3' of the SBS.
8c682b3a7c5b Uploaded iarc parents: diff changeset	146 SBS counts and frequency data are shown as tables, heatmaps or bar graphs. The heatmap colors are scaled to the maximum value of the corresponding table. The bar graph is scaled to the maximum frequency value (total number of mutation by SBS type is shown in parenthesis).
8c682b3a7c5b Uploaded iarc parents: diff changeset	147
8c682b3a7c5b Uploaded iarc parents: diff changeset	148
8c682b3a7c5b Uploaded iarc parents: diff changeset	149
8c682b3a7c5b Uploaded iarc parents: diff changeset	150 Panel 2. Stranded analysis of trinucleotide sequence context of SBS
8c682b3a7c5b Uploaded iarc parents: diff changeset	151 SBS within their trinucleotide sequence context are counted on the non-transcribed and transcribed strands of the gene region they are located in. Counts and frequencies are shown as tables or bar graphs.
8c682b3a7c5b Uploaded iarc parents: diff changeset	152 Only SBS with strand orientation annotation are considered in this analysis (strand annotation retrieved from RefSeq database).
8c682b3a7c5b Uploaded iarc parents: diff changeset	153
8c682b3a7c5b Uploaded iarc parents: diff changeset	154
8c682b3a7c5b Uploaded iarc parents: diff changeset	155 </help>
8c682b3a7c5b Uploaded iarc parents: diff changeset	156
8c682b3a7c5b Uploaded iarc parents: diff changeset	157 </tool>

Mercurial > repos > iarc > mutspec

annotate mutspecStat.xml @ 0:8c682b3a7c5b draft