annotate RBGOA.xml @ 2:5acf9dfdfa27 draft default tip

planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
author cristian
date Wed, 09 Nov 2022 08:57:54 +0000
parents f7287f82602f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
1 <tool id="RBGOA" name="RBGOA" version="0.3.0" python_template_version="3.5">
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
2 <description>"GO_MWU: a Rank Based Gene Ontology Analysis"</description>
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
3 <requirements>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
4 <requirement type="package" version="5.6">r-ape</requirement>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
5 <requirement type="package" version="1.20.3">r-getopt</requirement>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
6 </requirements>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
7 <version_command>Rscript GO_MWU.R -v</version_command>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
8 <command detect_errors="exit_code"><![CDATA[
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
9 ln -s '${input1}' samples.tsv &&
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
10 ln -s '${input2}' annotation.tsv &&
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
11 Rscript $__tool_directory__/GO_MWU.R -i samples.tsv -a annotation.tsv -g $__tool_directory__/go.obo -d '$input3' -c '$input_filter.cluster' -o '$input_filter.over' -m '$input_filter.min' -k '$plot_output.absval' -p '$grouping.pcut' -t '$grouping.hcut' -e '$plot_output.textsize' --l1 '$plot_output.lev1' --l2 '$plot_output.lev2' --l3 '$plot_output.lev3' &&
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
12 mv samples_${input3}.tsv div_input.tsv &&
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
13 mv dissim_${input3}_samples_annotation.tsv dissim.tsv &&
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
14 mv MWU_${input3}_samples.tsv mwu_file.tsv
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
15 ]]></command>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
16 <inputs>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
17 <param type="data" name="input1" format="tabular" label="Genes of interest with associated value" />
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
18 <param type="data" name="input2" format="tabular" label="Gene-GO annotation file" />
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
19 <param type="select" name="input3" label="GO division" >
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
20 <option value="BP" selected="true">BP</option>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
21 <option value="MF">MF</option>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
22 <option value="CC">CC</option>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
23 </param>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
24 <section name="input_filter" title="Input Filtering" expanded="true">
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
25 <param type="float" name="over" value="0.1" label="Filter out GO categories that include more than this fraction of the total number of genes" />
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
26 <param type="integer" name="min" value="5" label="Consider GO categories that have at least this many genes" />
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
27 <param type="float" name="cluster" value="0.25" label="Threshold for merging similar (gene-sharing) terms" />
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
28 </section>
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
29 <section name="plot_output" title="Plot tweaking" expanded="true">
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
30 <param type="float" name="absval" value="1.0" label="absValue" help="Threshold for 'good genes'. Default: 1, to use with log2(foldchange). Read help below!" />
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
31 <param type="float" name="lev1" value="0.1" label="Level 1" help="Significance level for smallest text" />
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
32 <param type="float" name="lev2" value="0.05" label="Level 2" help="Significance level for intermediate text" />
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
33 <param type="float" name="lev3" value="0.01" label="Level 3" help="Significance level for largest text" />
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
34 <param type="float" name="textsize" value="1.2" label="TextSize for plot labels" />
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
35 </section>
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
36 <section name="grouping" title="Significance and Grouping" expanded="true">
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
37 <param type="float" name="pcut" value="1e-2" label="Adjusted p-value cutoff for representative GO" />
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
38 <param type="float" name="hcut" value="0.9" label="Height at which to cut the GO terms tree to get 'independent groups'" />
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
39 </section>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
40 </inputs>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
41 <outputs>
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
42 <data name="graph" format="pdf" from_work_dir="Rplots.pdf" label="Plot of GO terms for (${input3})" />
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
43 <data name="div_input" format="tabular" from_work_dir="div_input.tsv" label="Augmented ${input3} GO terms for genes" />
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
44 <data name="dissim" format="tabular" from_work_dir="dissim.tsv" label="Dissimilarity matrix of GO terms" />
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
45 <data name="mwu" format="tabular" from_work_dir="mwu_file.tsv" label="MWU test result for (${input3})" />
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
46 <data name="results" format="tabular" from_work_dir="results.tsv" label="Raw data for plot" />
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
47 <data name="best_go" format="tabular" from_work_dir="best_go.tsv" label="Best GO terms" />
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
48 </outputs>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
49 <tests>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
50 <test>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
51 <param name="input1" value="heats.csv"/>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
52 <param name="input2" value="amil_defog_iso2go.tab"/>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
53 <param name="input3" value="BP"/>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
54 <output name="graph">
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
55 <assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
56 <has_size value="7043" delta="50"/>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
57 </assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
58 </output>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
59 <output name="div_input">
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
60 <assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
61 <has_n_lines n="25114"/>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
62 </assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
63 </output>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
64 <output name="dissim">
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
65 <assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
66 <has_n_lines n="266"/>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
67 </assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
68 </output>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
69 <output name="mwu">
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
70 <assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
71 <has_n_lines n="266"/>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
72 </assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
73 </output>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
74 <output name="results">
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
75 <assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
76 <has_n_lines n="51"/>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
77 </assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
78 </output>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
79 <output name="best_go">
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
80 <assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
81 <has_n_lines n="8"/>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
82 </assert_contents>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
83 </output>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
84 </test>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
85 </tests>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
86 <help><![CDATA[
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
87 ==========================================================
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
88 Rank-based Gene Ontology Analysis with Adaptive Clustering
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
89 ==========================================================
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
90
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
91 What it does
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
92 ------------
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
93
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
94 In contrast to most other "GO enrichment analysis" methods (e.g., GeneMerge or DAVID), this one does not look for GO categories enriched among "significant" genes.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
95
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
96 Instead, it measures whether each GO category is significantly enriched by either up or down-regulated genes.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
97 Basically, the method tests whether the genes belonging to a certain GO category are significantly bunched up near the top or the bottom of the global ranked list of genes, instead of being spread evenly all over it.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
98 The test used is called the Mann-Whitney U (MWU) test.
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
99
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
100 The major advantage of this approach is that the experimenter does not have to impose an arbitrary threshold for initial selection of "significant genes", and thus the whole dataset can be used to gain information.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
101
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
102 In fact, no preliminary statistical test is required prior to the analysis; the method is best suited to analyze the distribution of raw measures, such as dN/dS values, log-fold-changes of gene expression, or kME (correlation) values from WGCNA.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
103
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
104 The method can also be run in a traditional mode, looking for GO categories significantly over-represented among "significant genes" (based on Fisher's exact test). To make the method work in this mode, the measure of significance should be binary (1 or 0, i.e., significant or not).
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
105
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
106 **"absValue"**: Genes with the measure value exceeding this value will be counted as "good genes".
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
107 When using signed log(p-values) use the value 1.30103 which corresponds to -log(0.05, 10). Specify the value 0.001 if you are doing
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
108 Fisher's exact test for standard GO enrichment or analyzing a WGCNA module (all non-zero genes = "good genes").
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
109 Use a value of 1 if you're using log2(fold-change).
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
110 This parameter does not affect statistics and serves just the illustrative purpose.
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
111
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
112 The method automatically retrieves all the missing parental terms for the lower-level GO categories.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
113 Then, fully redundant GO categories (i.e., containing exactly the same genes) are collapsed under name of the lower-level (more specific) term.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
114 Then, highly similar categories are merged according to complete linkage clustering based on the fraction of shared genes.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
115 The distance measure for clustering, introduced in Kosiol et al 2008, is the number of genes shared among the two GO categories within the analyzed dataset divided by the size of the smaller of the two categories.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
116
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
117 The resulting hierarchical tree is then “cut” at the adjustable “height” ('Threshold for merging similar (gene-sharing) terms' parameter) to merge clustered categories.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
118 The default for this parameter is 0.25, implying that a group of categories will be merged if the most dissimilar two of them share >75% of genes included in the smaller of the two.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
119 The merged categories inherit the name of the largest one.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
120 This simplifies the GO hierarchy, generates biologically meaningful groups of categories tailored for the particular dataset, and improves the multiple testing situation.
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
121
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
122 In the final plot, the method shows hierarchical clustering of GO categories based on the number of genes shared between them, to indicate which categories might be significant because of the same genes.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
123
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
124 ------
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
125
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
126 Output Files
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
127 ------------
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
128
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
129 The plot
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
130 ^^^^^^^^
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
131
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
132 The plot consists of three parts:
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
133
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
134 | - Hierarchical clustering tree of significant GO categories based on shared genes in the current dataset.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
135 Categories with no branch length between them are subsets of each other and their significance is most likely driven by the same genes.
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
136
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
137 | - Category names, plotted in different colors and fonts.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
138 Fonts indicate the level of statistical significance, colors indicate enrichment of GO categories with either up- (red) or down- (blue) regulated genes.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
139 The category names are preceded by the fraction indicating the number of "good candidates" relative to the total number of genes belonging to this category.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
140 The "good candidates" are the genes exceeding an arbitrary **'absValue'** cutoff in their significance measure.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
141 Adjust 'absValue' parameter according to what your measure is.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
142 By default it is set to -log(0.05,10), assuming that the measure is a signed log p-value (so, the "good candidates" would be the ones with raw p-value < 0.05).
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
143 Ideally we would like to see more than one such gene per displayed GO category.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
144 With 'level 1'=1 the script will display all the categories containing "good candidates", which is a good way to summarize the whole GO content of the experiment.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
145 Note that 'absValue' parameter does not affect statistics and serves just the illustrative purpose.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
146 In the Fisher-test mode (binary significance measure) and signed WGCNA module analysis the colors are not used; in that case specify absValue=0.001 to make the script display the fraction of genes with non-zero measure within a GO category.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
147
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
148 | - The legend giving the correspondence of the fonts to significance thresholds.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
149 The method corrects the p-values using Benjamini-Hochberg false discovery rate procedure except when analyzing WGCNA modules; in that case the false discovery rate is determined from ten permutations where significance measures are randomly shuffled among genes.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
150 To set different thresholds for plotting, change parameters 'Level 1', 'Level 2' and 'Level 3' in the 'Plot tweaking' section.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
151
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
152 In addition, the script prints out the number of GO categories displayed and the fraction of "good candidates" that these categories account for. This is useful to evaluate whether the generated GO summary really accounts for a substantial portion of what was going on.
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
153
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
154 If the labels of the plot are too crowded or too small, you can adjust the 'TextSize for plot labels' parameter and relaunch the analysis.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
155
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
156 The tables
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
157 ^^^^^^^^^^
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
158
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
159 The script generates 5 tables.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
160
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
161 Augmented GO terms for genes
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
162 main data table containing reformatted and augmented GO terms for each gene (in addition to the originally listed terms, the script finds all their parental terms if any were missing), and measures of interest.
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
163
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
164 Dissimilarity table
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
165 dissimilarity matrix of GO categories based on the number of genes shared between them in the dataset.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
166
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
167 MWU Test
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
168 The results of MWU test.
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
169
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
170 The raw data for plot
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
171 The raw data represented in the plot.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
172
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
173 Best GO terms
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
174 GO terms that best represent *independent* groups of significant GO terms.
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
175
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
176
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
177 ]]></help>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
178 <citations>
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
179 <citation type="doi">10.1186/s12864-015-1540-2</citation>
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
180 <citation type="bibtex">
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
181 @misc{githubGO_MWU,
2
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
182 author = {Matz, Mikhail},
5acf9dfdfa27 planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents: 1
diff changeset
183 year = {2021},
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
184 title = {GO_MWU},
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
185 publisher = {GitHub},
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
186 journal = {GitHub repository},
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
187 url = {https://github.com/z0on/GO_MWU},
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
188 }</citation>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
189 </citations>
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff changeset
190 </tool>