Mercurial > repos > cristian > rbgoa
annotate RBGOA.xml @ 2:5acf9dfdfa27 draft default tip
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
author | cristian |
---|---|
date | Wed, 09 Nov 2022 08:57:54 +0000 |
parents | f7287f82602f |
children |
rev | line source |
---|---|
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
1 <tool id="RBGOA" name="RBGOA" version="0.3.0" python_template_version="3.5"> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
2 <description>"GO_MWU: a Rank Based Gene Ontology Analysis"</description> |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
3 <requirements> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
4 <requirement type="package" version="5.6">r-ape</requirement> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
5 <requirement type="package" version="1.20.3">r-getopt</requirement> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
6 </requirements> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
7 <version_command>Rscript GO_MWU.R -v</version_command> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
8 <command detect_errors="exit_code"><![CDATA[ |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
9 ln -s '${input1}' samples.tsv && |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
10 ln -s '${input2}' annotation.tsv && |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
11 Rscript $__tool_directory__/GO_MWU.R -i samples.tsv -a annotation.tsv -g $__tool_directory__/go.obo -d '$input3' -c '$input_filter.cluster' -o '$input_filter.over' -m '$input_filter.min' -k '$plot_output.absval' -p '$grouping.pcut' -t '$grouping.hcut' -e '$plot_output.textsize' --l1 '$plot_output.lev1' --l2 '$plot_output.lev2' --l3 '$plot_output.lev3' && |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
12 mv samples_${input3}.tsv div_input.tsv && |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
13 mv dissim_${input3}_samples_annotation.tsv dissim.tsv && |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
14 mv MWU_${input3}_samples.tsv mwu_file.tsv |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
15 ]]></command> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
16 <inputs> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
17 <param type="data" name="input1" format="tabular" label="Genes of interest with associated value" /> |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
18 <param type="data" name="input2" format="tabular" label="Gene-GO annotation file" /> |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
19 <param type="select" name="input3" label="GO division" > |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
20 <option value="BP" selected="true">BP</option> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
21 <option value="MF">MF</option> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
22 <option value="CC">CC</option> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
23 </param> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
24 <section name="input_filter" title="Input Filtering" expanded="true"> |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
25 <param type="float" name="over" value="0.1" label="Filter out GO categories that include more than this fraction of the total number of genes" /> |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
26 <param type="integer" name="min" value="5" label="Consider GO categories that have at least this many genes" /> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
27 <param type="float" name="cluster" value="0.25" label="Threshold for merging similar (gene-sharing) terms" /> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
28 </section> |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
29 <section name="plot_output" title="Plot tweaking" expanded="true"> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
30 <param type="float" name="absval" value="1.0" label="absValue" help="Threshold for 'good genes'. Default: 1, to use with log2(foldchange). Read help below!" /> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
31 <param type="float" name="lev1" value="0.1" label="Level 1" help="Significance level for smallest text" /> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
32 <param type="float" name="lev2" value="0.05" label="Level 2" help="Significance level for intermediate text" /> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
33 <param type="float" name="lev3" value="0.01" label="Level 3" help="Significance level for largest text" /> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
34 <param type="float" name="textsize" value="1.2" label="TextSize for plot labels" /> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
35 </section> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
36 <section name="grouping" title="Significance and Grouping" expanded="true"> |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
37 <param type="float" name="pcut" value="1e-2" label="Adjusted p-value cutoff for representative GO" /> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
38 <param type="float" name="hcut" value="0.9" label="Height at which to cut the GO terms tree to get 'independent groups'" /> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
39 </section> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
40 </inputs> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
41 <outputs> |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
42 <data name="graph" format="pdf" from_work_dir="Rplots.pdf" label="Plot of GO terms for (${input3})" /> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
43 <data name="div_input" format="tabular" from_work_dir="div_input.tsv" label="Augmented ${input3} GO terms for genes" /> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
44 <data name="dissim" format="tabular" from_work_dir="dissim.tsv" label="Dissimilarity matrix of GO terms" /> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
45 <data name="mwu" format="tabular" from_work_dir="mwu_file.tsv" label="MWU test result for (${input3})" /> |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
46 <data name="results" format="tabular" from_work_dir="results.tsv" label="Raw data for plot" /> |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
47 <data name="best_go" format="tabular" from_work_dir="best_go.tsv" label="Best GO terms" /> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
48 </outputs> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
49 <tests> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
50 <test> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
51 <param name="input1" value="heats.csv"/> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
52 <param name="input2" value="amil_defog_iso2go.tab"/> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
53 <param name="input3" value="BP"/> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
54 <output name="graph"> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
55 <assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
56 <has_size value="7043" delta="50"/> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
57 </assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
58 </output> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
59 <output name="div_input"> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
60 <assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
61 <has_n_lines n="25114"/> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
62 </assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
63 </output> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
64 <output name="dissim"> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
65 <assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
66 <has_n_lines n="266"/> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
67 </assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
68 </output> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
69 <output name="mwu"> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
70 <assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
71 <has_n_lines n="266"/> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
72 </assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
73 </output> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
74 <output name="results"> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
75 <assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
76 <has_n_lines n="51"/> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
77 </assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
78 </output> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
79 <output name="best_go"> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
80 <assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
81 <has_n_lines n="8"/> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
82 </assert_contents> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
83 </output> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
84 </test> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
85 </tests> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
86 <help><![CDATA[ |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
87 ========================================================== |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
88 Rank-based Gene Ontology Analysis with Adaptive Clustering |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
89 ========================================================== |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
90 |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
91 What it does |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
92 ------------ |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
93 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
94 In contrast to most other "GO enrichment analysis" methods (e.g., GeneMerge or DAVID), this one does not look for GO categories enriched among "significant" genes. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
95 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
96 Instead, it measures whether each GO category is significantly enriched by either up or down-regulated genes. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
97 Basically, the method tests whether the genes belonging to a certain GO category are significantly bunched up near the top or the bottom of the global ranked list of genes, instead of being spread evenly all over it. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
98 The test used is called the Mann-Whitney U (MWU) test. |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
99 |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
100 The major advantage of this approach is that the experimenter does not have to impose an arbitrary threshold for initial selection of "significant genes", and thus the whole dataset can be used to gain information. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
101 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
102 In fact, no preliminary statistical test is required prior to the analysis; the method is best suited to analyze the distribution of raw measures, such as dN/dS values, log-fold-changes of gene expression, or kME (correlation) values from WGCNA. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
103 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
104 The method can also be run in a traditional mode, looking for GO categories significantly over-represented among "significant genes" (based on Fisher's exact test). To make the method work in this mode, the measure of significance should be binary (1 or 0, i.e., significant or not). |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
105 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
106 **"absValue"**: Genes with the measure value exceeding this value will be counted as "good genes". |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
107 When using signed log(p-values) use the value 1.30103 which corresponds to -log(0.05, 10). Specify the value 0.001 if you are doing |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
108 Fisher's exact test for standard GO enrichment or analyzing a WGCNA module (all non-zero genes = "good genes"). |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
109 Use a value of 1 if you're using log2(fold-change). |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
110 This parameter does not affect statistics and serves just the illustrative purpose. |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
111 |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
112 The method automatically retrieves all the missing parental terms for the lower-level GO categories. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
113 Then, fully redundant GO categories (i.e., containing exactly the same genes) are collapsed under name of the lower-level (more specific) term. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
114 Then, highly similar categories are merged according to complete linkage clustering based on the fraction of shared genes. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
115 The distance measure for clustering, introduced in Kosiol et al 2008, is the number of genes shared among the two GO categories within the analyzed dataset divided by the size of the smaller of the two categories. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
116 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
117 The resulting hierarchical tree is then “cut” at the adjustable “height” ('Threshold for merging similar (gene-sharing) terms' parameter) to merge clustered categories. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
118 The default for this parameter is 0.25, implying that a group of categories will be merged if the most dissimilar two of them share >75% of genes included in the smaller of the two. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
119 The merged categories inherit the name of the largest one. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
120 This simplifies the GO hierarchy, generates biologically meaningful groups of categories tailored for the particular dataset, and improves the multiple testing situation. |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
121 |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
122 In the final plot, the method shows hierarchical clustering of GO categories based on the number of genes shared between them, to indicate which categories might be significant because of the same genes. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
123 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
124 ------ |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
125 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
126 Output Files |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
127 ------------ |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
128 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
129 The plot |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
130 ^^^^^^^^ |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
131 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
132 The plot consists of three parts: |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
133 |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
134 | - Hierarchical clustering tree of significant GO categories based on shared genes in the current dataset. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
135 Categories with no branch length between them are subsets of each other and their significance is most likely driven by the same genes. |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
136 |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
137 | - Category names, plotted in different colors and fonts. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
138 Fonts indicate the level of statistical significance, colors indicate enrichment of GO categories with either up- (red) or down- (blue) regulated genes. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
139 The category names are preceded by the fraction indicating the number of "good candidates" relative to the total number of genes belonging to this category. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
140 The "good candidates" are the genes exceeding an arbitrary **'absValue'** cutoff in their significance measure. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
141 Adjust 'absValue' parameter according to what your measure is. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
142 By default it is set to -log(0.05,10), assuming that the measure is a signed log p-value (so, the "good candidates" would be the ones with raw p-value < 0.05). |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
143 Ideally we would like to see more than one such gene per displayed GO category. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
144 With 'level 1'=1 the script will display all the categories containing "good candidates", which is a good way to summarize the whole GO content of the experiment. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
145 Note that 'absValue' parameter does not affect statistics and serves just the illustrative purpose. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
146 In the Fisher-test mode (binary significance measure) and signed WGCNA module analysis the colors are not used; in that case specify absValue=0.001 to make the script display the fraction of genes with non-zero measure within a GO category. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
147 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
148 | - The legend giving the correspondence of the fonts to significance thresholds. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
149 The method corrects the p-values using Benjamini-Hochberg false discovery rate procedure except when analyzing WGCNA modules; in that case the false discovery rate is determined from ten permutations where significance measures are randomly shuffled among genes. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
150 To set different thresholds for plotting, change parameters 'Level 1', 'Level 2' and 'Level 3' in the 'Plot tweaking' section. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
151 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
152 In addition, the script prints out the number of GO categories displayed and the fraction of "good candidates" that these categories account for. This is useful to evaluate whether the generated GO summary really accounts for a substantial portion of what was going on. |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
153 |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
154 If the labels of the plot are too crowded or too small, you can adjust the 'TextSize for plot labels' parameter and relaunch the analysis. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
155 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
156 The tables |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
157 ^^^^^^^^^^ |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
158 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
159 The script generates 5 tables. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
160 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
161 Augmented GO terms for genes |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
162 main data table containing reformatted and augmented GO terms for each gene (in addition to the originally listed terms, the script finds all their parental terms if any were missing), and measures of interest. |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
163 |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
164 Dissimilarity table |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
165 dissimilarity matrix of GO categories based on the number of genes shared between them in the dataset. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
166 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
167 MWU Test |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
168 The results of MWU test. |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
169 |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
170 The raw data for plot |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
171 The raw data represented in the plot. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
172 |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
173 Best GO terms |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
174 GO terms that best represent *independent* groups of significant GO terms. |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
175 |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
176 |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
177 ]]></help> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
178 <citations> |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
179 <citation type="doi">10.1186/s12864-015-1540-2</citation> |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
180 <citation type="bibtex"> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
181 @misc{githubGO_MWU, |
2
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
182 author = {Matz, Mikhail}, |
5acf9dfdfa27
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
cristian
parents:
1
diff
changeset
|
183 year = {2021}, |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
184 title = {GO_MWU}, |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
185 publisher = {GitHub}, |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
186 journal = {GitHub repository}, |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
187 url = {https://github.com/z0on/GO_MWU}, |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
188 }</citation> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
189 </citations> |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
diff
changeset
|
190 </tool> |