comparison GO_terms_enrich_comparison.xml @ 0:04f363ee805a draft

planemo upload commit c9d70181a2b587e53dcc4b5885b74b625def6b8c-dirty
author proteore
date Tue, 10 Dec 2019 04:15:39 -0500
parents
children 528652235016
comparison
equal deleted inserted replaced
-1:000000000000 0:04f363ee805a
1 <tool id="go_terms_enrich_comparison" name="GO terms enrich comparison " version="2019.11.19.1">
2 <description>(Human, Mouse, Rat)[clusterProfiler]</description>
3 <requirements>
4 <requirement type="package">R</requirement>
5 <requirement type="package" version="3.8.2">bioconductor-org.hs.eg.db</requirement>
6 <requirement type="package" version="3.8.2">bioconductor-org.mm.eg.db</requirement>
7 <requirement type="package" version="3.8.2">bioconductor-org.rn.eg.db</requirement>
8 <requirement type="package" version="3.10.2">bioconductor-dose</requirement>
9 <requirement type="package" version="3.12.0">bioconductor-clusterprofiler</requirement>
10 </requirements>
11 <command detect_errors="exit_code"><![CDATA[
12
13
14 Rscript $__tool_directory__/GO_terms_enrich_comparison.R
15
16 --nb=$len($lists)
17
18 #for $index, $list in enumerate($lists)
19
20 #if $list.input.ids == "file"
21 --input.$index="$list.input.file" --name.$index="$list.list_name" --inputtype.$index="file" --header.$index="$list.input.header" --column.$index="$list.input.ncol"
22 #else
23 --input.$index="$list.input.txt" --name.$index="$list.list_name" --inputtype.$index="text"
24 #end if
25
26 #end for
27
28 --org="$species"
29 --ont="$ontology"
30
31
32 > $log
33
34
35 ]]></command>
36 <inputs>
37
38 <repeat name="lists" title="List to compare" min="2" max="5" >
39
40 <conditional name="input" >
41 <param name="ids" type="select" label="Enter your Gene ID list" help="Copy/paste or from a file (e.g. table)" >
42 <option value="text">Copy/paste your IDs</option>
43 <option value="file" selected="true">Input file containing your IDs</option>
44 </param>
45 <when value="text" >
46 <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by spaces into the form field, for example: 2810 7534' >
47 <sanitizer>
48 <valid initial="string.printable">
49 <remove value="&apos;"/>
50 </valid>
51 <mapping initial="none">
52 <add source="&apos;" target="__sq__"/>
53 </mapping>
54 </sanitizer>
55 </param>
56 </when>
57 <when value="file" >
58 <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of IDs" help="" />
59 <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
60 <param name="ncol" type="text" value="c1" label="Column number of IDs" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on'>
61 <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
62 </param>
63 </when>
64 </conditional>
65
66 <param name="list_name" type="text" label="name of your list" value="list" optional="false">
67 <sanitizer>
68 <valid initial="string.printable">
69 <remove value="&apos;"/>
70 </valid>
71 <mapping initial="none">
72 <add source="&apos;" target="__sq__"/>
73 </mapping>
74 </sanitizer>
75 <validator type="regex" message="Please enter a list name">[a-zA-Z0-9._-]+</validator>
76 </param>
77
78 </repeat>
79
80 <param name="species" type="select" label="Species" >
81 <option value="org.Hs.eg.db">Human (Homo sapiens) </option>
82 <option value="org.Mm.eg.db">Mouse (Mus musculus) </option>
83 <option value="org.Rn.eg.db">Rat (Rattus norvegicus)</option>
84 </param>
85 <param name="ontology" type="select" display="checkboxes" multiple="true" label="Select GO terms category" optional="false" >
86 <option value="CC">Cellular Component</option>
87 <option value="BP">Biological Process</option>
88 <option value="MF">Molecular Function</option>
89 </param>
90 </inputs>
91
92 <outputs>
93 <data name="log" format="txt" label="GO enrich comparison" />
94 <collection type="list" label="GO enrichComparison results Table" name="text_output">
95 <discover_datasets pattern="(?P&lt;designation&gt;.+\.tsv)" ext="tsv"/>
96 </collection>
97 <collection type="list" label="GO enrichComparison results Diagram" name="graph_output" >
98 <discover_datasets pattern="(?P&lt;designation&gt;.+.png)" ext="png" />
99 </collection>
100 </outputs>
101
102 <tests>
103 <test>
104 <repeat name="lists">
105 <conditional name="input">
106 <param name="ids" value="file" />
107 <param name="file" value="Gene_ID_BreastUp.txt" />
108 <param name="header" value="true" />
109 <param name="ncol" value="c7" />
110 <param name="name" value="UP" />
111 </conditional>
112 </repeat>
113 <repeat name="lists">
114 <conditional name="input">
115 <param name="ids" value="file" />
116 <param name="file" value="Gene_ID_BreastDn.txt" />
117 <param name="header" value="true" />
118 <param name="ncol" value="c7" />
119 <param name="name" value="DN" />
120 </conditional>
121 </repeat>
122
123 <param name="species" value="org.Hs.eg.db"/>
124 <param name="ontology" value="BP"/>
125 <output name="log" file="log.txt" />
126 <output_collection name="text_output">
127 <element name="GO_enrich_comparison_BP.tsv" file="GO_enrich_comparison_BP.tsv" ftype="tsv"/>
128 </output_collection>
129 <output_collection name="graph_output">
130 <element name="GO_enrich_comparison_BP.png" file="GO_enrich_comparison_BP.png" ftype="png"/>
131 </output_collection>
132 </test>
133 </tests>
134 <help><![CDATA[
135
136 **Description**
137
138 This tool is based on R package clusterProfiler and allows to perform GO terms classification and enrichment analyses on gene/protein sets (e.g. given a set of genes that are up-regulated under certain conditions, an enrichment analysis will find which GO terms are over-represented (or under-represented) using annotations for that gene/protein set).
139
140 Given a list of IDs, this tool:
141
142 (i) performs gene classification based on GO distribution at a specific level,
143
144 (ii) calculates GO categories enrichment (over- or under-representation) for the IDs of the input list, compared to a background. User has the possibility to use background corresponding to the whole organism or to a user-defined list. In this latter case, we recommand to use the "Build tissue-specific expression dataset" ProteoRE tool to create this list according to your need.
145
146 -----
147
148 **Input**
149
150 Two modes are allowed: either by supplying a tabular file (.csv, .tsv, .txt, .tab) including your IDs (identifiers) or by copy/pasting your IDs (separated by a space).
151
152 "Select type/source of IDs": only entrez gene ID (e.g : 4151, 7412) or Uniprot accession number (e.g. P31946) are allowed. If your list is not in this form, please use the ID_Converter tool of ProteoRE.
153
154 -----
155
156 **Parameters**
157
158 "Species": the three supported species are Homo sapiens, Mus musculus and Rattus norvegicus
159
160 "Perform GO categories representation analysis?": classify genes based on their projection at a specific level of the GO corpus (see parameter below), and provides functions (set to "Yes")
161
162 "Ontology level (the higher this number, the deeper the GO level)": correspond to the level of GO hierarchy (from 1 to 3) (set to level "2" by default). In general the higher the level, the more semantically specific the term is.
163
164 "Perform GO categories enrichment analysis?": calculate enrichment test for GO terms based on hypergeometric distribution (set to "Yes")
165
166 "P-value cut off": P-value threshold value for the declaration of significance (default is < 0.01)
167
168 "Q-value cut off": to prevent high false discovery rate (FDR) in multiple testing, Q-values (adjusted P-values) are estimated for FDR control. (default is < 0.05)
169
170 "Define your own background IDs?": by default the whole genome/proteome is used as a reference background to compute the enrichment. As this reference set should normally only include genes/proteins that were monitored during your analysis, this option allows to provide your own background; this could be for instance, the total number of genes/proteins expressed in the tissue/sample under study.
171
172 If you want to use your own background, click on the "Yes" button. Your gene/protein set must be a list of Entrez gene ID or Uniprot accession number (otherwise, use the ID-Converter tool of ProteoRE). Select the file containing your list of ID (as background), then specify the column number which contains IDs and the type of IDs (gene Entrez or Uniprot Accession number) as requested.
173
174 Of note: for Human species, you can build your own background by using the "Build tissue-specific expression dataset" tool of ProteoRE.
175
176 -----
177
178 **Output**
179
180 Diagram output: graphical output in the form of bar-plot or dot-plot (png, jpeg or pdf format), one figure for each GO category.
181 Text tables: with the following information GO category description (e.g.BP.Description), GO term identifier (e.g. BP.GOID) and GO term frequency (e.g. BP.Frequency)d graphics representing the repartition and/or enrichment of GO categories. One table and one graphic will be produced for each GO catagory.
182
183 -----
184
185 **Authors**
186 G Yu, LG Wang, Y Han, QY He. clusterProfiler: an R package for comparing biological themes among gene clusters.
187 OMICS: A Journal of Integrative Biology 2012, 16(5):284-287. doi:[10.1089/omi.2011.0118](http://dx.doi.org/10.1089/omi.2011.0118)
188
189 User manual / Documentation of the clusterProfiler R package (functions and parameters):
190 https://bioconductor.org/packages/3.7/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.html
191
192 -----
193
194 .. class:: infomark
195
196 **Galaxy integration**
197
198 Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
199
200 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR
201
202 This work has been partially funded through the French National Agency for Research (ANR) IFB project.
203
204 Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
205
206
207 ]]></help>
208 <citations>
209 </citations>
210 </tool>