Mercurial > repos > iuc > deepmicro
comparison deepmicro.xml @ 0:77cbfe3e1b0d draft
planemo upload for repository https://github.com/paulzierep/DeepMicro commit 1bbea291a9d77beafaeba83ab775d870ec24719e
author | iuc |
---|---|
date | Tue, 02 May 2023 17:39:13 +0000 |
parents | |
children | c58c1a99578b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:77cbfe3e1b0d |
---|---|
1 <tool id="deepmicro" name="DeepMicro" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description> | |
3 Representation learning and classification framework | |
4 </description> | |
5 <macros> | |
6 <import>macros.xml</import> | |
7 </macros> | |
8 <expand macro="biotools" /> | |
9 <expand macro="requirements" /> | |
10 <expand macro="version" /> | |
11 <command detect_errors="exit_code"><![CDATA[ | |
12 mkdir data && | |
13 mkdir results && | |
14 ln -s '$features' data/features.csv && | |
15 | |
16 #if $mode.mode_type == "only_encoding": | |
17 | |
18 #for $params in $mode.parameter_set: | |
19 | |
20 #if $params.rl_type.rl_type_choice == "--pca" or $params.rl_type.rl_type_choice == "--rp": | |
21 DM.py -r 1 -cd features.csv '$params.rl_type.rl_type_choice' --save_rep --no_clf -t \${GALAXY_SLOTS:-8} && | |
22 #else: | |
23 DM.py -r 1 -cd features.csv '$params.rl_type.rl_type_choice' -dm '$params.rl_type.dm' --save_rep --no_clf -t \${GALAXY_SLOTS:-8} && | |
24 #end if | |
25 #end for | |
26 | |
27 #else: | |
28 | |
29 ln -s '$mode.class_labels' data/labels.csv && | |
30 | |
31 #for $params in $mode.parameter_set: | |
32 | |
33 #if $params.rl_type.rl_type_choice == "--pca" or $params.rl_type.rl_type_choice == "--rp": | |
34 DM.py -r 1 -cd features.csv -cl labels.csv '$params.rl_type.rl_type_choice' --save_rep -m '$params.rl_type.classifier' -t \${GALAXY_SLOTS:-8} && | |
35 #else: | |
36 DM.py -r 1 -cd features.csv -cl labels.csv '$params.rl_type.rl_type_choice' -dm '$params.rl_type.dm' --save_rep -m '$params.rl_type.classifier' -t \${GALAXY_SLOTS:-8} && | |
37 #end if | |
38 #end for | |
39 | |
40 #end if | |
41 | |
42 echo Done ! | |
43 ]]> | |
44 </command> | |
45 <inputs> | |
46 <param argument="--features" type="data" format="tabular" label="Feature table" help="Dataset containing the features of samples"/> | |
47 <conditional name="mode"> | |
48 <param name="mode_type" type="select" label="Mode" help="The tool can either only create a latent | |
49 representation of the data or create a latent representation of the data and cross validate a classifier using that encoding."> | |
50 <option value="only_encoding">Create only encoding</option> | |
51 <option value="e_and_c">Create encoding and cross validate a classifier</option> | |
52 </param> | |
53 <when value="only_encoding"> | |
54 <repeat name="parameter_set" title="Parameter Set"> | |
55 <conditional name="rl_type"> | |
56 <param name="rl_type_choice" type="select" label="Representation learning type" help="The type of representation learning" > | |
57 <option value="--pca">PCA</option> | |
58 <option value="--rp">Random Projection</option> | |
59 <option value="--ae">Autoencoder or Deep Autoencoder</option> | |
60 <option value="--vae">Variational Autoencoder</option> | |
61 <option value="--cae">Convolutional Autoencoder</option> | |
62 </param> | |
63 <when value="--pca"/> | |
64 <when value="--rp"/> | |
65 <when value="--ae"> | |
66 <expand macro="dm" /> | |
67 </when> | |
68 <when value="--vae"> | |
69 <expand macro="dm" /> | |
70 </when> | |
71 <when value="--cae"> | |
72 <expand macro="dm" /> | |
73 </when> | |
74 </conditional> | |
75 </repeat> | |
76 </when> | |
77 <when value="e_and_c"> | |
78 <param argument="--class_labels" type="data" format="tabular" label="Class labels" help="Dataset containing the class labels corresponding to the features"/> | |
79 <repeat name="parameter_set" title="Parameter Set"> | |
80 <conditional name="rl_type"> | |
81 <param name="rl_type_choice" type="select" label="Representation learning type" help="The type of representation learning. `Train on input` trains the classifier on the input features without representation learning" > | |
82 <option value="--pca">PCA</option> | |
83 <option value="--rp">Random Projection</option> | |
84 <option value="--ae">Autoencoder or Deep Autoencoder</option> | |
85 <option value="--vae">Variational Autoencoder</option> | |
86 <option value="--cae">Convolutional Autoencoder</option> | |
87 <option value="no_rl">Train on input</option> | |
88 </param> | |
89 <when value="no_rl"> | |
90 <expand macro="clfs" /> | |
91 </when> | |
92 <when value="--pca"> | |
93 <expand macro="clfs" /> | |
94 </when> | |
95 <when value="--rp"> | |
96 <expand macro="clfs" /> | |
97 </when> | |
98 <when value="--ae"> | |
99 <expand macro="dm" /> | |
100 <expand macro="clfs" /> | |
101 </when> | |
102 <when value="--vae"> | |
103 <expand macro="dm" /> | |
104 <expand macro="clfs" /> | |
105 </when> | |
106 <when value="--cae"> | |
107 <expand macro="dm" /> | |
108 <expand macro="clfs" /> | |
109 </when> | |
110 </conditional> | |
111 </repeat> | |
112 </when> | |
113 </conditional> | |
114 </inputs> | |
115 <outputs> | |
116 <data name="results" format="tabular" from_work_dir="./results/*_result.txt" label="${tool.name} on ${on_string}: Results"> | |
117 <!-- results are only for classifiers --> | |
118 <filter>mode["mode_type"] == "e_and_c"</filter> | |
119 </data> | |
120 <collection name="encoded_features" type="list" label="Encoded Features"> | |
121 <!-- the encoded features generated by the tool are only for the training set, this is not very useful, therefore omitting | |
122 todo change tool do export features complete dataset also when classification is performed --> | |
123 <filter>mode["mode_type"] == "only_encoding"</filter> | |
124 <discover_datasets directory="results" pattern="(?P<designation>.*)_rep\.csv" format="tabular" visible="false" /> | |
125 </collection> | |
126 </outputs> | |
127 <tests> | |
128 | |
129 <!-- only encoding --> | |
130 <!-- test one parameter sets --> | |
131 | |
132 <test expect_num_outputs="1"> | |
133 <param name="mode_type" value="only_encoding" /> | |
134 <param name="features" value="UserDataExample.csv" /> | |
135 <param name="rl_type_choice" value="--ae" /> | |
136 <param name="dm" value="40" /> | |
137 <output_collection name="encoded_features" type="list"> | |
138 <!-- output is non determinisitc --> | |
139 <element name="AE[40]_features" ftype="tabular" > | |
140 <assert_contents> | |
141 <has_n_lines n="20"/> | |
142 <!-- <has_n_columns n="40" sep="," /> --> | |
143 </assert_contents> | |
144 </element> | |
145 </output_collection> | |
146 </test> | |
147 | |
148 <test expect_num_outputs="1"> | |
149 <param name="mode_type" value="only_encoding" /> | |
150 <param name="features" value="UserDataExample.csv" /> | |
151 <param name="rl_type_choice" value="--pca" /> | |
152 <output_collection name="encoded_features" type="list"> | |
153 <element name="PCA_features" ftype="tabular" > | |
154 <assert_contents> | |
155 <has_n_lines n="20"/> | |
156 <!-- <has_n_columns n="40" sep="," /> --> | |
157 </assert_contents> | |
158 </element> | |
159 </output_collection> | |
160 </test> | |
161 | |
162 <!-- test multiple parameter sets --> | |
163 <test expect_num_outputs="1"> | |
164 <param name="features" value="UserDataExample.csv" /> | |
165 <conditional name="mode"> | |
166 <param name="mode_type" value="only_encoding" /> | |
167 | |
168 <repeat name="parameter_set"> | |
169 <conditional name="rl_type"> | |
170 <param name="rl_type_choice" value="--pca" /> | |
171 </conditional> | |
172 </repeat> | |
173 | |
174 <repeat name="parameter_set"> | |
175 <conditional name="rl_type"> | |
176 <param name="rl_type_choice" value="--ae" /> | |
177 <param name="dm" value="40" /> | |
178 </conditional> | |
179 </repeat> | |
180 | |
181 </conditional> | |
182 | |
183 <output_collection name="encoded_features" type="list"> | |
184 <element name="AE[40]_features" ftype="tabular" > | |
185 <assert_contents> | |
186 <has_n_lines n="20"/> | |
187 <!-- <has_n_columns n="40" sep="," /> --> | |
188 </assert_contents> | |
189 </element> | |
190 <element name="PCA_features" ftype="tabular" > | |
191 <assert_contents> | |
192 <has_n_lines n="20"/> | |
193 <!-- <has_n_columns n="40" sep="," /> --> | |
194 </assert_contents> | |
195 </element> | |
196 </output_collection> | |
197 | |
198 </test> | |
199 | |
200 <!-- encoding and clf --> | |
201 <!-- test one parameter set --> | |
202 | |
203 <test expect_num_outputs="1"> | |
204 <param name="features" value="UserDataExample.csv" /> | |
205 <param name="mode_type" value="e_and_c" /> | |
206 <param name="class_labels" value="UserLabelExample.csv" /> | |
207 <param name="rl_type_choice" value="--vae" /> | |
208 <param name="dm" value="40" /> | |
209 <param name="classifier" value="rf" /> | |
210 <output ftype="tabular" name="results" > | |
211 <assert_contents> | |
212 <has_text text="VAE[40]_rf" /> | |
213 </assert_contents> | |
214 </output> | |
215 | |
216 </test> | |
217 | |
218 <!-- test multiple parameter sets --> | |
219 <test expect_num_outputs="1"> | |
220 <param name="features" value="UserDataExample.csv" /> | |
221 <conditional name="mode"> | |
222 <param name="mode_type" value="e_and_c" /> | |
223 <param name="class_labels" value="UserLabelExample.csv" /> | |
224 | |
225 <repeat name="parameter_set"> | |
226 <conditional name="rl_type"> | |
227 <param name="rl_type_choice" value="--cae" /> | |
228 <param name="dm" value="20" /> | |
229 <param name="classifier" value="rf" /> | |
230 </conditional> | |
231 </repeat> | |
232 | |
233 <repeat name="parameter_set"> | |
234 <conditional name="rl_type"> | |
235 <param name="rl_type_choice" value="--vae" /> | |
236 <param name="dm" value="40" /> | |
237 <param name="classifier" value="mlp" /> | |
238 </conditional> | |
239 </repeat> | |
240 | |
241 </conditional> | |
242 | |
243 <output ftype="tabular" name="results" > | |
244 <assert_contents> | |
245 <has_text text="CAE[20]_rf" /> | |
246 <has_text text="VAE[40]_mlp" /> | |
247 </assert_contents> | |
248 </output> | |
249 | |
250 </test> | |
251 | |
252 </tests> | |
253 <help> | |
254 <![CDATA[ | |
255 DeepMicro is a deep representation learning framework exploiting various autoencoders | |
256 to learn robust low-dimensional representations from high-dimensional data and training | |
257 classification models based on the learned representation. | |
258 | |
259 ====================================== | |
260 Option 1) Only representation learning | |
261 ====================================== | |
262 | |
263 The representation learning does not require class labels and can be learned from the features alone. | |
264 The wrapper allows to explore multiple paramertes (i.e. different modes to | |
265 generate the features, please refer to the publication for details), for each | |
266 added parameter the encoded features are generated. Those features can then be passed to subsequent ML tools, | |
267 such as `Ensemble methods for classification and regression` or `Split Dataset into training and test subsets` | |
268 | |
269 ===================================================== | |
270 Option 2) Representation learning and classification | |
271 ===================================================== | |
272 | |
273 The tool itself can also evaluate the performance of the generated representation learning for different | |
274 classifiers internally using 5 fold CV. The wrapper allows to explore multiple paramertes and clfs. | |
275 Each parameter run will be stored as a line to the result file. If this option is chosen the latent representation is not | |
276 exported as output. To create the latent representation of the complete feature set, run the tool again | |
277 with the same parameters using the `Only representation learning` option. | |
278 The header of the result file is: | |
279 | |
280 '{Encoding}_{classifier}, AUC, ACC, Recall, Precision, F1_score, time-end, runtime(sec), classfication time(sec), best hyper-parameter' | |
281 | |
282 The overall schema of the tool is shown in: | |
283 | |
284 .. image:: ML_Workflow.png | |
285 ]]> | |
286 </help> | |
287 <expand macro="citations" /> | |
288 <expand macro="creator" /> | |
289 </tool> |