comparison deepmicro.xml @ 0:77cbfe3e1b0d draft

planemo upload for repository https://github.com/paulzierep/DeepMicro commit 1bbea291a9d77beafaeba83ab775d870ec24719e
author iuc
date Tue, 02 May 2023 17:39:13 +0000
parents
children c58c1a99578b
comparison
equal deleted inserted replaced
-1:000000000000 0:77cbfe3e1b0d
1 <tool id="deepmicro" name="DeepMicro" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>
3 Representation learning and classification framework
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <expand macro="biotools" />
9 <expand macro="requirements" />
10 <expand macro="version" />
11 <command detect_errors="exit_code"><![CDATA[
12 mkdir data &&
13 mkdir results &&
14 ln -s '$features' data/features.csv &&
15
16 #if $mode.mode_type == "only_encoding":
17
18 #for $params in $mode.parameter_set:
19
20 #if $params.rl_type.rl_type_choice == "--pca" or $params.rl_type.rl_type_choice == "--rp":
21 DM.py -r 1 -cd features.csv '$params.rl_type.rl_type_choice' --save_rep --no_clf -t \${GALAXY_SLOTS:-8} &&
22 #else:
23 DM.py -r 1 -cd features.csv '$params.rl_type.rl_type_choice' -dm '$params.rl_type.dm' --save_rep --no_clf -t \${GALAXY_SLOTS:-8} &&
24 #end if
25 #end for
26
27 #else:
28
29 ln -s '$mode.class_labels' data/labels.csv &&
30
31 #for $params in $mode.parameter_set:
32
33 #if $params.rl_type.rl_type_choice == "--pca" or $params.rl_type.rl_type_choice == "--rp":
34 DM.py -r 1 -cd features.csv -cl labels.csv '$params.rl_type.rl_type_choice' --save_rep -m '$params.rl_type.classifier' -t \${GALAXY_SLOTS:-8} &&
35 #else:
36 DM.py -r 1 -cd features.csv -cl labels.csv '$params.rl_type.rl_type_choice' -dm '$params.rl_type.dm' --save_rep -m '$params.rl_type.classifier' -t \${GALAXY_SLOTS:-8} &&
37 #end if
38 #end for
39
40 #end if
41
42 echo Done !
43 ]]>
44 </command>
45 <inputs>
46 <param argument="--features" type="data" format="tabular" label="Feature table" help="Dataset containing the features of samples"/>
47 <conditional name="mode">
48 <param name="mode_type" type="select" label="Mode" help="The tool can either only create a latent
49 representation of the data or create a latent representation of the data and cross validate a classifier using that encoding.">
50 <option value="only_encoding">Create only encoding</option>
51 <option value="e_and_c">Create encoding and cross validate a classifier</option>
52 </param>
53 <when value="only_encoding">
54 <repeat name="parameter_set" title="Parameter Set">
55 <conditional name="rl_type">
56 <param name="rl_type_choice" type="select" label="Representation learning type" help="The type of representation learning" >
57 <option value="--pca">PCA</option>
58 <option value="--rp">Random Projection</option>
59 <option value="--ae">Autoencoder or Deep Autoencoder</option>
60 <option value="--vae">Variational Autoencoder</option>
61 <option value="--cae">Convolutional Autoencoder</option>
62 </param>
63 <when value="--pca"/>
64 <when value="--rp"/>
65 <when value="--ae">
66 <expand macro="dm" />
67 </when>
68 <when value="--vae">
69 <expand macro="dm" />
70 </when>
71 <when value="--cae">
72 <expand macro="dm" />
73 </when>
74 </conditional>
75 </repeat>
76 </when>
77 <when value="e_and_c">
78 <param argument="--class_labels" type="data" format="tabular" label="Class labels" help="Dataset containing the class labels corresponding to the features"/>
79 <repeat name="parameter_set" title="Parameter Set">
80 <conditional name="rl_type">
81 <param name="rl_type_choice" type="select" label="Representation learning type" help="The type of representation learning. `Train on input` trains the classifier on the input features without representation learning" >
82 <option value="--pca">PCA</option>
83 <option value="--rp">Random Projection</option>
84 <option value="--ae">Autoencoder or Deep Autoencoder</option>
85 <option value="--vae">Variational Autoencoder</option>
86 <option value="--cae">Convolutional Autoencoder</option>
87 <option value="no_rl">Train on input</option>
88 </param>
89 <when value="no_rl">
90 <expand macro="clfs" />
91 </when>
92 <when value="--pca">
93 <expand macro="clfs" />
94 </when>
95 <when value="--rp">
96 <expand macro="clfs" />
97 </when>
98 <when value="--ae">
99 <expand macro="dm" />
100 <expand macro="clfs" />
101 </when>
102 <when value="--vae">
103 <expand macro="dm" />
104 <expand macro="clfs" />
105 </when>
106 <when value="--cae">
107 <expand macro="dm" />
108 <expand macro="clfs" />
109 </when>
110 </conditional>
111 </repeat>
112 </when>
113 </conditional>
114 </inputs>
115 <outputs>
116 <data name="results" format="tabular" from_work_dir="./results/*_result.txt" label="${tool.name} on ${on_string}: Results">
117 <!-- results are only for classifiers -->
118 <filter>mode["mode_type"] == "e_and_c"</filter>
119 </data>
120 <collection name="encoded_features" type="list" label="Encoded Features">
121 <!-- the encoded features generated by the tool are only for the training set, this is not very useful, therefore omitting
122 todo change tool do export features complete dataset also when classification is performed -->
123 <filter>mode["mode_type"] == "only_encoding"</filter>
124 <discover_datasets directory="results" pattern="(?P&lt;designation&gt;.*)_rep\.csv" format="tabular" visible="false" />
125 </collection>
126 </outputs>
127 <tests>
128
129 <!-- only encoding -->
130 <!-- test one parameter sets -->
131
132 <test expect_num_outputs="1">
133 <param name="mode_type" value="only_encoding" />
134 <param name="features" value="UserDataExample.csv" />
135 <param name="rl_type_choice" value="--ae" />
136 <param name="dm" value="40" />
137 <output_collection name="encoded_features" type="list">
138 <!-- output is non determinisitc -->
139 <element name="AE[40]_features" ftype="tabular" >
140 <assert_contents>
141 <has_n_lines n="20"/>
142 <!-- <has_n_columns n="40" sep="," /> -->
143 </assert_contents>
144 </element>
145 </output_collection>
146 </test>
147
148 <test expect_num_outputs="1">
149 <param name="mode_type" value="only_encoding" />
150 <param name="features" value="UserDataExample.csv" />
151 <param name="rl_type_choice" value="--pca" />
152 <output_collection name="encoded_features" type="list">
153 <element name="PCA_features" ftype="tabular" >
154 <assert_contents>
155 <has_n_lines n="20"/>
156 <!-- <has_n_columns n="40" sep="," /> -->
157 </assert_contents>
158 </element>
159 </output_collection>
160 </test>
161
162 <!-- test multiple parameter sets -->
163 <test expect_num_outputs="1">
164 <param name="features" value="UserDataExample.csv" />
165 <conditional name="mode">
166 <param name="mode_type" value="only_encoding" />
167
168 <repeat name="parameter_set">
169 <conditional name="rl_type">
170 <param name="rl_type_choice" value="--pca" />
171 </conditional>
172 </repeat>
173
174 <repeat name="parameter_set">
175 <conditional name="rl_type">
176 <param name="rl_type_choice" value="--ae" />
177 <param name="dm" value="40" />
178 </conditional>
179 </repeat>
180
181 </conditional>
182
183 <output_collection name="encoded_features" type="list">
184 <element name="AE[40]_features" ftype="tabular" >
185 <assert_contents>
186 <has_n_lines n="20"/>
187 <!-- <has_n_columns n="40" sep="," /> -->
188 </assert_contents>
189 </element>
190 <element name="PCA_features" ftype="tabular" >
191 <assert_contents>
192 <has_n_lines n="20"/>
193 <!-- <has_n_columns n="40" sep="," /> -->
194 </assert_contents>
195 </element>
196 </output_collection>
197
198 </test>
199
200 <!-- encoding and clf -->
201 <!-- test one parameter set -->
202
203 <test expect_num_outputs="1">
204 <param name="features" value="UserDataExample.csv" />
205 <param name="mode_type" value="e_and_c" />
206 <param name="class_labels" value="UserLabelExample.csv" />
207 <param name="rl_type_choice" value="--vae" />
208 <param name="dm" value="40" />
209 <param name="classifier" value="rf" />
210 <output ftype="tabular" name="results" >
211 <assert_contents>
212 <has_text text="VAE[40]_rf" />
213 </assert_contents>
214 </output>
215
216 </test>
217
218 <!-- test multiple parameter sets -->
219 <test expect_num_outputs="1">
220 <param name="features" value="UserDataExample.csv" />
221 <conditional name="mode">
222 <param name="mode_type" value="e_and_c" />
223 <param name="class_labels" value="UserLabelExample.csv" />
224
225 <repeat name="parameter_set">
226 <conditional name="rl_type">
227 <param name="rl_type_choice" value="--cae" />
228 <param name="dm" value="20" />
229 <param name="classifier" value="rf" />
230 </conditional>
231 </repeat>
232
233 <repeat name="parameter_set">
234 <conditional name="rl_type">
235 <param name="rl_type_choice" value="--vae" />
236 <param name="dm" value="40" />
237 <param name="classifier" value="mlp" />
238 </conditional>
239 </repeat>
240
241 </conditional>
242
243 <output ftype="tabular" name="results" >
244 <assert_contents>
245 <has_text text="CAE[20]_rf" />
246 <has_text text="VAE[40]_mlp" />
247 </assert_contents>
248 </output>
249
250 </test>
251
252 </tests>
253 <help>
254 <![CDATA[
255 DeepMicro is a deep representation learning framework exploiting various autoencoders
256 to learn robust low-dimensional representations from high-dimensional data and training
257 classification models based on the learned representation.
258
259 ======================================
260 Option 1) Only representation learning
261 ======================================
262
263 The representation learning does not require class labels and can be learned from the features alone.
264 The wrapper allows to explore multiple paramertes (i.e. different modes to
265 generate the features, please refer to the publication for details), for each
266 added parameter the encoded features are generated. Those features can then be passed to subsequent ML tools,
267 such as `Ensemble methods for classification and regression` or `Split Dataset into training and test subsets`
268
269 =====================================================
270 Option 2) Representation learning and classification
271 =====================================================
272
273 The tool itself can also evaluate the performance of the generated representation learning for different
274 classifiers internally using 5 fold CV. The wrapper allows to explore multiple paramertes and clfs.
275 Each parameter run will be stored as a line to the result file. If this option is chosen the latent representation is not
276 exported as output. To create the latent representation of the complete feature set, run the tool again
277 with the same parameters using the `Only representation learning` option.
278 The header of the result file is:
279
280 '{Encoding}_{classifier}, AUC, ACC, Recall, Precision, F1_score, time-end, runtime(sec), classfication time(sec), best hyper-parameter'
281
282 The overall schema of the tool is shown in:
283
284 .. image:: ML_Workflow.png
285 ]]>
286 </help>
287 <expand macro="citations" />
288 <expand macro="creator" />
289 </tool>