comparison pca.xml @ 0:2d7016b3ae92 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2afb24f3c81d625312186750a714d702363012b5"
author bgruening
date Fri, 02 Oct 2020 08:45:21 +0000
parents
children 132805688fa3
comparison
equal deleted inserted replaced
-1:000000000000 0:2d7016b3ae92
1 <tool id="sklearn_pca" name="Principal component analysis" version="@VERSION@+galaxy@GALAXY_VERSION@">
2 <description>with scikit-learn</description>
3 <macros>
4 <import>main_macros.xml</import>
5 <token name="@GALAXY_VERSION@">0</token>
6 </macros>
7 <expand macro="python_requirements"/>
8 <expand macro="macro_stdio"/>
9 <version_command>echo "@VERSION@"</version_command>
10 <command detect_errors="exit_code">
11 <![CDATA[
12 python '$__tool_directory__/pca.py'
13 -i '$infile'
14 $header
15 -c '$column_selector_options.selected_column_selector_option'
16 #if $column_selector_options.selected_column_selector_option != 'all_columns'
17 -ci '$column_selector_options.col1'
18 #end if
19 #if $select_pca_type.number != ''
20 -n '$select_pca_type.number'
21 #end if
22 -t '$select_pca_type.select_pca_opts'
23 #if $select_pca_type.select_pca_opts == 'classical'
24 -s '$select_pca_type.select_solver_type.svd_solver_opts'
25 #if $select_pca_type.select_solver_type.svd_solver_opts == 'arpack'
26 -tol $select_pca_type.select_solver_type.tolerance
27 #end if
28 $select_pca_type.whiten
29 #elif $select_pca_type.select_pca_opts == 'incremental'
30 #if $select_pca_type.batch_size != ''
31 -b '$select_pca_type.batch_size'
32 #end if
33 $select_pca_type.whiten
34 #elif $select_pca_type.select_pca_opts == 'kernel'
35 -k '$select_pca_type.select_kernel_opts.kernel_opts'
36 #if $select_pca_type.select_kernel_opts.kernel_opts == 'poly'
37 #if $select_pca_type.select_kernel_opts.gamma != ''
38 -g '$select_pca_type.select_kernel_opts.gamma'
39 #end if
40 -d '$select_pca_type.select_kernel_opts.degree'
41 -cf '$select_pca_type.select_kernel_opts.coef0'
42 #elif $select_pca_type.select_kernel_opts.kernel_opts == 'rbf'
43 #if $select_pca_type.select_kernel_opts.gamma != ''
44 -g '$select_pca_type.select_kernel_opts.gamma'
45 #end if
46 #elif $select_pca_type.select_kernel_opts.kernel_opts == 'sigmoid'
47 #if $select_pca_type.select_kernel_opts.gamma != ''
48 -g '$select_pca_type.select_kernel_opts.gamma'
49 #end if
50 -cf '$select_pca_type.select_kernel_opts.coef0'
51 #end if
52 -e '$select_pca_type.select_solver_type.eigen_solver_opts'
53 #if $select_pca_type.select_solver_type.eigen_solver_opts == 'arpack'
54 -tol $select_pca_type.select_solver_type.tolerance
55 #if $select_pca_type.select_solver_type.max_iter != ''
56 -mi $select_pca_type.select_solver_type.max_iter
57 #end if
58 #end if
59 #end if
60 -o '$outfile'
61 ]]>
62 </command>
63 <inputs>
64 <param name="infile" type="data" format="tabular" label="Input file"/>
65 <param name="header" type="boolean" label="Exclude Header" truevalue="--header" falsevalue="" help="If present, exclude the header row from the dataset"/>
66 <conditional name="column_selector_options">
67 <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile"/>
68 </conditional>
69 <conditional name="select_pca_type">
70 <param name="select_pca_opts" type="select" label="Select PCA Type" help="Choose which flavour of PCA to use">
71 <option value="classical" selected="true">Classical PCA</option>
72 <option value="incremental">Incremental PCA</option>
73 <option value="kernel">Kernel PCA</option>
74 </param>
75 <when value="classical">
76 <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/>
77 <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" help="Setting this option will reduce the redundancy and correlations between the features"/>
78 <conditional name="select_solver_type">
79 <param name="svd_solver_opts" type="select" label="SVD Solver" help="Method to perform the singular value decomposition">
80 <option value="auto" selected="true">auto</option>
81 <option value="full">full</option>
82 <option value="arpack">arpack</option>
83 <option value="randomized">randomized</option>
84 </param>
85 <when value="arpack">
86 <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/>
87 </when>
88 <when value="auto"/>
89 <when value="full"/>
90 <when value="randomized"/>
91 </conditional>
92 </when>
93 <when value="incremental">
94 <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/>
95 <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue=""/>
96 <param name="batch_size" type="integer" optional="true" label="Batch Size" help="The number of samples to use for each batch"/>
97 </when>
98 <when value="kernel">
99 <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/>
100 <conditional name="select_kernel_opts">
101 <param name="kernel_opts" type="select" label="Kernel Type">
102 <option value="linear" selected="true">linear</option>
103 <option value="poly">poly</option>
104 <option value="rbf">rbf</option>
105 <option value="sigmoid">sigmoid</option>
106 <option value="cosine">cosine</option>
107 <option value="precomputed">precomputed</option>
108 </param>
109 <when value="poly">
110 <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/>
111 <param name="degree" type="integer" value="3" label="Degree of the polynomial" help="Degree for poly kernels. Ignored by other kernels"/>
112 <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/>
113 </when>
114 <when value="sigmoid">
115 <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/>
116 <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/>
117 </when>
118 <when value="rbf">
119 <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/>
120 </when>
121 <when value="linear"/>
122 <when value="cosine"/>
123 <when value="precomputed"/>
124 </conditional>
125 <conditional name="select_solver_type">
126 <param name="eigen_solver_opts" type="select" label="Eigen Solver">
127 <option value="auto" selected="true">auto</option>
128 <option value="dense">dense</option>
129 <option value="arpack">arpack</option>
130 </param>
131 <when value="arpack">
132 <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/>
133 <param name="max_iter" type="integer" optional="true" label="Maximum Iterations" help="Maximum number of iterations for arpack"/>
134 </when>
135 <when value="auto"/>
136 <when value="dense"/>
137 </conditional>
138 </when>
139 </conditional>
140 </inputs>
141 <outputs>
142 <data format="tabular" name="outfile"/>
143 </outputs>
144 <tests>
145 <test>
146 <param name="infile" value="pca_input.dat" ftype="tabular"/>
147 <param name="selected_column_selector_option" value="by_index_number" />
148 <param name="col1" value="1,2,4,6,8,5"/>
149 <param name="number" value="5"/>
150 <param name="select_pca_opts" value="classical"/>
151 <param name="svd_solver_opts" value="arpack"/>
152 <param name="tolerance" value="0.4"/>
153 <output name="outfile" ftype='tabular' file="pca_classical_output.dat"/>
154 </test>
155 <test>
156 <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/>
157 <param name="header" value="--header"/>
158 <param name="selected_column_selector_option" value="by_header_name" />
159 <param name="col1" value="col_1,col_2,col_4,col_6,col_8,col_5"/>
160 <param name="number" value="5"/>
161 <param name="select_pca_opts" value="classical"/>
162 <param name="svd_solver_opts" value="arpack"/>
163 <param name="tolerance" value="0.4"/>
164 <output name="outfile" ftype='tabular' file="pca_classical_header_names_output.dat"/>
165 </test>
166 <test>
167 <param name="infile" value="pca_input.dat" ftype="tabular"/>
168 <param name="selected_column_selector_option" value="all_but_by_index_number"/>
169 <param name="col1" value="8,5" />
170 <param name="number" value="7"/>
171 <param name="select_pca_opts" value="incremental"/>
172 <param name="batch_size" value="64"/>
173 <output name="outfile" ftype='tabular' file="pca_incremental_output.dat"/>
174 </test>
175 <test>
176 <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/>
177 <param name="header" value="--header"/>
178 <param name="selected_column_selector_option" value="all_but_by_header_name" />
179 <param name="col1" value="col_8,col_5"/>
180 <param name="number" value="7"/>
181 <param name="select_pca_opts" value="incremental"/>
182 <param name="batch_size" value="64"/>
183 <output name="outfile" ftype='tabular' file="pca_incremental_header_names_output.dat"/>
184 </test>
185 <test>
186 <param name="infile" value="pca_input.dat" ftype="tabular"/>
187 <param name="selected_column_selector_option" value="all_columns" />
188 <param name="number" value="8"/>
189 <param name="select_pca_opts" value="kernel"/>
190 <param name="kernel_opts" value="linear"/>
191 <param name="eigen_solver_opts" value="arpack"/>
192 <param name="tolerance" value="4.3"/>
193 <param name="max_iter" value="8"/>
194 <output name="outfile" ftype="tabular">
195 <assert_contents>
196 <has_n_lines n="300"/>
197 <has_n_columns n="8"/>
198 </assert_contents>
199 </output>
200 </test>
201 <test>
202 <param name="infile" value="pca_input.dat" ftype="tabular"/>
203 <param name="selected_column_selector_option" value="all_columns" />
204 <param name="number" value="8"/>
205 <param name="select_pca_opts" value="kernel"/>
206 <param name="kernel_opts" value="poly"/>
207 <param name="gamma" value="0.3"/>
208 <param name="degree" value="4"/>
209 <param name="coef0" value="1.6"/>
210 <param name="eigen_solver_opts" value="auto"/>
211 <output name="outfile" ftype="tabular">
212 <assert_contents>
213 <has_n_lines n="300"/>
214 <has_n_columns n="8"/>
215 </assert_contents>
216 </output>
217 </test>
218 </tests>
219 <help><![CDATA[
220 .. class:: infomark
221
222 **What it does**
223
224 This tool takes a tabular input file (one data point per row, each column a variable)
225 and performs PCA using Singular Value Decomposition, returning an equally sized tabular
226 file with the first PC in the first column, second PC in the second column, etc.
227 ]]>
228 </help>
229 <expand macro="sklearn_citation"/>
230 </tool>