diff pca.xml @ 0:2d7016b3ae92 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2afb24f3c81d625312186750a714d702363012b5"
author bgruening
date Fri, 02 Oct 2020 08:45:21 +0000
parents
children 132805688fa3
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pca.xml	Fri Oct 02 08:45:21 2020 +0000
@@ -0,0 +1,230 @@
+<tool id="sklearn_pca" name="Principal component analysis" version="@VERSION@+galaxy@GALAXY_VERSION@">
+    <description>with scikit-learn</description>
+    <macros>
+        <import>main_macros.xml</import>
+        <token name="@GALAXY_VERSION@">0</token>
+    </macros>
+    <expand macro="python_requirements"/>
+    <expand macro="macro_stdio"/>
+    <version_command>echo "@VERSION@"</version_command>
+    <command detect_errors="exit_code">
+        <![CDATA[
+        python '$__tool_directory__/pca.py'
+        -i '$infile'
+        $header
+        -c '$column_selector_options.selected_column_selector_option'
+        #if $column_selector_options.selected_column_selector_option != 'all_columns'
+            -ci '$column_selector_options.col1'
+        #end if
+        #if $select_pca_type.number != ''
+            -n '$select_pca_type.number'
+        #end if
+        -t '$select_pca_type.select_pca_opts'
+        #if $select_pca_type.select_pca_opts == 'classical'           
+            -s '$select_pca_type.select_solver_type.svd_solver_opts'
+            #if $select_pca_type.select_solver_type.svd_solver_opts == 'arpack'
+                -tol $select_pca_type.select_solver_type.tolerance
+            #end if
+            $select_pca_type.whiten
+        #elif $select_pca_type.select_pca_opts == 'incremental'
+            #if $select_pca_type.batch_size != ''
+                -b '$select_pca_type.batch_size'
+            #end if           
+            $select_pca_type.whiten
+        #elif $select_pca_type.select_pca_opts == 'kernel'
+            -k '$select_pca_type.select_kernel_opts.kernel_opts'
+            #if $select_pca_type.select_kernel_opts.kernel_opts == 'poly'
+                #if $select_pca_type.select_kernel_opts.gamma != ''
+                    -g '$select_pca_type.select_kernel_opts.gamma'
+                #end if
+                -d '$select_pca_type.select_kernel_opts.degree'
+                -cf '$select_pca_type.select_kernel_opts.coef0'
+            #elif $select_pca_type.select_kernel_opts.kernel_opts == 'rbf'
+                #if $select_pca_type.select_kernel_opts.gamma != ''
+                    -g '$select_pca_type.select_kernel_opts.gamma'
+                #end if
+            #elif $select_pca_type.select_kernel_opts.kernel_opts == 'sigmoid'
+                #if $select_pca_type.select_kernel_opts.gamma != ''
+                    -g '$select_pca_type.select_kernel_opts.gamma'
+                #end if
+                -cf '$select_pca_type.select_kernel_opts.coef0'
+            #end if
+            -e '$select_pca_type.select_solver_type.eigen_solver_opts'
+            #if $select_pca_type.select_solver_type.eigen_solver_opts == 'arpack'
+                -tol $select_pca_type.select_solver_type.tolerance
+                #if $select_pca_type.select_solver_type.max_iter != ''
+                    -mi $select_pca_type.select_solver_type.max_iter
+                #end if
+            #end if
+        #end if
+        -o '$outfile'
+        ]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="tabular" label="Input file"/>
+        <param name="header" type="boolean" label="Exclude Header" truevalue="--header" falsevalue="" help="If present, exclude the header row from the dataset"/>
+        <conditional name="column_selector_options">
+            <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile"/>
+        </conditional>
+        <conditional name="select_pca_type">
+            <param name="select_pca_opts" type="select" label="Select PCA Type" help="Choose which flavour of PCA to use">
+                <option value="classical" selected="true">Classical PCA</option>
+                <option value="incremental">Incremental PCA</option>
+                <option value="kernel">Kernel PCA</option>
+            </param>
+            <when value="classical">
+                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/>
+                <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" help="Setting this option will reduce the redundancy and correlations between the features"/>
+                <conditional name="select_solver_type">
+                    <param name="svd_solver_opts" type="select" label="SVD Solver" help="Method to perform the singular value decomposition">
+                        <option value="auto" selected="true">auto</option>
+                        <option value="full">full</option>
+                        <option value="arpack">arpack</option>
+                        <option value="randomized">randomized</option>
+                    </param>
+                    <when value="arpack">
+                        <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/>
+                    </when>
+                    <when value="auto"/>
+                    <when value="full"/>
+                    <when value="randomized"/>
+                </conditional>
+            </when>
+            <when value="incremental">
+                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/>
+                <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue=""/>
+                <param name="batch_size" type="integer" optional="true" label="Batch Size" help="The number of samples to use for each batch"/>
+            </when>
+            <when value="kernel">
+                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept"/>
+                <conditional name="select_kernel_opts">
+                    <param name="kernel_opts" type="select" label="Kernel Type">
+                        <option value="linear" selected="true">linear</option>
+                        <option value="poly">poly</option>
+                        <option value="rbf">rbf</option>
+                        <option value="sigmoid">sigmoid</option>
+                        <option value="cosine">cosine</option>
+                        <option value="precomputed">precomputed</option>
+                    </param>
+                    <when value="poly">
+                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/>
+                        <param name="degree" type="integer" value="3" label="Degree of the polynomial" help="Degree for poly kernels. Ignored by other kernels"/>
+                        <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/>
+                    </when>
+                    <when value="sigmoid">
+                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/>
+                        <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels"/>
+                    </when>
+                    <when value="rbf">
+                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels"/>
+                    </when>
+                    <when value="linear"/>
+                    <when value="cosine"/>
+                    <when value="precomputed"/>
+                </conditional>
+                <conditional name="select_solver_type">
+                    <param name="eigen_solver_opts" type="select" label="Eigen Solver">
+                        <option value="auto" selected="true">auto</option>
+                        <option value="dense">dense</option>
+                        <option value="arpack">arpack</option>
+                    </param>
+                    <when value="arpack">
+                        <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack"/>
+                        <param name="max_iter" type="integer" optional="true" label="Maximum Iterations" help="Maximum number of iterations for arpack"/>
+                    </when>
+                    <when value="auto"/>
+                    <when value="dense"/>
+                </conditional>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="outfile"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="pca_input.dat" ftype="tabular"/>
+            <param name="selected_column_selector_option" value="by_index_number"  />
+            <param name="col1" value="1,2,4,6,8,5"/>
+            <param name="number" value="5"/>
+            <param name="select_pca_opts" value="classical"/>
+            <param name="svd_solver_opts" value="arpack"/>
+            <param name="tolerance" value="0.4"/>
+            <output name="outfile" ftype='tabular' file="pca_classical_output.dat"/>
+        </test>
+        <test>
+            <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/>
+            <param name="header" value="--header"/>
+            <param name="selected_column_selector_option" value="by_header_name"  />
+            <param name="col1" value="col_1,col_2,col_4,col_6,col_8,col_5"/>
+            <param name="number" value="5"/>
+            <param name="select_pca_opts" value="classical"/>
+            <param name="svd_solver_opts" value="arpack"/>
+            <param name="tolerance" value="0.4"/>
+            <output name="outfile" ftype='tabular' file="pca_classical_header_names_output.dat"/>
+        </test>
+        <test>
+            <param name="infile" value="pca_input.dat" ftype="tabular"/>
+            <param name="selected_column_selector_option" value="all_but_by_index_number"/>
+            <param name="col1" value="8,5" />
+            <param name="number" value="7"/>
+            <param name="select_pca_opts" value="incremental"/>
+            <param name="batch_size" value="64"/>
+            <output name="outfile" ftype='tabular' file="pca_incremental_output.dat"/>
+        </test>
+        <test>
+            <param name="infile" value="pca_input_with_headers.dat" ftype="tabular"/>
+            <param name="header" value="--header"/>
+            <param name="selected_column_selector_option" value="all_but_by_header_name"  />
+            <param name="col1" value="col_8,col_5"/>
+            <param name="number" value="7"/>
+            <param name="select_pca_opts" value="incremental"/>
+            <param name="batch_size" value="64"/>
+            <output name="outfile" ftype='tabular' file="pca_incremental_header_names_output.dat"/>
+        </test>
+        <test>
+            <param name="infile" value="pca_input.dat" ftype="tabular"/>
+            <param name="selected_column_selector_option" value="all_columns"  />
+            <param name="number" value="8"/>
+            <param name="select_pca_opts" value="kernel"/>
+            <param name="kernel_opts" value="linear"/>
+            <param name="eigen_solver_opts" value="arpack"/>
+            <param name="tolerance" value="4.3"/>
+            <param name="max_iter" value="8"/>
+            <output name="outfile" ftype="tabular">
+                <assert_contents>
+                    <has_n_lines n="300"/>
+                    <has_n_columns n="8"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="infile" value="pca_input.dat" ftype="tabular"/>
+            <param name="selected_column_selector_option" value="all_columns"  />
+            <param name="number" value="8"/>
+            <param name="select_pca_opts" value="kernel"/>
+            <param name="kernel_opts" value="poly"/>
+            <param name="gamma" value="0.3"/>
+            <param name="degree" value="4"/>
+            <param name="coef0" value="1.6"/>
+            <param name="eigen_solver_opts" value="auto"/>
+            <output name="outfile" ftype="tabular">
+                <assert_contents>
+                    <has_n_lines n="300"/>
+                    <has_n_columns n="8"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+This tool takes a tabular input file (one data point per row, each column a variable)
+and performs PCA using Singular Value Decomposition, returning an equally sized tabular
+file with the first PC in the first column, second PC in the second column, etc.
+        ]]>
+    </help>
+    <expand macro="sklearn_citation"/>
+</tool>