view pca.xml @ 1:132805688fa3 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
author bgruening
date Tue, 13 Apr 2021 18:49:35 +0000
parents 2d7016b3ae92
children c16818ce0424
line wrap: on
line source

<tool id="sklearn_pca" name="Principal component analysis" version="@VERSION@+galaxy@GALAXY_VERSION@" profile="20.05">
    <description>with scikit-learn</description>
    <macros>
        <import>main_macros.xml</import>
        <token name="@GALAXY_VERSION@">0</token>
    </macros>
    <expand macro="python_requirements" />
    <expand macro="macro_stdio" />
    <version_command>echo "@VERSION@"</version_command>
    <command detect_errors="exit_code">
        <![CDATA[
        python '$__tool_directory__/pca.py'
        -i '$infile'
        $header
        -c '$column_selector_options.selected_column_selector_option'
        #if $column_selector_options.selected_column_selector_option != 'all_columns'
            -ci '$column_selector_options.col1'
        #end if
        #if $select_pca_type.number != ''
            -n '$select_pca_type.number'
        #end if
        -t '$select_pca_type.select_pca_opts'
        #if $select_pca_type.select_pca_opts == 'classical'           
            -s '$select_pca_type.select_solver_type.svd_solver_opts'
            #if $select_pca_type.select_solver_type.svd_solver_opts == 'arpack'
                -tol $select_pca_type.select_solver_type.tolerance
            #end if
            $select_pca_type.whiten
        #elif $select_pca_type.select_pca_opts == 'incremental'
            #if $select_pca_type.batch_size != ''
                -b '$select_pca_type.batch_size'
            #end if           
            $select_pca_type.whiten
        #elif $select_pca_type.select_pca_opts == 'kernel'
            -k '$select_pca_type.select_kernel_opts.kernel_opts'
            #if $select_pca_type.select_kernel_opts.kernel_opts == 'poly'
                #if $select_pca_type.select_kernel_opts.gamma != ''
                    -g '$select_pca_type.select_kernel_opts.gamma'
                #end if
                -d '$select_pca_type.select_kernel_opts.degree'
                -cf '$select_pca_type.select_kernel_opts.coef0'
            #elif $select_pca_type.select_kernel_opts.kernel_opts == 'rbf'
                #if $select_pca_type.select_kernel_opts.gamma != ''
                    -g '$select_pca_type.select_kernel_opts.gamma'
                #end if
            #elif $select_pca_type.select_kernel_opts.kernel_opts == 'sigmoid'
                #if $select_pca_type.select_kernel_opts.gamma != ''
                    -g '$select_pca_type.select_kernel_opts.gamma'
                #end if
                -cf '$select_pca_type.select_kernel_opts.coef0'
            #end if
            -e '$select_pca_type.select_solver_type.eigen_solver_opts'
            #if $select_pca_type.select_solver_type.eigen_solver_opts == 'arpack'
                -tol $select_pca_type.select_solver_type.tolerance
                #if $select_pca_type.select_solver_type.max_iter != ''
                    -mi $select_pca_type.select_solver_type.max_iter
                #end if
            #end if
        #end if
        -o '$outfile'
        ]]>
    </command>
    <inputs>
        <param name="infile" type="data" format="tabular" label="Input file" />
        <param name="header" type="boolean" label="Exclude Header" truevalue="--header" falsevalue="" help="If present, exclude the header row from the dataset" />
        <conditional name="column_selector_options">
            <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile" />
        </conditional>
        <conditional name="select_pca_type">
            <param name="select_pca_opts" type="select" label="Select PCA Type" help="Choose which flavour of PCA to use">
                <option value="classical" selected="true">Classical PCA</option>
                <option value="incremental">Incremental PCA</option>
                <option value="kernel">Kernel PCA</option>
            </param>
            <when value="classical">
                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" />
                <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" help="Setting this option will reduce the redundancy and correlations between the features" />
                <conditional name="select_solver_type">
                    <param name="svd_solver_opts" type="select" label="SVD Solver" help="Method to perform the singular value decomposition">
                        <option value="auto" selected="true">auto</option>
                        <option value="full">full</option>
                        <option value="arpack">arpack</option>
                        <option value="randomized">randomized</option>
                    </param>
                    <when value="arpack">
                        <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack" />
                    </when>
                    <when value="auto" />
                    <when value="full" />
                    <when value="randomized" />
                </conditional>
            </when>
            <when value="incremental">
                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" />
                <param name="whiten" type="boolean" label="Whiten Components" truevalue="--whiten" falsevalue="" />
                <param name="batch_size" type="integer" optional="true" label="Batch Size" help="The number of samples to use for each batch" />
            </when>
            <when value="kernel">
                <param name="number" type="integer" optional="true" label="Number of components" help="Number of components to keep. If not set, all components are kept" />
                <conditional name="select_kernel_opts">
                    <param name="kernel_opts" type="select" label="Kernel Type">
                        <option value="linear" selected="true">linear</option>
                        <option value="poly">poly</option>
                        <option value="rbf">rbf</option>
                        <option value="sigmoid">sigmoid</option>
                        <option value="cosine">cosine</option>
                        <option value="precomputed">precomputed</option>
                    </param>
                    <when value="poly">
                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" />
                        <param name="degree" type="integer" value="3" label="Degree of the polynomial" help="Degree for poly kernels. Ignored by other kernels" />
                        <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels" />
                    </when>
                    <when value="sigmoid">
                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" />
                        <param name="coef0" type="float" value="1.0" label="Coef0" help="Independent term in poly and sigmoid kernels. Ignored by other kernels" />
                    </when>
                    <when value="rbf">
                        <param name="gamma" type="float" optional="true" label="Gamma Value" help="Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels" />
                    </when>
                    <when value="linear" />
                    <when value="cosine" />
                    <when value="precomputed" />
                </conditional>
                <conditional name="select_solver_type">
                    <param name="eigen_solver_opts" type="select" label="Eigen Solver">
                        <option value="auto" selected="true">auto</option>
                        <option value="dense">dense</option>
                        <option value="arpack">arpack</option>
                    </param>
                    <when value="arpack">
                        <param name="tolerance" type="float" value="0.0" label="Tolerance" help="Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack" />
                        <param name="max_iter" type="integer" optional="true" label="Maximum Iterations" help="Maximum number of iterations for arpack" />
                    </when>
                    <when value="auto" />
                    <when value="dense" />
                </conditional>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="tabular" name="outfile" />
    </outputs>
    <tests>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular" />
            <param name="selected_column_selector_option" value="by_index_number" />
            <param name="col1" value="1,2,4,6,8,5" />
            <param name="number" value="5" />
            <param name="select_pca_opts" value="classical" />
            <param name="svd_solver_opts" value="arpack" />
            <param name="tolerance" value="0.4" />
            <output name="outfile" ftype='tabular' file="pca_classical_output.dat" />
        </test>
        <test>
            <param name="infile" value="pca_input_with_headers.dat" ftype="tabular" />
            <param name="header" value="--header" />
            <param name="selected_column_selector_option" value="by_header_name" />
            <param name="col1" value="col_1,col_2,col_4,col_6,col_8,col_5" />
            <param name="number" value="5" />
            <param name="select_pca_opts" value="classical" />
            <param name="svd_solver_opts" value="arpack" />
            <param name="tolerance" value="0.4" />
            <output name="outfile" ftype='tabular' file="pca_classical_header_names_output.dat" />
        </test>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular" />
            <param name="selected_column_selector_option" value="all_but_by_index_number" />
            <param name="col1" value="8,5" />
            <param name="number" value="7" />
            <param name="select_pca_opts" value="incremental" />
            <param name="batch_size" value="64" />
            <output name="outfile" ftype='tabular' file="pca_incremental_output.dat" />
        </test>
        <test>
            <param name="infile" value="pca_input_with_headers.dat" ftype="tabular" />
            <param name="header" value="--header" />
            <param name="selected_column_selector_option" value="all_but_by_header_name" />
            <param name="col1" value="col_8,col_5" />
            <param name="number" value="7" />
            <param name="select_pca_opts" value="incremental" />
            <param name="batch_size" value="64" />
            <output name="outfile" ftype='tabular' file="pca_incremental_header_names_output.dat" />
        </test>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular" />
            <param name="selected_column_selector_option" value="all_columns" />
            <param name="number" value="8" />
            <param name="select_pca_opts" value="kernel" />
            <param name="kernel_opts" value="linear" />
            <param name="eigen_solver_opts" value="arpack" />
            <param name="tolerance" value="4.3" />
            <param name="max_iter" value="8" />
            <output name="outfile" ftype="tabular">
                <assert_contents>
                    <has_n_lines n="300" />
                    <has_n_columns n="8" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="infile" value="pca_input.dat" ftype="tabular" />
            <param name="selected_column_selector_option" value="all_columns" />
            <param name="number" value="8" />
            <param name="select_pca_opts" value="kernel" />
            <param name="kernel_opts" value="poly" />
            <param name="gamma" value="0.3" />
            <param name="degree" value="4" />
            <param name="coef0" value="1.6" />
            <param name="eigen_solver_opts" value="auto" />
            <output name="outfile" ftype="tabular">
                <assert_contents>
                    <has_n_lines n="300" />
                    <has_n_columns n="8" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

This tool takes a tabular input file (one data point per row, each column a variable)
and performs principal component analysis (PCA) using Singular Value Decomposition,
returning an equally sized tabular file with the first PC in the first column, second
PC in the second column, etc.
        ]]>
    </help>
    <expand macro="sklearn_citation" />
</tool>