view tools/multivariate_stats/kpca.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line source

<tool id="kpca1" name="Kernel Principal Component Analysis" version="1.0.0">
  <description> </description>
  <command interpreter="python">
    kpca.py 
      --input=$input1
      --output1=$out_file1
      --output2=$out_file2
      --var_cols=$var_cols
      --kernel=$kernelChoice.kernel
      --features=$features
      #if $kernelChoice.kernel == "rbfdot" or $kernelChoice.kernel == "anovadot":
      --sigma=$kernelChoice.sigma
      --degree="None"
      --scale="None"
      --offset="None"
      --order="None"
      #elif $kernelChoice.kernel == "polydot":
      --sigma="None"
      --degree=$kernelChoice.degree
      --scale=$kernelChoice.scale
      --offset=$kernelChoice.offset
      --order="None"
      #elif $kernelChoice.kernel == "tanhdot":
      --sigma="None"
      --degree="None"
      --scale=$kernelChoice.scale
      --offset=$kernelChoice.offset
      --order="None"
      #elif $kernelChoice.kernel == "besseldot":
      --sigma=$kernelChoice.sigma
      --degree=$kernelChoice.degree
      --scale="None"
      --offset="None"
      --order=$kernelChoice.order
      #elif $kernelChoice.kernel == "anovadot":
      --sigma=$kernelChoice.sigma
      --degree=$kernelChoice.degree
      --scale="None"
      --offset="None"
      --order="None"
      #else:
      --sigma="None"
      --degree="None"
      --scale="None"
      --offset="None"
      --order="None"
      #end if
  </command>
  <inputs>
    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
    <param name="var_cols" label="Select columns containing input variables " type="data_column" data_ref="input1" numerical="True" multiple="true" >
        <validator type="no_options" message="Please select at least one column."/>
    </param>
    <param name="features" size="10" type="integer" value="2" label="Number of principal components to return" help="To return all, enter 0"/>
    <conditional name="kernelChoice">
        <param name="kernel" type="select" label="Kernel function">
            <option value="rbfdot" selected="true">Gaussian Radial Basis Function</option>
            <option value="polydot">Polynomial</option>
            <option value="vanilladot">Linear</option>
            <option value="tanhdot">Hyperbolic</option>
            <option value="laplacedot">Laplacian</option>
            <option value="besseldot">Bessel</option>
            <option value="anovadot">ANOVA Radial Basis Function</option>
            <option value="splinedot">Spline</option>
        </param>
        <when value="vanilladot" />
        <when value="splinedot" />
        <when value="rbfdot">
            <param name="sigma" size="10" type="float" value="1" label="sigma (inverse kernel width)" />
        </when>
        <when value="laplacedot">
            <param name="sigma" size="10" type="float" value="1" label="sigma (inverse kernel width)" />
        </when>
        <when value="polydot">
            <param name="degree" size="10" type="integer" value="1" label="degree" />
            <param name="scale" size="10" type="integer" value="1" label="scale" />
            <param name="offset" size="10" type="integer" value="1" label="offset" />
        </when>
        <when value="tanhdot">
            <param name="scale" size="10" type="integer" value="1" label="scale" />
            <param name="offset" size="10" type="integer" value="1" label="offset" />
        </when>
        <when value="besseldot">
            <param name="sigma" size="10" type="integer" value="1" label="sigma" />
            <param name="order" size="10" type="integer" value="1" label="order" />
            <param name="degree" size="10" type="integer" value="1" label="degree" />
        </when>
        <when value="anovadot">
            <param name="sigma" size="10" type="integer" value="1" label="sigma" />
            <param name="degree" size="10" type="integer" value="1" label="degree" />
        </when>
    </conditional>    
  </inputs>
  <outputs>
    <data format="input" name="out_file1" metadata_source="input1" />
    <data format="pdf" name="out_file2" />
  </outputs>
  <requirements>
    <requirement type="python-module">rpy</requirement>
  </requirements>
  <tests>
    <test>
        <param name="input1" value="iris.tabular"/>
        <param name="var_cols" value="1,2,3,4"/>
        <param name="kernel" value="polydot"/>
        <param name="features" value="2"/>
        <param name="offset" value="0"/>
        <param name="scale" value="1"/>
        <param name="degree" value="2"/>
        <output name="out_file1" file="kpca_out1.tabular"/>
        <output name="out_file2" file="kpca_out2.pdf"/>
    </test>
  </tests>
  <help>


.. class:: infomark

**TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*

-----

.. class:: infomark

**What it does**

This tool uses functions from 'kernlab' library from R statistical package to perform Kernel Principal Component Analysis (kPCA) on the input data. It outputs two files, one containing the summary statistics of the performed kPCA, and the other containing a scatterplot matrix of rotated values reported by kPCA.   

*Alexandros Karatzoglou, Alex Smola, Kurt Hornik, Achim Zeileis (2004). kernlab - An S4 Package for Kernel Methods in R. Journal of Statistical Software 11(9), 1-20. URL http://www.jstatsoft.org/v11/i09/*

-----

.. class:: warningmark

**Note**

This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results. Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.

  </help>
</tool>