view pandas_rolling.xml @ 0:a06f7b5c4dc7 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/pandas_rolling_window commit bdbedf42854d16bb00c396045007d4baece0a869
author bgruening
date Mon, 20 May 2019 08:42:54 -0400
parents
children
line wrap: on
line source

<tool id="pandas_rolling_window" name="Rolling window" version="0.1">
    <description>over a dataframe (e.g. for data smoothing)</description>
    <requirements>
        <requirement type="package" version="1.16.3">numpy</requirement>
        <requirement type="package" version="1.2.1">scipy</requirement>
        <requirement type="package" version="0.24.2">pandas</requirement>
    </requirements>
    <command>
<![CDATA[

    cat '$pandas_script' &&
    python '$pandas_script'

]]>
    </command>
    <configfiles>
        <configfile name="pandas_script"><![CDATA[
import argparse
import sys

import pandas as pd

kwargs = dict()
window_type = '$smooth_function.smooth_function_opts_selector'

#if $smooth_function.smooth_function_opts_selector == 'gaussian':
kwargs.update({'std': $smooth_function.gaussian_std})
#elif $smooth_function.smooth_function_opts_selector == 'general_gaussian':
kwargs = ({'power': $smooth_function.ggaussian_power, 'width': $smooth_function.ggaussian_width})
#elif $smooth_function.smooth_function_opts_selector == 'kaiser':
kwargs.update({'beta': $smooth_function.kaiser_beta})
#elif $smooth_function.smooth_function_opts_selector == 'slepian':
kwargs.update({'width': $smooth_function.slepian_width})
#end if

df = pd.read_csv('${infile}', sep='\t', index_col=None, header=None, dtype={'strand': object} )

#if $group_column:
df['aggregate'] = df.groupby( int($group_column)-1, sort=False )[int($value_column)-1].rolling(${window_len}, win_type=window_type, center=$centering ).${statistics}(**kwargs).reset_index(drop=True)
#else:
df['aggregate'] = df[int($value_column)-1].rolling(${window_len}, win_type=window_type, center=$centering ).${statistics}(**kwargs).reset_index(drop=True)
#end if

df.to_csv('${outfile}', index=False, header=False, sep='\t', na_rep='0', float_format='%.2f')
    ]]> </configfile>
    </configfiles>
    <inputs>
        <param name="infile" type="data" format="tabular,bed.interval" label="Select input file in tabular or BED format"/>
        <param name="group_column" type="data_column" data_ref="infile" optional="true" label="Optional column to group"
            help="For example if you have a chromosome column you probably want to group each chromosome before you apply any function." />

        <param name="value_column" type="data_column" data_ref="infile" label="Column with the value of interest"
            help="" />

        <conditional name="smooth_function">
            <param name="smooth_function_opts_selector" type="select" label="Provide a window type"
                help="For more information please see https://en.wikipedia.org/wiki/Window_function">
                <option value="boxcar" selected="True">Boxcar or Dirichlet, all points are evenly weighted</option>
                <option value="triang">triang</option>
                <option value="blackman">blackman</option>
                <option value="hamming">hamming</option>
                <option value="bartlett">bartlett</option>
                <option value="parzen">parzen</option>
                <option value="bohman">bohman</option>
                <option value="blackmanharris">blackmanharris</option>
                <option value="nuttall">nuttall</option>
                <option value="barthann">barthann</option>
                <!--option value="kaiser">kaiser</option>
                <option value="gaussian">gaussian</option>
                <option value="general_gaussian">general gaussian</option>
                <option value="slepian">slepian</option-->
            </param>
            <when value="boxcar" />
            <when value="triang" />
            <when value="blackman" />
            <when value="hamming" />
            <when value="bartlett" />
            <when value="parzen" />
            <when value="bohman" />
            <when value="blackmanharris" />
            <when value="nuttall" />
            <when value="barthann" />
            <when value="kaiser">
                <param name="kaiser_beta" type="float" value="0.1" min='0.0' label="beta" />
            </when>
            <when value="gaussian">
                <param name="gaussian_std" type="float" value="0.1" min='0.0' label="std" />
            </when>
            <when value="general_gaussian">
                <param name="ggaussian_power" type="integer" value="2" min='1' label="power" />
                <param name="ggaussian_width" type="integer" value="2" min="1" label="width" />
            </when>
            <when value="slepian">
                <param name="slepian_width" type="integer" value="2" min="1" label="width" />
            </when>
        </conditional>

        <param name="statistics" type="select" label="Provide a statistical function">
            <option value="count">Number of non-null observations (count)</option>
            <option value="sum">Sum of values (sum)</option>
            <option value="mean" selected="true">Mean of values (mean)</option>
            <option value="median">Arithmetic median of values (median)</option>
            <option value="min">Minimum (min)</option>
            <option value="max">max (max)</option>
            <option value="std">Bessel-corrected sample standard deviation (std)</option>
            <option value="var">Unbiased variance (var)</option>
            <option value="skew">Sample skewness (3rd moment)</option>
            <option value="kurt">Sample kurtosis (4th moment)</option>
            <option value="quantil">Sample quantile (value at %)</option>
            <option value="cov">Unbiased covariance (binary) (cov)</option>
            <option value="corr">Correlation (corr)</option>
        </param>

        <param name="centering" type="boolean" truevalue="True" falsevalue="False" label="center smoothed values"
            help="By default the labels are set to the right edge of the window. Here you can change that to the center." />
        <!-- Options for all formats.-->
        <param name="window_len" type="integer" value="3" min="2" label="Window length"/>
    </inputs>
    <outputs>
        <data name="outfile" format_source="infile" />
    </outputs>
    <tests>
        <test>
            <param name="infile" value="1.bedgraph"/>
            <param name="group_column" value="1"/>
            <param name="value_column" value="5"/>
            <conditional name="smooth_function">
                <param name="smooth_function_opts_selector" value="boxcar"/>
            </conditional>
            <param name="window_len" value="3"/>
            <output name="outfile" value="1_boxcar.bedgraph"/>
        </test>
        <test>
            <!-- None test -->
            <param name="infile" value="1.bedgraph"/>
            <param name="value_column" value="5"/>
            <conditional name="smooth_function">
                <param name="smooth_function_opts_selector" value="boxcar"/>
            </conditional>
            <param name="window_len" value="3"/>
            <output name="outfile" value="2_boxcar.bedgraph"/>
        </test>
        <test>
            <param name="infile" value="1.bedgraph"/>
            <param name="group_column" value="1"/>
            <param name="value_column" value="5"/>
            <conditional name="smooth_function">
                <param name="smooth_function_opts_selector" value="hamming"/>
            </conditional>
            <param name="window_len" value="3"/>
            <param name="statistics" value="sum"/>
            <output name="outfile" value="1_hamming.bedgraph"/>
        </test>
        <test>
            <param name="infile" value="1.bedgraph"/>
            <param name="value_column" value="5"/>
            <conditional name="smooth_function">
                <param name="smooth_function_opts_selector" value="hamming"/>
            </conditional>
            <param name="window_len" value="3"/>
            <param name="statistics" value="sum"/>
            <output name="outfile" value="2_hamming.bedgraph"/>
        </test>
    </tests>
    <help>
<![CDATA[

**What it does**

Provides rolling window calculations, e.g. for smoothing values.


]]>
    </help>
</tool>