view modulated_modularity_clustering.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children caba07f41453
line wrap: on
line source

<tool id="secimtools_modulated_modularity_clustering" name="Modulated Modularity Clustering (MMC)"  version="@WRAPPER_VERSION@">
    <description>with visual summaries.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
modulated_modularity_clustering.py
--input $input
--design $design
--ID $uniqID
--out $output
--figure $figure
--sigmaLow $sigmaLow
--sigmaHigh $sigmaHigh
--sigmaNum $sigmaNum
--correlation $corr
    ]]></command>
    <inputs>
        <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If not tab separated see TIP below." />
        <param name="design" type="data" format="tabular" label="Design Dataset" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
        <param name="uniqID" type="text" size="30" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers.." />
        <param name="sigmaLow" type="float" size="6" value="0.05" label="Lower sigma bound" help="Default: 0.05." />
        <param name="sigmaHigh" type="float" size="6" value="0.50" label="Upper sigma bound" help="Default: 0.50." />
        <param name="sigmaNum" type="float" size="6" value="451" label="Number of Sigma values" help="Number of values of sigma to search. Default: 451." />
        <param name="corr" type="select" value="pearson" label="Correlation method" help="Select correlation method for preliminary correlation before clustering. Default: Pearson." >
            <option value="pearson" selected="true">Pearson</option>
            <option value="kendall" selected="true">Kendall</option>
            <option value="spearman" selected="true">Spearman</option>
        </param>
    </inputs>
    <outputs>
        <data format="tabular" name="output" label="${tool.name} on ${on_string}: Values"/>
        <data format="pdf" name="figure" label="${tool.name} on ${on_string}: Heatmaps"/>
    </outputs>
    <tests>
        <test>
            <param name="input"   value="ST000006_data.tsv"/>
            <param name="design"  value="ST000006_design.tsv"/>
            <param name="uniqID"  value="Retention_Index" />
            <param name="corr"    value="pearson" />
            <output name="output" file="ST000006_modulated_modularity_clustering_out.tsv" compare="sim_size" delta="10000"/>
            <output name="figure" file="ST000006_modulated_modularity_clustering_figure.pdf" compare="sim_size" delta="10000" />
        </test>
    </tests>
    <help><![CDATA[

@TIP_AND_WARNING@

**Tool Description**

Modulated Modularity Clustering method (MMC) was designed to detect latent structure in data using weighted graphs.
The method searches for optimal community structure and detects the magnitude of pairwise relationships.
The optimal number of clusters and the optimal cluster size are selected by the method during the analysis.

The initial boundaries (lower and upper) for sigma as well as the number of points in the search grid (number of sigma values) are specified initially by the user.
The boundaries are extended automatically by the algorithm if the values are close to the boundary. The correlation type (Pearson, Kendall or Spearman) can be specified.

More details about the method can be found in:

Stone, E. A., and Ayroles, J. F. (2009). Modulated modularity clustering as an exploratory tool for functional genomic inference. PLoS Genet, 5(5), e1000479.


--------------------------------------------------------------------------------

**Input**

    - Two input datasets are required.

@WIDE@

**NOTE:** The sample IDs must match the sample IDs in the Design File (below). Extra columns will automatically be ignored.

@METADATA@

@UNIQID@

**Lower sigma value**

    - Default: 0.05.

**Upper sigma value**

    - Default: 0.50.

**Sigma values**

    - Number of values of sigma to search. Default: 451.  Higher numbers increase the precision but decrease the performance time.

**Correlation method**

    - Correlation method for preliminary correlation before clustering. Default = Pearson.

--------------------------------------------------------------------------------

**Output**

The tool produces four files: a single TSV file and three PDF files:

(1) a TSV file containing the algorithm summaries and
(2) three PDF files containing (i) unsorted, (ii) sorted, and (iii) sorted and smoothed dependency heatmaps produced by the MMC algorithm respectively.


    ]]></help>
    <expand macro="citations"/>
</tool>