view missing_values_sample_filter.xml @ 1:2cf12cd2c53a draft

"planemo upload for repository https://github.com/computational-metabolomics/dimspy-galaxy commit 680116d0cf6a6d7246cba655452dea43269aeba4"
author computational-metabolomics
date Tue, 28 Apr 2020 17:27:23 -0400
parents 91441e41cc3d
children
line wrap: on
line source

<tool id="dimspy_missing_values_sample_filter" name="Missing Values Sample Filter" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@">
    <description> - Remove samples with a high percentage of missing values</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code">
    <![CDATA[
        dimspy mv-sample-filter
        --input '$hdf5_file_in'
        --output '$hdf5_file_out'
        --max-fraction $max_fraction
        &&
        dimspy create-sample-list
        --input '$hdf5_file_out'
        --output '$samplelist'
        --delimiter tab
        #if $hdf5_to_txt.standard
            &&
            @HDF5_PM_TO_TXT@
        #end if
        #if $hdf5_to_txt.comprehensive
            &&
            @HDF5_PM_TO_TXT_COMPREHENSIVE@
        #end if
    ]]>
    </command>
    <inputs>
        <param name="hdf5_file_in" argument="--input" type="data" format="h5" label="Peak Intensity Matrix (HDF5 file)" help="" />
        <param name="max_fraction" argument="--max-fraction" type="float" min="0" max="1.0" value="0.8" label="Maximum percentage of missing values." help="" />
        <param name="delimiter" argument="--delimiter" type="hidden" value="tab" label="" help=""/>
        <expand macro="hdf5_pm_to_txt" />
    </inputs>
    <outputs>
        <expand macro="outputs_peak_intensity_matrix" />
        <data name="samplelist" format="tsv" label="${tool.name} on ${on_string}: Sample Metadata (updated)" />
    </outputs>
    <tests>
        <test>
            <param name="hdf5_file_in" value="pm_as_bf_sf.h5" ftype="h5"/>
            <param name="max_fraction" value="0.8"/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt">
                <param name="standard" value="True"/>
                <param name="comprehensive" value="False"/>
                <param name="representation_samples" value="rows"/>
                <param name="matrix_attr" value="intensity"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as_bf_sf_mv.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as_bf_sf_mv.txt" ftype="tsv"/>
            <output name="samplelist" file="sample_list_after_mv_filter.txt" ftype="tsv"/>
        </test>
    </tests>
    <help>
----------------------------
Missing Values Sample Filter
----------------------------

..

---------------------------------------------

Description
-----------

Standard DIMS processing workflow: Process Scans -> [Replicate Filter] -> Align Samples -> Blank Filter -> Sample Filter -> **[Missing values sample filter]** -> Pre-processing -> Statistics

|

This tool is used to remove study samples with greater-than a user-defined “Maximum percentage of missing values” from the peak intensity matrix. A missing value is defined as the absence of a recorded peak intensity value for a specific mass spectral peak, in a specific study sample.

Samples with large numbers of missing values are often observed where a failed mass spectral acquisition has occurred, the reasons for which are many and diverse.

---------------------------------------------

Parameters
----------

**Peak Intensity Matrix (HDF5 file)** (REQUIRED) - for routine usage, the input peak intensity matrix should be that generated from the 'Align samples' tool.

**Maximum percentage of missing values** (REQUIRED; default = 0.8) - a numeric value ranging from 0 to 1 (decimal representation of percentage), where:

    - A value of 0 (i.e. 0%) corresponds to a very harsh filtering procedure, in which only those samples with zero missing values are retained in the output peak matrix.
    
    - A value of 1 (i.e. 100%) corresponds to a very liberal filtering procedure, in which samples with as many as 100% missing values will be retained in the output peak matrix.

|

@help_options_addtional_output@

|

---------------------------------------------

Output file(s)
--------------

**IMPORTANT** - in all outputs except for the (optional) comprehensive output, if a sample had greater-than the user defined maximum percentage of missing values, then this sample is removed from the output peak matrix.


@help_outputs_matrix@

---------------------------------------------

@github_developers_contributors@
@license@
    </help>
    <expand macro="citations" />
</tool>