view blank_filter.xml @ 2:c2078c6d019d draft default tip

"planemo upload for repository https://github.com/computational-metabolomics/dimspy-galaxy commit 80069808371b58f45da0c8133c27d67ac1a5b448"
author computational-metabolomics
date Wed, 17 Feb 2021 10:55:51 +0000
parents 109c81a4138f
children
line wrap: on
line source

<tool id="dimspy_blank_filter" name="Blank Filter" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@">
    <description> - Remove 'blank' peaks from the biological mass spectra</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code">
    <![CDATA[
        dimspy blank-filter
        --input '$hdf5_file_in'
        --output '$hdf5_file_out'
        --blank-label '$blank_label'
        --min-fraction $min_fraction
        --function $function
        --min-fold-change $min_fold_change
        #if $remove_blank_samples == '--remove-blank-samples'
            $remove_blank_samples
            &&
            dimspy create-sample-list
            --input '$hdf5_file_out'
            --output '$samplelist'
            --delimiter tab
        #end if
        #if $hdf5_to_txt.standard
            &&
            @HDF5_PM_TO_TXT@
        #end if
        #if $hdf5_to_txt.comprehensive
            &&
            @HDF5_PM_TO_TXT_COMPREHENSIVE@
        #end if
    ]]>
    </command>
    <inputs>
        <param name="hdf5_file_in" argument="--input" type="data" format="h5" label="Peak Intensity Matrix (HDF5 file)" help="" />
        <param name="blank_label" argument="--blank-label" type="text" value="blank" label="Label for the blank samples" >
            <validator type="empty_field"/>
            <validator type="regex" message="Value may include alphanumeric characters, underscores, dashes and spaces.">[A-Za-z0-9_\- ]+</validator>
        </param>
        <param name="min_fraction" argument="--min-fraction" type="float" min="0.0" max="1.0" value="1.0" label="Minimum fraction (percentage)" help="The percentage of samples (with a intensity value) that must beat the blank samples (peak by peak basis)" />
        <param name="function" argument="--function" type="select" label="Function" help="Select the function that should be used to calculate the peak intensity threshold (blanks only)" >
            <option value="mean" selected="true">mean</option>
            <option value="median">median</option>
            <option value="max">max</option>
        </param>
        <param name="min_fold_change" argument="--min-fold-change" type="float" value="10.0" help="Minimum fold change" />
        <param name="remove_blank_samples" argument="--remove-blank-samples" type="boolean" checked="true" truevalue="--remove-blank-samples" falsevalue="" label="Remove blank samples (rows)" help="" />
        <param name="delimiter" argument="--delimiter" type="hidden" value="tab" />
        <expand macro="hdf5_pm_to_txt" />
    </inputs>
    <outputs>
        <expand macro="outputs_peak_intensity_matrix" />
        <data name="samplelist" format="tsv" label="${tool.name} on ${on_string}: Sample Metadata (updated)" >
            <filter>str($remove_blank_samples) == "--remove-blank-samples"</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="hdf5_file_in" value="pm_as.h5" ftype="h5"/>
            <param name="blank_label" value="blank"/>
            <param name="min_fraction" value="1.0"/>
            <param name="function" value="mean"/>
            <param name="min_fold_change" value="10.0"/>
            <param name="remove_blank_samples" value="--remove-blank-samples"/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt">
                <param name="standard" value="True"/>
                <param name="comprehensive" value="False"/>
                <param name="representation_samples" value="rows"/>
                <param name="matrix_attr" value="intensity"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as_bf_rmbs.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as_bf_rmbs.txt" ftype="tsv"/>
            <output name="samplelist" file="sample_list_after_bf.txt" ftype="tsv"/>
        </test>
        <test>
            <param name="hdf5_file_in" value="pm_as.h5" ftype="h5"/>
            <param name="blank_label" value="blank"/>
            <param name="min_fraction" value="1.0"/>
            <param name="function" value="mean"/>
            <param name="min_fold_change" value="10.0"/>
            <param name="remove_blank_samples" value=""/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt">
                <param name="standard" value="True"/>
                <param name="comprehensive" value="True"/>
                <param name="representation_samples" value="rows"/>
                <param name="matrix_attr" value="intensity"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as_bf.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as_bf.txt" ftype="tsv"/>
            <output name="matrix_comprehensive_file_out" file="peak_matrix_as_bf_compr.txt" ftype="tsv"/>
        </test>
    </tests>
    <help>
------------
Blank Filter
------------

..

------------------------------

Description
-----------

Standard DIMS processing workflow: Process Scans -> [Replicate Filter] -> Align Samples -> **Blank Filter** -> Sample Filter -> [Missing values sample filter] -> Pre-processing -> Statistics

|

This tool is typically used to subtract peaks from the input peak intensity matrix that are believed to originate from non-biological sources e.g. leachables/extractables from pipette tips, plates, tubes and other consumables, as well as electrical noise peaks and peaks originating from solvents, infusion instrumentation etc. 

In a routine DIMS analytical workflow, a set of extraction blank samples are prepared and analysed alongside other study samples. These “reference” samples are equivalent to the other study samples, with the exception that they do not contain the biological material to be analysed in the study. Peaks detected in these samples are therefore likely to be of non-biological origin. These peaks are removed from the peak intensity matrix if fewer-than the user-defined 'minimum fraction' of non-reference study samples have an intensity ratio (relative to the blank class) greater-than the user-specified 'Miniumum fold change'; e.g. a minimum fraction of 0.5 and 'miniumum fold change' of 10 requires that, for any given peak, at least 50% of the non-reference study samples have an intensity value at least 10 times greater than average intensity value calculated from “reference” samples.

**Note** - while extraction blank samples are typically used to filter peaks from the peak intensity matrix, this tool can in principle be used to filter peaks originating from any class defined in the Process Scans or Replicate Filter metadata file. The class used for filtering the peak intensity matrix is defined using the **Label for the blank samples** parameter.

-------------------------------

Parameters
----------

|

**Peak Intensity Matrix (HDF5 file)** (REQUIRED) - a peak intensity matrix (in .hdf5 format), typically returned from the 'Align Samples' tool.

**Label for the blank samples** (REQUIRED) - a string indicating the name of the class to be used for filtering (e.g. blank), i.e. the “reference” class. This string must have been included in the “classLabel” column of the metadata file associated with the Process Scans or Replicate Filter tool(s).

**Minimum fraction (percentage)** (REQUIRED; default = 1) - a numeric value ranging from 0 to 1. Setting this value to 0 will skip this filtering step. A value greater than 0 requires that for each peak in the peak intensity matrix, at least this proportion of non-reference samples have to have an intensity value that exceeds the product of: (A) the average intensity of “reference” class intensities and (B)  the user-defined “Minimum fold change”. If this condition is not met, the peak is removed from the peak intensity matrix.

**Function** (REQUIRED; default = **mean**) - toggle, where selection of:

    - **mean** - corresponds to using the non-weighted average of “reference” sample peak intensities (NA values are ignored) in calculating the “reference” to “non-reference” peak intensity ratio.

    - **median** - corresponds to using the median of “reference” sample peak intensities (NA values are ignored) in calculating the “reference” to “non-reference” peak intensity ratio.

    - **max** corresponds to the use of the maximum intensity among “reference” sample peak intensities (NA values are ignored) in calculating the “reference” to “non-reference” peak intensity ratio.

|

**Minimum fold change** (REQUIRED; default = 10) - numeric value from 0 upwards. When minimum fraction filtering is enabled, this value defines the minimum required ratio between the intensity of a peak in a “non-reference” sample and the average intensity of the “reference” sample(s). Peaks with ratios exceeding this threshold are considered to have been reliably detected in a “non-reference” sample.

|

**Remove blank samples (rows)** (REQUIRED; default = **Yes**) - toggle:

    **Yes** - samples belonging to the user-defined “reference” class are removed from the output peak matrix

    **No** - samples belonging to the user-defined “reference” class are retained in the output peak matrix.

|    

@help_options_addtional_output@

----------------------------

Output file(s)
--------------

**IMPORTANT** - in all outputs except for the (optional) comprehensive output, if fewer-than the user-defined “Minimum fraction” of “non-reference” samples had an intensity value that, when divided by the average “reference” class peak intensity value, were less than the user-defined “Minimum fold change” parameter, then that peak will be removed from the output peak matrix.


@help_outputs_matrix@

-----------------------------

@github_developers_contributors@  
@license@
    </help>
    <expand macro="citations" />
</tool>