view flagRemove.xml @ 9:da36e8bbcfbd draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 20a48a1862267264f98b7c514287f9a5cba1143f
author computational-metabolomics
date Thu, 13 Jun 2024 11:35:07 +0000
parents a0ee43397d1e
children
line wrap: on
line source

<tool id="mspurity_flagremove" name="msPurity.flagRemove" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@">
    <description>Tool to flag and remove XCMS grouped peaks from the xcmsSet object based on various thresholds
        (e.g. RSD of intensity and retention time).
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        Rscript '$__tool_directory__/flagRemove.R'
                --xset_path='$xset_path'
                --out_dir=.

                #if $sample_flag.sample_flag=='update'
                    --rsd_i_sample='$sample_flag.rsd_i_sample'
                    --minfrac_sample='$sample_flag.minfrac_sample'
                    --ithres_sample='$sample_flag.ithres_sample'
                #end if

                #if $blank_flag.blank_flag=='update'
                    --rsd_i_blank='$blank_flag.rsd_i_blank'
                    --minfrac_blank='$blank_flag.minfrac_blank'
                    --ithres_blank='$blank_flag.ithres_blank'
                    --blank_class='$blank_flag.blank_class'
                #end if

                #if $peak_removal.peak_removal=='remove'
                    --remove_spectra
                    --minfrac_xcms='$peak_removal.minfrac_xcms'
                    --mzwid='$peak_removal.mzwid'
                    --bw='$peak_removal.bw'
                #end if

                #if $advanced.advanced=='update'
                    --egauss_thr='$advanced.egauss_thr'
                    --polarity='$advanced.polarity'
                    --grp_rm_ids='$advanced.grp_rm_ids'
                    --xset_name='$advanced.xset_name'
                    '$advanced.temp_save.value'

                #end if
                #if $choose_samp.choose_samp=='yes'
                        --samplelist='$choose_samp.samplelist'
                #end if
    ]]></command>
    <inputs>
        <param argument="--xset_path" type="data" format='rdata.xcms.raw,rdata.xcms.group,rdata.xcms.retcor,rdata.xcms.fillpeaks,rdata'
                help="The path to the xcmsSet object saved as an RData file"/>
        <conditional name="sample_flag">
            <param argument="sample_flag" type="select" label="Change biological sample flag parameters?">
                <option value="update" >Update biological sample flag parameters</option>
                <option value="" selected="true">Use default biological sample flag parameters</option>
            </param>
            <when value="">
            </when>
            <when value="update">
                <param argument="--rsd_i_sample" type="text" label="rsd_i_sample" value="NA"
                    help="Relative Standard Deviation threshold for the sample classes"/>
                <param argument="--minfrac_sample" type="float" label="minfrac_sample" value="0.5" min="0.0" max="1"
                    help="minimum fraction of files for features needed for the sample classes"/>
                <param argument="--rsd_rt_sample" type="text" label="rsd_rt_sample" value="NA"
                    help="Relative standard Deviation threshold for the retention time of the sample
                     classes"/>
                <param argument="--ithres_sample" type="text" label="ithres_sample" value="NA"
                    help="Intensity threshold for the sample"/>
            </when>
        </conditional>
        <conditional name="blank_flag">
            <param argument="blank_flag" type="select" label="Change blank flag parameters?">
                <option value="update" >Update blank flag parameters</option>
                <option value="" selected="true">Use default blank flag parameters</option>
            </param>
            <when value="">
            </when>
            <when value="update">
                <param argument="--blank_class" type="text" label="blank_class" value="blank"
                help="A string representing the class that will be used for the blank"/>
                <param argument="--rsd_i_blank" type="text" label="rsd_i_blank" value="NA"
                    help="RSD threshold for the blank"/>
                <param argument="--minfrac_blank" type="float" label="minfrac_blank" value="0.5" min="0.0" max="1"
                    help="minimum fraction of files for features needed for the blank"/>
                <param argument="--rsd_rt_blank" type="text" label="rsd_rt_blank" value="NA"
                    help="RSD threshold for the retention time of the blank"/>
                <param argument="--ithres_blank" type="text" label="ithres_blank" value="NA"
                    help="Intensity threshold for the blank"/>
                <param argument="--s2b" type="float" label="s2b" value="10"
                    help="fold change (sample/blank) needed for sample peak to be allowed. e.g.
                    if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10.
                    1000/10 = 100, so sample has fold change higher than the threshold and the peak
                    is not considered a blank"/>
            </when>
        </conditional>
        <conditional name="peak_removal">
            <param name="peak_removal" type="select" label="Remove peaks from xcmsSet object?">
                <option value="remove" >Remove peaks and re-group</option>
                <option value="" selected="true">Only flag peaks (do not remove and re-group)</option>
            </param>
            <when value="">
            </when>
            <when value="remove">
                 <param argument="--minfrac_xcms" type="float" label="minfrac_xcms" value="0.7" min="0.0" max="1"
                    help="minfrac for xcms  grouping"/>
                 <param argument="--mzwid" type="float" label="mzwid" value="0.001"
                    help="mzwid for xcms  grouping"/>
                <param argument="--bw" type="float" label="bw" value="5"
                    help="bw for xcms  grouping"/>
            </when>
        </conditional>
        <conditional name="advanced">
            <param name="advanced" type="select" label="Advanced parameters">
                <option value="update" >Update advanced and testing parameters</option>
                <option value="" selected="true">Use default advanced parameters</option>
            </param>
            <when value="">
            </when>
            <when value="update">
                <param argument="--egauss_thr" type="text" label="egauss_thr" value="NA"
                    help="Threshold for filtering out non gaussian shaped peaks. Note this only works
                          if the 'verbose columns' and 'fit gauss' was used with xcms"/>
                <param argument="--temp_save" type="boolean" label="temp_save" checked="false" truevalue="--temp_save" falsevalue=""
                    help="Assign True if files for each step saved (for testing purposes)"/>
                <param argument="--polarity" type="select" label="polarity"
                    help="polarity (just used for naming purpose when files are saved)">
                    <option value="positive">Positive</option>
                    <option value="negative" >Negative</option>
                    <option value="NA" selected="true">NA</option>
                </param>
                <param argument="--grp_rm_ids" type="text" label="grp_rm_ids" value="NA"
                help="comma seperated list of grouped_xcms peak ids to remove (corresponds to the row from xcms::group output)
                      e.g '1,20,30,56'"/>
                <param argument="--xset_name" type="text" label="xset_name" value="xset"
                    help="Name of the xcmsSet object within the RData file"/>
            </when>
        </conditional>
        <conditional name="choose_samp">
            <param name="choose_samp" type="select" label="Samplelist">
                        <option value="yes" >Use samplelist</option>
                        <option value="" selected="true">Don't use samplelist</option>
            </param>
            <when value="">
            </when>
            <when value="yes">
                <param argument="--samplelist" type="data" label="samplelist" format="tsv,tabular"
                       help="A samplelist can be provided to find
                       an appriopiate blank class (requires a column 'blank' where 'yes' indicates the
                       class should be used as the blank) "/>

            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="peaklist_filtered" format="tabular" label="${tool.name} on ${on_string}: peaklist_filtered (tsv)"
              from_work_dir="peaklist_filtered.tsv" />
        <data name="removed_peaks" format="tabular" label="${tool.name} on ${on_string}: removed_peaks (tsv)"
              from_work_dir="removed_peaks.tsv" />
        <data name="xset_filtered" format="rdata" label="xset_filtered"
              from_work_dir="xset_filtered.RData"/>
    </outputs>
    <tests>
        <test>
            
            <param name="xset_path" value="flagRemove_input.RData"/>
            <conditional name="blank_flag">
                <param name="blank_flag"  value="update"/>
                <param name="blank_class" value="KO" />
            </conditional>
            <conditional name="peak_removal">
                <param name="peak_removal"  value="remove"/>
            </conditional>
            <output name="peaklist_filtered" file="flagRemove_output.tsv"/>
        </test>
    </tests>
    <help><![CDATA[

=======================================
Flag & remove peaks from xcmsSet object
=======================================
-----------
Description
-----------

Tool to flag XCMS grouped peaks based on various criteria (e.g RSD, intensity). The flagged grouped peaks can then be removed
completely from the xcmsSet object (xset). This means removing the individual peaks associated for each file. Located
in the **xset@peaks** socket of the xcmsSet object.

Additionally a list of ids of the xcms grouped peaks can be supplied, all peaks associated with these ids can be
be removed.

**Note**: grouped peak refers to a peak that has been grouped together by xcms::group function

-----------------
Updated peaklist
-----------------
The calculated columns for the update peaklist dataframe include:

* RSD of intensity for grouped peaks across each class
* RSD of retention time for grouped peaks across each class
* Coverage across all classes
* mzmin_full & mzmax_full: the full mzrange of each grouped peak
* rtmin_full & rtmax_full: the full rtrange of each grouped peak
* flag for criteria for the blank class (if 1 it means the blank is valid in at least 1 condition) and this grouped peak will be removed
* flag for criteria for the sample classes (if 1 it means that this grouped peak is valid for this class)
* all_sample_valid: flag for all samples (if 1 it means that at least 1 sample class is valid, this ignores the blank)

To filter out blank peaks just filter out all peaks where the blank_valid is equal to 1
This is in addition to the standard output from the xcmsSet peaklist

**flag example**

Dataset consists of 3 classes. Blank, cond1 and cond2. The classes cond1 and cond2 are biological sample classes.

============= ============= ============= ================ ================
blank_valid   cond1_valid   cond2_valid   all_sample_valid Keep peak?
============= ============= ============= ================ ================
0             0             1             1                Yes
------------- ------------- ------------- ---------------- ----------------
0             1             1             1                Yes
------------- ------------- ------------- ---------------- ----------------
1             0             1             1                No
------------- ------------- ------------- ---------------- ----------------
1             0             0             0                No
------------- ------------- ------------- ---------------- ----------------
0             0             0             0                No
============= ============= ============= ================ ================



-----------------
Filters for flags
-----------------

The following filters can be used to determine if a grouped peak gets flagged to keep. If the column all_sample_valid is 0 then
the grouped peak will be removed


* RSD of intensity for each biological sample class
* minfrac for each biological sample class
* RSD of retention time for each biological sample class
* intensity threshold for each biological sample class
* Blank subtraction. If a blank peak is found where the intensity of any corresponding biological sample class is not greater that the he s2b threshold (sample/blank). Then this grouped peak will be flagged for removal

The blank grouped peaks also have there own filters. If the blank peak passes this criteria and s2b threshold detailed above
then the peak will be removed.

* RSD of intensity
* minfrac
* intensity threshold

Additionally there is a filter for assessing how well the peaks fit the gaussian shape. Note that this can only be performed
when XCMS has fit_gauss option and verbose columns set to TRUE. Also, these peaks are just removed and not flagged.



-----------
Regrouping
-----------
The resulting xcmsSet object where all peaks have been removed needs to be regrouped otherwise the individual peaks
associated with each file will not be correctly linked to the grouped peaks.

This tool will re-group the xcmsSet object and check the newly created re-grouped xcmsSet object to see if any peaks are
still being flagged. If so the process will be repeated untill the xcmsSet object only contains peaks that match
the peak criteria.

The output file is an xcmsSet.RData file.
    ]]></help>

<expand macro="citations" />
</tool>