Mercurial > repos > recetox > ramclustr

--- a/macros.xml	Tue Apr 16 11:30:05 2024 +0000
+++ b/macros.xml	Wed Apr 17 07:51:23 2024 +0000
@@ -184,9 +184,55 @@
                    help="NA, NaN, and Inf values are replaced with zero, and zero values are sometimes returned from
                    peak peaking. When TRUE, zero values will be replaced with a small amount of noise, with noise level
                    set based on the detected signal intensities for that feature."/>
+            <param label="Quality control" name="quality_control" type="boolean" truevalue="TRUE" falsevalue="FALSE"
+                   checked="false"
+                   help="Add graph to Rplots.pdf output which contains summarize quality control for clustering and for quality
+                   control sample variation based on compound ($SpecAbund) and feature ($MSdata and $MSMSdata, if present)."/>
             <param label="Experimental design metadata" name="ExpDes" type="data" format="csv" optional="true"
                    help="Definition of experimental design in CSV format." />
         </section>
+
+        <section name="filtering" title="Filtering">
+            <conditional name="feature_filter_blanks">
+                <param label="Filter blanks" name="filter_blanks" type="select"
+                    help="Is used to remove features which are found at similar intensity in blank samples">
+                    <option value="FALSE" selected="true">FALSE</option>
+                    <option value="TRUE">TRUE</option>
+                </param>
+                <when value="TRUE">
+                    <param label="qc tag" name="qc_tag" type="text" value="QC"
+                        help="Character vector of length one or two. If length is two, enter search string and factor name in $phenoData slot
+                        (i.e. c('QC', 'sample.type'). If length one (i.e. 'QC'), will search for this string in the 'sample.names' slot by default.
+                        Default is 'QC'"/>
+                    <param label="blank tag" name="blank_tag" type="text" value="blank"
+                        help="See 'qc tag' , but for blanks to use as background. Default is 'blank'"/>
+                    <param label="signal to noise (sn)" name="sn" type="integer" value="3"
+                        help="Numeric defines the ratio for 'signal'. i.e. sn = 3 indicates that signal intensity must be 3 fold higher in sample than in blanks,
+                        on average, to be retained. Default is '3'"/>
+                    <param label="Remove blanks" name="remove_blanks" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true"
+                        help="TRUE by default. This removes any recognized blanks samples from the MSdata and MSMSdata sets after they are used to filter
+                        contaminant features."/>
+                </when>
+                <when value="FALSE"></when>
+            </conditional>
+
+            <conditional name="feature_filter_cv">
+                <param label="Filter cv" name="filter_cv" type="select"
+                    help="Extractor for xcms objects in preparation for clustering. This function offers normalization
+                    by total extracted ion signal. It is recommended to first run 'Filter blanks' to remove non-sample derived signal">
+                    <option value="FALSE" selected="true">FALSE</option>
+                    <option value="TRUE">TRUE</option>
+                </param>
+                <when value="TRUE">
+                    <param label="qc tag" name="qc_tag" type="text" value="QC"
+                        help="Character vector of length one or two. If length is two, enter search string and factor name in $phenoData slot
+                        (i.e. c('QC', 'sample.type'). If length one (i.e. 'QC'), will search for this string in the 'sample.names' slot by default.
+                        Default is 'QC'"/>
+                    <param label="Max cv" name="max_cv" type="float" value="0.5" help="Numeric maximum allowable cv for any feature. Default = 0.5."/>
+                </when>
+                <when value="FALSE"></when>
+            </conditional>
+        </section>
     </xml>

     <xml name="output_msp">
@@ -197,6 +243,9 @@
        <data label="Mass spectra from ${tool.name} on ${on_string}" name="mass_spectra_merged" format="msp">
            <filter>msp_output_details['merge_msp']</filter>
        </data>
+       <data format="pdf" name="pdf_plot" from_work_dir="Rplots.pdf" label="PDF plot.">
+            <filter>extras['quality_control'] or filtering['feature_filter_cv']['filter_cv'] == "TRUE"</filter>
+       </data>
     </xml>

     <xml name="citations">
--- a/ramclustr.xml	Tue Apr 16 11:30:05 2024 +0000
+++ b/ramclustr.xml	Wed Apr 17 07:51:23 2024 +0000
@@ -1,4 +1,4 @@
-<tool id="ramclustr" name="RAMClustR" version="@TOOL_VERSION@+galaxy4" profile="21.09">
+<tool id="ramclustr" name="RAMClustR" version="@TOOL_VERSION@+galaxy5" profile="21.09">
     <description>A feature clustering algorithm for non-targeted mass spectrometric metabolomics data.</description>
     <macros>
         <import>macros.xml</import>
@@ -72,6 +72,16 @@
             ramclustObj = RAMClustR::rc.feature.replace.na(ramclustObj = ramclustObj)
             #end if

+            #if $filtering.feature_filter_blanks.filter_blanks == "TRUE":
+            ramclustObj = RAMClustR::rc.feature.filter.blanks(
+                ramclustObj = ramclustObj,
+                qc.tag = "${filtering.feature_filter_blanks.qc_tag}",
+                blank.tag = "${filtering.feature_filter_blanks.blank_tag}",
+                sn = $filtering.feature_filter_blanks.sn,
+                remove.blanks = $filtering.feature_filter_blanks.remove_blanks
+            )
+            #end if
+
             #if $normalisation.normalisation_method.normalize != "none":
             ramclustObj = apply_normalisation(
                 ramclustr_obj = ramclustObj,
@@ -89,6 +99,14 @@
             )
             #end if

+            #if $filtering.feature_filter_cv.filter_cv == "TRUE":
+            ramclustObj = RAMClustR::rc.feature.filter.cv(
+                ramclustObj = ramclustObj,
+                qc.tag = "${filtering.feature_filter_cv.qc_tag}",
+                max.cv = $filtering.feature_filter_cv.max_cv
+            )
+            #end if
+
             ramclustObj = RAMClustR::rc.ramclustr(
                 ramclustObj = ramclustObj,
                 st = $filetype.required.st,
@@ -104,6 +122,10 @@
                 rt.only.low.n = $extras.rt_only_low_n,
                 fftempdir = NULL,
             )
+
+            #if $extras.quality_control == "TRUE":
+            ramclustObj = RAMClustR::rc.qc(ramclustObj = ramclustObj)
+            #end if

             store_output(
                 ramclustr_obj = ramclustObj,
@@ -235,6 +257,40 @@
             </section>
             <output name="mass_spectra_merged" file="test5_spectra.msp" ftype="msp" lines_diff="10"/>
         </test>
+        <test expect_num_outputs="3"><!-- TEST 6 -->
+            <section name="filetype">
+                <param name="type_choice" value="xcms"/>
+                <section name="xcms">
+                    <param name="input_xcms" value="test6.fillpeaks" ftype="rdata.xcms.fillpeaks"/>
+                </section>
+            </section>
+            <section name="msp_output_details">
+                <param name="mzdec" value="4"/>
+            </section>
+            <section name="extras">
+                <param name="quality_control" value="TRUE"/>
+            </section>
+            <section name="filtering">
+                <section name="feature_filter_blanks">
+                    <param name="filter_blanks" value="TRUE"/>
+                    <param name="blank_tag" value="Blanc"/>
+                </section>
+                <section name="feature_filter_cv">
+                    <param name="filter_cv" value="TRUE"/>
+                </section>
+            </section>
+            <output name="mass_spectra_merged" file="test6_out.msp" ftype="msp"/>
+            <output name="spec_abundance">
+                <assert_contents>
+                    <has_size value="309" delta="10"/>
+                </assert_contents>
+            </output>
+            <output name="pdf_plot">
+                <assert_contents>
+                    <has_size value="6842" delta="100"/>
+                </assert_contents>
+            </output>
+        </test>
     </tests>

     <help>
Binary file test-data/test6.fillpeaks has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test6_out.msp	Wed Apr 17 07:51:23 2024 +0000
@@ -0,0 +1,53 @@
+NAME:C1
+IONMODE:Negative
+SPECTRUMTYPE:Centroid
+RETENTIONTIME:54.89
+Num Peaks:10
+135.0295 9979069
+179.056 5594441
+195.0506 2625643
+165.0401 1960745
+187.0372 1743661
+148.0279 995311
+146.0145 950514
+219.0448 540250
+189.0327 411265
+180.0589 227648
+
+NAME:C2
+IONMODE:Negative
+SPECTRUMTYPE:Centroid
+RETENTIONTIME:63.74
+Num Peaks:8
+191.0193 28601409
+111.0086 2371386
+129.0192 2125989
+192.0223 1724583
+85.0294 991832
+87.0087 956947
+359.0473 811419
+101.0241 397166
+
+NAME:C3
+IONMODE:Negative
+SPECTRUMTYPE:Centroid
+RETENTIONTIME:59.06
+Num Peaks:7
+149.0451 7763606
+151.0067 3434565
+166.0174 822559
+75.0087 771138
+177.0401 707232
+209.066 258300
+178.0633 166079
+
+NAME:C4
+IONMODE:Negative
+SPECTRUMTYPE:Centroid
+RETENTIONTIME:50.66
+Num Peaks:4
+96.9692 6132644
+102.9565 1867855
+176.8926 120442
+184.8312 80882
+