diff msi_preprocessing.xml @ 3:b9523950e79d draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 1c808d60243bb1eeda0cd26cb4b0a17ab05de2c0
author galaxyp
date Mon, 28 May 2018 12:38:04 -0400
parents 50ba097d03e0
children 42580292d381
line wrap: on
line diff
--- a/msi_preprocessing.xml	Mon Apr 23 17:19:16 2018 -0400
+++ b/msi_preprocessing.xml	Mon May 28 12:38:04 2018 -0400
@@ -1,9 +1,9 @@
-<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.7.0.2">
+<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.0">
     <description>
         mass spectrometry imaging preprocessing
     </description>
     <requirements>
-        <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement>
+        <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
         <requirement type="package" version="2.2.1">r-gridextra</requirement>
         <requirement type="package" version="0.20-35">r-lattice</requirement>
     </requirements>
@@ -11,12 +11,12 @@
     <![CDATA[
 
         #if $infile.ext == 'imzml'
-            cp '${infile.extra_files_path}/imzml' infile.imzML &&
-            cp '${infile.extra_files_path}/ibd' infile.ibd &&
+            ln -s '${infile.extra_files_path}/imzml' infile.imzML &&
+            ln -s '${infile.extra_files_path}/ibd' infile.ibd &&
         #elif $infile.ext == 'analyze75'
-            cp '${infile.extra_files_path}/hdr' infile.hdr &&
-            cp '${infile.extra_files_path}/img' infile.img &&
-            cp '${infile.extra_files_path}/t2m' infile.t2m &&
+            ln -s '${infile.extra_files_path}/hdr' infile.hdr &&
+            ln -s '${infile.extra_files_path}/img' infile.img &&
+            ln -s '${infile.extra_files_path}/t2m' infile.t2m &&
         #else
             ln -s '$infile' infile.RData &&
         #end if
@@ -32,15 +32,15 @@
 library(lattice)
 
 #if $infile.ext == 'imzml'
-    msidata <- readMSIData('infile.imzML')
+    msidata = readImzML('infile')
 #elif $infile.ext == 'analyze75'
-    msidata <- readMSIData('infile.hdr')
+    msidata = readAnalyze('infile')
 #else
     load('infile.RData')
 #end if
 
 
-##################################### Preparations for QC report #######################################################################################
+##################################### Preparations for QC report ###############
 
 #if $outputs.outputs_select == "quality_control":
 
@@ -54,7 +54,7 @@
 
 
         ### Read tabular file with calibrant masses: 
-        calibrant_list = read.delim("$outputs.calibrant_file", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE)
+        calibrant_list = read.delim("$outputs.calibrant_file", header = FALSE, stringsAsFactors = FALSE)
 
             ### calculate how many input calibrant masses are valid: 
             inputcalibrants = calibrant_list[calibrant_list[,$outputs.calibrants_column]>min(mz(msidata)) & calibrant_list[,$outputs.calibrants_column]<max(mz(msidata)),$outputs.calibrants_column]
@@ -85,18 +85,19 @@
     }
 
 
-        current_plot_raw <- vector(length(inputcalibrants), mode='list')
+        current_plot_raw = vector(length(inputcalibrants), mode='list')
 
 
 #end if
 
-################################################### Preprocessing steps #######################################################################################
+############################### Preprocessing steps ###########################
 #for $method in $methods:
+
     #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization':
         print('Normalization')
         ##normalization
 
-        msidata <- normalize(msidata, method="tic")
+        msidata = normalize(msidata, method="tic")
 
         #if $outputs.outputs_select == "quality_control":
             ### values for QC table:
@@ -152,13 +153,13 @@
 
         #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian':
             print('gaussian smoothing')
-            msidata <- smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian)
+            msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian)
         #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay':
             print('sgolay smoothing')
-            msidata <- smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters)
+            msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters)
         #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma':
             print('sgolay smoothing')
-            msidata <- smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
+            msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
         #end if
 
         #if $outputs.outputs_select == "quality_control":
@@ -226,7 +227,7 @@
             align_peak_reference = msidata
 
         #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table':
-            align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE)
+            align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, stringsAsFactors = FALSE)
 
             align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.align_mass_column]
 
@@ -317,7 +318,7 @@
             print('peaks reduction')
 
             #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table':
-                reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE)
+                reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, stringsAsFactors = FALSE)
                 reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.mass_column]
                 peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))]
 
@@ -351,10 +352,25 @@
                assign(paste("reduced",calibrant, sep="_"), currentimage)
             }
         #end if
+
+    #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation':
+        print('Transformation')
+
+            #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2':
+                print('log2 transformation')
+                spectra(msidata)[spectra(msidata) ==0] = NA
+                print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)))))
+                spectra(msidata) = log2(spectra(msidata))
+
+            #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt':
+                print('squareroot transformation')
+                spectra(msidata) = sqrt(spectra(msidata))
+
+            #end if
     #end if
 #end for
 
-####################################################### Outputs: RData, tabular and QC report ######################################################
+###################### Outputs: RData, tabular and QC report ###################
 
 
 ## save as (.RData)
@@ -410,7 +426,7 @@
     ]]></configfile>
     </configfiles>
     <inputs>
-        <param name="infile" type="data" format="imzml,rdata,analyze75"
+        <param name="infile" type="data" format="imzml,rdata,danalyze75"
             label="MSI rawdata as imzml, analyze7.5 or Cardinal MSImageSet saved as RData"
             help="load imzml and ibd file by uploading composite datatype imzml"/>
         <repeat name="methods" title="Preprocessing" min="1" max="50">
@@ -423,8 +439,8 @@
                     <option value="Peak_alignment">Peak alignment</option>
                     <option value="Peak_filtering">Peak filtering</option>
                     <option value="Data_reduction">Data reduction</option>
+                    <option value="Transformation">Transformation</option>
                 </param>
-
                 <when value="Normalization"/>
                 <when value="Baseline_reduction">
                     <param name="blocks_baseline" type="integer" value="50"
@@ -530,7 +546,7 @@
                         </param>
                         <when value="bin">
                             <param name="bin_width" type="float" value="1"
-                                   label="The width of a bin in mz or ppm" help="Width must be greater than Range of mz values/Number of mz features"/>
+                                   label="The width of a bin in mz or ppm" help="Width must be greater than range of mz values divided by number of mz features"/>
                             <param name="bin_units" type="select" display="radio"
                                    label="Unit for bin">
                                     <option value="mz" selected="True">mz</option>
@@ -544,7 +560,7 @@
                         </when>
                         <when value="resample">
                             <param name="resample_step" type="float" value="1"
-                                   label="The step size in mz" help="Step size must be greater than Range of mz values/Number of mz features"/>
+                                   label="The step size in mz" help="Step size must be greater than range of mz values divided by number of mz features"/>
                         </when>
                         <when value="peaks">
                             <param name="peaks_type" type="select" display="radio"
@@ -569,6 +585,16 @@
                         </when>
                     </conditional>
                 </when>
+                <when value="Transformation">
+                    <conditional name="transf_conditional">
+                        <param name="trans_type" type="select" label="Choose which intensity transformation you want to apply" help="logarithm base 2 (log2) or squareroot (sqrt)">
+                            <option value="log2" selected="True">log2</option>
+                            <option value="sqrt">sqrt</option>
+                        </param>
+                            <when value="log2"/>
+                            <when value="sqrt"/>
+                    </conditional>
+                </when>
             </conditional>
         </repeat>
         <conditional name="outputs">
@@ -597,67 +623,79 @@
         </data>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="2">
             <param name="infile" value="" ftype="imzml">
                 <composite_data value="Example_Continuous.imzML"/>
                 <composite_data value="Example_Continuous.ibd"/>
             </param>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Normalization" />
+                    <param name="preprocessing_method" value="Normalization"/>
+                </conditional>
+            </repeat>
+            <repeat name="methods">
+                <conditional name="methods_conditional">
+                    <param name="preprocessing_method" value="Smoothing"/>
+                    <conditional name="methods_for_smoothing">
+                        <param name="smoothing_method" value="gaussian"/>
+                    </conditional>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Smoothing" />
-                    <conditional name="methods_for_smoothing">
-                        <param name="smoothing_method" value="gaussian" />
+                    <param name="preprocessing_method" value="Peak_picking"/>
+                    <conditional name="methods_for_picking">
+                        <param name="picking_method" value="adaptive"/>
+                    </conditional>
+                    <param name="blocks_picking" value="3"/>
+                    <param name="window_picking" value="3"/>
+                </conditional>
+            </repeat>
+            <repeat name="methods">
+                <conditional name="methods_conditional">
+                    <param name="preprocessing_method" value="Peak_alignment"/>
+                    <conditional name="methods_for_alignment">
+                        <param name="alignment_method" value="diff"/>
                     </conditional>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Peak_picking" />
-                    <conditional name="methods_for_picking">
-                        <param name="picking_method" value="adaptive" />
-                    </conditional>
-                    <param name="blocks_picking" value="3" />
-                    <param name="window_picking" value="3" />
+                    <param name="preprocessing_method" value="Peak_filtering"/>
+                    <param name="frequ_filtering" value="2"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Peak_alignment" />
-                    <conditional name="methods_for_alignment">
-                        <param name="alignment_method" value="diff" />
+                    <param name="preprocessing_method" value="Transformation"/>
+                        <conditional name="transf_conditional">
+                            <param name="trans_type" value="sqrt"/>
+                        </conditional>
+                </conditional>
+            </repeat>
+            <param name="outputs_select" value="no_quality_control"/>
+            <param name="output_matrix" value="True"/>
+            <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/>
+            <output name="matrixasoutput" file="preprocessing_results1.txt"/>
+        </test>
+        <test expect_num_outputs="3">
+            <param name="infile" value="example_continous.RData" ftype="rdata"/>
+            <repeat name="methods">
+                <conditional name="methods_conditional">
+                    <param name="preprocessing_method" value="Peak_picking"/>
+                    <param name="blocks_picking" value="3"/>
+                    <param name="window_picking" value="5"/>
+                    <conditional name="methods_for_picking">
+                        <param name="picking_method" value="simple"/>
                     </conditional>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Peak_filtering" />
-                    <param name="frequ_filtering" value="2"/>
-                </conditional>
-            </repeat>
-            <param name="outputs_select" value="no_quality_control"/>
-            <param name="output_matrix" value="True"/>
-            <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size" />
-            <output name="matrixasoutput" file="preprocessing_results1.txt" />
-        </test>
-        <test>
-            <param name="infile" value="example_continous.RData" ftype="rdata"/>
-            <repeat name="methods">
-                <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Peak_picking" />
-                    <param name="blocks_picking" value="3" />
-                    <param name="window_picking" value="5" />
-                    <param name="methods_for_picking" value="simple" />
-                </conditional>
-            </repeat>
-            <repeat name="methods">
-                <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Peak_alignment" />
-                    <param name="methods_for_alignment" value="DP" />
+                    <param name="preprocessing_method" value="Peak_alignment"/>
+                    <conditional name="methods_for_alignment">
+                        <param name="alignment_method" value="DP"/>
+                    </conditional>
                 </conditional>
             </repeat>
             <param name="outputs_select" value="quality_control"/>
@@ -665,11 +703,11 @@
             <param name="calibrants_column" value="1"/>
             <param name="plusminus_dalton" value="0.25"/>
             <param name="output_matrix" value="True"/>
-            <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size" />
-            <output name="matrixasoutput" file="preprocessing_results2.txt" lines_diff="2" />
+            <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/>
+            <output name="matrixasoutput" file="preprocessing_results2.txt" lines_diff="2"/>
             <output name="QC_plots" file="preprocessing_results2.pdf" compare="sim_size"/>
         </test>
-        <test>
+        <test expect_num_outputs="2">
             <param name="infile" value="" ftype="analyze75">
                 <composite_data value="Analyze75.hdr"/>
                 <composite_data value="Analyze75.img"/>
@@ -677,31 +715,35 @@
             </param>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Normalization" />
+                    <param name="preprocessing_method" value="Normalization"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Peak_picking" />
-                    <param name="blocks_picking" value="3" />
-                    <param name="window_picking" value="5" />
-                    <param name="methods_for_picking" value="limpic" />
+                    <param name="preprocessing_method" value="Peak_picking"/>
+                    <param name="blocks_picking" value="100"/>
+                    <param name="window_picking" value="5"/>
+                    <conditional name="methods_for_picking">
+                        <param name="picking_method" value="limpic"/>
+                    </conditional>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Peak_alignment" />
-                    <param name="methods_for_alignment" value="diff" />
+                    <param name="preprocessing_method" value="Peak_alignment"/>
+                    <conditional name="methods_for_alignment">
+                        <param name="alignment_method" value="diff"/>
+                    </conditional>
                 </conditional>
             </repeat>
             <param name="outputs_select" value="quality_control"/>
             <param name="calibrant_file" ftype="tabular" value="inputcalibrantfile2.tabular"/>
             <param name="calibrants_column" value="1"/>
             <param name="plusminus_dalton" value="0.25"/>
-            <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size" />
+            <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/>
             <output name="QC_plots" file="preprocessing_results3.pdf" compare="sim_size"/>
         </test>
-        <test>
+        <test expect_num_outputs="2">
             <param name="infile" value="" ftype="analyze75">
                 <composite_data value="Analyze75.hdr"/>
                 <composite_data value="Analyze75.img"/>
@@ -709,29 +751,40 @@
             </param>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Normalization" />
+                    <param name="preprocessing_method" value="Normalization"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Data_reduction" />
-                    <param name="bin_width" value="0.1" />
+                    <param name="preprocessing_method" value="Data_reduction"/>
+                    <param name="bin_width" value="0.1"/>
                 </conditional>
             </repeat>
             <param name="outputs_select" value="no_quality_control"/>
             <param name="output_matrix" value="True"/>
-            <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size" />
-            <output name="matrixasoutput" file="preprocessing_results4.txt" />
+            <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/>
+            <output name="matrixasoutput" file="preprocessing_results4.txt"/>
         </test>
-        <test>
+        <test expect_num_outputs="3">
             <param name="infile" value="" ftype="imzml">
                 <composite_data value="Example_Continuous.imzML"/>
                 <composite_data value="Example_Continuous.ibd"/>
             </param>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Data_reduction" />
-                    <param name="step_width" value="0.1" />
+                    <param name="preprocessing_method" value="Data_reduction"/>
+                        <conditional name="methods_for_reduction">
+                            <param name="reduction_method" value="resample"/>
+                            <param name="step_width" value="0.1"/>
+                        </conditional>
+                </conditional>
+            </repeat>
+            <repeat name="methods">
+                <conditional name="methods_conditional">
+                    <param name="preprocessing_method" value="Transformation"/>
+                        <conditional name="transf_conditional">
+                            <param name="trans_type" value="log2"/>
+                        </conditional>
                 </conditional>
             </repeat>
             <param name="outputs_select" value="quality_control"/>
@@ -739,24 +792,46 @@
             <param name="calibrants_column" value="1"/>
             <param name="plusminus_dalton" value="0.25"/>
             <param name="output_matrix" value="True"/>
-            <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size" />
-            <output name="matrixasoutput" file="preprocessing_results5.txt" />
+            <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/>
+            <output name="matrixasoutput" file="preprocessing_results5.txt"/>
             <output name="QC_plots" file="preprocessing_results5.pdf" compare="sim_size"/>
         </test>
     </tests>
     <help>
         <![CDATA[
-Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets.
+
+Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_
+
+This tool provides provides multiple Cardinal functions to preprocess mass-spectrometry imaging data. 
+
+Input data: 3 types of input data can be used:
 
-**Preprocessing steps:**
+- imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
+- Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
+- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
 
-- Smoothening: Smoothing of the mass peaks reduces noise and improves peak detection
+Options:
+
 - Normalization: Normalization of intensities to total ion current (TIC)
 - Baseline reduction: Baseline  reduction removes backgroundintensity generated by chemical noise (common in MALDI datasets)
-- Peak picking: relevant peaks are picked while noise-peaks are removed
-- Peak alignment: after peak picking, mz inaccuracies are removed by alignment of same peaks to a common mz value
-- Peak filtering: after peak picking and alignment, removes peaks that occur infrequently, such as those which only occur in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot.
+- Smoothening: Smoothing of the mass peaks reduces noise and improves peak detection
+- Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards)
+- Peak alignment: only possible after peak picking, mz inaccuracies are removed by alignment of same peaks to a common mz value
+- Peak filtering: works only on centroided data (after peak picking and alignment or data reduction with peak filtering), removes peaks that occur only in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot.
 - Data reduction: binning, resampling or peak filtering to reduce data
+- Transformation: log2 or squareroot transformation of all intensities
+
+Output: 
+
+- imzML file, preprocessed
+- optional: pdf with heatmap of masses of interest after each preprocessing step
+- optional: intensity matrix as tabular file (intensities for masses in rows and pixel in columns)
+
+Tip: 
+
+- Peak alignment works only after peak picking
+- Peak filtering works only on centroided data (peak picking and alignment or Data reduction peaks)
+
         ]]>
     </help>
     <citations>