diff msi_preprocessing.xml @ 10:df8d7f6f210b draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit e87eea03505ab1ba067e192bbbcdc6197dc4b42e
author galaxyp
date Tue, 04 Sep 2018 13:42:22 -0400
parents 4d5578b57a77
children
line wrap: on
line diff
--- a/msi_preprocessing.xml	Wed Aug 22 13:43:04 2018 -0400
+++ b/msi_preprocessing.xml	Tue Sep 04 13:42:22 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.6">
+<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.7">
     <description>
         mass spectrometry imaging preprocessing
     </description>
@@ -103,7 +103,7 @@
             print('Baseline_reduction')
             ##baseline reduction
 
-            msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline)
+            msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline, spar=$method.methods_conditional.spar_baseline)
 
             ############################### QC ###########################
 
@@ -132,7 +132,7 @@
 
                 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters)
             #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma':
-                print('sgolay smoothing')
+                print('moving average smoothing')
 
                 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
 
@@ -156,7 +156,7 @@
             ## Peakpicking
 
             ## remove duplicated coordinates, otherwise peak picking will fail
-            print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed"))
+            print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
             msidata <- msidata[,!duplicated(coord(msidata))]
 
             #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive':
@@ -364,14 +364,15 @@
             sample_matrix = cbind(sample_matrix, subsample_calc)
             count = count+1
             }
-            rownames(sample_matrix) = mz(msidata)
-            colnames(sample_matrix) = levels(msidata\$annotation)
-            write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+
+            sample_matrix_mean = cbind(mz(msidata),sample_matrix)
+            sample_matrix_mean = rbind(c("mz", levels(msidata\$annotation)), sample_matrix_mean)
+            write.table(sample_matrix_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }else{
-            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE))
-            rownames(full_sample_calc) = mz(msidata)
-            colnames(full_sample_calc) = "$infile.display_name"
-            write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+            full_sample_calc_mean = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE))
+            full_sample_calc_mean = cbind(mz(msidata),full_sample_calc_mean)
+            full_sample_calc_mean = rbind(c("mz", "$infile.display_name"), full_sample_calc_mean)
+            write.table(full_sample_calc_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }
 
     #end if
@@ -391,14 +392,15 @@
             count = count+1
             }
 
-            rownames(sample_matrix) = mz(msidata)
-            colnames(sample_matrix) = levels(msidata\$annotation)
-            write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+            sample_matrix_median = cbind(mz(msidata),sample_matrix)
+            sample_matrix_median = rbind(c("mz", levels(msidata\$annotation)), sample_matrix_median)
+            write.table(sample_matrix_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }else{
-            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE))
-            rownames(full_sample_calc) = mz(msidata)
-            colnames(full_sample_calc) = "$infile.display_name"
-            write.table(full_sample_calc, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+
+            full_sample_calc_median = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE))
+            full_sample_calc_median = cbind(mz(msidata),full_sample_calc_median)
+            full_sample_calc_median = rbind(c("mz", "$infile.display_name"), full_sample_calc_median)
+            write.table(full_sample_calc_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }
     #end if
 
@@ -414,15 +416,15 @@
             count = count+1
             }
 
-            rownames(sample_matrix) = mz(msidata)
-            colnames(sample_matrix) = levels(msidata\$annotation)
-            write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+            sample_matrix_sd = cbind(mz(msidata),sample_matrix)
+            sample_matrix_sd = rbind(c("mz", levels(msidata\$annotation)), sample_matrix_sd)
+            write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }else{
 
-            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE))
-            rownames(full_sample_calc) = mz(msidata)
-            colnames(full_sample_calc) = "$infile.display_name"
-            write.table(full_sample_calc, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+            full_sample_calc_sd = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE))
+            full_sample_calc_sd = cbind(mz(msidata),full_sample_calc_sd)
+            full_sample_calc_sd = rbind(c("mz", "$infile.display_name"), full_sample_calc_sd)
+            write.table(full_sample_calc_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }
     #end if
     print(paste0("Number of NA in output file: ",sum(is.na(spectra(msidata)[]))))
@@ -436,8 +438,8 @@
         if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){
             spectramatrix = spectra(msidata)[]
             spectramatrix = cbind(mz(msidata),spectramatrix)
-            newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix)
-            write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
+            newmatrix = rbind(c("mz", names(pixels(msidata))), spectramatrix)
+            write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }else{
             print("file has no features or pixels left")
         }
@@ -517,7 +519,7 @@
         </conditional>
         <repeat name="methods" title="Preprocessing" min="1" max="50">
             <conditional name="methods_conditional">
-                <param name="preprocessing_method" type="select" label="Select the preprocessing methods you want to apply">
+                <param name="preprocessing_method" type="select" label="Preprocessing methods">
                     <option value="Normalization" selected="True">Normalization to TIC</option>
                     <option value="Baseline_reduction">Baseline Reduction</option>
                     <option value="Smoothing">Peak smoothing</option>
@@ -529,8 +531,12 @@
                 </param>
                 <when value="Normalization"/>
                 <when value="Baseline_reduction">
-                    <param name="blocks_baseline" type="integer" value="50"
+                    <param name="blocks_baseline" type="integer" value="500"
                         label="Blocks"/>
+                    <param name="spar_baseline" type="float" value="1.0" label="Spar value" 
+                           help = "Smoothing parameter for the spline smoothing 
+                                  applied to the spectrum in order to decide the cutoffs 
+                              for throwing away false noise spikes that might occur inside peaks"/>
                 </when>
                 <when value="Smoothing">
                     <conditional name="methods_for_smoothing">
@@ -540,28 +546,28 @@
                             <option value="ma">moving average</option>
                         </param>
                         <when value="gaussian">
-                            <param name="sd_gaussian" type="float" value="4"
+                            <param name="sd_gaussian" type="float" value="2"
                                    label="The standard deviation for the Gaussian kernel (window/sd)"/>
                         </when>
                         <when value="sgolay">
                             <param name="order_of_filters" type="integer" value="3"
-                                   label="The order of the smoothing filter"/>
+                                   label="The order of the smoothing filter, must be smaller than window size"/>
                         </when>
                         <when value="ma">
-                            <param name="coefficients_ma_filter" type="integer" value="1"
+                            <param name="coefficients_ma_filter" type="float" value="1"
                                    label="The coefficients for the moving average filter"/>
                         </when>
                     </conditional>
-                    <param name="window_smoothing" type="integer" value="9"
+                    <param name="window_smoothing" type="float" value="8"
                                 label="Window size"/>
                 </when>
                 <when value="Peak_picking">
-                    <param name="SNR_picking_method" type="integer" value="3"
+                    <param name="SNR_picking_method" type="integer" value="6"
                         label="Signal to noise ratio"
                         help="The minimal signal to noise ratio for peaks to be considered as a valid peak."/>
                     <param name="blocks_picking" type="integer" value="100" label = "Number of blocks"
                         help="Number of blocks in which to divide mass spectrum to calculate noise"/>
-                    <param name="window_picking" type="integer" value="5" label= "Window size" help="Window width for seeking local maxima"/>
+                    <param name="window_picking" type="float" value="5" label= "Window size" help="Window width for seeking local maxima"/>
                     <conditional name="methods_for_picking">
                         <param name="picking_method" type="select" label="Peak picking method" help="only simple works for processed imzML files">
                             <option value="adaptive" selected="True">adaptive</option>
@@ -589,7 +595,7 @@
                             <option value="DP">DP</option>
                         </param>
                         <when value="diff">
-                            <param name="value_diffalignment" type="integer" value="200"
+                            <param name="value_diffalignment" type="float" value="200"
                                    label="diff.max" help="Peaks that differ less than this value will be aligned together"/>
                             <param name="units_diffalignment" type="select" display = "radio" optional = "False"
                                    label="units">
@@ -598,7 +604,7 @@
                             </param>
                         </when>
                         <when value="DP">
-                            <param name="gap_DPalignment" type="integer" value="0"
+                            <param name="gap_DPalignment" type="float" value="0"
                                    label="Gap" help="The gap penalty for the dynamic programming sequence alignment"/>
                         </when>
                     </conditional>
@@ -673,7 +679,7 @@
                 </when>
                 <when value="Transformation">
                     <conditional name="transf_conditional">
-                        <param name="trans_type" type="select" label="Choose which intensity transformation you want to apply" help="logarithm base 2 (log2) or squareroot (sqrt)">
+                        <param name="trans_type" type="select" label="Intensity transformations" help="logarithm base 2 (log2) or squareroot (sqrt)">
                             <option value="log2" selected="True">log2</option>
                             <option value="sqrt">sqrt</option>
                         </param>
@@ -708,16 +714,16 @@
     <outputs>
         <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/>
         <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/>
-        <data format="tabular" name="summarized_output_mean" label="$infile.display_name mean_matrix">
+        <data format="tabular" name="summarized_mean" label="$infile.display_name preprocessed_mean">
             <filter>summary_type and "mean" in summary_type</filter>
         </data>
-        <data format="tabular" name="summarized_output_median" label="$infile.display_name median_matrix">
+        <data format="tabular" name="summarized_median" label="$infile.display_name preprocessed_median">
             <filter>summary_type and "median" in summary_type</filter>
         </data>
-        <data format="tabular" name="summarized_output_sd" label="$infile.display_name sd_matrix">
+        <data format="tabular" name="summarized_sd" label="$infile.display_name preprocessed_sd">
             <filter>summary_type and "sd" in summary_type</filter>
         </data>
-        <data format="tabular" name="matrixasoutput" label="$infile.display_name preprocessed_matrix">
+        <data format="tabular" name="intensity_matrix" label="$infile.display_name preprocessed_matrix">
             <filter>output_matrix</filter>
         </data>
     </outputs>
@@ -737,7 +743,9 @@
                     <param name="preprocessing_method" value="Smoothing"/>
                     <conditional name="methods_for_smoothing">
                         <param name="smoothing_method" value="gaussian"/>
+                        <param name="sd_gaussian" value="4"/>
                     </conditional>
+                        <param name="window_smoothing" value="9"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
@@ -748,6 +756,7 @@
                     </conditional>
                     <param name="blocks_picking" value="3"/>
                     <param name="window_picking" value="3"/>
+                    <param name="SNR_picking_method" value="3"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
@@ -777,7 +786,7 @@
             </conditional>
             <param name="output_matrix" value="True"/>
             <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/>
-            <output name="matrixasoutput" file="preprocessing_results1.txt"/>
+            <output name="intensity_matrix" file="preprocessing_results1.txt"/>
             <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/>
         </test>
         <test expect_num_outputs="4">
@@ -811,8 +820,8 @@
             </conditional>
             <param name="summary_type" value="median,sd"/>
             <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/>
-            <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/>
-            <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/>
+            <output name="summarized_median" file="preprocessing_median2.txt" lines_diff="2"/>
+            <output name="summarized_sd" file="preprocessing_sd2.txt" lines_diff="2"/>
             <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
         </test>
         <test expect_num_outputs="3">
@@ -831,6 +840,7 @@
                     <param name="preprocessing_method" value="Peak_picking"/>
                     <param name="blocks_picking" value="100"/>
                     <param name="window_picking" value="5"/>
+                    <param name="SNR_picking_method" value="3"/>
                         <param name="picking_method" value="limpic"/>
                 </conditional>
             </repeat>
@@ -848,7 +858,7 @@
             </conditional>
             <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/>
             <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/>
-            <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/>
+            <output name="summarized_mean" file="preprocessing_mean3.txt" lines_diff="2"/>
         </test>
         <test expect_num_outputs="3">
             <param name="infile" value="" ftype="analyze75">
@@ -872,7 +882,7 @@
             </conditional>
             <param name="output_matrix" value="True"/>
             <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/>
-            <output name="matrixasoutput" file="preprocessing_results4.txt"/>
+            <output name="intensity_matrix" file="preprocessing_results4.txt"/>
             <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/>
         </test>
         <test expect_num_outputs="2">