Mercurial > repos > galaxyp > msi_preprocessing
changeset 10:df8d7f6f210b draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit e87eea03505ab1ba067e192bbbcdc6197dc4b42e
author | galaxyp |
---|---|
date | Tue, 04 Sep 2018 13:42:22 -0400 (2018-09-04) |
parents | 4d5578b57a77 |
children | |
files | msi_preprocessing.xml test-data/preprocessing_mean3.txt test-data/preprocessing_median2.txt test-data/preprocessing_results1.RData test-data/preprocessing_results1.pdf test-data/preprocessing_results1.txt test-data/preprocessing_results2.pdf test-data/preprocessing_results3.RData test-data/preprocessing_results3.pdf test-data/preprocessing_results4.RData test-data/preprocessing_results4.pdf test-data/preprocessing_results4.txt test-data/preprocessing_results5.RData test-data/preprocessing_results5.pdf test-data/preprocessing_sd2.txt |
diffstat | 15 files changed, 62 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/msi_preprocessing.xml Wed Aug 22 13:43:04 2018 -0400 +++ b/msi_preprocessing.xml Tue Sep 04 13:42:22 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.6"> +<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.7"> <description> mass spectrometry imaging preprocessing </description> @@ -103,7 +103,7 @@ print('Baseline_reduction') ##baseline reduction - msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline) + msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline, spar=$method.methods_conditional.spar_baseline) ############################### QC ########################### @@ -132,7 +132,7 @@ msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': - print('sgolay smoothing') + print('moving average smoothing') msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) @@ -156,7 +156,7 @@ ## Peakpicking ## remove duplicated coordinates, otherwise peak picking will fail - print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed")) + print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed")) msidata <- msidata[,!duplicated(coord(msidata))] #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': @@ -364,14 +364,15 @@ sample_matrix = cbind(sample_matrix, subsample_calc) count = count+1 } - rownames(sample_matrix) = mz(msidata) - colnames(sample_matrix) = levels(msidata\$annotation) - write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + + sample_matrix_mean = cbind(mz(msidata),sample_matrix) + sample_matrix_mean = rbind(c("mz", levels(msidata\$annotation)), sample_matrix_mean) + write.table(sample_matrix_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") }else{ - full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE)) - rownames(full_sample_calc) = mz(msidata) - colnames(full_sample_calc) = "$infile.display_name" - write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + full_sample_calc_mean = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE)) + full_sample_calc_mean = cbind(mz(msidata),full_sample_calc_mean) + full_sample_calc_mean = rbind(c("mz", "$infile.display_name"), full_sample_calc_mean) + write.table(full_sample_calc_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") } #end if @@ -391,14 +392,15 @@ count = count+1 } - rownames(sample_matrix) = mz(msidata) - colnames(sample_matrix) = levels(msidata\$annotation) - write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + sample_matrix_median = cbind(mz(msidata),sample_matrix) + sample_matrix_median = rbind(c("mz", levels(msidata\$annotation)), sample_matrix_median) + write.table(sample_matrix_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") }else{ - full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE)) - rownames(full_sample_calc) = mz(msidata) - colnames(full_sample_calc) = "$infile.display_name" - write.table(full_sample_calc, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + + full_sample_calc_median = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE)) + full_sample_calc_median = cbind(mz(msidata),full_sample_calc_median) + full_sample_calc_median = rbind(c("mz", "$infile.display_name"), full_sample_calc_median) + write.table(full_sample_calc_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") } #end if @@ -414,15 +416,15 @@ count = count+1 } - rownames(sample_matrix) = mz(msidata) - colnames(sample_matrix) = levels(msidata\$annotation) - write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + sample_matrix_sd = cbind(mz(msidata),sample_matrix) + sample_matrix_sd = rbind(c("mz", levels(msidata\$annotation)), sample_matrix_sd) + write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") }else{ - full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE)) - rownames(full_sample_calc) = mz(msidata) - colnames(full_sample_calc) = "$infile.display_name" - write.table(full_sample_calc, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + full_sample_calc_sd = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE)) + full_sample_calc_sd = cbind(mz(msidata),full_sample_calc_sd) + full_sample_calc_sd = rbind(c("mz", "$infile.display_name"), full_sample_calc_sd) + write.table(full_sample_calc_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") } #end if print(paste0("Number of NA in output file: ",sum(is.na(spectra(msidata)[])))) @@ -436,8 +438,8 @@ if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ spectramatrix = spectra(msidata)[] spectramatrix = cbind(mz(msidata),spectramatrix) - newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix) - write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + newmatrix = rbind(c("mz", names(pixels(msidata))), spectramatrix) + write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") }else{ print("file has no features or pixels left") } @@ -517,7 +519,7 @@ </conditional> <repeat name="methods" title="Preprocessing" min="1" max="50"> <conditional name="methods_conditional"> - <param name="preprocessing_method" type="select" label="Select the preprocessing methods you want to apply"> + <param name="preprocessing_method" type="select" label="Preprocessing methods"> <option value="Normalization" selected="True">Normalization to TIC</option> <option value="Baseline_reduction">Baseline Reduction</option> <option value="Smoothing">Peak smoothing</option> @@ -529,8 +531,12 @@ </param> <when value="Normalization"/> <when value="Baseline_reduction"> - <param name="blocks_baseline" type="integer" value="50" + <param name="blocks_baseline" type="integer" value="500" label="Blocks"/> + <param name="spar_baseline" type="float" value="1.0" label="Spar value" + help = "Smoothing parameter for the spline smoothing + applied to the spectrum in order to decide the cutoffs + for throwing away false noise spikes that might occur inside peaks"/> </when> <when value="Smoothing"> <conditional name="methods_for_smoothing"> @@ -540,28 +546,28 @@ <option value="ma">moving average</option> </param> <when value="gaussian"> - <param name="sd_gaussian" type="float" value="4" + <param name="sd_gaussian" type="float" value="2" label="The standard deviation for the Gaussian kernel (window/sd)"/> </when> <when value="sgolay"> <param name="order_of_filters" type="integer" value="3" - label="The order of the smoothing filter"/> + label="The order of the smoothing filter, must be smaller than window size"/> </when> <when value="ma"> - <param name="coefficients_ma_filter" type="integer" value="1" + <param name="coefficients_ma_filter" type="float" value="1" label="The coefficients for the moving average filter"/> </when> </conditional> - <param name="window_smoothing" type="integer" value="9" + <param name="window_smoothing" type="float" value="8" label="Window size"/> </when> <when value="Peak_picking"> - <param name="SNR_picking_method" type="integer" value="3" + <param name="SNR_picking_method" type="integer" value="6" label="Signal to noise ratio" help="The minimal signal to noise ratio for peaks to be considered as a valid peak."/> <param name="blocks_picking" type="integer" value="100" label = "Number of blocks" help="Number of blocks in which to divide mass spectrum to calculate noise"/> - <param name="window_picking" type="integer" value="5" label= "Window size" help="Window width for seeking local maxima"/> + <param name="window_picking" type="float" value="5" label= "Window size" help="Window width for seeking local maxima"/> <conditional name="methods_for_picking"> <param name="picking_method" type="select" label="Peak picking method" help="only simple works for processed imzML files"> <option value="adaptive" selected="True">adaptive</option> @@ -589,7 +595,7 @@ <option value="DP">DP</option> </param> <when value="diff"> - <param name="value_diffalignment" type="integer" value="200" + <param name="value_diffalignment" type="float" value="200" label="diff.max" help="Peaks that differ less than this value will be aligned together"/> <param name="units_diffalignment" type="select" display = "radio" optional = "False" label="units"> @@ -598,7 +604,7 @@ </param> </when> <when value="DP"> - <param name="gap_DPalignment" type="integer" value="0" + <param name="gap_DPalignment" type="float" value="0" label="Gap" help="The gap penalty for the dynamic programming sequence alignment"/> </when> </conditional> @@ -673,7 +679,7 @@ </when> <when value="Transformation"> <conditional name="transf_conditional"> - <param name="trans_type" type="select" label="Choose which intensity transformation you want to apply" help="logarithm base 2 (log2) or squareroot (sqrt)"> + <param name="trans_type" type="select" label="Intensity transformations" help="logarithm base 2 (log2) or squareroot (sqrt)"> <option value="log2" selected="True">log2</option> <option value="sqrt">sqrt</option> </param> @@ -708,16 +714,16 @@ <outputs> <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/> <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/> - <data format="tabular" name="summarized_output_mean" label="$infile.display_name mean_matrix"> + <data format="tabular" name="summarized_mean" label="$infile.display_name preprocessed_mean"> <filter>summary_type and "mean" in summary_type</filter> </data> - <data format="tabular" name="summarized_output_median" label="$infile.display_name median_matrix"> + <data format="tabular" name="summarized_median" label="$infile.display_name preprocessed_median"> <filter>summary_type and "median" in summary_type</filter> </data> - <data format="tabular" name="summarized_output_sd" label="$infile.display_name sd_matrix"> + <data format="tabular" name="summarized_sd" label="$infile.display_name preprocessed_sd"> <filter>summary_type and "sd" in summary_type</filter> </data> - <data format="tabular" name="matrixasoutput" label="$infile.display_name preprocessed_matrix"> + <data format="tabular" name="intensity_matrix" label="$infile.display_name preprocessed_matrix"> <filter>output_matrix</filter> </data> </outputs> @@ -737,7 +743,9 @@ <param name="preprocessing_method" value="Smoothing"/> <conditional name="methods_for_smoothing"> <param name="smoothing_method" value="gaussian"/> + <param name="sd_gaussian" value="4"/> </conditional> + <param name="window_smoothing" value="9"/> </conditional> </repeat> <repeat name="methods"> @@ -748,6 +756,7 @@ </conditional> <param name="blocks_picking" value="3"/> <param name="window_picking" value="3"/> + <param name="SNR_picking_method" value="3"/> </conditional> </repeat> <repeat name="methods"> @@ -777,7 +786,7 @@ </conditional> <param name="output_matrix" value="True"/> <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/> - <output name="matrixasoutput" file="preprocessing_results1.txt"/> + <output name="intensity_matrix" file="preprocessing_results1.txt"/> <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> </test> <test expect_num_outputs="4"> @@ -811,8 +820,8 @@ </conditional> <param name="summary_type" value="median,sd"/> <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/> - <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/> - <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/> + <output name="summarized_median" file="preprocessing_median2.txt" lines_diff="2"/> + <output name="summarized_sd" file="preprocessing_sd2.txt" lines_diff="2"/> <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> </test> <test expect_num_outputs="3"> @@ -831,6 +840,7 @@ <param name="preprocessing_method" value="Peak_picking"/> <param name="blocks_picking" value="100"/> <param name="window_picking" value="5"/> + <param name="SNR_picking_method" value="3"/> <param name="picking_method" value="limpic"/> </conditional> </repeat> @@ -848,7 +858,7 @@ </conditional> <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/> <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> - <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/> + <output name="summarized_mean" file="preprocessing_mean3.txt" lines_diff="2"/> </test> <test expect_num_outputs="3"> <param name="infile" value="" ftype="analyze75"> @@ -872,7 +882,7 @@ </conditional> <param name="output_matrix" value="True"/> <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/> - <output name="matrixasoutput" file="preprocessing_results4.txt"/> + <output name="intensity_matrix" file="preprocessing_results4.txt"/> <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/> </test> <test expect_num_outputs="2">
--- a/test-data/preprocessing_mean3.txt Wed Aug 22 13:43:04 2018 -0400 +++ b/test-data/preprocessing_mean3.txt Tue Sep 04 13:42:22 2018 -0400 @@ -1,4 +1,4 @@ - Uploaded Composite Dataset (analyze75) +mz Uploaded Composite Dataset (analyze75) 1199.55615234375 3.35218415321305 1200.09387207031 1.08671297501661 1200.59020996094 2.15851707603805
--- a/test-data/preprocessing_median2.txt Wed Aug 22 13:43:04 2018 -0400 +++ b/test-data/preprocessing_median2.txt Tue Sep 04 13:42:22 2018 -0400 @@ -1,4 +1,4 @@ - File1 File2 NA +mz File1 File2 NA 101.083335876465 0 0.133186891674995 0.266373783349991 101.666664123535 0 0 0 102.166664123535 0 0 0
--- a/test-data/preprocessing_results1.txt Wed Aug 22 13:43:04 2018 -0400 +++ b/test-data/preprocessing_results1.txt Tue Sep 04 13:42:22 2018 -0400 @@ -1,3 +1,3 @@ -mz | spectra x = 1, y = 1 x = 2, y = 1 x = 3, y = 1 x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 x = 1, y = 3 x = 2, y = 3 x = 3, y = 3 +mz x = 1, y = 1 x = 2, y = 1 x = 3, y = 1 x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 x = 1, y = 3 x = 2, y = 3 x = 3, y = 3 329 8.48069807321137 6.00276368862812 0 0 7.22240715797167 6.68463797360356 0 0 0 345 0 0 4.70593890744759 0 0 0 5.23000350586712 4.17949067812964 5.08555910047608
--- a/test-data/preprocessing_results4.txt Wed Aug 22 13:43:04 2018 -0400 +++ b/test-data/preprocessing_results4.txt Tue Sep 04 13:42:22 2018 -0400 @@ -1,4 +1,4 @@ -mz | spectra x = 1, y = 1 x = 2, y = 1 x = 3, y = 1 x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 x = 1, y = 3 x = 2, y = 3 x = 3, y = 3 +mz x = 1, y = 1 x = 2, y = 1 x = 3, y = 1 x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 x = 1, y = 3 x = 2, y = 3 x = 3, y = 3 1199 1.90173968313755 1.13259535967648 2.08382650993109 2.34349737625869 1.33087314662273 2.14468085106383 3.43161925601751 1.32706902782797 2.22480967308554 1200 1.39388874502695 0.970046951574763 1.52152411836238 1.35619061126081 1.10906095551895 1.66382978723404 2.22846006564551 1.19804842790025 1.7089117778773 1201 1.13095882671438 0.99102093971692 1.23623834616944 1.19344773790952 1.05864909390445 1.31063829787234 1.67396061269147 1.07824358511023 1.28168383340797
--- a/test-data/preprocessing_sd2.txt Wed Aug 22 13:43:04 2018 -0400 +++ b/test-data/preprocessing_sd2.txt Tue Sep 04 13:42:22 2018 -0400 @@ -1,4 +1,4 @@ - File1 File2 NA +mz File1 File2 NA 101.083335876465 0.180910895583245 0.284914371691127 0.358878736172051 101.666664123535 0 0 0 102.166664123535 0 0 0