Mercurial > repos > galaxyp > msi_preprocessing
diff msi_preprocessing.xml @ 3:b9523950e79d draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 1c808d60243bb1eeda0cd26cb4b0a17ab05de2c0
author | galaxyp |
---|---|
date | Mon, 28 May 2018 12:38:04 -0400 |
parents | 50ba097d03e0 |
children | 42580292d381 |
line wrap: on
line diff
--- a/msi_preprocessing.xml Mon Apr 23 17:19:16 2018 -0400 +++ b/msi_preprocessing.xml Mon May 28 12:38:04 2018 -0400 @@ -1,9 +1,9 @@ -<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.7.0.2"> +<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.0"> <description> mass spectrometry imaging preprocessing </description> <requirements> - <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement> + <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> <requirement type="package" version="2.2.1">r-gridextra</requirement> <requirement type="package" version="0.20-35">r-lattice</requirement> </requirements> @@ -11,12 +11,12 @@ <![CDATA[ #if $infile.ext == 'imzml' - cp '${infile.extra_files_path}/imzml' infile.imzML && - cp '${infile.extra_files_path}/ibd' infile.ibd && + ln -s '${infile.extra_files_path}/imzml' infile.imzML && + ln -s '${infile.extra_files_path}/ibd' infile.ibd && #elif $infile.ext == 'analyze75' - cp '${infile.extra_files_path}/hdr' infile.hdr && - cp '${infile.extra_files_path}/img' infile.img && - cp '${infile.extra_files_path}/t2m' infile.t2m && + ln -s '${infile.extra_files_path}/hdr' infile.hdr && + ln -s '${infile.extra_files_path}/img' infile.img && + ln -s '${infile.extra_files_path}/t2m' infile.t2m && #else ln -s '$infile' infile.RData && #end if @@ -32,15 +32,15 @@ library(lattice) #if $infile.ext == 'imzml' - msidata <- readMSIData('infile.imzML') + msidata = readImzML('infile') #elif $infile.ext == 'analyze75' - msidata <- readMSIData('infile.hdr') + msidata = readAnalyze('infile') #else load('infile.RData') #end if -##################################### Preparations for QC report ####################################################################################### +##################################### Preparations for QC report ############### #if $outputs.outputs_select == "quality_control": @@ -54,7 +54,7 @@ ### Read tabular file with calibrant masses: - calibrant_list = read.delim("$outputs.calibrant_file", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE) + calibrant_list = read.delim("$outputs.calibrant_file", header = FALSE, stringsAsFactors = FALSE) ### calculate how many input calibrant masses are valid: inputcalibrants = calibrant_list[calibrant_list[,$outputs.calibrants_column]>min(mz(msidata)) & calibrant_list[,$outputs.calibrants_column]<max(mz(msidata)),$outputs.calibrants_column] @@ -85,18 +85,19 @@ } - current_plot_raw <- vector(length(inputcalibrants), mode='list') + current_plot_raw = vector(length(inputcalibrants), mode='list') #end if -################################################### Preprocessing steps ####################################################################################### +############################### Preprocessing steps ########################### #for $method in $methods: + #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization': print('Normalization') ##normalization - msidata <- normalize(msidata, method="tic") + msidata = normalize(msidata, method="tic") #if $outputs.outputs_select == "quality_control": ### values for QC table: @@ -152,13 +153,13 @@ #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian': print('gaussian smoothing') - msidata <- smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) + msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay': print('sgolay smoothing') - msidata <- smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) + msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': print('sgolay smoothing') - msidata <- smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) + msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) #end if #if $outputs.outputs_select == "quality_control": @@ -226,7 +227,7 @@ align_peak_reference = msidata #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table': - align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE) + align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, stringsAsFactors = FALSE) align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.align_mass_column] @@ -317,7 +318,7 @@ print('peaks reduction') #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table': - reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE) + reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, stringsAsFactors = FALSE) reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.mass_column] peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))] @@ -351,10 +352,25 @@ assign(paste("reduced",calibrant, sep="_"), currentimage) } #end if + + #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation': + print('Transformation') + + #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': + print('log2 transformation') + spectra(msidata)[spectra(msidata) ==0] = NA + print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata))))) + spectra(msidata) = log2(spectra(msidata)) + + #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt': + print('squareroot transformation') + spectra(msidata) = sqrt(spectra(msidata)) + + #end if #end if #end for -####################################################### Outputs: RData, tabular and QC report ###################################################### +###################### Outputs: RData, tabular and QC report ################### ## save as (.RData) @@ -410,7 +426,7 @@ ]]></configfile> </configfiles> <inputs> - <param name="infile" type="data" format="imzml,rdata,analyze75" + <param name="infile" type="data" format="imzml,rdata,danalyze75" label="MSI rawdata as imzml, analyze7.5 or Cardinal MSImageSet saved as RData" help="load imzml and ibd file by uploading composite datatype imzml"/> <repeat name="methods" title="Preprocessing" min="1" max="50"> @@ -423,8 +439,8 @@ <option value="Peak_alignment">Peak alignment</option> <option value="Peak_filtering">Peak filtering</option> <option value="Data_reduction">Data reduction</option> + <option value="Transformation">Transformation</option> </param> - <when value="Normalization"/> <when value="Baseline_reduction"> <param name="blocks_baseline" type="integer" value="50" @@ -530,7 +546,7 @@ </param> <when value="bin"> <param name="bin_width" type="float" value="1" - label="The width of a bin in mz or ppm" help="Width must be greater than Range of mz values/Number of mz features"/> + label="The width of a bin in mz or ppm" help="Width must be greater than range of mz values divided by number of mz features"/> <param name="bin_units" type="select" display="radio" label="Unit for bin"> <option value="mz" selected="True">mz</option> @@ -544,7 +560,7 @@ </when> <when value="resample"> <param name="resample_step" type="float" value="1" - label="The step size in mz" help="Step size must be greater than Range of mz values/Number of mz features"/> + label="The step size in mz" help="Step size must be greater than range of mz values divided by number of mz features"/> </when> <when value="peaks"> <param name="peaks_type" type="select" display="radio" @@ -569,6 +585,16 @@ </when> </conditional> </when> + <when value="Transformation"> + <conditional name="transf_conditional"> + <param name="trans_type" type="select" label="Choose which intensity transformation you want to apply" help="logarithm base 2 (log2) or squareroot (sqrt)"> + <option value="log2" selected="True">log2</option> + <option value="sqrt">sqrt</option> + </param> + <when value="log2"/> + <when value="sqrt"/> + </conditional> + </when> </conditional> </repeat> <conditional name="outputs"> @@ -597,67 +623,79 @@ </data> </outputs> <tests> - <test> + <test expect_num_outputs="2"> <param name="infile" value="" ftype="imzml"> <composite_data value="Example_Continuous.imzML"/> <composite_data value="Example_Continuous.ibd"/> </param> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Normalization" /> + <param name="preprocessing_method" value="Normalization"/> + </conditional> + </repeat> + <repeat name="methods"> + <conditional name="methods_conditional"> + <param name="preprocessing_method" value="Smoothing"/> + <conditional name="methods_for_smoothing"> + <param name="smoothing_method" value="gaussian"/> + </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Smoothing" /> - <conditional name="methods_for_smoothing"> - <param name="smoothing_method" value="gaussian" /> + <param name="preprocessing_method" value="Peak_picking"/> + <conditional name="methods_for_picking"> + <param name="picking_method" value="adaptive"/> + </conditional> + <param name="blocks_picking" value="3"/> + <param name="window_picking" value="3"/> + </conditional> + </repeat> + <repeat name="methods"> + <conditional name="methods_conditional"> + <param name="preprocessing_method" value="Peak_alignment"/> + <conditional name="methods_for_alignment"> + <param name="alignment_method" value="diff"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Peak_picking" /> - <conditional name="methods_for_picking"> - <param name="picking_method" value="adaptive" /> - </conditional> - <param name="blocks_picking" value="3" /> - <param name="window_picking" value="3" /> + <param name="preprocessing_method" value="Peak_filtering"/> + <param name="frequ_filtering" value="2"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Peak_alignment" /> - <conditional name="methods_for_alignment"> - <param name="alignment_method" value="diff" /> + <param name="preprocessing_method" value="Transformation"/> + <conditional name="transf_conditional"> + <param name="trans_type" value="sqrt"/> + </conditional> + </conditional> + </repeat> + <param name="outputs_select" value="no_quality_control"/> + <param name="output_matrix" value="True"/> + <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/> + <output name="matrixasoutput" file="preprocessing_results1.txt"/> + </test> + <test expect_num_outputs="3"> + <param name="infile" value="example_continous.RData" ftype="rdata"/> + <repeat name="methods"> + <conditional name="methods_conditional"> + <param name="preprocessing_method" value="Peak_picking"/> + <param name="blocks_picking" value="3"/> + <param name="window_picking" value="5"/> + <conditional name="methods_for_picking"> + <param name="picking_method" value="simple"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Peak_filtering" /> - <param name="frequ_filtering" value="2"/> - </conditional> - </repeat> - <param name="outputs_select" value="no_quality_control"/> - <param name="output_matrix" value="True"/> - <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size" /> - <output name="matrixasoutput" file="preprocessing_results1.txt" /> - </test> - <test> - <param name="infile" value="example_continous.RData" ftype="rdata"/> - <repeat name="methods"> - <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Peak_picking" /> - <param name="blocks_picking" value="3" /> - <param name="window_picking" value="5" /> - <param name="methods_for_picking" value="simple" /> - </conditional> - </repeat> - <repeat name="methods"> - <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Peak_alignment" /> - <param name="methods_for_alignment" value="DP" /> + <param name="preprocessing_method" value="Peak_alignment"/> + <conditional name="methods_for_alignment"> + <param name="alignment_method" value="DP"/> + </conditional> </conditional> </repeat> <param name="outputs_select" value="quality_control"/> @@ -665,11 +703,11 @@ <param name="calibrants_column" value="1"/> <param name="plusminus_dalton" value="0.25"/> <param name="output_matrix" value="True"/> - <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size" /> - <output name="matrixasoutput" file="preprocessing_results2.txt" lines_diff="2" /> + <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/> + <output name="matrixasoutput" file="preprocessing_results2.txt" lines_diff="2"/> <output name="QC_plots" file="preprocessing_results2.pdf" compare="sim_size"/> </test> - <test> + <test expect_num_outputs="2"> <param name="infile" value="" ftype="analyze75"> <composite_data value="Analyze75.hdr"/> <composite_data value="Analyze75.img"/> @@ -677,31 +715,35 @@ </param> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Normalization" /> + <param name="preprocessing_method" value="Normalization"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Peak_picking" /> - <param name="blocks_picking" value="3" /> - <param name="window_picking" value="5" /> - <param name="methods_for_picking" value="limpic" /> + <param name="preprocessing_method" value="Peak_picking"/> + <param name="blocks_picking" value="100"/> + <param name="window_picking" value="5"/> + <conditional name="methods_for_picking"> + <param name="picking_method" value="limpic"/> + </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Peak_alignment" /> - <param name="methods_for_alignment" value="diff" /> + <param name="preprocessing_method" value="Peak_alignment"/> + <conditional name="methods_for_alignment"> + <param name="alignment_method" value="diff"/> + </conditional> </conditional> </repeat> <param name="outputs_select" value="quality_control"/> <param name="calibrant_file" ftype="tabular" value="inputcalibrantfile2.tabular"/> <param name="calibrants_column" value="1"/> <param name="plusminus_dalton" value="0.25"/> - <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size" /> + <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/> <output name="QC_plots" file="preprocessing_results3.pdf" compare="sim_size"/> </test> - <test> + <test expect_num_outputs="2"> <param name="infile" value="" ftype="analyze75"> <composite_data value="Analyze75.hdr"/> <composite_data value="Analyze75.img"/> @@ -709,29 +751,40 @@ </param> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Normalization" /> + <param name="preprocessing_method" value="Normalization"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Data_reduction" /> - <param name="bin_width" value="0.1" /> + <param name="preprocessing_method" value="Data_reduction"/> + <param name="bin_width" value="0.1"/> </conditional> </repeat> <param name="outputs_select" value="no_quality_control"/> <param name="output_matrix" value="True"/> - <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size" /> - <output name="matrixasoutput" file="preprocessing_results4.txt" /> + <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/> + <output name="matrixasoutput" file="preprocessing_results4.txt"/> </test> - <test> + <test expect_num_outputs="3"> <param name="infile" value="" ftype="imzml"> <composite_data value="Example_Continuous.imzML"/> <composite_data value="Example_Continuous.ibd"/> </param> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Data_reduction" /> - <param name="step_width" value="0.1" /> + <param name="preprocessing_method" value="Data_reduction"/> + <conditional name="methods_for_reduction"> + <param name="reduction_method" value="resample"/> + <param name="step_width" value="0.1"/> + </conditional> + </conditional> + </repeat> + <repeat name="methods"> + <conditional name="methods_conditional"> + <param name="preprocessing_method" value="Transformation"/> + <conditional name="transf_conditional"> + <param name="trans_type" value="log2"/> + </conditional> </conditional> </repeat> <param name="outputs_select" value="quality_control"/> @@ -739,24 +792,46 @@ <param name="calibrants_column" value="1"/> <param name="plusminus_dalton" value="0.25"/> <param name="output_matrix" value="True"/> - <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size" /> - <output name="matrixasoutput" file="preprocessing_results5.txt" /> + <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/> + <output name="matrixasoutput" file="preprocessing_results5.txt"/> <output name="QC_plots" file="preprocessing_results5.pdf" compare="sim_size"/> </test> </tests> <help> <![CDATA[ -Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. + +Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ + +This tool provides provides multiple Cardinal functions to preprocess mass-spectrometry imaging data. + +Input data: 3 types of input data can be used: -**Preprocessing steps:** +- imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ +- Analyze7.5 (upload hdr, img and t2m file via the "composite" function) +- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) -- Smoothening: Smoothing of the mass peaks reduces noise and improves peak detection +Options: + - Normalization: Normalization of intensities to total ion current (TIC) - Baseline reduction: Baseline reduction removes backgroundintensity generated by chemical noise (common in MALDI datasets) -- Peak picking: relevant peaks are picked while noise-peaks are removed -- Peak alignment: after peak picking, mz inaccuracies are removed by alignment of same peaks to a common mz value -- Peak filtering: after peak picking and alignment, removes peaks that occur infrequently, such as those which only occur in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot. +- Smoothening: Smoothing of the mass peaks reduces noise and improves peak detection +- Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards) +- Peak alignment: only possible after peak picking, mz inaccuracies are removed by alignment of same peaks to a common mz value +- Peak filtering: works only on centroided data (after peak picking and alignment or data reduction with peak filtering), removes peaks that occur only in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot. - Data reduction: binning, resampling or peak filtering to reduce data +- Transformation: log2 or squareroot transformation of all intensities + +Output: + +- imzML file, preprocessed +- optional: pdf with heatmap of masses of interest after each preprocessing step +- optional: intensity matrix as tabular file (intensities for masses in rows and pixel in columns) + +Tip: + +- Peak alignment works only after peak picking +- Peak filtering works only on centroided data (peak picking and alignment or Data reduction peaks) + ]]> </help> <citations>