comparison preprocessing.xml @ 13:6b36be80febb draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 39bd480e8813fa7a96b640150365577a69885d17-dirty"
author galaxyp
date Sun, 29 Nov 2020 23:45:08 +0000
parents e0669b1854b1
children accf9fb6ea01
comparison
equal deleted inserted replaced
12:e0669b1854b1 13:6b36be80febb
1 <tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1"> 1 <tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.2">
2 <description> 2 <description>
3 mass spectrometry imaging preprocessing 3 mass spectrometry imaging preprocessing
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>macros.xml</import> 6 <import>macros.xml</import>
45 45
46 @READING_MSIDATA_FULLY_COMPATIBLE@ 46 @READING_MSIDATA_FULLY_COMPATIBLE@
47 47
48 48
49 ## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail 49 ## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail
50 msidata <- msidata[,!duplicated(coord(msidata)[,1:2])]
51 50
52 ## set variable to False 51 ## set variable to False
53 #set $used_peak_picking = False 52 #set $used_peak_picking = False
54 #set $used_peak_alignment = False 53 #set $used_peak_alignment = False
55 #set $continuous_format = False 54 #set $continuous_format = False
133 132
134 #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian': 133 #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian':
135 print('gaussian smoothing') 134 print('gaussian smoothing')
136 135
137 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) 136 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian)
137 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
138 138
139 #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay': 139 #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay':
140 print('sgolay smoothing') 140 print('sgolay smoothing')
141 141
142 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) 142 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters)
143 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
143 144
144 ## if selected replace negative intensities with zero 145 ## if selected replace negative intensities with zero
145 #if $method.methods_conditional.methods_for_smoothing.replace_negatives: 146 #if $method.methods_conditional.methods_for_smoothing.replace_negatives:
146 spectra(msidata)[spectra(msidata)<0] = 0 147 ## bring spectra matrix to disk
148 spectra_df = as.matrix(spectra(msidata))
149 spectra_df[spectra_df<0] = 0
150 spectra(msidata) = spectra_df
147 #end if 151 #end if
148 152
149 #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': 153 #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma':
150 print('moving average smoothing') 154 print('moving average smoothing')
151 155
152 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) 156 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
157 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
153 158
154 #end if 159 #end if
155 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
156 160
157 ############################### QC ########################### 161 ############################### QC ###########################
158 162
159 maxfeatures =nrow(msidata) 163 maxfeatures =nrow(msidata)
160 pixelcount = ncol(msidata) 164 pixelcount = ncol(msidata)
168 172
169 173
170 ############################### Mz alignment ########################### 174 ############################### Mz alignment ###########################
171 175
172 #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_alignment': 176 #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_alignment':
173 print('M/z alignment') 177 print('m/z alignment')
174 ## M/z alignment 178 ## M/z alignment
175 179
176 #if str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_table': 180 #if str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_table':
177 181
178 reference_mz = read.delim("$method.methods_conditional.mzalign_ref_type.mz_tabular", header = $method.methods_conditional.mzalign_ref_type.feature_header, stringsAsFactors = FALSE) 182 reference_mz = read.delim("$method.methods_conditional.mzalign_ref_type.mz_tabular", header = $method.methods_conditional.mzalign_ref_type.feature_header, stringsAsFactors = FALSE)
181 msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) 185 msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span)
182 186
183 187
184 #elif str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_noref': 188 #elif str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_noref':
185 189
186 msidata = mzAlign(msidata,tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", , quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) 190 msidata = mzAlign(msidata,tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span)
187 191
188 #end if 192 #end if
189 193
190 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) 194 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
191 195
330 ############################### Mass binning ########################### 334 ############################### Mass binning ###########################
331 335
332 #elif str( $method.methods_conditional.preprocessing_method) == 'Mass_binning': 336 #elif str( $method.methods_conditional.preprocessing_method) == 'Mass_binning':
333 print('mass binning') 337 print('mass binning')
334 338
335 #if str( $method.methods_conditional.mz_range.features_filtering) == 'change_mz_range': 339 #if str($method.methods_conditional.mz_range.features_filtering) == 'change_mz_range':
336
337 #if str($processed_cond.processed_file) == "processed":
338 340
339 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, from=$method.methods_conditional.mz_range.min_mz, to=$method.methods_conditional.mz_range.max_mz, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun") 341 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, from=$method.methods_conditional.mz_range.min_mz, to=$method.methods_conditional.mz_range.max_mz, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun")
340
341 #else
342 ## continuous file cannot be binned from m/z to m/z, therefore first cut m/z range and then do mzbin:
343 msidata = msidata[mz(msidata) >= $method.methods_conditional.mz_range.min_mz & mz(msidata) <= $method.methods_conditional.mz_range.max_mz,]
344 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun")
345 #end if
346 342
347 343
348 #elif str( $method.methods_conditional.mz_range.features_filtering) == 'none': 344 #elif str($method.methods_conditional.mz_range.features_filtering) == 'none':
349 345
350 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun) 346 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun)
347
348 #elif str($method.methods_conditional.mz_range.features_filtering) == 'bin_to_reference':
349
350 bin_reference_mz = read.delim("$method.methods_conditional.mz_range.mz_tabular", header = $method.methods_conditional.mz_range.feature_header, stringsAsFactors = FALSE)
351 bin_reference_mz = bin_reference_mz[,$method.methods_conditional.mz_range.feature_column]
352
353 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun,
354 ref=bin_reference_mz)
351 355
352 #end if 356 #end if
353 357
354 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) 358 msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
355 359
383 387
384 #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': 388 #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2':
385 print('log2 transformation') 389 print('log2 transformation')
386 390
387 ## replace 0 with NA to prevent Inf 391 ## replace 0 with NA to prevent Inf
388 spectra_df = spectra(msidata) ## convert into R matrix 392 spectra_df = as.matrix(spectra(msidata)) ## convert into R matrix
389 spectra_df[spectra_df ==0] = NA 393 spectra_df[spectra_df ==0] = NA
390 print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df)))) 394 print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df))))
391 spectra(msidata) = spectra_df 395 spectra(msidata) = spectra_df
392 ## log transformation 396 ## log transformation
393 spectra(msidata) = log2(spectra(msidata)) 397 spectra(msidata) = log2(spectra(msidata))
420 424
421 ############# Outputs: RData, imzml and QC report ############# 425 ############# Outputs: RData, imzml and QC report #############
422 ################################################################################ 426 ################################################################################
423 427
424 ## save msidata as imzML file, will only work if there is at least 1 m/z left 428 ## save msidata as imzML file, will only work if there is at least 1 m/z left
425
426 #if str($imzml_output) == "cont_format":
427 #set $continuous_format = True
428 #end if
429 429
430 if (nrow(msidata) > 0){ 430 if (nrow(msidata) > 0){
431 ## make sure that coordinates are integers 431 ## make sure that coordinates are integers
432 coord(msidata)\$y = as.integer(coord(msidata)\$y) 432 coord(msidata)\$y = as.integer(coord(msidata)\$y)
433 coord(msidata)\$x = as.integer(coord(msidata)\$x) 433 coord(msidata)\$x = as.integer(coord(msidata)\$x)
434 #if $used_peak_picking: 434 ## only continuous files can currently be exported
435 #if $continuous_format: 435 msidata = as(msidata, "MSContinuousImagingExperiment")
436 msidata = as(msidata, "MSContinuousImagingExperiment") 436 writeImzML(msidata, "out")
437 #end if
438 #elif $used_peak_alignment
439 #if $continuous_format:
440 msidata = as(msidata, "MSContinuousImagingExperiment")
441 #end if
442 #end if
443 writeImzML(msidata, "out")
444 } 437 }
445 438
446 plot(0,type='n',axes=FALSE,ann=FALSE) 439 plot(0,type='n',axes=FALSE,ann=FALSE)
447 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "# spectra") 440 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "# spectra")
448 grid.table(t(QC_numbers)) 441 grid.table(t(QC_numbers))
618 <option value="mean" selected="True">mean</option> 611 <option value="mean" selected="True">mean</option>
619 <option value="sum">sum</option> 612 <option value="sum">sum</option>
620 </param> 613 </param>
621 <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/> 614 <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/>
622 <conditional name="mz_range"> 615 <conditional name="mz_range">
623 <param name="features_filtering" type="select" label="Select m/z feature filtering option"> 616 <param name="features_filtering" type="select" label="Select m/z options">
624 <option value="none" selected="True">none</option> 617 <option value="none" selected="True">none</option>
625 <option value="change_mz_range">change m/z range</option> 618 <option value="change_mz_range">change m/z range</option>
619 <option value="bin_to_reference">bin m/z to reference</option>
626 </param> 620 </param>
627 <when value="none"/> 621 <when value="none"/>
628 <when value="change_mz_range"> 622 <when value="change_mz_range">
629 <param name="min_mz" type="float" value="1" label="Minimum value for m/z"/> 623 <param name="min_mz" type="float" value="1" label="Minimum value for m/z"/>
630 <param name="max_mz" type="float" value="10000" label="Maximum value for m/z"/> 624 <param name="max_mz" type="float" value="10000" label="Maximum value for m/z"/>
631 </when> 625 </when>
626 <when value="bin_to_reference">
627 <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features as reference for binning. Only the m/z values from the tabular file will be kept."/>
628 </when>
632 </conditional> 629 </conditional>
633 </when> 630 </when>
634 <when value="Transformation"> 631 <when value="Transformation">
635 <conditional name="transf_conditional"> 632 <conditional name="transf_conditional">
636 <param name="trans_type" type="select" label="Intensity transformations" help="logarithm base 2 (log2) or squareroot (sqrt)"> 633 <param name="trans_type" type="select" label="Intensity transformations" help="logarithm base 2 (log2) or squareroot (sqrt)">
643 <when value="sqrt"/> 640 <when value="sqrt"/>
644 </conditional> 641 </conditional>
645 </when> 642 </when>
646 </conditional> 643 </conditional>
647 </repeat> 644 </repeat>
648 <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format"/>
649 </inputs> 645 </inputs>
650 <outputs> 646 <outputs>
651 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/> 647 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/>
652 <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "${tool.name} on ${on_string}: QC"/> 648 <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "${tool.name} on ${on_string}: QC"/>
653 </outputs> 649 </outputs>
664 </repeat> 660 </repeat>
665 <repeat name="methods"> 661 <repeat name="methods">
666 <conditional name="methods_conditional"> 662 <conditional name="methods_conditional">
667 <param name="preprocessing_method" value="Smoothing"/> 663 <param name="preprocessing_method" value="Smoothing"/>
668 <conditional name="methods_for_smoothing"> 664 <conditional name="methods_for_smoothing">
669 <param name="smoothing_method" value="gaussian"/> 665 <param name="smoothing_method" value="sgolay"/>
670 <param name="sd_gaussian" value="4"/> 666 </conditional>
671 </conditional>
672 <param name="window_smoothing" value="9"/>
673 </conditional> 667 </conditional>
674 </repeat> 668 </repeat>
675 <repeat name="methods"> 669 <repeat name="methods">
676 <conditional name="methods_conditional"> 670 <conditional name="methods_conditional">
677 <param name="preprocessing_method" value="Peak_picking"/> 671 <param name="preprocessing_method" value="Peak_picking"/>
700 <conditional name="transf_conditional"> 694 <conditional name="transf_conditional">
701 <param name="trans_type" value="sqrt"/> 695 <param name="trans_type" value="sqrt"/>
702 </conditional> 696 </conditional>
703 </conditional> 697 </conditional>
704 </repeat> 698 </repeat>
705 <param name="imzml_output" value="cont_format"/>
706 <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> 699 <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/>
707 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size"> 700 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size">
708 <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="6"/> 701 <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="6"/>
709 <extra_files type="file" file="preprocessing_results1.ibd" name="ibd" compare="sim_size"/> 702 <extra_files type="file" file="preprocessing_results1.ibd" name="ibd" compare="sim_size"/>
710 </output> 703 </output>
725 <repeat name="methods"> 718 <repeat name="methods">
726 <conditional name="methods_conditional"> 719 <conditional name="methods_conditional">
727 <param name="preprocessing_method" value="Peak_alignment"/> 720 <param name="preprocessing_method" value="Peak_alignment"/>
728 </conditional> 721 </conditional>
729 </repeat> 722 </repeat>
730 <param name="imzml_output" value="cont_format"/>
731 <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> 723 <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
732 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size"> 724 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size">
733 <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="6"/> 725 <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="6"/>
734 <extra_files type="file" file="preprocessing_results2.ibd" name="ibd" compare="sim_size"/> 726 <extra_files type="file" file="preprocessing_results2.ibd" name="ibd" compare="sim_size"/>
735 </output> 727 </output>
751 <param name="window_picking" value="5"/> 743 <param name="window_picking" value="5"/>
752 <param name="SNR_picking_method" value="2"/> 744 <param name="SNR_picking_method" value="2"/>
753 <conditional name="methods_for_picking"> 745 <conditional name="methods_for_picking">
754 <param name="picking_method" value="mad"/> 746 <param name="picking_method" value="mad"/>
755 </conditional> 747 </conditional>
756 <param name="imzml_output" value="proc_format"/>
757 </conditional> 748 </conditional>
758 </repeat> 749 </repeat>
759 <repeat name="methods"> 750 <repeat name="methods">
760 <conditional name="methods_conditional"> 751 <conditional name="methods_conditional">
761 <param name="preprocessing_method" value="Peak_alignment"/> 752 <param name="preprocessing_method" value="Peak_alignment"/>
762 <param name="imzml_output" value="proc_format"/>
763 </conditional> 753 </conditional>
764 </repeat> 754 </repeat>
765 <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> 755 <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/>
766 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results3.imzml.txt" compare="sim_size"> 756 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results3.imzml.txt" compare="sim_size">
767 <extra_files type="file" file="preprocessing_results3.imzml" name="imzml" lines_diff="6"/> 757 <extra_files type="file" file="preprocessing_results3.imzml" name="imzml" lines_diff="6"/>
848 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. 838 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems.
849 839
850 840
851 **Output** 841 **Output**
852 842
853 - MSI data as continuous or processed imzML file 843 - MSI data as continuous imzML file
854 - pdf with key values and four random mass spectra after each processing step 844 - pdf with key values and four random mass spectra after each processing step
855 845
856 ]]> 846 ]]>
857 </help> 847 </help>
858 <expand macro="citations"/> 848 <expand macro="citations"/>