comparison msi_preprocessing.xml @ 8:d77c5228fd1a draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 5bceedc3a11c950790692a4c64bbb83d46897bee
author galaxyp
date Tue, 24 Jul 2018 04:53:10 -0400
parents 1a3d477bc54a
children 4d5578b57a77
comparison
equal deleted inserted replaced
7:1a3d477bc54a 8:d77c5228fd1a
1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.4"> 1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.5">
2 <description> 2 <description>
3 mass spectrometry imaging preprocessing 3 mass spectrometry imaging preprocessing
4 </description> 4 </description>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> 6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
7 <requirement type="package" version="2.2.1">r-gridextra</requirement> 7 <requirement type="package" version="2.2.1">r-gridextra</requirement>
8 <requirement type="package" version="0.20-35">r-lattice</requirement> 8 <requirement type="package" version="0.20-35">r-lattice</requirement>
9 <requirement type="package" version="2.2.1">r-ggplot2</requirement>
9 </requirements> 10 </requirements>
10 <command detect_errors="exit_code"> 11 <command detect_errors="exit_code">
11 <![CDATA[ 12 <![CDATA[
12 13
13 #if $infile.ext == 'imzml' 14 #if $infile.ext == 'imzml'
31 ################################# load libraries and read file ################# 32 ################################# load libraries and read file #################
32 33
33 library(Cardinal) 34 library(Cardinal)
34 library(gridExtra) 35 library(gridExtra)
35 library(lattice) 36 library(lattice)
37 library(ggplot2)
36 38
37 #if $infile.ext == 'imzml' 39 #if $infile.ext == 'imzml'
38 #if str($processed_cond.processed_file) == "processed": 40 #if str($processed_cond.processed_file) == "processed":
39 msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units") 41 msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units")
40 #else 42 #else
317 ############# Outputs: summar matrix, RData, tabular and QC report ############# 319 ############# Outputs: summar matrix, RData, tabular and QC report #############
318 ################################################################################ 320 ################################################################################
319 ## optional summarized matrix 321 ## optional summarized matrix
320 print('Summarized matrix') 322 print('Summarized matrix')
321 323
324 ## optional annotation from tabular file to obtain groups over which to apply mean, median or sd (otherwise all pixels are considered to be sample)
325
326 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
327
328 ## read and extract x,y,annotation information
329 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
330 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
331 colnames(annotation_input) = c("x", "y", "annotation")
332
333 ## merge with coordinate information of msidata
334 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata)))
335 colnames(msidata_coordinates)[3] = "pixel_index"
336 merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE)
337 merged_annotation[is.na(merged_annotation)] = "NA"
338 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
339 msidata\$annotation = as.factor(merged_annotation[,4])
340
341 #end if
342
322 #if "mean" in str($summary_type).split(","): 343 #if "mean" in str($summary_type).split(","):
323 print("mean matrix") 344 print("mean matrix")
324 if (!is.null(levels(msidata\$combined_sample))){ 345 if (!is.null(levels(msidata\$annotation))){
325 346
326 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) 347 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
327 count = 1 348 count = 1
328 for (subsample in levels(msidata\$combined_sample)){ 349 for (subsample in levels(msidata\$annotation)){
329 subsample_pixels = msidata[,msidata\$combined_sample == subsample] 350 subsample_pixels = msidata[,msidata\$annotation == subsample]
330 subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE) 351 subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE)
331 sample_matrix = cbind(sample_matrix, subsample_calc) 352 sample_matrix = cbind(sample_matrix, subsample_calc)
332 count = count+1 353 count = count+1
333 } 354 }
334 rownames(sample_matrix) = mz(msidata) 355 rownames(sample_matrix) = mz(msidata)
335 colnames(sample_matrix) = levels(msidata\$combined_sample) 356 colnames(sample_matrix) = levels(msidata\$annotation)
336 write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 357 write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
337 }else{ 358 }else{
338 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE)) 359 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE))
339 rownames(full_sample_calc) = mz(msidata) 360 rownames(full_sample_calc) = mz(msidata)
340 colnames(full_sample_calc) = "$infile.display_name" 361 colnames(full_sample_calc) = "$infile.display_name"
343 364
344 #end if 365 #end if
345 366
346 #if "median" in str($summary_type).split(","): 367 #if "median" in str($summary_type).split(","):
347 print("median matrix") 368 print("median matrix")
348 if (!is.null(levels(msidata\$combined_sample))){ 369 if (!is.null(levels(msidata\$annotation))){
349 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) 370 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
350 count = 1 371 count = 1
351 for (subsample in levels(msidata\$combined_sample)){ 372 for (subsample in levels(msidata\$annotation)){
352 subsample_pixels = msidata[,msidata\$combined_sample == subsample] 373
374 subsample_pixels = msidata[,msidata\$annotation == subsample] ## no idea why it does not work??? NA problem?!
375
353 subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE) 376 subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE)
377
354 sample_matrix = cbind(sample_matrix, subsample_calc) 378 sample_matrix = cbind(sample_matrix, subsample_calc)
355 count = count+1 379 count = count+1
356 } 380 }
357 381
358 rownames(sample_matrix) = mz(msidata) 382 rownames(sample_matrix) = mz(msidata)
359 colnames(sample_matrix) = levels(msidata\$combined_sample) 383 colnames(sample_matrix) = levels(msidata\$annotation)
360 write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 384 write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
361 }else{ 385 }else{
362 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE)) 386 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE))
363 rownames(full_sample_calc) = mz(msidata) 387 rownames(full_sample_calc) = mz(msidata)
364 colnames(full_sample_calc) = "$infile.display_name" 388 colnames(full_sample_calc) = "$infile.display_name"
366 } 390 }
367 #end if 391 #end if
368 392
369 #if "sd" in str($summary_type).split(","): 393 #if "sd" in str($summary_type).split(","):
370 print("sd matrix") 394 print("sd matrix")
371 if (!is.null(levels(msidata\$combined_sample))){ 395 if (!is.null(levels(msidata\$annotation))){
372 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) 396 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
373 count = 1 397 count = 1
374 for (subsample in levels(msidata\$combined_sample)){ 398 for (subsample in levels(msidata\$annotation)){
375 subsample_pixels = msidata[,msidata\$combined_sample == subsample] 399 subsample_pixels = msidata[,msidata\$annotation == subsample]
376 subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE) 400 subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE)
377 sample_matrix = cbind(sample_matrix, subsample_calc) 401 sample_matrix = cbind(sample_matrix, subsample_calc)
378 count = count+1 402 count = count+1
379 } 403 }
380 404
381 rownames(sample_matrix) = mz(msidata) 405 rownames(sample_matrix) = mz(msidata)
382 colnames(sample_matrix) = levels(msidata\$combined_sample) 406 colnames(sample_matrix) = levels(msidata\$annotation)
383 write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 407 write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
384 }else{ 408 }else{
385 409
386 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE)) 410 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE))
387 rownames(full_sample_calc) = mz(msidata) 411 rownames(full_sample_calc) = mz(msidata)
412 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) 436 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
413 plot(0,type='n',axes=FALSE,ann=FALSE) 437 plot(0,type='n',axes=FALSE,ann=FALSE)
414 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) 438 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name"))
415 rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC") 439 rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC")
416 grid.table(t(QC_numbers)) 440 grid.table(t(QC_numbers))
441
442 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
443
444 ## the more annotation groups a file has the smaller will be the legend
445 number_combined = length(levels(msidata\$annotation))
446 if (number_combined<20){
447 legend_size = 10
448 }else if (number_combined>20 && number_combined<40){
449 legend_size = 9
450 }else if (number_combined>40 && number_combined<60){
451 legend_size = 8
452 }else if (number_combined>60 && number_combined<100){
453 legend_size = 7
454 }else{
455 legend_size = 6
456 }
457
458 position_df = cbind(coord(msidata)[,1:2], msidata\$annotation)
459 colnames(position_df)[3] = "sample_name"
460
461 combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+
462 geom_tile() +
463 coord_fixed()+
464 ggtitle("Spatial orientation of annotated data")+
465 theme_bw()+
466 theme(plot.title = element_text(hjust = 0.5))+
467 theme(text=element_text(family="ArialMT", face="bold", size=12))+
468 theme(legend.position="bottom",legend.direction="vertical")+
469 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+
470 guides(fill=guide_legend(ncol=5,byrow=TRUE))
471 coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean)
472 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name)
473 for(file_count in 1:nrow(coord_labels))
474 {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],
475 y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))}
476
477 print(combine_plot)
478 #end if
479
417 dev.off() 480 dev.off()
418 481
419 }else{ 482 }else{
420 print("inputfile has no intensities > 0") 483 print("inputfile has no intensities > 0")
421 } 484 }
611 <param name="summary_type" type="select" display="checkboxes" multiple="true" label="Summarize all pixels of a sample and calculate the mean, median or standard deviation"> 674 <param name="summary_type" type="select" display="checkboxes" multiple="true" label="Summarize all pixels of a sample and calculate the mean, median or standard deviation">
612 <option value="mean">mean</option> 675 <option value="mean">mean</option>
613 <option value="median">median</option> 676 <option value="median">median</option>
614 <option value="sd">standard deviation</option> 677 <option value="sd">standard deviation</option>
615 </param> 678 </param>
679 <conditional name="tabular_annotation">
680 <param name="load_annotation" type="select" label="Use pixel annotations from tabular file to summarize pixel">
681 <option value="no_annotation" selected="True">summarize over all pixels</option>
682 <option value="yes_annotation">summarize over categories from annotation file</option>
683 </param>
684 <when value="yes_annotation">
685 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file to summarize pixel"
686 help="Tabular file with three columns: x values, y values and pixel annotations"/>
687 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
688 <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
689 <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/>
690 <param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
691 </when>
692 <when value="no_annotation"/>
693 </conditional>
616 <param name="output_matrix" type="boolean" label="Intensity matrix output"/> 694 <param name="output_matrix" type="boolean" label="Intensity matrix output"/>
617 </inputs> 695 </inputs>
618 <outputs> 696 <outputs>
619 <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/> 697 <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/>
620 <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/> 698 <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/>
680 <conditional name="transf_conditional"> 758 <conditional name="transf_conditional">
681 <param name="trans_type" value="sqrt"/> 759 <param name="trans_type" value="sqrt"/>
682 </conditional> 760 </conditional>
683 </conditional> 761 </conditional>
684 </repeat> 762 </repeat>
763 <conditional name="tabular_annotation">
764 <param name="load_annotation" value="no_annotation"/>
765 </conditional>
685 <param name="output_matrix" value="True"/> 766 <param name="output_matrix" value="True"/>
686 <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/> 767 <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/>
687 <output name="matrixasoutput" file="preprocessing_results1.txt"/> 768 <output name="matrixasoutput" file="preprocessing_results1.txt"/>
688 <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> 769 <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/>
689 </test> 770 </test>
706 <conditional name="methods_for_alignment"> 787 <conditional name="methods_for_alignment">
707 <param name="alignment_method" value="DP"/> 788 <param name="alignment_method" value="DP"/>
708 </conditional> 789 </conditional>
709 </conditional> 790 </conditional>
710 </repeat> 791 </repeat>
792 <conditional name="tabular_annotation">
793 <param name="load_annotation" value="yes_annotation"/>
794 <param name="annotation_file" value="pixel_annotations.tabular"/>
795 <param name="column_x" value="1"/>
796 <param name="column_y" value="2"/>
797 <param name="column_names" value="3"/>
798 <param name="tabular_header" value="FALSE"/>
799 </conditional>
711 <param name="summary_type" value="median,sd"/> 800 <param name="summary_type" value="median,sd"/>
712 <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/> 801 <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/>
713 <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/> 802 <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/>
714 <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/> 803 <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/>
715 <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> 804 <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
740 <param name="alignment_method" value="diff"/> 829 <param name="alignment_method" value="diff"/>
741 </conditional> 830 </conditional>
742 </conditional> 831 </conditional>
743 </repeat> 832 </repeat>
744 <param name="summary_type" value="mean"/> 833 <param name="summary_type" value="mean"/>
834 <conditional name="tabular_annotation">
835 <param name="load_annotation" value="no_annotation"/>
836 </conditional>
745 <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/> 837 <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/>
746 <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> 838 <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/>
747 <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/> 839 <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/>
748 </test> 840 </test>
749 <test expect_num_outputs="3"> 841 <test expect_num_outputs="3">
761 <conditional name="methods_conditional"> 853 <conditional name="methods_conditional">
762 <param name="preprocessing_method" value="Data_reduction"/> 854 <param name="preprocessing_method" value="Data_reduction"/>
763 <param name="bin_width" value="0.1"/> 855 <param name="bin_width" value="0.1"/>
764 </conditional> 856 </conditional>
765 </repeat> 857 </repeat>
858 <conditional name="tabular_annotation">
859 <param name="load_annotation" value="no_annotation"/>
860 </conditional>
766 <param name="output_matrix" value="True"/> 861 <param name="output_matrix" value="True"/>
767 <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/> 862 <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/>
768 <output name="matrixasoutput" file="preprocessing_results4.txt"/> 863 <output name="matrixasoutput" file="preprocessing_results4.txt"/>
769 <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/> 864 <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/>
770 </test> 865 </test>
780 <param name="reduction_method" value="resample"/> 875 <param name="reduction_method" value="resample"/>
781 <param name="step_width" value="0.1"/> 876 <param name="step_width" value="0.1"/>
782 </conditional> 877 </conditional>
783 </conditional> 878 </conditional>
784 </repeat> 879 </repeat>
880 <conditional name="tabular_annotation">
881 <param name="load_annotation" value="no_annotation"/>
882 </conditional>
785 <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/> 883 <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/>
786 <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size"/> 884 <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size"/>
787 </test> 885 </test>
788 </tests> 886 </tests>
789 <help> 887 <help>
796 Input data: 3 types of input data can be used: 894 Input data: 3 types of input data can be used:
797 895
798 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ 896 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
799 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) 897 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
800 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) 898 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
899 - optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column
801 900
802 Options: 901 Options:
803 902
804 - Normalization: Normalization of intensities to total ion current (TIC) 903 - Normalization: Normalization of intensities to total ion current (TIC)
805 - Baseline reduction: Baseline reduction removes backgroundintensity generated by chemical noise (common in MALDI datasets) 904 - Baseline reduction: Baseline reduction removes backgroundintensity generated by chemical noise (common in MALDI datasets)
812 911
813 912
814 Output: 913 Output:
815 914
816 - imzML file, preprocessed 915 - imzML file, preprocessed
817 - pdf with key values after each processing step 916 - pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations
818 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) 917 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns)
918 - optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group
819 919
820 Tip: 920 Tip:
821 921
822 - Peak alignment works only after peak picking 922 - Peak alignment works only after peak picking
823 - Peak filtering works only on centroided data (peak picking and alignment or Data reduction peaks) 923 - Peak filtering works only on centroided data (peak picking and alignment or Data reduction peaks)