msi_preprocessing: msi_preprocessing.xml comparison

comparison msi_preprocessing.xml @ 8:d77c5228fd1a draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 5bceedc3a11c950790692a4c64bbb83d46897bee

author	galaxyp
date	Tue, 24 Jul 2018 04:53:10 -0400
parents	1a3d477bc54a
children	4d5578b57a77

comparison

equal deleted inserted replaced

-:1a3d477bc54a
+:d77c5228fd1a
-<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.4">
+<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.5">
 <description>
 mass spectrometry imaging preprocessing
 </description>
 <requirements>
 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
 <requirement type="package" version="2.2.1">r-gridextra</requirement>
 <requirement type="package" version="0.20-35">r-lattice</requirement>
+<requirement type="package" version="2.2.1">r-ggplot2</requirement>
 </requirements>
 <command detect_errors="exit_code">
 <![CDATA[
 #if $infile.ext == 'imzml'
 ################################# load libraries and read file #################
 library(Cardinal)
 library(gridExtra)
 library(lattice)
+library(ggplot2)
 #if $infile.ext == 'imzml'
 #if str($processed_cond.processed_file) == "processed":
 msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units")
 #else
 ############# Outputs: summar matrix, RData, tabular and QC report #############
 ################################################################################
 ## optional summarized matrix
 print('Summarized matrix')
+## optional annotation from tabular file to obtain groups over which to apply mean, median or sd (otherwise all pixels are considered to be sample)
+#if str($tabular_annotation.load_annotation) == 'yes_annotation':
+## read and extract x,y,annotation information
+input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
+annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
+colnames(annotation_input) = c("x", "y", "annotation")
+## merge with coordinate information of msidata
+msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata)))
+colnames(msidata_coordinates)[3] = "pixel_index"
+merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE)
+merged_annotation[is.na(merged_annotation)] = "NA"
+merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
+msidata\$annotation = as.factor(merged_annotation[,4])
+#end if
 #if "mean" in str($summary_type).split(","):
 print("mean matrix")
-if (!is.null(levels(msidata\$combined_sample))){
+if (!is.null(levels(msidata\$annotation))){
 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
 count = 1
-for (subsample in levels(msidata\$combined_sample)){
+for (subsample in levels(msidata\$annotation)){
-subsample_pixels = msidata[,msidata\$combined_sample == subsample]
+subsample_pixels = msidata[,msidata\$annotation == subsample]
 subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE)
 sample_matrix = cbind(sample_matrix, subsample_calc)
 count = count+1
 }
 rownames(sample_matrix) = mz(msidata)
-colnames(sample_matrix) = levels(msidata\$combined_sample)
+colnames(sample_matrix) = levels(msidata\$annotation)
 write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
 }else{
 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE))
 rownames(full_sample_calc) = mz(msidata)
 colnames(full_sample_calc) = "$infile.display_name"
 #end if
 #if "median" in str($summary_type).split(","):
 print("median matrix")
-if (!is.null(levels(msidata\$combined_sample))){
+if (!is.null(levels(msidata\$annotation))){
 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
 count = 1
-for (subsample in levels(msidata\$combined_sample)){
+for (subsample in levels(msidata\$annotation)){
-subsample_pixels = msidata[,msidata\$combined_sample == subsample]
+subsample_pixels = msidata[,msidata\$annotation == subsample] ## no idea why it does not work??? NA problem?!
 subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE)
 sample_matrix = cbind(sample_matrix, subsample_calc)
 count = count+1
 }
 rownames(sample_matrix) = mz(msidata)
-colnames(sample_matrix) = levels(msidata\$combined_sample)
+colnames(sample_matrix) = levels(msidata\$annotation)
 write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
 }else{
 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE))
 rownames(full_sample_calc) = mz(msidata)
 colnames(full_sample_calc) = "$infile.display_name"
 }
 #end if
 #if "sd" in str($summary_type).split(","):
 print("sd matrix")
-if (!is.null(levels(msidata\$combined_sample))){
+if (!is.null(levels(msidata\$annotation))){
 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
 count = 1
-for (subsample in levels(msidata\$combined_sample)){
+for (subsample in levels(msidata\$annotation)){
-subsample_pixels = msidata[,msidata\$combined_sample == subsample]
+subsample_pixels = msidata[,msidata\$annotation == subsample]
 subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE)
 sample_matrix = cbind(sample_matrix, subsample_calc)
 count = count+1
 }
 rownames(sample_matrix) = mz(msidata)
-colnames(sample_matrix) = levels(msidata\$combined_sample)
+colnames(sample_matrix) = levels(msidata\$annotation)
 write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
 }else{
 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE))
 rownames(full_sample_calc) = mz(msidata)
 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
 plot(0,type='n',axes=FALSE,ann=FALSE)
 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name"))
 rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC")
 grid.table(t(QC_numbers))
+#if str($tabular_annotation.load_annotation) == 'yes_annotation':
+## the more annotation groups a file has the smaller will be the legend
+number_combined = length(levels(msidata\$annotation))
+if (number_combined<20){
+legend_size = 10
+}else if (number_combined>20 && number_combined<40){
+legend_size = 9
+}else if (number_combined>40 && number_combined<60){
+legend_size = 8
+}else if (number_combined>60 && number_combined<100){
+legend_size = 7
+}else{
+legend_size = 6
+}
+position_df = cbind(coord(msidata)[,1:2], msidata\$annotation)
+colnames(position_df)[3] = "sample_name"
+combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+
+geom_tile() +
+coord_fixed()+
+ggtitle("Spatial orientation of annotated data")+
+theme_bw()+
+theme(plot.title = element_text(hjust = 0.5))+
+theme(text=element_text(family="ArialMT", face="bold", size=12))+
+theme(legend.position="bottom",legend.direction="vertical")+
+theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+
+guides(fill=guide_legend(ncol=5,byrow=TRUE))
+coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean)
+coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name)
+for(file_count in 1:nrow(coord_labels))
+{combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],
+y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))}
+print(combine_plot)
+#end if
 dev.off()
 }else{
 print("inputfile has no intensities > 0")
 }
 <param name="summary_type" type="select" display="checkboxes" multiple="true" label="Summarize all pixels of a sample and calculate the mean, median or standard deviation">
 <option value="mean">mean</option>
 <option value="median">median</option>
 <option value="sd">standard deviation</option>
 </param>
+<conditional name="tabular_annotation">
+<param name="load_annotation" type="select" label="Use pixel annotations from tabular file to summarize pixel">
+<option value="no_annotation" selected="True">summarize over all pixels</option>
+<option value="yes_annotation">summarize over categories from annotation file</option>
+</param>
+<when value="yes_annotation">
+<param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file to summarize pixel"
+help="Tabular file with three columns: x values, y values and pixel annotations"/>
+<param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
+<param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
+<param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/>
+<param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
+</when>
+<when value="no_annotation"/>
+</conditional>
 <param name="output_matrix" type="boolean" label="Intensity matrix output"/>
 </inputs>
 <outputs>
 <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/>
 <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/>
 <conditional name="transf_conditional">
 <param name="trans_type" value="sqrt"/>
 </conditional>
 </conditional>
 </repeat>
+<conditional name="tabular_annotation">
+<param name="load_annotation" value="no_annotation"/>
+</conditional>
 <param name="output_matrix" value="True"/>
 <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/>
 <output name="matrixasoutput" file="preprocessing_results1.txt"/>
 <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/>
 </test>
 <conditional name="methods_for_alignment">
 <param name="alignment_method" value="DP"/>
 </conditional>
 </conditional>
 </repeat>
+<conditional name="tabular_annotation">
+<param name="load_annotation" value="yes_annotation"/>
+<param name="annotation_file" value="pixel_annotations.tabular"/>
+<param name="column_x" value="1"/>
+<param name="column_y" value="2"/>
+<param name="column_names" value="3"/>
+<param name="tabular_header" value="FALSE"/>
+</conditional>
 <param name="summary_type" value="median,sd"/>
 <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/>
 <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/>
 <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/>
 <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
 <param name="alignment_method" value="diff"/>
 </conditional>
 </conditional>
 </repeat>
 <param name="summary_type" value="mean"/>
+<conditional name="tabular_annotation">
+<param name="load_annotation" value="no_annotation"/>
+</conditional>
 <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/>
 <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/>
 <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/>
 </test>
 <test expect_num_outputs="3">
 <conditional name="methods_conditional">
 <param name="preprocessing_method" value="Data_reduction"/>
 <param name="bin_width" value="0.1"/>
 </conditional>
 </repeat>
+<conditional name="tabular_annotation">
+<param name="load_annotation" value="no_annotation"/>
+</conditional>
 <param name="output_matrix" value="True"/>
 <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/>
 <output name="matrixasoutput" file="preprocessing_results4.txt"/>
 <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/>
 </test>
 <param name="reduction_method" value="resample"/>
 <param name="step_width" value="0.1"/>
 </conditional>
 </conditional>
 </repeat>
+<conditional name="tabular_annotation">
+<param name="load_annotation" value="no_annotation"/>
+</conditional>
 <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/>
 <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size"/>
 </test>
 </tests>
 <help>
 Input data: 3 types of input data can be used:
 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
+- optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column
 Options:
 - Normalization: Normalization of intensities to total ion current (TIC)
 - Baseline reduction: Baseline  reduction removes backgroundintensity generated by chemical noise (common in MALDI datasets)
 Output:
 - imzML file, preprocessed
-- pdf with key values after each processing step
+- pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations
 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns)
+- optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group
 Tip:
 - Peak alignment works only after peak picking
 - Peak filtering works only on centroided data (peak picking and alignment or Data reduction peaks)

Mercurial > repos > galaxyp > msi_preprocessing

comparison msi_preprocessing.xml @ 8:d77c5228fd1a draft