Mercurial > repos > galaxyp > maldi_quant_peak_detection
changeset 1:eaaa73b043e6 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit 0825a4ccd3ebf4ca8a298326d14f3e7b25ae8415
| author | galaxyp |
|---|---|
| date | Mon, 01 Oct 2018 01:09:43 -0400 |
| parents | 01212bf66f61 |
| children | 17c54820f3be |
| files | maldi_macros.xml maldi_quant_peakdetection.xml test-data/int1.tabular test-data/int2.tabular test-data/intensity_matrix3.tabular test-data/masspeaks1_forinput.tabular test-data/masspeaks2.tabular test-data/masspeaks3.tabular test-data/masspeaks3_forinput.tabular test-data/peakdetection1_QC.pdf test-data/peakdetection2_QC.pdf test-data/peakdetection3_QC.pdf |
| diffstat | 12 files changed, 852 insertions(+), 480 deletions(-) [+] |
line wrap: on
line diff
--- a/maldi_macros.xml Wed Aug 22 11:49:29 2018 -0400 +++ b/maldi_macros.xml Mon Oct 01 01:09:43 2018 -0400 @@ -1,20 +1,28 @@ <macros> <token name="@R_IMPORTS@"><![CDATA[ - ## Libraries library (Cardinal) library (MALDIquantForeign) library (MALDIquant) library (ggplot2) - + library(gridExtra) ]]> </token> + <token name="@MADLI_QUANT_DESCRIPTION@"><![CDATA[ +MALDIquant_ provides a complete analysis pipeline for MALDI-TOF and other mass spectrometry data. +So far we have only implemented the functionalities for mass spectrometry imaging data. + ]]> + </token> + + <token name="@VERSION@">1.18.0</token> + <xml name="requirements"> <requirements> <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> <requirement type="package" version="0.11.5">r-maldiquantforeign</requirement> <requirement type="package" version="1.18">r-maldiquant</requirement> <requirement type="package" version="2.2.1">r-ggplot2</requirement> + <requirement type="package" version="2.2.1">r-gridextra</requirement> </requirements> </xml>
--- a/maldi_quant_peakdetection.xml Wed Aug 22 11:49:29 2018 -0400 +++ b/maldi_quant_peakdetection.xml Mon Oct 01 01:09:43 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="1.18.0.0"> +<tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.1"> <description> Peak detection, binning and filtering for mass-spectrometry imaging data </description> @@ -15,6 +15,8 @@ cp '${infile.extra_files_path}/hdr' infile.hdr && cp '${infile.extra_files_path}/img' infile.img && cp '${infile.extra_files_path}/t2m' infile.t2m && + #else + ln -s $infile infile.RData && #end if Rscript '${maldi_quant_peak_detection}'&& mkdir $outfile_imzml.files_path && @@ -29,36 +31,39 @@ @R_IMPORTS@ -summarized_spectra = FALSE + #if $restriction_conditional.restriction == 'restrict': print('Reading mask region') + ## Import imzML file - coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = FALSE, stringsAsFactors = FALSE))[,1:2] + coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2] maldi_data <- importImzMl('infile.imzML', coordinates = coordinate_matrix, centroided = $centroids) - pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2]) + pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") + #else: print('Reading entire file') ## Import imzML file - #if $infile.ext == 'imzml' - + print('imzML file') #if str($centroids) == "TRUE" peaks <- importImzMl('infile.imzML', centroided = $centroids) - pixelnames = paste0("x = ", coordinates(peaks)[,1],", y = ", coordinates(peaks)[,2]) - + pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") #else maldi_data <- importImzMl('infile.imzML', centroided = $centroids) - pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2]) + pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") #end if + coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) + #elif $infile.ext == 'tabular' - + print('tabular file') + #set $centroids = "TRUE" ## will be used in some if conditions peak_tabular = read.delim("$infile", header = TRUE, stringsAsFactors = FALSE) peak_list = split(peak_tabular, f = peak_tabular\$spectrum) ## will be ordered according to spectrum pixelnames = unique(peak_tabular\$spectrum) @@ -66,17 +71,55 @@ peaks = list() for (spectra in 1:length(peak_list)) { - single_peaks = createMassPeaks(peak_list[[spectra]]\$mass, peak_list[[spectra]]\$intensity, snr=peak_list[[spectra]]\$snr) - peaks[[spectra]] = single_peaks + single_peaks = createMassPeaks(peak_list[[spectra]]\$mass, peak_list[[spectra]]\$intensity, snr=peak_list[[spectra]]\$snr) + peaks[[spectra]] = single_peaks } + #else + print('rdata file') + loadRData <- function(fileName){ + #loads an RData file, and returns it + load(fileName) + get(ls()[ls() != "fileName"]) + } + msidata = loadRData('infile.RData') + centroided(msidata) = $centroids + pixelnames = gsub(", y = ", "_", names(Cardinal::pixels(msidata))) + pixelnames = gsub(" = ", "y_", pixelnames) + + cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2]) + + if (centroided(msidata) == FALSE){ + ## create mass spectrum object + cardinal_mzs = Cardinal::mz(msidata) + maldi_data = list() + for(number_spectra in 1:ncol(msidata)){ + maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra]) + coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))} + coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data))) + }else{ + peaks = list() + for (spectra in 1:ncol(msidata)) + { + single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata)))) + peaks[[spectra]] = single_peaks + }} #end if +#end if -#end if + + + + + + + + +## default summarized = FALSE +summarized_spectra = FALSE ## Quality control plots during peak detection - pdf("peaks_qc_plot.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) @@ -86,19 +129,42 @@ title(main=paste("$filename")) ## plot input file spectrum: -#if $infile.ext == 'imzml' - - #if str($centroids) == "TRUE" - plot(peaks[[1]], main="First spectrum of input file") - #else - avgSpectra <- averageMassSpectra(maldi_data,method="mean") - plot(avgSpectra, main="Average spectrum of input file") - #end if -#elif $infile.ext == 'tabular' - plot(peaks[[1]], main="First spectrum of input file") +#if str($centroids) == "TRUE" + plot(peaks[[1]], main="First spectrum of input file") +#else + avgSpectra <- averageMassSpectra(maldi_data,method="mean") + plot(avgSpectra, main="Average spectrum of input file") #end if + + + + + + + +## QC numbers for input file +#if str($centroids) == "TRUE" + pixel_number = length(peaks) + minmz = round(min(unlist(lapply(peaks,mass))), digits=4) + maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) + maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) + medint = round(median(unlist(lapply(peaks,intensity))), digits=2) + inputdata = c(minmz, maxmz,maxfeatures, medint) + QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medint)) + vectorofactions = "inputdata" +#else + pixel_number = length(maldi_data) + minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) + maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) + maxfeatures = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) + medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) + inputdata = c(minmz, maxmz,maxfeatures, medint) + QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medint)) + vectorofactions = "inputdata" +#end if + #if str($tabular_annotation.load_annotation) == 'yes_annotation': ## read and extract x,y,annotation information @@ -107,10 +173,8 @@ colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation" ## merge with coordinate information of MSI data - - coordinates_st = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) - colnames(coordinates_st)[3] = "pixel_index" - merged_annotation = merge(coordinates_st, annotation_input, by=c("x", "y"), all.x=TRUE) + colnames(coordinates_info)[3] = "pixel_index" + merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE) merged_annotation[is.na(merged_annotation)] = "NA" merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] samples = as.factor(merged_annotation\$annotation) @@ -151,14 +215,13 @@ #for $method in $methods: - #if str( $method.methods_conditional.method ) == 'Peak_detection': print('peak detection') ##peak detection #if $method.methods_conditional.use_annotations: maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking - pixelnames = merged_annotation\$annotation + pixelnames = levels(samples) summarized_spectra = TRUE #end if @@ -166,8 +229,16 @@ peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method", halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr) - ## QC plot + ## QC plot and numbers plot(peaks[[1]], main="First spectrum after peak detection") + pixel_number = length(peaks) + minmz = round(min(unlist(lapply(peaks,mass))), digits=4) + maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) + maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) + medint = round(median(unlist(lapply(peaks,intensity))), digits=2) + peaks_picked = c(minmz, maxmz,maxfeatures, medint) + QC_numbers= cbind(QC_numbers, peaks_picked) + vectorofactions = append(vectorofactions, "peaks_picked") if (length(peaks[!sapply(peaks, isEmpty)])>0){ #if $infile.ext == 'imzml' @@ -178,7 +249,7 @@ featureMatrix <- intensityMatrix(peaks) #end if featureMatrix2 =cbind(pixelnames, featureMatrix) - colnames(featureMatrix2)[1] = c("mz | spectra") + colnames(featureMatrix2)[1] = c("mz") featureMatrix2 = t(featureMatrix2) write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") }else{print("There are no spectra with peaks left")} @@ -191,8 +262,15 @@ peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, tolerance=$method.methods_conditional.tolerance, distance=$method.methods_conditional.distance, size=$method.methods_conditional.size) - ## QC plot + ## QC plot and numbers plot(peaks[[1]], main="First spectrum after monoisotopic peaks detection") + minmz = round(min(unlist(lapply(peaks,mass))), digits=4) + maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) + maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) + medint = round(median(unlist(lapply(peaks,intensity))), digits=2) + monoisotopes = c(minmz, maxmz,maxfeatures, medint) + QC_numbers= cbind(QC_numbers, monoisotopes) + vectorofactions = append(vectorofactions, "monoisotopes") if (length(peaks[!sapply(peaks, isEmpty)])>0){ #if $infile.ext == 'imzml' @@ -203,7 +281,7 @@ featureMatrix <- intensityMatrix(peaks) #end if featureMatrix2 =cbind(pixelnames, featureMatrix) - colnames(featureMatrix2)[1] = c("mz | spectra") + colnames(featureMatrix2)[1] = c("mz") featureMatrix2 = t(featureMatrix2) write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") }else{print("There are no spectra with peaks left")} @@ -214,8 +292,16 @@ ##m/z binning peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance) - ## QC plot + + ## QC plot and numbers plot(peaks[[1]], main="First spectrum after binning") + minmz = round(min(unlist(lapply(peaks,mass))), digits=4) + maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) + maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) + medint =round( median(unlist(lapply(peaks,intensity))), digits=2) + binned = c(minmz, maxmz,maxfeatures, medint) + QC_numbers= cbind(QC_numbers, binned) + vectorofactions = append(vectorofactions, "binned") if (length(peaks[!sapply(peaks, isEmpty)])>0){ #if $infile.ext == 'imzml' @@ -229,7 +315,7 @@ featureMatrix <- intensityMatrix(peaks) #end if featureMatrix2 =cbind(pixelnames, featureMatrix) - colnames(featureMatrix2)[1] = c("mz | spectra") + colnames(featureMatrix2)[1] = c("mz") featureMatrix2 = t(featureMatrix2) write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") }else{print("There are no spectra with peaks left")} @@ -256,8 +342,15 @@ mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples) #end if - ##QC plot + ##QC plot and numbers plot(peaks[[1]], main="First spectrum after m/z filtering") + minmz = round(min(unlist(lapply(peaks,mass))), digits=4) + maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) + maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) + medint = round(median(unlist(lapply(peaks,intensity))), digits=2) + filtered = c(minmz, maxmz,maxfeatures, medint) + QC_numbers= cbind(QC_numbers, filtered) + vectorofactions = append(vectorofactions, "filtered") if (length(peaks[!sapply(peaks, isEmpty)])>0){ #if $infile.ext == 'imzml' @@ -268,58 +361,67 @@ featureMatrix <- intensityMatrix(peaks) #end if featureMatrix2 =cbind(pixelnames, featureMatrix) - colnames(featureMatrix2)[1] = c("mz | spectra") + colnames(featureMatrix2)[1] = c("mz") featureMatrix2 = t(featureMatrix2) + }else{print("There are no spectra with peaks left") + featureMatrix2 = matrix(0, ncol=1, nrow=1)} write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") - }else{print("There are no spectra with peaks left")} - #end if #end for - if (length(peaks[!sapply(peaks, isEmpty)])>0){ - ## mass peaks output - mass_peaks = data.frame(matrix(,ncol=3, nrow=0)) - for (spectrum in 1:length(peaks)){ - spectrum_df = data.frame(peaks[[spectrum]]@snr, peaks[[spectrum]]@mass, peaks[[spectrum]]@intensity) - spectrum_df\$spectrum_id = rep(pixelnames[[spectrum]], length(peaks[[spectrum]]@mass)) - mass_peaks = rbind(mass_peaks,spectrum_df) - } - colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum") - write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") - }else{print("There are no spectra with peaks left")} +if (length(peaks[!sapply(peaks, isEmpty)])>0){ + ## mass peaks output + mass_peaks = data.frame(matrix(,ncol=3, nrow=0)) + for (spectrum in 1:length(peaks)){ + spectrum_df = data.frame(peaks[[spectrum]]@snr, peaks[[spectrum]]@mass, peaks[[spectrum]]@intensity) + spectrum_df\$spectrum_id = rep(pixelnames[[spectrum]], length(peaks[[spectrum]]@mass)) + mass_peaks = rbind(mass_peaks,spectrum_df) + } + colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum") + write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") +}else{print("There are no spectra with peaks left")} + +## print table with QC values +rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\nintensity") +plot(0,type='n',axes=FALSE,ann=FALSE) +grid.table(t(QC_numbers)) dev.off() if (summarized_spectra == FALSE){ #if $infile.ext == 'imzml' - exportImzMl(peaks, file="out.imzMl", processed=$export_processed) + MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed) #elif $infile.ext == 'tabular' - masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\,")), ncol=2, byrow=TRUE) + masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE) ## extract x and y values and create the coordinate matrix in case tabular was input - peaklist_coordinates = unique(cbind(as.numeric(substring(masspeaks_coordinates[,1], 5, last = 1000000L)), as.numeric(substring(masspeaks_coordinates[,2], 5, last = 1000000L)))) + peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3]))) exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=peaklist_coordinates) + #elif $infile.ext == 'rdata' + MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates) #end if + } ]]> </configfile> </configfiles> <inputs> - <param name="infile" type="data" format="imzml,tabular" label="MS metadata" help="This file is in imzML or tabular format (peak list, peak detection cannot be run again)"/> - <param name="centroids" type="boolean" label="Is the imzML data centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/> + <param name="infile" type="data" format="imzml,tabular,rdata" label="Inputfile as imzML or Cardinal MSImageSet saved as RData" help="This file is in imzML or tabular format (peak list, peak detection cannot be run again) or Cardinal MSImageSet saved as RData"/> + <param name="centroids" type="boolean" label="Is the imzML/RData data centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/> <conditional name="restriction_conditional"> - <param name="restriction" type="select" label="Restrict the preprocessing to coordinates of interest"> + <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files"> <option value="no_restriction" selected="True">Calculate on entire file</option> <option value="restrict">Restrict to coordinates of interest</option> </param> <when value="restrict"> - <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates which should be read" help="x-values in first column, y-values in second column"/> + <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/> + <param name="coordinates_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/> </when> <when value="no_restriction"/> </conditional> <conditional name="tabular_annotation"> - <param name="load_annotation" type="select" label="Use pixel annotation from tabular file - select in peak detection or filtering step where you want to apply the annotation information"> + <param name="load_annotation" type="select" label="Use pixel annotation from tabular file - select in peak detection or filtering step where annotation should be used"> <option value="no_annotation" selected="True">pixels belong into one group only</option> <option value="yes_annotation">use pixel annotation from a tabular file</option> </param> @@ -335,7 +437,7 @@ </conditional> <repeat name="methods" title="Method" min="1"> <conditional name="methods_conditional"> - <param name="method" type="select" label="Select the method you want to apply"> + <param name="method" type="select" label="Select a method"> <option value="Peak_detection">Peak detection</option> <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option> <option value="Binning">Binning</option> @@ -369,9 +471,9 @@ </when> <when value="Filtering"> <param name="minFrequency" type="float" value="0.25" - label="Remove all peaks which occur in less than minFrequency spectra" help="It is a relative threshold."/> + label="Removal of all peaks which occur in less than minFrequency spectra" help="It is a relative threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be sure."/> <param name="minNumber" type="float" value="1.0" - label="remove all peaks which occur in less than minNumber spectra" help="It is an absolute threshold."/> + label="Removal of all peaks which occur in less than minNumber spectra" help="It is an absolute threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be sure."/> <param name="filter_annot_groups" type="boolean" label="Group wise filtering with pixel annotations. If not specified a single group is assumed or when filtering has been done group wise it will automatically be group wise when selecting filtering on all pixel" truevalue="TRUE" falsevalue="FALSE"/> <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="mergeWhitelists" help="if FALSE the filtering criteria are applied groupwise. If TRUE peaks that survive the filtering in one group (level of labels) these peaks are also kept in other groups even if their frequencies are below minFrequency"/> @@ -381,7 +483,7 @@ <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" checked="true" truevalue="TRUE" falsevalue="FALSE"/> </inputs> <outputs> - <data format="imzml" name="outfile_imzml" label="$infile.display_name peaks" /> + <data format="imzml" name="outfile_imzml" label="$infile.display_name peaks"/> <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "$infile.display_name peakdetection QC"/> <data format="tabular" name="masspeaks" label="$infile.display_name mass_peaks"/> <data format="tabular" name="intensity_matrix" label="intensity_matrix"/> @@ -414,13 +516,11 @@ <output name="intensity_matrix" file="int1.tabular"/> </test> <test> - <param name="infile" value="masspeaks1_forinput.tabular"/> + <param name="infile" value="masspeaks3_forinput.tabular"/> <param name="centroids" value="TRUE"/> <repeat name="methods"> <conditional name="methods_conditional"> <param name="method" value="monoisotopic_peaks"/> - <param name="minCor" value="0.60"/> - <param name="tolerance" value="0.0001"/> </conditional> </repeat> <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/> @@ -457,7 +557,6 @@ <repeat name="methods"> <conditional name="methods_conditional"> <param name="method" value="Filtering"/> - <param name="bin_tolerance" value="0.01"/> <param name="minFrequency" value="0.5"/> <param name="minNumber" value="3"/> <param name="filter_annot_groups" value="TRUE"/> @@ -472,30 +571,66 @@ <help> <