maldi_quant_peak_detection: maldi_quant_peakdetection.xml comparison

comparison maldi_quant_peakdetection.xml @ 3:36d38d2cf88c draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit f127be2141cf22e269c85282d226eb16fe14a9c1

author	galaxyp
date	Fri, 15 Feb 2019 10:26:45 -0500
parents	17c54820f3be
children	e9300ef37403

comparison

equal deleted inserted replaced

-:17c54820f3be
+:36d38d2cf88c
-<tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.2">
+<tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.3">
 <description>
 Peak detection, binning and filtering for mass-spectrometry imaging data
 </description>
 <macros>
 <import>maldi_macros.xml</import>
 print('Reading mask region')
 ## Import imzML file
 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2]
+coordinate_matrix = coordinate_matrix[,c($restriction_conditional.column_x, $restriction_conditional.column_y)]
 maldi_data <- importImzMl('infile.imzML',
 coordinates = coordinate_matrix, centroided = $centroids)
 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_")
 if (centroided(msidata) == FALSE){
 ## create mass spectrum object
 cardinal_mzs = Cardinal::mz(msidata)
 maldi_data = list()
 for(number_spectra in 1:ncol(msidata)){
-maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])
+maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])}
-coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))}
+coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))
 }else{
 peaks = list()
 for (spectra in 1:ncol(msidata))
 {
 single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata))))
 peaks[[spectra]] = single_peaks
-}}
+}
+coordinates_info = cbind(cardinal_coordinates, c(1:length(peaks)))}
 #end if
 #end if
 ## default summarized = FALSE
 summarized_spectra = FALSE
 title(main=paste("$filename"))
 ## plot input file spectrum:
 #if $centroids:
-plot(peaks[[1]], main="First spectrum of input file")
+## Choose random spectra for QC plots
+random_spectra = sample(1:length(peaks), 4, replace=FALSE)
+par(mfrow = c(2, 2), oma=c(0,0,2,0))
+for (random_sample in random_spectra){
+plot(peaks[[random_sample]],sub="", main=paste0("spectrum ", random_sample))}
+title("Input spectra", outer=TRUE, line=0)
 #else
-avgSpectra <- averageMassSpectra(maldi_data,method="mean")
+## Choose random spectra for QC plots
-plot(avgSpectra, main="Average spectrum of input file")
+random_spectra = sample(1:length(maldi_data), 4, replace=FALSE)
+par(mfrow = c(2, 2), oma=c(0,0,2,0))
+for (random_sample in random_spectra){
+plot(maldi_data[[random_sample]],sub="", main=paste0("spectrum ", random_sample))}
+title("Input spectra", outer=TRUE, line=0)
 #end if
 ## QC numbers for input file
 #if str($centroids) == "TRUE"
 ## read and extract x,y,annotation information
 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"
-## merge with coordinate information of MSI data
+## merge provided annotation with coordinate information of MSI data
 colnames(coordinates_info)[3] = "pixel_index"
 merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE)
 merged_annotation[is.na(merged_annotation)] = "NA"
+## order coordinate information according to pixel index to make sure that the order stays the same
 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
 samples = as.factor(merged_annotation\$annotation)
 ## print annotation overview into PDF output
 #if $method.methods_conditional.use_annotations:
 maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking
 pixelnames = levels(samples)
 summarized_spectra = TRUE
+random_spectra = sample(1:length(maldi_data), 4, replace=TRUE)
 #end if
 peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method",
 halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr)
 ## QC plot and numbers
-## plot old spectrum with baseline in blue and picked peaks in green
+## plot old spectra with baseline in blue and picked peaks in green
-noise = estimateNoise(maldi_data[[1]], method= "$method.methods_conditional.peak_method")
+par(mfrow = c(2, 2), oma=c(0,0,2,0))
-plot(maldi_data[[1]], main="First spectrum with noise line (blue) and picked peaks (green)")
+for (random_sample in random_spectra){
-lines(noise[,1], noise[,2]*$method.methods_conditional.snr, col="blue")
+noise = estimateNoise(maldi_data[[random_sample]], method= "$method.methods_conditional.peak_method")
-points(peaks[[1]], col="green", pch=20)
+plot(maldi_data[[random_sample]], sub="", main=paste0("spectrum ", random_sample))
+lines(noise[,1], noise[,2]*$method.methods_conditional.snr, col="blue")
+points(peaks[[random_sample]], col="green", pch=20)}
+title("S/N in blue and picked peaks in green", outer=TRUE, line=0)
 ## plot new spectrum
-plot(peaks[[1]], main="First spectrum after peak detection")
+par(mfrow = c(2, 2), oma=c(0,0,2,0))
+for (random_sample in random_spectra){
+plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))}
+title("Picked peaks", outer=TRUE, line=0)
 pixel_number = length(peaks)
 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
 #elif str( $method.methods_conditional.method ) == 'monoisotopic_peaks':
 print('monoisotopic peaks')
 ##monoisotopic peaks
-peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, tolerance=$method.methods_conditional.tolerance, distance=$method.methods_conditional.distance, size=$method.methods_conditional.size)
+## keep peaks to plot them with monoisotopic peaks
+picked_peaks = peaks
-## QC plot and numbers
+peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor,
+tolerance=$method.methods_conditional.tolerance,
+distance=$method.methods_conditional.distance,
+size=$method.methods_conditional.size)
 ## plot old spectrum with picked isotopes as green dots
-plot(peaks[[1]], main="First spectrum with picked monoisotopic peaks (green)")
+par(mfrow = c(2, 2), oma=c(0,0,2,0))
-points(peaks[[1]], col="green", pch=20)
+for (random_sample in random_spectra){
-## plot new spectrum
+plot(picked_peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))
-plot(peaks[[1]], main="First spectrum after monoisotopic peaks detection")
+points(peaks[[random_sample]], col="green", pch=20)}
+title(paste0("Monoisotopic peaks in green"), outer=TRUE, line=0)
+par(mfrow = c(2, 2), oma=c(0,0,2,0))
+for (random_sample in random_spectra){
+plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))}
+title("Monoisotopic peaks", outer=TRUE, line=0)
 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
 number_features = length(unique(unlist(lapply(peaks,mass))))
 colnames(featureMatrix2)[1] = c("mz")
 featureMatrix2 = t(featureMatrix2)
 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
 }else{print("There are no spectra with peaks left")}
+#elif str( $method.methods_conditional.method ) == 'Align':
+print('align')
+##align spectra with 2 separate functions
+#if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference':
+## 1) calculate warping:
+warping_function <- determineWarpingFunctions(peaks,
+tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
+allowNoMatches=$method.methods_conditional.allow_nomatch, minFrequency = $method.methods_conditional.reference_for_alignment.min_frequency)
+## 2) warp spectra:
+peaks = warpMassPeaks(peaks, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)
+#elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference':
+## create reference mass_vector from tabular file
+mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = $method.methods_conditional.reference_for_alignment.reference_header, stringsAsFactors = FALSE)[,$method.methods_conditional.reference_for_alignment.mz_column]
+int_vector = rep(1,length(mass_vector))
+mass_list = createMassPeaks(mass_vector, int_vector)
+#if str($method.methods_conditional.reference_for_alignment.separate_alignment) == "FALSE"
+print('default alignment')
+## 1) calculate warping:
+warping_function <- determineWarpingFunctions(peaks,
+tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
+allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list)
+## 2) warp spectra:
+peaks = warpMassPeaks(peaks, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)
+#elif str($method.methods_conditional.reference_for_alignment.separate_alignment) == "TRUE"
+print('spectra wise alignment')
+peaks_new_list =list()
+for (pixelnb in 1:length(peaks))
+{
+## 1) calculate warping:
+warping_function <- determineWarpingFunctions(peaks[[pixelnb]],
+tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
+allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list)
+## 2) warp spectra:
+peaks_new_list = warpMassPeaks(list(peaks[[pixelnb]]), warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)
+}
+peaks_new = peaks_new_list
+#end if
+#end if
+## QC plot and numbers
+par(mfrow = c(2, 2), oma=c(0,0,2,0))
+for (random_sample in random_spectra){
+plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))}
+title("Aligned spectra", outer=TRUE, line=0)
+minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
+maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
+mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
+medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
+number_features = length(unique(unlist(lapply(peaks,mass))))
+aligned = c(minmz, maxmz,number_features,mean_features,  medint)
+QC_numbers= cbind(QC_numbers, aligned)
+vectorofactions = append(vectorofactions, "aligned")
+if (length(peaks[!sapply(peaks, isEmpty)])>0){
+featureMatrix <- intensityMatrix(peaks)
+## only for profile imzML file: featurematrix is overwritten:
+#if $infile.ext == 'imzml'
+#if str($centroids) == "FALSE"
+featureMatrix <- intensityMatrix(peaks, maldi_data)
+#end if
+#end if
+featureMatrix2 =cbind(pixelnames, featureMatrix)
+colnames(featureMatrix2)[1] = c("mz")
+featureMatrix2 = t(featureMatrix2)
+write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
+}else{print("There are no spectra with peaks left")}
 #elif str( $method.methods_conditional.method ) == 'Binning':
 print('binning')
 ##m/z binning
 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method")
 ## QC plot and numbers
-plot(peaks[[1]], main="First spectrum after binning")
+par(mfrow = c(2, 2), oma=c(0,0,2,0))
+for (random_sample in random_spectra){
+plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))}
+title("Binned spectra", outer=TRUE, line=0)
 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
 medint =round( median(unlist(lapply(peaks,intensity))), digits=2)
 number_features = length(unique(unlist(lapply(peaks,mass))))
 minNumber=$method.methods_conditional.minNumber,
 mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples)
 #end if
 ##QC plot and numbers
-plot(peaks[[1]], main="First spectrum after m/z filtering")
+par(mfrow = c(2, 2), oma=c(0,0,2,0))
+for (random_sample in random_spectra){
+plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))}
+title("Filtered spectra", outer=TRUE, line=0)
 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
 number_features = length(unique(unlist(lapply(peaks,mass))))
 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum")
 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
 }else{print("There are no spectra with peaks left")}
 ## print table with QC values
-rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# features", "median\nintensity")
+rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# peaks (int.>0)", "median\nintensity")
 plot(0,type='n',axes=FALSE,ann=FALSE)
 grid.table(t(QC_numbers))
 dev.off()
 if (summarized_spectra == FALSE){
 #if $infile.ext == 'imzml'
-MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed)
+MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=TRUE)
 #elif $infile.ext == 'tabular'
 masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE)
 ## extract x and y values and create the coordinate matrix in case tabular was input
 peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3])))
-exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=peaklist_coordinates)
+exportImzMl(peaks, file="out.imzMl", processed=TRUE, coordinates=peaklist_coordinates)
 #elif $infile.ext == 'rdata'
-MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates)
+MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=TRUE, coordinates=cardinal_coordinates)
 #end if
 }
 ]]>
 </configfile>
 </configfiles>
 <inputs>
-<param name="infile" type="data" format="imzml,tabular,rdata" label="Inputfile as imzML, peaklist or Cardinal MSImageSet saved as RData" help="imzML or tabular format (peak list) or Cardinal MSImageSet saved as RData"/>
+<param name="infile" type="data" format="imzml,tabular,rdata" label="MSI data" help="Input file as imzML (composite upload), tabular peaklist or Cardinal MSImageSet saved as RData (regular upload)"/>
-<param name="centroids" type="boolean" label="Input data is centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/>
+<param name="centroids" type="boolean" label="Centroided input" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/>
 <conditional name="restriction_conditional">
-<param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files">
+<param name="restriction" type="select" label="Use only spectra of interest" help="This option only works for imzML files">
-<option value="no_restriction" selected="True">Calculate on entire file</option>
+<option value="no_restriction" selected="True">No, calculate on entire file</option>
-<option value="restrict">Restrict to coordinates of interest</option>
+<option value="restrict">Yes, restrict to spectra of interest</option>
 </param>
 <when value="restrict">
-<param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/>
+<param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates"/>
+<param name="column_x" data_ref="coordinates_file" label="Column with x values" type="data_column"/>
+<param name="column_y" data_ref="coordinates_file" label="Column with y values" type="data_column"/>
 <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
 </when>
 <when value="no_restriction"/>
 </conditional>
 <conditional name="tabular_annotation">
-<param name="load_annotation" type="select" label="Pixels have annotations" help="Annotations can be used during peak detection or filteringfsplit">
+<param name="load_annotation" type="select" label="Spectra annotations" help="Annotations can be used for group wise peak detection or filtering">
-<option value="no_annotation" selected="True">pixels have no annotations</option>
+<option value="no_annotation" selected="True">No</option>
-<option value="yes_annotation">pixel annotation from file</option>
+<option value="yes_annotation">Yes</option>
 </param>
 <when value="yes_annotation">
 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file"
 help="Tabular file with three columns: x values, y values and pixel annotations"/>
 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
 <repeat name="methods" title="Method" min="1">
 <conditional name="methods_conditional">
 <param name="method" type="select" label="Select a method">
 <option value="Peak_detection">Peak detection</option>
 <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option>
+<option value="Align">Align Spectra (warping/phase correction)</option>
 <option value="Binning">Binning</option>
 <option value="Filtering">Filtering</option>
 </param>
 <when value="Peak_detection">
 <param name="peak_method" type="select" label="Noise estimation function">
 <param name="tolerance" type="float" label="Tolerance" value="0.00005"
 help="Maximal relative deviation of a peak position (m/z) to be considered as identical: abs(((mass[i]+distance)-mass[i+1])/mass[i]) smaller than 'tolerance'. For 50ppm use 0.00005 or 50e-6" />
 <param name="distance" type="float" label="Distance" value="1.00235" help="Distance between two consecutive peaks in an isotopic pattern. 1.00235 is average distance for polypeptides."/>
 <param name="size" type="integer" label="Size" value="3" help="Size (length) of isotopic pattern, longer patterns are prefered over shorter ones, min size is 2."/>
 </when>
+<when value="Align">
+<param name="warping_method" type="select" label="Warping methods">
+<option value="lowess" selected="True">Lowess</option>
+<option value="linear">Linear</option>
+<option value="quadratic">Quadratic</option>
+<option value="cubic">Cubic</option>
+</param>
+<param name="tolerance" type="float" value="0.00005"
+label="Tolerance = abs(mz1 - mz2)/mz2"
+help="Maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 5e-5" />
+<param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/>
+<param name="empty_nomatch" type="boolean" label="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/>
+<param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE" help="For Cardinal RData files this step can only be performed if pixel annotations were provided"/>
+<conditional name="reference_for_alignment">
+<param name="align_ref" type="select" label="Reference" help="If given, samples will be aligned to reference, use internal calibrants to perform m/z calibration">
+<option value="no_reference" selected="True">no reference</option>
+<option value="yes_reference">reference from tabular file</option>
+</param>
+<when value="no_reference">
+<param name="min_frequency" type="float" value="0.9" label = "minFrequency" help="Removal of all peaks which occur in less than minFrequency spectra to generate the reference m/z"/>
+</when>
+<when value="yes_reference">
+<param name="reference_file" type="data" format="tabular"
+label="Reference m/z values"
+help="Tabular file"/>
+<param name="mz_column" data_ref="reference_file" label="Column with m/z values" type="data_column"/>
+<param name="reference_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
+<param name="separate_alignment" type="boolean" label="Spectrum wise alignment" help="Internal binning is omitted to avoid interaction between spectra" truevalue="TRUE" falsevalue="FALSE"/>
+</when>
+</conditional>
+</when>
 <when value="Binning">
-<param name="bin_tolerance" type="float" value="0.002" label="Binning tolerance"
+<param name="bin_tolerance" type="float" value="0.002" label="Tolerance"
 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/>
 <param name="bin_method" display="radio" type="select" label="Bin creation rule" help="strict - creates bins never containing two or more peaks of the sampe sample. relaxed - allows multiple peaks of the same sample in one bin.">
 <option value="strict" selected="True" >strict</option>
 <option value="relaxed" >relaxed</option>
 </param>
 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE"
 label="mergeWhitelists" help="Yes means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups"/>
 </when>
 </conditional>
 </repeat>
-<param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" checked="True" truevalue="TRUE" falsevalue="FALSE"/>
 </inputs>
 <outputs>
-<data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}">
+<data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzml">
 <!--filter>methods_conditional['method'] == 'Peak_detection'</filter-->
 </data>
 <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "${tool.name} on ${on_string}: QC"/>
 <data format="tabular" name="masspeaks" label="${tool.name} on ${on_string}: peaklist"/>
 <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix"/>
 </conditional>
 </repeat>
 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/>
 <output name="masspeaks" file="masspeaks2.tabular"/>
 <output name="intensity_matrix" file="int2.tabular"/>
+<output name="outfile_imzml" ftype="imzml" file="peak_detection2.imzml.txt" lines_diff="4">
+<extra_files type="file" file="peak_detection2.imzml" name="imzml" lines_diff="6"/>
+<extra_files type="file" file="peak_detection2.ibd" name="ibd" compare="sim_size"/>
+</output>
 </test>
 <test>
 <param name="infile" value="" ftype="imzml">
 <composite_data value="Example_Continuous.imzML"/>
 <composite_data value="Example_Continuous.ibd"/>
 </conditional>
 </repeat>
 <output name="plots" file="peakdetection3_QC.pdf" compare="sim_size"/>
 <output name="intensity_matrix" file="intensity_matrix3.tabular"/>
 <output name="masspeaks" file="masspeaks3.tabular"/>
+<output name="outfile_imzml" ftype="imzml" file="peak_detection3.imzml.txt" lines_diff="4">
+<extra_files type="file" file="peak_detection3.imzml" name="imzml" lines_diff="6"/>
+<extra_files type="file" file="peak_detection3.ibd" name="ibd" compare="sim_size"/>
+</output>
 </test>
 <test>
 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
 <param name="method" value="Peak_detection"/>
 <param name="peak_method" value="MAD"/>
 <param name="halfWindowSize" value="20"/>
 <param name="snr" value="2"/>
 <output name="plots" file="peakdetection4_QC.pdf" compare="sim_size"/>
 <output name="intensity_matrix" file="intensity_matrix4.tabular"/>
 <output name="masspeaks" file="masspeaks4.tabular"/>
+<output name="outfile_imzml" ftype="imzml" file="peak_detection4.imzml.txt" lines_diff="4">
+<extra_files type="file" file="peak_detection4.imzml" name="imzml" lines_diff="6"/>
+<extra_files type="file" file="peak_detection4.ibd" name="ibd" compare="sim_size"/>
+</output>
 </test>
 </tests>
 <help>
 <![CDATA[
 6.80	306.25     0.133        xy_1_1
 ...
 ...
-- Optional:  Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. The file has to contain x values in the first column and y values in the second columns. Further columns are allowed. Tabular files with any header name or no header at all are supported.
+- Optional:  Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. Tabular files with any header name or no header at all are supported.
 ::
 x_coord     y_coord
 1            1
 - SNR is an abbreviation for signal-to-noise-ratio. A local maximum has to be higher than SNR*noise to be recognize as peak.
 - 'halfWindowSize': The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]. A local maximum has to be the highest one in the given window to be recognized as peak.
 - Detect peaks on average mass spectra: Spectra with the same annotation (taken from the annotation tabular input) are averaged and peak picking is performed on the average spectrum of each annotation group. The exported imzML is empty and cannot be used for further analysis steps. The peaklist and intensity matrix outputs contain the annotation group names with their averaged intensity values. Filtering steps have to be done in the same run as the peak picking.
-- Monoisotopic peaks: detection of monoisotopic peaks
+- Monoisotopic peaks: Keeps only the monoisotopic peaks
+- Spectra alignment (warping): alignment for (re)calibration of m/z values.
+- without external reference m/z: internal reference is obtained by filtering (default 90%) and binning the peaks to find landmark peaks and their average m/z
+- with external reference m/z: the m/z provided in a tabular file are used as a reference, at least 10 reference values are recommended
+- non linear warping (parametric time warping plus binning) to match the reference peaks (internal or external) to the peaks with the given tolerance. At least two m/z per spectrum are needed for the alignment. This requirement can be skipped by setting "Don't throw an error when less than 2 reference m/z were found in a spectrum" to yes. If the not aligned spectra should be set to zero select yes in "If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero". In order to remove such empty spectra set "Should empty spectra be removed" to yes.
 - Peak binning: After the alignment the peak positions (m/z) are very similar but not identical. The binning is needed to make similar peak m/z values identical. The algorithm is based on the following workflow:
-1. Put all mass in a sorted vector.
+1. Put all m/z in a sorted vector.
 2. Calculate differences between each neighbor.
-3. Divide the mass vector at the largest gap (largest difference) and form a left and a right bin.
+3. Divide the m/z vector at the largest gap (largest difference) and form a left and a right bin.
 4. Rerun step 3 for the left and/or the right bin if they don't fulfill the following criteria:
 - All peaks in a bin are near to the mean (abs(mass-meanMass)/meanMass < tolerance).
 - method == "strict": The bin doesn't contain two or more peaks of the same sample.
 - To filter data that was averaged before peak detection: Filtering has to be done in the same tool run as the peak detection. The filtering criteria are automatically applied per annotation group (Group wise filtering can be 'No') and not per pixel. Example: to keep only m/z that were detected in at least half of all annotation groups set minFrequency to 0.5.
 **Output**
-- centroided imzML file (processed or continuous), imzML file is empty when 'Detect peaks on average mass spectra' is chosen.
+- centroided, processed imzML file, imzML file is empty when 'Detect peaks on average mass spectra' is chosen.
-- pdf with mass spectra plots after each preprocessing step and a table with key values after each preprocessing step
+- pdf with mass spectra plots of four random spectra and a table with key values after each preprocessing step
 - peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum"
 - tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. For peak picking on the average of multiple spectra, each spectra group is a column with mean intensities for each m/z.
 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/

Mercurial > repos > galaxyp > maldi_quant_peak_detection

comparison maldi_quant_peakdetection.xml @ 3:36d38d2cf88c draft