Mercurial > repos > galaxyp > msi_qualitycontrol
view msi_qualitycontrol.xml @ 1:c6bc77c4731d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_qualitycontrol commit 93af5acf54999bcfea4c803680ca5a9c2a92fee5
author | galaxyp |
---|---|
date | Thu, 02 Nov 2017 10:32:41 -0400 |
parents | 845073d506a8 |
children | 1ccbda92b76b |
line wrap: on
line source
<tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.7.0"> <description> mass spectrometry imaging QC </description> <requirements> <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement> <requirement type="package" version="2.2.1">r-ggplot2</requirement> <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> <requirement type="package" version="2.2.1"> r-gridextra</requirement> <requirement type="package" version="2.23_15">r-kernsmooth</requirement> </requirements> <command detect_errors="exit_code"> <![CDATA[ #if $infile.ext == 'imzml' cp '${infile.extra_files_path}/imzml' infile.imzML && cp '${infile.extra_files_path}/ibd' infile.ibd && #elif $infile.ext == 'analyze75' cp '${infile.extra_files_path}/hdr' infile.hdr && cp '${infile.extra_files_path}/img' infile.img && cp '${infile.extra_files_path}/t2m' infile.t2m && #else ln -s '$infile' infile.RData && #end if cat '${cardinal_qualitycontrol_script}' && Rscript '${cardinal_qualitycontrol_script}' ]]> </command> <configfiles> <configfile name="cardinal_qualitycontrol_script"><![CDATA[ ################################# load libraries and read file ######################### library(Cardinal) library(ggplot2) library(RColorBrewer) library(gridExtra) library(KernSmooth) ## Read MALDI Imagind dataset #if $infile.ext == 'imzml' msidata <- readMSIData('infile.imzML') #elif $infile.ext == 'analyze75' msidata <- readMSIData('infile.hdr') #else load('infile.RData') #end if #if $inputpeptidefile: ## Read tabular file with peptide masses for plots and heatmap images: input_list = read.delim("$inputpeptidefile", header = FALSE, na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE) #else input_list = data.frame(0, 0) #end if ###################################### file properties in numbers ###################### ## Number of features (mz) maxfeatures = length(features(msidata)) ## Range mz minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) ## Number of spectra (pixels) pixelcount = length(pixels(msidata)) ## Range x coordinates minimumx = min(coord(msidata)[,1]) maximumx = max(coord(msidata)[,1]) ## Range y coordinates minimumy = min(coord(msidata)[,2]) maximumy = max(coord(msidata)[,2]) ## Range of intensities minint = round(min(spectra(msidata)[]), digits=2) maxint = round(max(spectra(msidata)[]), digits=2) medint = round(median(spectra(msidata)[]), digits=2) ## Number of intensities > 0 npeaks= sum(spectra(msidata)[]>0) ## Spectra multiplied with mz (potential number of peaks) numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) ## Percentage of intensities > 0 percpeaks = round(npeaks/numpeaks*100, digits=2) ## Number of empty TICs TICs = colSums(spectra(msidata)[]) NumemptyTIC = sum(TICs == 0) ## Processing informations processinginfo = processingData(msidata) centroidedinfo = processinginfo@centroided # TRUE or FALSE ## if TRUE write processinginfo if no write FALSE ## normalization if (length(processinginfo@normalization) == 0) { normalizationinfo='FALSE' } else { normalizationinfo=processinginfo@normalization } ## smoothing if (length(processinginfo@smoothing) == 0) { smoothinginfo='FALSE' } else { smoothinginfo=processinginfo@smoothing } ## baseline if (length(processinginfo@baselineReduction) == 0) { baselinereductioninfo='FALSE' } else { baselinereductioninfo=processinginfo@baselineReduction } ## peak picking if (length(processinginfo@peakPicking) == 0) { peakpickinginfo='FALSE' } else { peakpickinginfo=processinginfo@peakPicking } ## calculate how many input peptide masses are valid: inputpeptides = input_list[input_list[,1]>minmz & input_list[,1]<maxmz,] inputmasses = inputpeptides[,1] inputnames = inputpeptides[,2] ############################################################################# properties = c("Number of mz features", "Range of mz values [Da]", "Number of pixels", "Range of x coordinates", "Range of y coordinates", "Range of intensities", "Median of intensities", "Intensities > 0", "Number of zero TICs", "Preprocessing", "Normalization", "Smoothing", "Baseline reduction", "Peak picking", "Centroided", "# valid peptidemasses") values = c(paste0(maxfeatures), paste0(minmz, " - ", maxmz), paste0(pixelcount), paste0(minimumx, " - ", maximumx), paste0(minimumy, " - ", maximumy), paste0(minint, " - ", maxint), paste0(medint), paste0(percpeaks, " %"), paste0(NumemptyTIC), paste0(" "), paste0(normalizationinfo), paste0(smoothinginfo), paste0(baselinereductioninfo), paste0(peakpickinginfo), paste0(centroidedinfo), paste0(length(inputmasses))) property_df = data.frame(properties, values) ## Variables for plots xrange = 1 yrange = 1 maxx = max(coord(msidata)[,1])+xrange minx = min(coord(msidata)[,1])-xrange maxy = max(coord(msidata)[,2])+yrange miny = min(coord(msidata)[,2])-yrange ####################################### Preparation of images ######################### ## Acquisitionorder pixelnumber = 1:pixelcount pixelxyarray=cbind(coord(msidata),pixelnumber) ## Number of peaks per pixel peaksperpixel = colSums(spectra(msidata)[]> 0) peakscoordarray=cbind(coord(msidata), peaksperpixel) ## Most abundant mz highestmz = apply(spectra(msidata)[],2,which.max) highestmz_matrix = cbind(coord(msidata),mz(msidata)[highestmz]) colnames(highestmz_matrix)[3] = "highestmzinDa" ###################################### Preparation of plots ############################ ## function without xaxt for plots with automatic x axis plot_colorByDensity = function(x1,x2, ylim=c(min(x2),max(x2)), xlim=c(min(x1),max(x1)), xlab="",ylab="",main="") { df <- data.frame(x1,x2) x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white"))) df\$dens <- col2rgb(x)[1,] + 1L cols <- colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256) df\$col <- cols[df\$dens] plot(x2~x1, data=df[order(df\$dens),], ylim=ylim,xlim=xlim,pch=20,col=col, cex=1,xlab=xlab,ylab=ylab,las=1, main=main) } ## Number of peaks per mz - number across all pixel peakspermz = rowSums(spectra(msidata)[] > 0 ) ## Sum of all intensities for each mz (like TIC, but for mz instead of pixel) mzTIC = rowSums(spectra(msidata)[]) # calculate intensity sum for each mz ######################################## PDF ############################################# ########################################################################################## ########################################################################################## pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) #if not $filename: #set $filename = $infile.display_name #end if title(main=paste("Quality control of MSI data\n\n", "Filename:", "$filename")) ############################# I) numbers #################################### ############################################################################# grid.table(property_df, rows= NULL) ############################# II) ion images ################################# ############################################################################## ## 1) Acquisition image (ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber)) +scale_y_reverse() + geom_tile() + coord_fixed() + ggtitle("1) Order of Acquisition") +theme_bw() + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "Acq")) ## 2) Calibrant images: if (length(inputmasses) != 0) { for (mass in 1:length(inputmasses)) { image(msidata, mz=inputmasses[mass], plusminus=$plusminusinDalton, main= paste0("2",LETTERS[mass], ") ", inputnames[mass], " (", round(inputmasses[mass], digits = 2), " Da)"), contrast.enhance = "histogram") } } else {print("The inputpeptide masses were outside the mass range")} ## 3) Number of peaks per pixel - image (ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo) +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() + ggtitle("3) Number of peaks per pixel") + theme_bw() + theme(text=element_text(family="ArialMT", face="bold", size=12)) + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") ,space = "Lab", na.value = "black", name = "# peaks")) ## 4) TIC image TICcoordarray=cbind(coord(msidata), TICs) colo <- colorRampPalette( c('blue', 'cyan', 'green', 'yellow','red')) (ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo) +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() + ggtitle("4) Total Ion Chromatogram") + theme_bw() + theme(text=element_text(family="ArialMT", face="bold", size=12)) + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") ,space = "Lab", na.value = "black", name = "TIC")) ## 5) Most abundant mass image (ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa)) +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() + ggtitle("5) Most abundant m/z in each pixel") + theme_bw() + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]), breaks = pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)], limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa))) + theme(text=element_text(family="ArialMT", face="bold", size=12))) ## which mz are highest highestmz_peptides = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[1]) highestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == highestmz_peptides)[1] secondhighestmz = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[2]) secondhighestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == secondhighestmz)[1] ## 6) pca image for two components pca <- PCA(msidata, ncomp=2) par(mfrow = c(2,1)) plot(pca, col=c("black", "darkgrey"), main="6) PCA for two components") image(pca, ylim = c(-1, maxy), col=c("black", "white")) ############################# III) properties over acquisition (spectra index)########## ############################################################################## par(mfrow = c(2,1), mar=c(5,6,4,2)) ## 7a) number of peaks per spectrum - scatterplot plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="7a) Number of peaks per spectrum") title(xlab="Spectra index \n (= Acquisition time)", line=3) title(ylab="Number of peaks", line=4) ## 7b) number of peaks per spectrum - histogram hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") title(main="7b) Number of peaks per spectrum", line=2) title(ylab="Frequency = # spectra", line=4) abline(v=median(peaksperpixel), col="blue") ## 8a) TIC per spectrum - density scatterplot zero=0 par(mfrow = c(2,1), mar=c(5,6,4,2)) plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="8a) TIC per pixel") title(xlab="Spectra index \n (= Acquisition time)", line=3) title(ylab = "Total ion chromatogram intensity", line=4) ## 8b) TIC per spectrum - histogram hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="") title(main= "8b) TIC per spectrum", line=2) title(ylab="Frequency = # spectra", line=4) abline(v=median(log(TICs[TICs>0])), col="blue") ## 9) intensity of chosen peptides over acquisition (pixel index) if (length(inputmasses) != 0) { par(mfrow = c(3, 2)) intensityvector = vector() for (mass in 1:length(inputmasses)) { mznumber = features(msidata, mz = inputmasses[mass]) intensityvector = spectra(msidata)[][mznumber,] plot(intensityvector, main=inputnames[mass], xlab="Spectra index \n (= Acquisition time)") } } else {print("The inputpeptide masses were outside the mass range")} ################################## IV) changes over mz ############################ ################################################################################### ## 10) Number of peaks per mz par(mfrow = c(2,1), mar=c(5,6,4,4.5)) ## 10a) Number of peaks per mz - scatterplot plot_colorByDensity(mz(msidata),peakspermz, main= "10a) Number of peaks for each mz", ylab ="") title(xlab="mz in Dalton", line=2.5) title(ylab = "Number of peaks", line=4) axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1) mtext("Coverage of spectra [%]", 4, line=3, adj=1) # make plot smaller to fit axis and labels, add second y axis with % ## 10b) Number of peaks per mz - histogram hist(peakspermz, main="", las=1, ylab="", xlab="") title(ylab = "Frequency", line=4) title(main="10b) Number of peaks per mz", xlab = "Number of peaks per mz", line=2) abline(v=median(peakspermz), col="blue") ## 11) Sum of intensities per mz par(mfrow = c(2,1), mar=c(5,6,4,2)) # 11a) sum of intensities per mz - scatterplot plot_colorByDensity(mz(msidata),mzTIC, main= "11a) Sum of all peak intensities for each mz", ylab ="") title(xlab="mz in Dalton", line=2.5) title(ylab="Intensity sum", line=4) # 11b) sum of intensities per mz - histogram hist(log(mzTIC), main="", xlab = "", las=1, ylab="") title(main="11b) Sum of intensities per mz", line=2, ylab="") title(xlab = "log (sum of intensities per mz)") title(ylab = "Frequency", line=4) abline(v=median(log(mzTIC[mzTIC>0])), col="blue") ################################## V) general plots ############################ ################################################################################### ## 12) Intensity distribution par(mfrow = c(2,1), mar=c(5,6,4,2)) ## 12a) Intensity histogram: hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1) title(main="12a) Log2-transformed intensities", line=2) title(xlab="log2 intensities") title(ylab="Frequency", line=4) abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue") ## 12b) Median intensity over spectra medianint_spectra = apply(spectra(msidata), 2, median) plot(medianint_spectra, main="12b) Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="") title(ylab="Median spectrum intensity", line=4) ## 13) Mass spectra par(mfrow = c(2, 2)) plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum") plot(msidata, pixel =round(length(pixelnumber)/2, digits=0), main="Spectrum in middle of acquisition") plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,]))) plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,]))) dev.off() ]]></configfile> </configfiles> <inputs> <param name="infile" type="data" format="imzml, rdata, analyze75" label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> <param name="filename" type="text" value="" optional="true" label="Title" help="will appear in the quality report. If nothing given it will take the dataset name."/> <param name="inputpeptidefile" type="data" optional="true" format="txt, csv" label="Text file with peptidemasses and names" help="first column peptide m/z, second column peptide name, tab separated file"/> <param name="plusminusinDalton" value="0.25" type="text" label="Mass range" help="plusminus mass window in Dalton"/> </inputs> <outputs> <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label="${tool.name} on $infile.display_name"/> </outputs> <tests> <test> <param name="infile" value="" ftype="imzml"> <composite_data value="Example_Continuous.imzML" ftype="imzml"/> <composite_data value="Example_Continuous.ibd" ftype="ibd"/> </param> <param name="inputpeptidefile" value="inputpeptides.csv" ftype="csv"/> <param name="plusminusinDalton" value="0.25"/> <param name="filename" value="Testfile_imzml"/> <output name="plots" file="Testfile_qualitycontrol_imzml.pdf" compare="sim_size" delta="20000"/> </test> <test> <param name="infile" value="" ftype="analyze75"> <composite_data value="Analyze75.hdr" ftype="hdr"/> <composite_data value="Analyze75.img" ftype="img"/> <composite_data value="Analyze75.t2m" ftype="t2m"/> </param> <param name="inputpeptidefile" value="inputpeptides.txt" ftype="txt"/> <param name="plusminusinDalton" value="0.5"/> <param name="filename" value="Testfile_analyze75"/> <output name="plots" file="Testfile_qualitycontrol_analyze75.pdf" compare="sim_size" delta="20000"/> </test> <test> <param name="infile" value="example_continousS042.RData" ftype="rdata"/> <param name="inputpeptidefile" value="inputpeptides.csv" ftype="txt"/> <param name="plusminusinDalton" value="0.1"/> <param name="filename" value="Testfile_rdata"/> <output name="plots" file="Testfile_qualitycontrol_rdata.pdf" compare="sim_size" delta="20000"/> </test> </tests> <help> <![CDATA[ Quality control for maldi imaging mass spectrometry data. Input data: 3 types of input data can be used: - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_ - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) Only for continuous imzML so far. The output of this tool contains key values and plots of the imaging data as pdf. ]]> </help> <citations> <citation type="doi">10.1093/bioinformatics/btv146</citation> </citations> </tool>