Mercurial > repos > galaxyp > msi_qualitycontrol
diff msi_qualitycontrol.xml @ 0:845073d506a8 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_qualitycontrol commit fa798afa023eea1cb183c14d0242721b2c696c21
author | galaxyp |
---|---|
date | Tue, 31 Oct 2017 06:00:03 -0400 |
parents | |
children | c6bc77c4731d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/msi_qualitycontrol.xml Tue Oct 31 06:00:03 2017 -0400 @@ -0,0 +1,482 @@ +<tool id="Mass_spectrometry_imaging_QC" name="MSI Qualitycontrol" version="1.7.0"> + <description> + mass spectrometry imaging QC + </description> + <requirements> + <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement> + <requirement type="package" version="2.2.1">r-ggplot2</requirement> + <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> + <requirement type="package" version="2.2.1"> r-gridextra</requirement> + <requirement type="package" version="2.23_15">r-kernsmooth</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + + #if $infile.ext == 'imzml' + cp '${infile.extra_files_path}/imzml' infile.imzML && + cp '${infile.extra_files_path}/ibd' infile.ibd && + #elif $infile.ext == 'analyze75' + cp '${infile.extra_files_path}/hdr' infile.hdr && + cp '${infile.extra_files_path}/img' infile.img && + cp '${infile.extra_files_path}/t2m' infile.t2m && + #else + ln -s '$infile' infile.RData && + #end if + cat '${cardinal_qualitycontrol_script}' && + Rscript '${cardinal_qualitycontrol_script}' + ]]> + </command> + <configfiles> + <configfile name="cardinal_qualitycontrol_script"><![CDATA[ + +################################# load libraries and read file ######################### +library(Cardinal) +library(ggplot2) +library(RColorBrewer) +library(gridExtra) +library(KernSmooth) + +## Read MALDI Imagind dataset + +#if $infile.ext == 'imzml' + msidata <- readMSIData('infile.imzML') +#elif $infile.ext == 'analyze75' + msidata <- readMSIData('infile.hdr') + +#else + load('infile.RData') +#end if + +#if $inputpeptidefile: + ## Read tabular file with peptide masses for plots and heatmap images: + input_list = read.delim("$inputpeptidefile", header = FALSE, na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE) +#else + input_list = data.frame(0, 0) +#end if + +###################################### file properties in numbers ###################### + +## Number of features (mz) +maxfeatures = length(features(msidata)) +## Range mz +minmz = round(min(mz(msidata)), digits=2) +maxmz = round(max(mz(msidata)), digits=2) +## Number of spectra (pixels) +pixelcount = length(pixels(msidata)) +## Range x coordinates +minimumx = min(coord(msidata)[,1]) +maximumx = max(coord(msidata)[,1]) +## Range y coordinates +minimumy = min(coord(msidata)[,2]) +maximumy = max(coord(msidata)[,2]) +## Range of intensities +minint = round(min(spectra(msidata)[]), digits=2) +maxint = round(max(spectra(msidata)[]), digits=2) +medint = round(median(spectra(msidata)[]), digits=2) +## Number of intensities > 0 +npeaks= sum(spectra(msidata)[]>0) +## Spectra multiplied with mz (potential number of peaks) +numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) +## Percentage of intensities > 0 +percpeaks = round(npeaks/numpeaks*100, digits=2) +## Number of empty TICs +TICs = colSums(spectra(msidata)[]) +NumemptyTIC = sum(TICs == 0) + +## Processing informations +processinginfo = processingData(msidata) +centroidedinfo = processinginfo@centroided # TRUE or FALSE + +## if TRUE write processinginfo if no write FALSE + +## normalization +if (length(processinginfo@normalization) == 0) { + normalizationinfo='FALSE' +} else { + normalizationinfo=processinginfo@normalization +} +## smoothing +if (length(processinginfo@smoothing) == 0) { + smoothinginfo='FALSE' +} else { + smoothinginfo=processinginfo@smoothing +} +## baseline +if (length(processinginfo@baselineReduction) == 0) { + baselinereductioninfo='FALSE' +} else { + baselinereductioninfo=processinginfo@baselineReduction +} +## peak picking +if (length(processinginfo@peakPicking) == 0) { + peakpickinginfo='FALSE' +} else { + peakpickinginfo=processinginfo@peakPicking +} + + +## calculate how many input peptide masses are valid: +inputpeptides = input_list[input_list[,1]>minmz & input_list[,1]<maxmz,] +inputmasses = inputpeptides[,1] +inputnames = inputpeptides[,2] + +############################################################################# + +properties = c("Number of mz features", + "Range of mz values [Da]", + "Number of pixels", + "Range of x coordinates", + "Range of y coordinates", + "Range of intensities", + "Median of intensities", + "Intensities > 0", + "Number of zero TICs", + "Preprocessing", + "Normalization", + "Smoothing", + "Baseline reduction", + "Peak picking", + "Centroided", + "# valid peptidemasses") + +values = c(paste0(maxfeatures), + paste0(minmz, " - ", maxmz), + paste0(pixelcount), + paste0(minimumx, " - ", maximumx), + paste0(minimumy, " - ", maximumy), + paste0(minint, " - ", maxint), + paste0(medint), + paste0(percpeaks, " %"), + paste0(NumemptyTIC), + paste0(" "), + paste0(normalizationinfo), + paste0(smoothinginfo), + paste0(baselinereductioninfo), + paste0(peakpickinginfo), + paste0(centroidedinfo), + paste0(length(inputmasses))) + + +property_df = data.frame(properties, values) + + +## Variables for plots +xrange = 1 +yrange = 1 +maxx = max(coord(msidata)[,1])+xrange +minx = min(coord(msidata)[,1])-xrange +maxy = max(coord(msidata)[,2])+yrange +miny = min(coord(msidata)[,2])-yrange + + +####################################### Preparation of images ######################### + +## Acquisitionorder + +pixelnumber = 1:pixelcount +pixelxyarray=cbind(coord(msidata),pixelnumber) + + +## Number of peaks per pixel +peaksperpixel = colSums(spectra(msidata)[]> 0) +peakscoordarray=cbind(coord(msidata), peaksperpixel) + +## Most abundant mz + +highestmz = apply(spectra(msidata)[],2,which.max) +highestmz_matrix = cbind(coord(msidata),mz(msidata)[highestmz]) +colnames(highestmz_matrix)[3] = "highestmzinDa" + +###################################### Preparation of plots ############################ + +## function without xaxt for plots with automatic x axis +plot_colorByDensity = function(x1,x2, + ylim=c(min(x2),max(x2)), + xlim=c(min(x1),max(x1)), + xlab="",ylab="",main="") { + + df <- data.frame(x1,x2) + x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white"))) + df\$dens <- col2rgb(x)[1,] + 1L + cols <- colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256) + df\$col <- cols[df\$dens] + plot(x2~x1, data=df[order(df\$dens),], + ylim=ylim,xlim=xlim,pch=20,col=col, + cex=1,xlab=xlab,ylab=ylab,las=1, + main=main) +} + +## Number of peaks per mz - number across all pixel +peakspermz = rowSums(spectra(msidata)[] > 0 ) + +## Sum of all intensities for each mz (like TIC, but for mz instead of pixel) +mzTIC = rowSums(spectra(msidata)[]) # calculate intensity sum for each mz + + + +######################################## PDF ############################################# +########################################################################################## +########################################################################################## + + +pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12) +plot(0,type='n',axes=FALSE,ann=FALSE) +#if not $filename: + #set $filename = $infile.display_name +#end if +title(main=paste("Quality control of MSI data\n\n", "Filename:", "$filename")) + +############################# I) numbers #################################### +############################################################################# +grid.table(property_df, rows= NULL) + +############################# II) ion images ################################# +############################################################################## + +## 1) Acquisition image +(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber)) + +scale_y_reverse() + geom_tile() + coord_fixed() + + ggtitle("1) Order of Acquisition") + +theme_bw() + + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), + space = "Lab", na.value = "black", name = "Acq")) + +## 2) Calibrant images: + + + +if (length(inputmasses) != 0) +{ for (mass in 1:length(inputmasses)) + + { + image(msidata, mz=inputmasses[mass], plusminus=$plusminusinDalton, + main= paste0("2",LETTERS[mass], ") ", inputnames[mass], " (", round(inputmasses[mass], digits = 2), " Da)"), + contrast.enhance = "histogram") + } +} else {print("The inputpeptide masses were outside the mass range")} + +## 3) Number of peaks per pixel - image + +(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo) + +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() + + ggtitle("3) Number of peaks per pixel") + + theme_bw() + + theme(text=element_text(family="ArialMT", face="bold", size=12)) + + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") + ,space = "Lab", na.value = "black", name = "# peaks")) + + +## 4) TIC image +TICcoordarray=cbind(coord(msidata), TICs) +colo <- colorRampPalette( +c('blue', 'cyan', 'green', 'yellow','red')) +(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo) + +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() + + ggtitle("4) Total Ion Chromatogram") + + theme_bw() + + theme(text=element_text(family="ArialMT", face="bold", size=12)) + + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") + ,space = "Lab", na.value = "black", name = "TIC")) + +## 5) Most abundant mass image + +(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa)) ++scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() ++ ggtitle("5) Most abundant m/z in each pixel") ++ theme_bw() ++ scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", + labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]), + breaks = pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)], limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa))) ++ theme(text=element_text(family="ArialMT", face="bold", size=12))) + +## which mz are highest +highestmz_peptides = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[1]) +highestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == highestmz_peptides)[1] + +secondhighestmz = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[2]) +secondhighestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == secondhighestmz)[1] + + + +## 6) pca image for two components +pca <- PCA(msidata, ncomp=2) +par(mfrow = c(2,1)) +plot(pca, col=c("black", "darkgrey"), main="6) PCA for two components") +image(pca, ylim = c(-1, maxy), col=c("black", "white")) + + +############################# III) properties over acquisition (spectra index)########## +############################################################################## + +par(mfrow = c(2,1), mar=c(5,6,4,2)) + +## 7a) number of peaks per spectrum - scatterplot +plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="7a) Number of peaks per spectrum") +title(xlab="Spectra index \n (= Acquisition time)", line=3) +title(ylab="Number of peaks", line=4) + +## 7b) number of peaks per spectrum - histogram +hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") +title(main="7b) Number of peaks per spectrum", line=2) +title(ylab="Frequency = # spectra", line=4) +abline(v=median(peaksperpixel), col="blue") + +## 8a) TIC per spectrum - density scatterplot +zero=0 +par(mfrow = c(2,1), mar=c(5,6,4,2)) +plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="8a) TIC per pixel") +title(xlab="Spectra index \n (= Acquisition time)", line=3) +title(ylab = "Total ion chromatogram intensity", line=4) + +## 8b) TIC per spectrum - histogram +hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="") +title(main= "8b) TIC per spectrum", line=2) +title(ylab="Frequency = # spectra", line=4) +abline(v=median(log(TICs[TICs>0])), col="blue") + + +## 9) intensity of chosen peptides over acquisition (pixel index) + +if (length(inputmasses) != 0) +{ + + par(mfrow = c(3, 2)) + intensityvector = vector() + for (mass in 1:length(inputmasses)) + { + mznumber = features(msidata, mz = inputmasses[mass]) + intensityvector = spectra(msidata)[][mznumber,] + plot(intensityvector, main=inputnames[mass], xlab="Spectra index \n (= Acquisition time)") + } +} else {print("The inputpeptide masses were outside the mass range")} + +################################## IV) changes over mz ############################ +################################################################################### + +## 10) Number of peaks per mz + +par(mfrow = c(2,1), mar=c(5,6,4,4.5)) +## 10a) Number of peaks per mz - scatterplot +plot_colorByDensity(mz(msidata),peakspermz, main= "10a) Number of peaks for each mz", ylab ="") +title(xlab="mz in Dalton", line=2.5) +title(ylab = "Number of peaks", line=4) +axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1) +mtext("Coverage of spectra [%]", 4, line=3, adj=1) + +# make plot smaller to fit axis and labels, add second y axis with % +## 10b) Number of peaks per mz - histogram +hist(peakspermz, main="", las=1, ylab="", xlab="") +title(ylab = "Frequency", line=4) +title(main="10b) Number of peaks per mz", xlab = "Number of peaks per mz", line=2) +abline(v=median(peakspermz), col="blue") + + +## 11) Sum of intensities per mz + +par(mfrow = c(2,1), mar=c(5,6,4,2)) +# 11a) sum of intensities per mz - scatterplot +plot_colorByDensity(mz(msidata),mzTIC, main= "11a) Sum of all peak intensities for each mz", ylab ="") +title(xlab="mz in Dalton", line=2.5) +title(ylab="Intensity sum", line=4) +# 11b) sum of intensities per mz - histogram +hist(log(mzTIC), main="", xlab = "", las=1, ylab="") +title(main="11b) Sum of intensities per mz", line=2, ylab="") +title(xlab = "log (sum of intensities per mz)") +title(ylab = "Frequency", line=4) +abline(v=median(log(mzTIC[mzTIC>0])), col="blue") + + + +################################## V) general plots ############################ +################################################################################### + + +## 12) Intensity distribution + +par(mfrow = c(2,1), mar=c(5,6,4,2)) + +## 12a) Intensity histogram: +hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1) +title(main="12a) Log2-transformed intensities", line=2) +title(xlab="log2 intensities") +title(ylab="Frequency", line=4) +abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue") + +## 12b) Median intensity over spectra +medianint_spectra = apply(spectra(msidata), 2, median) +plot(medianint_spectra, main="12b) Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="") +title(ylab="Median spectrum intensity", line=4) + +## 13) Mass spectra + +par(mfrow = c(2, 2)) +plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum") +plot(msidata, pixel =round(length(pixelnumber)/2, digits=0), main="Spectrum in middle of acquisition") +plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,]))) +plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,]))) + +dev.off() + + ]]></configfile> + </configfiles> + <inputs> + <param name="infile" type="data" format="imzml, rdata, analyze75" label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" + help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> + <param name="filename" type="text" value="" optional="true" label="Title" help="will appear in the quality report. If nothing given it will take the dataset name."/> + <param name="inputpeptidefile" type="data" optional="true" format="txt, csv" label="Text file with peptidemasses and names" + help="first column peptide m/z, second column peptide name, tab separated file"/> + <param name="plusminusinDalton" value="0.25" type="text" label="Mass range" help="plusminus mass window in Dalton"/> + </inputs> + <outputs> + <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label="${tool.name} on $infile.display_name"/> + </outputs> + <tests> + <test> + <param name="infile" value="" ftype="imzml"> + <composite_data value="Example_Continuous.imzML" ftype="imzml"/> + <composite_data value="Example_Continuous.ibd" ftype="ibd"/> + </param> + <param name="inputpeptidefile" value="inputpeptides.csv" ftype="csv"/> + <param name="plusminusinDalton" value="0.25"/> + <param name="filename" value="Testfile_imzml"/> + <output name="plots" file="Testfile_qualitycontrol_imzml.pdf" compare="sim_size" delta="20000"/> + </test> + <test> + <param name="infile" value="" ftype="analyze75"> + <composite_data value="Analyze75.hdr" ftype="hdr"/> + <composite_data value="Analyze75.img" ftype="img"/> + <composite_data value="Analyze75.t2m" ftype="t2m"/> + </param> + <param name="inputpeptidefile" value="inputpeptides.txt" ftype="txt"/> + <param name="plusminusinDalton" value="0.5"/> + <param name="filename" value="Testfile_analyze75"/> + <output name="plots" file="Testfile_qualitycontrol_analyze75.pdf" compare="sim_size" delta="20000"/> + </test> + <test> + <param name="infile" value="example_continousS042.RData" ftype="rdata"/> + <param name="inputpeptidefile" value="inputpeptides.csv" ftype="txt"/> + <param name="plusminusinDalton" value="0.1"/> + <param name="filename" value="Testfile_rdata"/> + <output name="plots" file="Testfile_qualitycontrol_rdata.pdf" compare="sim_size" delta="20000"/> + </test> + </tests> + <help> + <![CDATA[ +Quality control for maldi imaging mass spectrometry data. + +Input data: 3 types of input data can be used: + +- imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_ +- Analyze7.5 (upload hdr, img and t2m file via the "composite" function) +- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) + +Only for continuous imzML so far. + +The output of this tool contains key values and plots of the imaging data as pdf. + + ]]> + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btv146</citation> + </citations> +</tool>