diff msi_qualitycontrol.xml @ 0:845073d506a8 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_qualitycontrol commit fa798afa023eea1cb183c14d0242721b2c696c21
author galaxyp
date Tue, 31 Oct 2017 06:00:03 -0400
parents
children c6bc77c4731d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/msi_qualitycontrol.xml	Tue Oct 31 06:00:03 2017 -0400
@@ -0,0 +1,482 @@
+<tool id="Mass_spectrometry_imaging_QC" name="MSI Qualitycontrol" version="1.7.0">
+    <description>
+        mass spectrometry imaging QC
+    </description>
+    <requirements>
+        <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement>
+        <requirement type="package" version="2.2.1">r-ggplot2</requirement>
+        <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement>
+        <requirement type="package" version="2.2.1"> r-gridextra</requirement>
+        <requirement type="package" version="2.23_15">r-kernsmooth</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+    <![CDATA[
+
+        #if $infile.ext == 'imzml'
+            cp '${infile.extra_files_path}/imzml' infile.imzML &&
+            cp '${infile.extra_files_path}/ibd' infile.ibd &&
+        #elif $infile.ext == 'analyze75'
+            cp '${infile.extra_files_path}/hdr' infile.hdr &&
+            cp '${infile.extra_files_path}/img' infile.img &&
+            cp '${infile.extra_files_path}/t2m' infile.t2m &&
+        #else
+            ln -s '$infile' infile.RData &&
+        #end if
+        cat '${cardinal_qualitycontrol_script}' &&
+        Rscript '${cardinal_qualitycontrol_script}'
+    ]]>
+    </command>
+    <configfiles>
+        <configfile name="cardinal_qualitycontrol_script"><![CDATA[
+
+################################# load libraries and read file #########################
+library(Cardinal)
+library(ggplot2)
+library(RColorBrewer)
+library(gridExtra)
+library(KernSmooth)
+
+## Read MALDI Imagind dataset
+
+#if $infile.ext == 'imzml'
+    msidata <- readMSIData('infile.imzML')
+#elif $infile.ext == 'analyze75'
+    msidata <- readMSIData('infile.hdr')
+
+#else
+    load('infile.RData')
+#end if
+
+#if $inputpeptidefile:
+    ## Read tabular file with peptide masses for plots and heatmap images: 
+    input_list = read.delim("$inputpeptidefile", header = FALSE, na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE)
+#else
+    input_list = data.frame(0, 0)
+#end if
+
+###################################### file properties in numbers ######################
+
+## Number of features (mz)
+maxfeatures = length(features(msidata))
+## Range mz
+minmz = round(min(mz(msidata)), digits=2)
+maxmz = round(max(mz(msidata)), digits=2)
+## Number of spectra (pixels)
+pixelcount = length(pixels(msidata))
+## Range x coordinates
+minimumx = min(coord(msidata)[,1])
+maximumx = max(coord(msidata)[,1])
+## Range y coordinates
+minimumy = min(coord(msidata)[,2])
+maximumy = max(coord(msidata)[,2])
+## Range of intensities
+minint = round(min(spectra(msidata)[]), digits=2)
+maxint = round(max(spectra(msidata)[]), digits=2)
+medint = round(median(spectra(msidata)[]), digits=2)
+## Number of intensities > 0 
+npeaks= sum(spectra(msidata)[]>0)
+## Spectra multiplied with mz (potential number of peaks)
+numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
+## Percentage of intensities > 0
+percpeaks = round(npeaks/numpeaks*100, digits=2)
+## Number of empty TICs
+TICs = colSums(spectra(msidata)[]) 
+NumemptyTIC = sum(TICs == 0)
+
+## Processing informations
+processinginfo = processingData(msidata)
+centroidedinfo = processinginfo@centroided # TRUE or FALSE
+
+## if TRUE write processinginfo if no write FALSE
+
+## normalization
+if (length(processinginfo@normalization) == 0) {
+  normalizationinfo='FALSE'
+} else {
+  normalizationinfo=processinginfo@normalization
+}
+## smoothing
+if (length(processinginfo@smoothing) == 0) {
+  smoothinginfo='FALSE'
+} else {
+  smoothinginfo=processinginfo@smoothing
+}
+## baseline
+if (length(processinginfo@baselineReduction) == 0) {
+  baselinereductioninfo='FALSE'
+} else {
+  baselinereductioninfo=processinginfo@baselineReduction
+}
+## peak picking
+if (length(processinginfo@peakPicking) == 0) {
+  peakpickinginfo='FALSE'
+} else {
+  peakpickinginfo=processinginfo@peakPicking
+}
+
+
+## calculate how many input peptide masses are valid: 
+inputpeptides = input_list[input_list[,1]>minmz & input_list[,1]<maxmz,]
+inputmasses = inputpeptides[,1]
+inputnames = inputpeptides[,2]
+
+#############################################################################
+
+properties = c("Number of mz features",
+               "Range of mz values [Da]",
+               "Number of pixels", 
+               "Range of x coordinates", 
+               "Range of y coordinates",
+               "Range of intensities", 
+               "Median of intensities",
+               "Intensities > 0",
+               "Number of zero TICs",
+               "Preprocessing", 
+               "Normalization", 
+               "Smoothing",
+               "Baseline reduction",
+               "Peak picking",
+               "Centroided", 
+               "# valid peptidemasses")
+
+values = c(paste0(maxfeatures), 
+           paste0(minmz, " - ", maxmz), 
+           paste0(pixelcount), 
+           paste0(minimumx, " - ", maximumx),  
+           paste0(minimumy, " - ", maximumy), 
+           paste0(minint, " - ", maxint), 
+           paste0(medint),
+           paste0(percpeaks, " %"), 
+           paste0(NumemptyTIC), 
+           paste0(" "),
+           paste0(normalizationinfo),
+           paste0(smoothinginfo),
+           paste0(baselinereductioninfo),
+           paste0(peakpickinginfo),
+           paste0(centroidedinfo), 
+           paste0(length(inputmasses)))
+
+
+property_df = data.frame(properties, values)
+
+
+## Variables for plots
+xrange = 1
+yrange = 1
+maxx = max(coord(msidata)[,1])+xrange
+minx = min(coord(msidata)[,1])-xrange
+maxy = max(coord(msidata)[,2])+yrange
+miny = min(coord(msidata)[,2])-yrange
+
+
+####################################### Preparation of images #########################
+
+## Acquisitionorder
+
+pixelnumber = 1:pixelcount
+pixelxyarray=cbind(coord(msidata),pixelnumber)
+
+
+## Number of peaks per pixel
+peaksperpixel = colSums(spectra(msidata)[]> 0)
+peakscoordarray=cbind(coord(msidata), peaksperpixel)
+
+## Most abundant mz 
+
+highestmz = apply(spectra(msidata)[],2,which.max) 
+highestmz_matrix = cbind(coord(msidata),mz(msidata)[highestmz])
+colnames(highestmz_matrix)[3] = "highestmzinDa"
+
+###################################### Preparation of plots ############################
+
+## function without xaxt for plots with automatic x axis
+plot_colorByDensity = function(x1,x2,
+                               ylim=c(min(x2),max(x2)),
+                               xlim=c(min(x1),max(x1)),
+                               xlab="",ylab="",main="") {
+  
+  df <- data.frame(x1,x2)
+  x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white")))
+  df\$dens <- col2rgb(x)[1,] + 1L
+  cols <-  colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256)
+  df\$col <- cols[df\$dens]
+  plot(x2~x1, data=df[order(df\$dens),], 
+       ylim=ylim,xlim=xlim,pch=20,col=col,
+       cex=1,xlab=xlab,ylab=ylab,las=1,
+       main=main)
+}
+
+## Number of peaks per mz - number across all pixel
+peakspermz = rowSums(spectra(msidata)[] > 0 )
+
+## Sum of all intensities for each mz (like TIC, but for mz instead of pixel)
+mzTIC = rowSums(spectra(msidata)[]) # calculate intensity sum for each mz
+
+
+
+######################################## PDF #############################################
+##########################################################################################
+##########################################################################################
+
+
+pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12)
+plot(0,type='n',axes=FALSE,ann=FALSE)
+#if not $filename:
+    #set $filename = $infile.display_name
+#end if
+title(main=paste("Quality control of MSI data\n\n", "Filename:", "$filename"))
+
+############################# I) numbers ####################################
+#############################################################################
+grid.table(property_df, rows= NULL)
+
+############################# II) ion images #################################
+##############################################################################
+
+## 1) Acquisition image
+(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber))
+ +scale_y_reverse() + geom_tile() + coord_fixed()
+ + ggtitle("1) Order of Acquisition")
+ +theme_bw()
+ + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), 
+                        space = "Lab", na.value = "black", name = "Acq"))
+
+## 2) Calibrant images:
+
+
+
+if (length(inputmasses) != 0)
+{   for (mass in 1:length(inputmasses))
+
+    {
+      image(msidata, mz=inputmasses[mass], plusminus=$plusminusinDalton, 
+            main= paste0("2",LETTERS[mass], ") ", inputnames[mass], " (", round(inputmasses[mass], digits = 2), " Da)"), 
+            contrast.enhance = "histogram") 
+    }
+} else {print("The inputpeptide masses were outside the mass range")}
+
+## 3) Number of peaks per pixel - image
+
+(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo)
+ +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() 
+ + ggtitle("3) Number of peaks per pixel")
+ + theme_bw() 
+ + theme(text=element_text(family="ArialMT", face="bold", size=12))
+ + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 
+                        ,space = "Lab", na.value = "black", name = "# peaks"))
+
+
+## 4) TIC image 
+TICcoordarray=cbind(coord(msidata), TICs)
+colo <- colorRampPalette(
+c('blue', 'cyan', 'green', 'yellow','red'))
+(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo)
+ +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() 
+ + ggtitle("4) Total Ion Chromatogram")
+ + theme_bw() 
+ + theme(text=element_text(family="ArialMT", face="bold", size=12))
+ + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 
+                        ,space = "Lab", na.value = "black", name = "TIC"))
+
+## 5) Most abundant mass image 
+
+(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))
++scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() 
++ ggtitle("5) Most abundant m/z in each pixel")
++ theme_bw() 
++ scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", 
+                       labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]),
+                       breaks = pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)], limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa)))
++ theme(text=element_text(family="ArialMT", face="bold", size=12)))
+
+## which mz are highest
+highestmz_peptides = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[1])
+highestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == highestmz_peptides)[1]
+
+secondhighestmz = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[2]) 
+secondhighestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == secondhighestmz)[1]
+
+
+
+## 6) pca image for two components
+pca <- PCA(msidata, ncomp=2) 
+par(mfrow = c(2,1))
+plot(pca, col=c("black", "darkgrey"), main="6) PCA for two components")
+image(pca, ylim = c(-1, maxy), col=c("black", "white"))
+
+
+############################# III) properties over acquisition (spectra index)##########
+##############################################################################
+
+par(mfrow = c(2,1), mar=c(5,6,4,2))
+
+## 7a) number of peaks per spectrum - scatterplot
+plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="7a) Number of peaks per spectrum")
+title(xlab="Spectra index \n (= Acquisition time)", line=3)
+title(ylab="Number of peaks", line=4)
+
+## 7b) number of peaks per spectrum - histogram
+hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") 
+title(main="7b) Number of peaks per spectrum", line=2)
+title(ylab="Frequency = # spectra", line=4)
+abline(v=median(peaksperpixel), col="blue")
+
+## 8a) TIC per spectrum -  density scatterplot
+zero=0
+par(mfrow = c(2,1), mar=c(5,6,4,2))
+plot_colorByDensity(pixels(msidata), TICs,  ylab = "", xlab = "", main="8a) TIC per pixel")
+title(xlab="Spectra index \n (= Acquisition time)", line=3)
+title(ylab = "Total ion chromatogram intensity", line=4)
+
+## 8b) TIC per spectrum -  histogram
+hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="")
+title(main= "8b) TIC per spectrum", line=2)
+title(ylab="Frequency = # spectra", line=4)
+abline(v=median(log(TICs[TICs>0])), col="blue") 
+
+
+## 9) intensity of chosen peptides over acquisition (pixel index)
+
+if (length(inputmasses) != 0)
+{   
+
+    par(mfrow = c(3, 2))
+    intensityvector = vector()
+    for (mass in 1:length(inputmasses))
+    {
+        mznumber = features(msidata, mz = inputmasses[mass])
+        intensityvector = spectra(msidata)[][mznumber,] 
+        plot(intensityvector, main=inputnames[mass], xlab="Spectra index \n (= Acquisition time)")
+    }
+} else {print("The inputpeptide masses were outside the mass range")}
+
+################################## IV) changes over mz ############################
+###################################################################################
+
+## 10) Number of peaks per mz
+
+par(mfrow = c(2,1), mar=c(5,6,4,4.5))
+## 10a) Number of peaks per mz - scatterplot
+plot_colorByDensity(mz(msidata),peakspermz, main= "10a) Number of peaks for each mz", ylab ="")
+title(xlab="mz in Dalton", line=2.5)
+title(ylab = "Number of peaks", line=4)
+axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1)
+mtext("Coverage of spectra [%]", 4, line=3, adj=1)
+
+# make plot smaller to fit axis and labels, add second y axis with %
+## 10b) Number of peaks per mz - histogram
+hist(peakspermz, main="", las=1, ylab="", xlab="")
+title(ylab = "Frequency", line=4)
+title(main="10b) Number of peaks per mz", xlab = "Number of peaks per mz", line=2)
+abline(v=median(peakspermz), col="blue") 
+
+
+## 11) Sum of intensities per mz
+
+par(mfrow = c(2,1), mar=c(5,6,4,2))
+# 11a) sum of intensities per mz - scatterplot
+plot_colorByDensity(mz(msidata),mzTIC,  main= "11a) Sum of all peak intensities for each mz", ylab ="")
+title(xlab="mz in Dalton", line=2.5)
+title(ylab="Intensity sum", line=4)
+# 11b) sum of intensities per mz - histogram
+hist(log(mzTIC), main="", xlab = "", las=1, ylab="")
+title(main="11b) Sum of intensities per mz", line=2, ylab="")
+title(xlab = "log (sum of intensities per mz)")
+title(ylab = "Frequency", line=4)
+abline(v=median(log(mzTIC[mzTIC>0])), col="blue")
+
+
+
+################################## V) general plots ############################
+###################################################################################
+
+
+## 12) Intensity distribution
+
+par(mfrow = c(2,1), mar=c(5,6,4,2))
+
+## 12a) Intensity histogram: 
+hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1)
+title(main="12a) Log2-transformed intensities", line=2)
+title(xlab="log2 intensities")
+title(ylab="Frequency", line=4)
+abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue")
+
+## 12b) Median intensity over spectra
+medianint_spectra = apply(spectra(msidata), 2, median)
+plot(medianint_spectra, main="12b) Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="")
+title(ylab="Median spectrum intensity", line=4)
+
+## 13) Mass spectra 
+
+par(mfrow = c(2, 2))
+plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum")
+plot(msidata, pixel =round(length(pixelnumber)/2, digits=0), main="Spectrum in middle of acquisition")
+plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,])))
+plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,])))
+
+dev.off()
+
+    ]]></configfile>
+    </configfiles>
+    <inputs>
+        <param name="infile" type="data" format="imzml, rdata, analyze75" label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
+            help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
+        <param name="filename" type="text" value="" optional="true" label="Title" help="will appear in the quality report. If nothing given it will take the dataset name."/>
+        <param name="inputpeptidefile" type="data" optional="true" format="txt, csv" label="Text file with peptidemasses and names"
+            help="first column peptide m/z, second column peptide name, tab separated file"/>
+        <param name="plusminusinDalton" value="0.25" type="text" label="Mass range" help="plusminus mass window in Dalton"/>
+    </inputs>
+    <outputs>
+        <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label="${tool.name} on $infile.display_name"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="" ftype="imzml">
+                <composite_data value="Example_Continuous.imzML" ftype="imzml"/>
+                <composite_data value="Example_Continuous.ibd" ftype="ibd"/>
+            </param>
+            <param name="inputpeptidefile" value="inputpeptides.csv" ftype="csv"/>
+            <param name="plusminusinDalton" value="0.25"/>
+            <param name="filename" value="Testfile_imzml"/>
+            <output name="plots" file="Testfile_qualitycontrol_imzml.pdf" compare="sim_size" delta="20000"/>
+        </test>
+        <test>
+            <param name="infile" value="" ftype="analyze75">
+                <composite_data value="Analyze75.hdr" ftype="hdr"/>
+                <composite_data value="Analyze75.img" ftype="img"/>
+                <composite_data value="Analyze75.t2m" ftype="t2m"/>
+            </param>
+            <param name="inputpeptidefile" value="inputpeptides.txt" ftype="txt"/>
+            <param name="plusminusinDalton" value="0.5"/>
+            <param name="filename" value="Testfile_analyze75"/>
+            <output name="plots" file="Testfile_qualitycontrol_analyze75.pdf" compare="sim_size" delta="20000"/>
+        </test>
+        <test>
+            <param name="infile" value="example_continousS042.RData" ftype="rdata"/>
+            <param name="inputpeptidefile" value="inputpeptides.csv" ftype="txt"/>
+            <param name="plusminusinDalton" value="0.1"/>
+            <param name="filename" value="Testfile_rdata"/>
+            <output name="plots" file="Testfile_qualitycontrol_rdata.pdf" compare="sim_size" delta="20000"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+Quality control for maldi imaging mass spectrometry data. 
+
+Input data: 3 types of input data can be used:
+
+- imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_
+- Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
+- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
+
+Only for continuous imzML so far. 
+
+The output of this tool contains key values and plots of the imaging data as pdf. 
+
+        ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv146</citation>
+    </citations>
+</tool>