view msi_qualitycontrol.xml @ 8:52ef77866de8 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_qualitycontrol commit 1c808d60243bb1eeda0cd26cb4b0a17ab05de2c0
author galaxyp
date Mon, 28 May 2018 12:38:50 -0400
parents b86a66dd1a16
children 963c7ec00141
line wrap: on
line source

<tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.10.0.0">
    <description>
        mass spectrometry imaging QC
    </description>
    <requirements>
        <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
        <requirement type="package" version="2.2.1">r-ggplot2</requirement>
        <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement>
        <requirement type="package" version="2.2.1">r-gridextra</requirement>
        <requirement type="package" version="2.23_15">r-kernsmooth</requirement>
    </requirements>
    <command detect_errors="exit_code">
    <![CDATA[
        #if $infile.ext == 'imzml'
            ln -s '${infile.extra_files_path}/imzml' infile.imzML &&
            ln -s '${infile.extra_files_path}/ibd' infile.ibd &&
        #elif $infile.ext == 'analyze75'
            ln -s '${infile.extra_files_path}/hdr' infile.hdr &&
            ln -s '${infile.extra_files_path}/img' infile.img &&
            ln -s '${infile.extra_files_path}/t2m' infile.t2m &&
        #else
            ln -s '$infile' infile.RData &&
        #end if
        cat '${cardinal_qualitycontrol_script}' &&
        Rscript '${cardinal_qualitycontrol_script}'
    ]]>
    </command>
    <configfiles>
        <configfile name="cardinal_qualitycontrol_script"><![CDATA[

library(Cardinal)
library(ggplot2)
library(RColorBrewer)
library(gridExtra)
library(KernSmooth)

## Read MALDI Imaging dataset

#if $infile.ext == 'imzml'
    msidata = readImzML('infile')
#elif $infile.ext == 'analyze75'
    msidata = readAnalyze('infile')
#else
    load('infile.RData')
#end if



###################################### file properties in numbers ######################

## Number of features (mz)
maxfeatures = length(features(msidata))
## Range mz
minmz = round(min(mz(msidata)), digits=2)
maxmz = round(max(mz(msidata)), digits=2)
## Number of spectra (pixels)
pixelcount = length(pixels(msidata))
## Range x coordinates
minimumx = min(coord(msidata)[,1])
maximumx = max(coord(msidata)[,1])
## Range y coordinates
minimumy = min(coord(msidata)[,2])
maximumy = max(coord(msidata)[,2])
## Range of intensities
minint = round(min(spectra(msidata)[]), digits=2)
maxint = round(max(spectra(msidata)[]), digits=2)
medint = round(median(spectra(msidata)[]), digits=2)
## Number of intensities > 0
npeaks= sum(spectra(msidata)[]>0)
## Spectra multiplied with mz (potential number of peaks)
numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
## Percentage of intensities > 0
percpeaks = round(npeaks/numpeaks*100, digits=2)
## Number of empty TICs
TICs = colSums(spectra(msidata)[]) 
NumemptyTIC = sum(TICs == 0)

## Processing informations
processinginfo = processingData(msidata)
centroidedinfo = processinginfo@centroided # TRUE or FALSE

## if TRUE write processinginfo if no write FALSE

## normalization
if (length(processinginfo@normalization) == 0) {
  normalizationinfo='FALSE'
} else {
  normalizationinfo=processinginfo@normalization
}
## smoothing
if (length(processinginfo@smoothing) == 0) {
  smoothinginfo='FALSE'
} else {
  smoothinginfo=processinginfo@smoothing
}
## baseline
if (length(processinginfo@baselineReduction) == 0) {
  baselinereductioninfo='FALSE'
} else {
  baselinereductioninfo=processinginfo@baselineReduction
}
## peak picking
if (length(processinginfo@peakPicking) == 0) {
  peakpickinginfo='FALSE'
} else {
  peakpickinginfo=processinginfo@peakPicking
}

### Read tabular file with masses for plots and heatmap images: 

#if $peptide_file:

    input_list = read.delim("$peptide_file", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE)
        if (ncol(input_list) == 1)
        {
            input_list = cbind(input_list, input_list)
        }

        ### calculate how many input peptide masses are valid: 
        inputpeptides = input_list[input_list[,1]>minmz & input_list[,1]<maxmz,]
        number_peptides_in = length(input_list[,1])
        number_peptides_valid = length(inputpeptides[,1])

#else
    ###inputpeptides = data.frame(0,0)
    inputpeptides = as.data.frame(matrix(, nrow = 0, ncol = 2))
    number_peptides_in = 0
    number_peptides_valid = 0
#end if

colnames(inputpeptides) = c("mz", "name")

#if $calibrant_file:
    ### Read tabular file with calibrant masses: 
    calibrant_list = read.delim("$calibrant_file", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE)
        if (ncol(calibrant_list) == 1)
        {
            calibrant_list = cbind(calibrant_list, calibrant_list)
         }
        ### calculate how many input calibrant masses are valid: 
        inputcalibrants = calibrant_list[calibrant_list[,1]>minmz & calibrant_list[,1]<maxmz,]
        number_calibrants_in = length(calibrant_list[,1])
        number_calibrants_valid = length(inputcalibrants[,1])
#else

    inputcalibrants = as.data.frame(matrix(, nrow = 0, ncol = 2))
    number_calibrants_in = 0
    number_calibrants_valid = 0
#end if

colnames(inputcalibrants) = c("mz", "name")

### bind inputcalibrants and inputpeptides together, to make heatmap on both lists

inputs_all = rbind(inputcalibrants[,1:2], inputpeptides[,1:2])
inputmasses = inputs_all[,1]
inputnames = inputs_all[,2]



properties = c("Number of mz features",
               "Range of mz values [Da]",
               "Number of pixels", 
               "Range of x coordinates", 
               "Range of y coordinates",
               "Range of intensities", 
               "Median of intensities",
               "Intensities > 0",
               "Number of zero TICs",
               "Preprocessing", 
               "Normalization", 
               "Smoothing",
               "Baseline reduction",
               "Peak picking",
               "Centroided", 
               paste0("# peptides in \n", "$peptide_file.display_name"), 
               paste0("# calibrants in \n", "$calibrant_file.display_name"))

values = c(paste0(maxfeatures), 
           paste0(minmz, " - ", maxmz), 
           paste0(pixelcount), 
           paste0(minimumx, " - ", maximumx),  
           paste0(minimumy, " - ", maximumy), 
           paste0(minint, " - ", maxint), 
           paste0(medint),
           paste0(percpeaks, " %"), 
           paste0(NumemptyTIC), 
           paste0(" "),
           paste0(normalizationinfo),
           paste0(smoothinginfo),
           paste0(baselinereductioninfo),
           paste0(peakpickinginfo),
           paste0(centroidedinfo), 
           paste0(number_peptides_valid, " / " , number_peptides_in),
           paste0(number_calibrants_valid, " / ", number_calibrants_in))


property_df = data.frame(properties, values)

######################################## PDF #############################################
##########################################################################################
##########################################################################################

pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12)
plot(0,type='n',axes=FALSE,ann=FALSE)
#if not $filename:
    #set $filename = $infile.display_name
#end if
title(main=paste("Quality control of MSI data\n\n", "Filename:", "$filename"))

############################# I) numbers ####################################
#############################################################################
grid.table(property_df, rows= NULL)

if (npeaks > 0)
{
    ############################# II) ion images #################################
    ##############################################################################

    ## function without xaxt for plots with automatic x axis
    plot_colorByDensity = function(x1,x2,
                                   ylim=c(min(x2),max(x2)),
                                   xlim=c(min(x1),max(x1)),
                                   xlab="",ylab="",main=""){
      
      df = data.frame(x1,x2)
      x = densCols(x1,x2, colramp=colorRampPalette(c("black", "white")))
      df\$dens = col2rgb(x)[1,] + 1L
      cols = colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256)
      df\$col = cols[df\$dens]
      plot(x2~x1, data=df[order(df\$dens),], 
           ylim=ylim,xlim=xlim,pch=20,col=col,
           cex=1,xlab=xlab,ylab=ylab,las=1,
           main=main)
    }


    ############################################################################

    ## 1) Acquisition image

    pixelnumber = 1:pixelcount
    pixelxyarray=cbind(coord(msidata)[,1:2],pixelnumber)

    print(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber))
     + geom_tile() + coord_fixed()
     + ggtitle("Order of Acquisition")
     +theme_bw()
     + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), 
                            space = "Lab", na.value = "black", name = "Acq"))

    ## 2) Number of calibrants per spectrum

    pixelmatrix = matrix(ncol=ncol(msidata), nrow=0)
    inputcalibrantmasses = inputcalibrants[,1]

    if (length(inputcalibrantmasses) != 0)
    {   for (calibrantnr in 1:length(inputcalibrantmasses))
        {
          calibrantmz = inputcalibrantmasses[calibrantnr]
          calibrantfeaturemin = features(msidata, mz=calibrantmz-$plusminus_dalton)
          calibrantfeaturemax = features(msidata, mz=calibrantmz+$plusminus_dalton)

            if (calibrantfeaturemin == calibrantfeaturemax)
            {
              
              calibrantintensity = spectra(msidata)[calibrantfeaturemin,] 
              
            }else{
              
              calibrantintensity = colSums(spectra(msidata)[calibrantfeaturemin:calibrantfeaturemax,] )
              
            }
          pixelmatrix = rbind(pixelmatrix, calibrantintensity)
        }

        countvector= as.factor(colSums(pixelmatrix>0))
        countdf= cbind(coord(msidata)[,1:2], countvector)
        mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrab1", "lightseagreen")

        print(ggplot(countdf, aes(x=x, y=y, fill=countvector))
           + geom_tile() + coord_fixed() 
          + ggtitle("Number of calibrants per pixel")
          + theme_bw() 
          + theme(text=element_text(family="ArialMT", face="bold", size=12))
          + scale_fill_manual(values = mycolours[1:length(countvector)], 
                                na.value = "black", name = "# calibrants"))
    }else{print("2) The inputcalibrant masses were not provided or outside the mass range")}


############# new 2b) image of foldchanges (log2 intensity ratios) between two masses in the same spectrum

    #if $calibrantratio:
        #for $foldchanges in $calibrantratio:
            mass1 = $foldchanges.mass1
            mass2 = $foldchanges.mass2
            distance = $foldchanges.distance

            #if not str($foldchanges.filenameratioplot).strip():
                #set $label = "Fold change %s Da / %s Da" % ($foldchanges.mass1, $foldchanges.mass2)
            #else:
                #set $label = $foldchanges.filenameratioplot
            #end if

            ### find rows which contain masses: 

            mzrowdown1 = features(msidata, mz = mass1-distance)
            mzrowup1 = features(msidata, mz = mass1+distance)
            mzrowdown2 = features(msidata, mz = mass2-distance)
            mzrowup2 = features(msidata, mz = mass2+distance)

            ### lower and upperlimit for the plot
            mzdown1 = features(msidata, mz = mass1-2)
            mzup1 = features(msidata, mz = mass1+3)
            mzdown2 = features(msidata, mz = mass2-2)
            mzup2 = features(msidata, mz = mass2+3)

            ### find mass in the given range with the highest intensity (will be plotted in red)

                if (mzrowdown1 == mzrowup1)
                {
                         maxmass1 = mz(msidata)[ mzrowdown1]
                }else{ ### for all masses in the massrange calculate mean intensity over all pixels and take mass which has highest mean
                        maxmassrow1 = rowMeans(spectra(msidata)[mzrowdown1:mzrowup1,])
                        maxmass1 = mz(msidata)[mzrowdown1:mzrowup1][which.max(maxmassrow1)] 
                }
                if (mzrowdown2 == mzrowup2)
                {
                    maxmass2 = mz(msidata)[mzrowup2]
                }else{
                    maxmassrow2 = rowMeans(spectra(msidata)[mzrowdown2:mzrowup2,])
                    maxmass2 = mz(msidata)[mzrowdown2:mzrowup2][which.max(maxmassrow2)]
                }

            ### plot the part which was chosen, with chosen value in blue, distance in blue, maxmass in red, xlim fixed to 5 Da window
            par(mfrow=c(2,1), oma=c(0,0,2,0))
            plot(msidata[mzdown1:mzup1,], pixel = 1:pixelcount, main=paste0("average spectrum ", mass1, " Da"))
            abline(v=c(mass1-distance, mass1, mass1+distance), col="blue",lty=c(3,5,3))
            abline(v=maxmass1, col="red", lty=5)

            plot(msidata[mzdown2:mzup2,], pixel = 1:pixelcount, main= paste0("average spectrum ", mass2, " Da"))
            abline(v=c(mass2-distance, mass2, mass2+distance), col="blue", lty=c(3,5,3))
            abline(v=maxmass2, col="red", lty=5)
            title("Control of fold change plot", outer=TRUE)

            ### filter spectra for maxmass to have two vectors, which can be divided

            mass1vector = spectra(msidata)[features(msidata, mz = maxmass1),]
            mass2vector = spectra(msidata)[features(msidata, mz = maxmass2),]
            foldchange = log2(mass1vector/mass2vector)

            ratiomatrix = cbind(foldchange, coord(msidata)[,1:2])

            print(ggplot(ratiomatrix, aes(x=x, y=y, fill=foldchange), colour=colo)
             + geom_tile() + coord_fixed()
             + ggtitle("$label")
             + theme_bw()
             + theme(text=element_text(family="ArialMT", face="bold", size=12))
             + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
                                    ,space = "Lab", na.value = "black", name ="FC"))
        #end for
    #end if

    ## 3) Calibrant images:

    par(mfrow=c(1,1), mar=c(5.1, 4.1, 4.1, 2.1), mgp=c(3, 1, 0), las=0)
    if (length(inputmasses) != 0)
    {   for (mass in 1:length(inputmasses))
        {
          image(msidata, mz=inputmasses[mass], plusminus=$plusminus_dalton, 
          main= paste0(inputnames[mass], " (", round(inputmasses[mass], digits = 2)," ± ", $plusminus_dalton, " Da)"),
                contrast.enhance = "histogram", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy))
        }
    } else {print("3) The inputpeptide masses were not provided or outside the mass range")}


    ## 4) Number of peaks per pixel - image

    peaksperpixel = colSums(spectra(msidata)[]> 0)
    peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel)

    print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo)
    + geom_tile() + coord_fixed() 
     + ggtitle("Number of peaks per pixel")
     + theme_bw() 
     + theme(text=element_text(family="ArialMT", face="bold", size=12))
     + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 
                            ,space = "Lab", na.value = "black", name = "# peaks"))

    ## 5) TIC image 
    TICcoordarray=cbind(coord(msidata)[,1:2], TICs)
    colo = colorRampPalette(
    c("blue", "cyan", "green", "yellow","red"))
    print(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo)
     + geom_tile() + coord_fixed() 
     + ggtitle("Total Ion Chromatogram")
     + theme_bw() 
     + theme(text=element_text(family="ArialMT", face="bold", size=12))
     + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 
                            ,space = "Lab", na.value = "black", name = "TIC"))

    ## 6) Most abundant mass image 

    highestmz = apply(spectra(msidata)[],2,which.max) 
    highestmz_matrix = cbind(coord(msidata)[,1:2],mz(msidata)[highestmz])
    colnames(highestmz_matrix)[3] = "highestmzinDa"

    print(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))
    + geom_tile() + coord_fixed() 
    + ggtitle("Most abundant m/z in each pixel")
    + theme_bw() 
    + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", 
                           labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]),
                           breaks = pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)], limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa)))
    + theme(text=element_text(family="ArialMT", face="bold", size=12)))

    ## which mz are highest
    highestmz_peptides = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[1])
    highestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == highestmz_peptides)[1]

    secondhighestmz = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[2]) 
    secondhighestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == secondhighestmz)[1]

    ## 7) pca image for two components
    pca = PCA(msidata, ncomp=2) 
    par(mfrow = c(2,1))
    plot(pca, col=c("black", "darkgrey"), main="PCA for two components")
    image(pca, col=c("black", "white"),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),  strip=FALSE)


    ############################# III) properties over acquisition (spectra index)##########
    ##############################################################################

    par(mfrow = c(2,1), mar=c(5,6,4,2))

    ## 8a) number of peaks per spectrum - scatterplot
    plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum")
    title(xlab="Spectra index \n (= Acquisition time)", line=3)
    title(ylab="Number of peaks", line=4)

    ## 8b) number of peaks per spectrum - histogram
    hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") 
    title(main="Number of peaks per spectrum", line=2)
    title(ylab="Frequency = # spectra", line=4)
    abline(v=median(peaksperpixel), col="blue")

    ## 9a) TIC per spectrum - density scatterplot
    zero=0
    par(mfrow = c(2,1), mar=c(5,6,4,2))
    plot_colorByDensity(pixels(msidata), TICs,  ylab = "", xlab = "", main="TIC per spectrum")
    title(xlab="Spectra index \n (= Acquisition time)", line=3)
    title(ylab = "Total ion chromatogram intensity", line=4)

    ## 9b) TIC per spectrum -  histogram
    hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="")
    title(main= "TIC per spectrum", line=2)
    title(ylab="Frequency = # spectra", line=4)
    abline(v=median(log(TICs[TICs>0])), col="blue") 


    ################################## IV) changes over mz ############################
    ###################################################################################

    ## 11) Number of peaks per mz
    ## Number of peaks per mz - number across all pixel
    peakspermz = rowSums(spectra(msidata)[] > 0 )

    par(mfrow = c(2,1), mar=c(5,6,4,4.5))
    ## Number of peaks per mz - scatterplot
    plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per mz", ylab ="")
    title(xlab="mz in Dalton", line=2.5)
    title(ylab = "Number of peaks", line=4)
    axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1)
    mtext("Coverage of spectra [%]", 4, line=3, adj=1)

    # make plot smaller to fit axis and labels, add second y axis with %
    ## Number of peaks per mz - histogram
    hist(peakspermz, main="", las=1, ylab="", xlab="")
    title(ylab = "Frequency", line=4)
    title(main="Number of peaks per mz", xlab = "Number of peaks per mz", line=2)
    abline(v=median(peakspermz), col="blue") 


    ## 12) Sum of intensities per mz

    ## Sum of all intensities for each mz (like TIC, but for mz instead of pixel)
    mzTIC = rowSums(spectra(msidata)[]) # calculate intensity sum for each mz

    par(mfrow = c(2,1), mar=c(5,6,4,2))
    # 12a) sum of intensities per mz - scatterplot
    plot_colorByDensity(mz(msidata),mzTIC,  main= "Sum of intensities per mz", ylab ="")
    title(xlab="mz in Dalton", line=2.5)
    title(ylab="Intensity sum", line=4)
    # 12b) sum of intensities per mz - histogram
    hist(log(mzTIC), main="", xlab = "", las=1, ylab="")
    title(main="Sum of intensities per mz", line=2, ylab="")
    title(xlab = "log (sum of intensities per mz)")
    title(ylab = "Frequency", line=4)
    abline(v=median(log(mzTIC[mzTIC>0])), col="blue")

    ################################## V) general plots ############################
    ###################################################################################

    ## 13) Intensity distribution

    par(mfrow = c(2,1), mar=c(5,6,4,2))

    ## 13a) Intensity histogram: 
    hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1)
    title(main="Log2-transformed intensities", line=2)
    title(xlab="log2 intensities")
    title(ylab="Frequency", line=4)
    abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue")

    ## 13b) Median intensity over spectra
    medianint_spectra = apply(spectra(msidata), 2, median)
    plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="")
    title(ylab="Median spectrum intensity", line=4)

    ## 13c) Histogram on mz values
    par(mfrow = c(1, 1))
    hist(mz(msidata), xlab = "mz in Dalton", main="Histogram of mz values")


    ## 14) Mass spectra 

    par(mfrow = c(2, 2))
    plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum")
    plot(msidata, pixel =round(length(pixelnumber)/2, digits=0), main="Spectrum in middle of acquisition")
    plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,1:2])))
    plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,1:2])))

    ## 15) Zoomed in mass spectra for calibrants
    plusminusvalue = $plusminus_dalton
    x = 1
    if (length(inputcalibrantmasses) != 0)
    {

        for (calibrant in inputcalibrantmasses)
        {
          minmasspixel = features(msidata, mz=calibrant-1)
          maxmasspixel = features(msidata, mz=calibrant+3)
          par(mfrow = c(2, 2), oma=c(0,0,2,0))
          plot(msidata[minmasspixel:maxmasspixel,], pixel = 1:length(pixelnumber), main= "average spectrum")
          abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
          plot(msidata[minmasspixel:maxmasspixel,], pixel =round(length(pixelnumber)/2, digits=0), main="pixel in middle of acquisition")
          abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
          plot(msidata[minmasspixel:maxmasspixel,], pixel = highestmz_pixel,main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,1:2])))
          abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
          plot(msidata[minmasspixel:maxmasspixel,], pixel = secondhighestmz_pixel,  main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,1:2])))
          abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
          title(paste0(inputcalibrants[x,1]), outer=TRUE)
          x=x+1
        }

    }else{print("15) The inputcalibrant masses were not provided or outside the mass range")}

    ## 16) ppm accuracy measured vs. theoretical calibrant mass

    if (length(inputcalibrantmasses) != 0)
    {
        par(mfrow = c(1, 1))

        differencevector = vector()

        for (mass in 1:length(inputcalibrantmasses))
        {mznumber = features(msidata, mz = inputcalibrantmasses[mass]) ### this gives the featurenumber which is closest to given mz
        mzvalue = mz(msidata)[mznumber] ### gives the mz in Da which is closest to the given mz (using the featurenumber)
        mzdifference = inputcalibrantmasses[mass] - mzvalue ### difference in Da: theoretical value - closest mz value
        ppmdifference = mzdifference/inputcalibrantmasses[mass]*1000000 ### calculate ppm for accuracy measurement
        differencevector[mass] = ppmdifference }
        differencevector = round(differencevector, digits=2)

        ### plot the ppm difference theor. mz value to closest mz value: 

        calibrant_names = as.character(inputcalibrants[,2])
        diff_df = data.frame(differencevector, calibrant_names)        
        diff_plot<-ggplot(data=diff_df, aes(x=calibrant_names, y=differencevector)) + geom_col() + theme_minimal() +
        labs(title="Theoretical calibrant mz vs. closest measured mz", x="calibrants", y = "Difference in ppm")+
        geom_text(aes(label=differencevector), vjust=-0.3, size=3.5, col="blue")

        print(diff_plot)

    }else{print("16) The inputcalibrant masses were not provided or outside the mass range")}


dev.off()

}else{
  print("inputfile has no intensities > 0")
dev.off()
}

    ]]></configfile>
    </configfiles>
    <inputs>
        <param name="infile" type="data" format="imzml,rdata,analyze75" label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
            help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
        <param name="filename" type="text" value="" optional="true" label="Title" help="will appear in the quality report. If nothing given it will take the dataset name."/>
        <param name="peptide_file" type="data" optional="true" format="tabular" label="Text file with masses and names"
            help="first column m/z, second column name, tab separated file"/>
        <param name="calibrant_file" type="data" optional="true" format="tabular"
            label="Internal calibrants and names"
            help="Used for plot number of calibrant per spectrum and for zoomed in mass spectra"/>
        <param name="plusminus_dalton" value="0.25" type="text" label="Mass range in Dalton" help="Plusminus mass window in Dalton for calibrant and peptide plots"/>
        <repeat name="calibrantratio" title="Plot fold change of two masses for each spectrum" min="0" max="10">
            <param name="mass1" value="1111" type="float" label="Mass 1" help="First mass in Dalton"/>
            <param name="mass2" value="2222" type="float" label="Mass 2" help="Second mass in Dalton"/>
            <param name="distance" value="0.25" type="float" label="Distance in Dalton" help="Distance in Da used to find peak maximum from input masses in both directions"/>
            <param name="filenameratioplot" type="text" optional="true" label="Title" help="Optional title for fold change plot."/>
        </repeat>
    </inputs>
    <outputs>
        <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label = "${tool.name} ${on_string}"/>
    </outputs>

    <tests>
        <test>
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML" />
                <composite_data value="Example_Continuous.ibd" />
            </param>
            <param name="peptide_file" value="inputpeptides.txt"/>
            <param name="calibrant_file" value="inputcalibrantfile1.txt"/>
            <param name="plusminus_dalton" value="0.25"/>
            <param name="filename" value="Testfile_imzml"/>
            <repeat name="calibrantratio">
                <param name="mass1" value="111"/>
                <param name="mass2" value="222"/>
                <param name="distance" value="0.25"/>
                <param name="filenameratioplot" value = "Ratio of mass1 (111) / mass2 (222)"/>
            </repeat>
            <output name="plots" file="QC_imzml.pdf" compare="sim_size" delta="20000"/>
        </test>
        <test>
            <param name="infile" value="" ftype="analyze75">
                <composite_data value="Analyze75.hdr"/>
                <composite_data value="Analyze75.img"/>
                <composite_data value="Analyze75.t2m"/>
            </param>
            <param name="peptide_file" value="inputpeptides.txt"/>
            <param name="calibrant_file" value="inputcalibrantfile2.txt"/>
            <param name="plusminus_dalton" value="0.5"/>
            <param name="filename" value="Testfile_analyze75"/>
            <output name="plots" file="QC_analyze75.pdf" compare="sim_size" delta="20000"/>
        </test>
        <test>
            <param name="infile" value="preprocessed.RData" ftype="rdata"/>
            <param name="plusminus_dalton" value="0"/>
            <param name="filename" value="Testfile_rdata"/>
            <output name="plots" file="QC_rdata.pdf" compare="sim_size" delta="20000"/>
        </test>
        <test>
            <param name="infile" value="empty_spectra.rdata" ftype="rdata"/>
            <param name="peptide_file" value="inputpeptides.txt"/>
            <param name="calibrant_file" value="inputcalibrantfile2.txt"/>
            <param name="plusminus_dalton" value="0.1"/>
            <param name="filename" value="Testfile_rdata"/>
            <output name="plots" file="QC_empty_spectra.pdf" compare="sim_size" delta="20000"/>
        </test>
    </tests>
    <help>
        <![CDATA[
Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_

This tool uses some Cardinal functions to create a quality control report with descriptive plots for mass-spectrometry imaging data. 

Input data: 3 types of input data can be used:

- imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
- Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)

Options: 

- masses of interest as tabular file, used to generate heatmap images
- internal calibrants as tabular file, used for the following plots: Number of calibrant per spectrum, heatmap images, mass-spectrum plot zoomed in for calibrant region, ppm accuracy
- fold change plot: draws a heatmap of the fold change of two masses (log2(intensity ratio))

Output: 

- pdf with numbers and descriptive plots to check the quality of the mass-spectrometry imaging data

Tip: 

- For additional heatmap images use the MSI ion images tool and to plot more mass spectra use the MSI massspectra tool. 

        ]]>
    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btv146</citation>
    </citations>
</tool>