diff quality_report.xml @ 18:d426a9107a6c draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 91e77c139cb3b7c6d67727dc39140dd79355fa0c
author galaxyp
date Thu, 04 Jul 2024 13:35:30 +0000
parents 23d0394b5908
children
line wrap: on
line diff
--- a/quality_report.xml	Wed Apr 19 22:49:12 2023 +0000
+++ b/quality_report.xml	Thu Jul 04 13:35:30 2024 +0000
@@ -1,18 +1,11 @@
-<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.0">
+<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
     <description>
         mass spectrometry imaging QC
     </description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements">
-        <requirement type="package" version="2.3">r-gridextra</requirement>
-        <requirement type="package" version="3.3.5">r-ggplot2</requirement>
-        <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement>
-        <requirement type="package" version="2.23_20">r-kernsmooth</requirement>
-        <requirement type="package" version="1.1.1">r-scales</requirement>
-        <requirement type="package" version="1.0.12">r-pheatmap</requirement>
-    </expand>
+    <expand macro="requirements"/>
     <command detect_errors="exit_code">
     <![CDATA[
         @INPUT_LINKING@
@@ -41,9 +34,11 @@
     run(msidata) = "infile"
     }
 
+
 ## remove duplicated coordinates
 msidata <- msidata[,!duplicated(coord(msidata))]
 
+
 ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample)
 
 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
@@ -80,13 +75,14 @@
 medTIC = round(median(TICs), digits=1)
 sdTIC = round(sd(TICs), digits=0)
 ## Median and sd # peaks per spectrum
-medpeaks = round(median(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
-sdpeaks = round(sd(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
-##max window size 
+medpeaks = round(median(colSums(as.matrix(spectra(msidata))>0, na.rm=TRUE), na.rm=TRUE), digits=0)
+sdpeaks = round(sd(colSums(as.matrix(spectra(msidata))>0, na.rm=TRUE), na.rm=TRUE), digits=0)
+## max window size
 max_window = round(mz(msidata)[nrow(msidata)]-mz(msidata)[nrow(msidata)-1], digits=2)
 ## Processing informations
 centroidedinfo = centroided(msidata)
 
+
 ############## Read and filter tabular file with m/z ###########################
 
 ### reading m/z input (calibrant) file:
@@ -95,6 +91,7 @@
 
     calibrant_list = read.delim("$calibrant_file", header = $calibrant_header, na.strings=c(" ","","NA"), stringsAsFactors = FALSE)
     calibrant_list = calibrant_list[,c($mz_column, $name_column)]
+    calibrant_list = calibrant_list[order(calibrant_list[,1]),]
 
     ### calculate how many input calibrant m/z are valid: 
 
@@ -244,6 +241,7 @@
     pixelmatrix = matrix(ncol=ncol(msidata), nrow = 0)
 
     ## plot only possible when there is at least one valid calibrant
+
     if (length(inputcalibrantmasses) != 0){
 
         ## calculate plusminus values in m/z for each calibrant
@@ -253,26 +251,26 @@
 
         for (mass in 1:length(inputcalibrantmasses)){
 
-            filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
+          filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
 
-           if (nrow(filtered_data) > 0 & sum(spectra(filtered_data),na.rm=TRUE) > 0){
+          if (nrow(filtered_data) > 0 & sum(as.matrix(spectra(filtered_data)),na.rm=TRUE) > 0){
 
-                ## intensity of all m/z > 0
-                intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0
+            ## intensity of all m/z > 0
+            intensity_sum = colSums(as.matrix(spectra(filtered_data)[1,]), na.rm=TRUE) > 0
 
-            ###}else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data), na.rm=TRUE) > 0){
+          ###}else if(nrow(filtered_data) == 1 & sum(as.matrix(spectra(filtered_data)), na.rm=TRUE) > 0){
 
-                ## intensity of only m/z > 0
-                intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0
-
-            }else{
+            ## intensity of only m/z > 0
+            intensity_sum = colSums(as.matrix(spectra(filtered_data)[1,]), na.rm=TRUE) > 0
 
-                intensity_sum = rep(FALSE, ncol(filtered_data))}
+          }else{
+            intensity_sum = rep(FALSE, ncol(filtered_data))}
 
-            ## for each pixel add sum of intensities > 0 in the given m/z range
-            pixelmatrix = rbind(pixelmatrix, intensity_sum)
+          ## for each pixel add sum of intensities > 0 in the given m/z range
+          pixelmatrix = rbind(pixelmatrix, intensity_sum)
         }
 
+
         ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE)
         countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE))
         countdf= data.frame(coord(msidata)\$x, coord(msidata)\$y, countvector) ## add pixel coordinates to counts
@@ -292,7 +290,9 @@
             rm(countdf)
             gc()
 
-    }else{print("2) The inputcalibrant m/z were not provided or outside the m/z range")}
+    }else{plot.new()
+        text(0.5, 0.5, "The input calibrant m/z were not provided \n or outside the m/z range.", cex = 1.5)
+        print("2) The input calibrant m/z were not provided or outside the m/z range")}
 
     ########################## 3) fold change image ###########################
 
@@ -379,14 +379,16 @@
 				}
 				)    
 		}
-		} else {print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")}
+		} else {plot.new()
+        text(0.5, 0.5, "The input peptide and calibrant m/z were not \n provided or outside the m/z range.", cex = 1.5)
+		print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")}
 		
     #end if
 
     #################### 5) Number of peaks per pixel - image ##################
 
     ## here every intensity value > 0 counts as peak
-    peaksperpixel = colSums(spectra(msidata)> 0, na.rm=TRUE)
+    peaksperpixel = colSums(int_matrix> 0, na.rm=TRUE)
     peakscoordarray=data.frame(coord(msidata)\$x, coord(msidata)\$y, peaksperpixel)
     colnames(peakscoordarray) = c("x", "y", "peaksperpixel")
 
@@ -495,24 +497,24 @@
     #end if
 
     ########################## 8) optional pca image for two components #################
-
-    #if $do_pca:
+    ### PCA removed until next Update
+    ####if $do_pca:
 
         set.seed(1)
-        pca = PCA(msidata, ncomp=2)
+        ##pca = PCA(msidata, ncomp=2)
 
         ## plot overview image and plot and PC1 and 2 images
-        print(plot(pca, col=c("black", "darkgrey"), main="PCA for two components", layout=c(2,1), strip=FALSE))
-        print(image(pca, run="infile", col=c("black", "white"), strip=FALSE,  ylim= c(maximumy+0.2*maximumy,minimumy-1), layout=FALSE))
+        ##print(plot(pca, col=c("black", "darkgrey"), main="PCA for two components", layout=c(2,1), strip=FALSE))
+        ##print(image(pca, run="infile", col=c("black", "white"), strip=FALSE,  ylim= c(maximumy+0.2*maximumy,minimumy-1), layout=FALSE))
 
-        par(oma=c(0,0,0,1))## margin for image legend
-        print(image(pca, column = "PC1" , strip=FALSE, superpose = FALSE, main="PC1", col.regions = risk.colors(100), layout=c(2,1), ylim= c(maximumy+0.2*maximumy,minimumy-1)))
-        print(image(pca, column = "PC2" , strip=FALSE, superpose = FALSE, main="PC2", col.regions = risk.colors(100), layout=FALSE,  ylim= c(maximumy+0.2*maximumy,minimumy-1)))
+        ##par(oma=c(0,0,0,1))## margin for image legend
+        ##print(image(pca, column = "PC1" , strip=FALSE, superpose = FALSE, main="PC1", col.regions = risk.colors(100), layout=c(2,1), ylim= c(maximumy+0.2*maximumy,minimumy-1)))
+        ##print(image(pca, column = "PC2" , strip=FALSE, superpose = FALSE, main="PC2", col.regions = risk.colors(100), layout=FALSE,  ylim= c(maximumy+0.2*maximumy,minimumy-1)))
     	## remove pca to clean up space
-        rm(pca)
-        gc()
+        ##rm(pca)
+        ##gc()
 
-    #end if
+    ####end if
 
     ################## III) properties over spectra index ######################
     ############################################################################
@@ -612,7 +614,7 @@
 
     #if $report_depth:
         
-		peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE)
+		peakspermz = rowSums(int_matrix > 0, na.rm=TRUE)
 
 		par(mfrow = c(2,1), mar=c(5,6,4,4.5))
 		## 12a) scatterplot
@@ -665,7 +667,7 @@
 		title(main="Intensity histogram", line=2)
 		title(xlab="intensities")
 		title(ylab="Frequency", line=4)
-		abline(v=median(int_matrix)[(as.matrix(spectra(msidata))>0)], col="blue")
+		abline(v=median(int_matrix)[(int_matrix>0)], col="blue")
 
     #end if
 
@@ -675,7 +677,7 @@
 
         df_13 = data.frame(matrix(,ncol=2, nrow=0))
         for (subsample in unique(msidata\$annotation)){
-            log2_int_subsample = spectra(msidata)[,msidata\$annotation==subsample]
+            log2_int_subsample = int_matrix[,msidata\$annotation==subsample]
             df_subsample = data.frame(as.numeric(log2_int_subsample))
             df_subsample\$annotation = subsample
             df_13 = rbind(df_13, df_subsample)}
@@ -698,7 +700,7 @@
 
         mean_matrix = matrix(,ncol=0, nrow = nrow(msidata))
         for (subsample in unique(msidata\$annotation)){
-            mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE)
+            mean_mz_sample = rowMeans(int_matrix[,msidata\$annotation==subsample],na.rm=TRUE)
             mean_matrix = cbind(mean_matrix, mean_mz_sample)}
             
         boxplot(log10(as.data.frame(mean_matrix)), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n")
@@ -707,6 +709,8 @@
         ## 14e) Heatmap of mean intensities of annotation groups
 
         colnames(mean_matrix) = unique(msidata\$annotation)
+        print(mean_matrix)
+
         mean_matrix[is.na(mean_matrix)] = 0
             heatmap.parameters <- list(mean_matrix, 
             show_rownames = T, show_colnames = T,
@@ -807,7 +811,7 @@
             ### find m/z with the highest mean intensity in m/z range (red line in plot 16) and calculate ppm difference for plot 17
             filtered_data = msidata_no_NA[mz(msidata_no_NA) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata_no_NA) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
 
-            if (nrow(filtered_data) > 0 & sum(spectra(filtered_data)) > 0){
+            if (nrow(filtered_data) > 0 & sum(as.matrix(spectra(filtered_data))) > 0){
                 maxmassrow = featureApply(filtered_data, mean) ## for each m/z average intensity is calculated
                 maxvalue = mz(filtered_data)[which.max(maxmassrow)] ### m/z with highest average intensity in m/z range
                 mzdifference = maxvalue - inputcalibrantmasses[mass] ### difference: theoretical value - closest m/z value
@@ -834,7 +838,7 @@
             abline(v=c(mzvalue), col="green2", lty=4)
             ## average plot including points per data point
             print(plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], run="infile", layout=FALSE, strip=FALSE, main="Average spectrum with data points", col="black"))
-            points(mz(msidata_no_NA[minmasspixel1:maxmasspixel1,]), rowMeans(spectra(msidata_no_NA)[minmasspixel1:maxmasspixel1,,drop=FALSE]), col="blue", pch=20)
+            points(mz(msidata_no_NA[minmasspixel1:maxmasspixel1,]), rowMeans(as.matrix(spectra(msidata_no_NA))[minmasspixel1:maxmasspixel1,,drop=FALSE]), col="blue", pch=20)
             ## plot of third average plot
             print(plot(msidata_no_NA[minmasspixel2:maxmasspixel2,], run="infile", layout=FALSE, strip=FALSE, main= "Average spectrum", col="black"))
             abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3))
@@ -912,10 +916,12 @@
     #end if
 
         #################### 19) ppm difference over pixels #####################
+        print("ppm difference over pixels")
 
         par(mfrow = c(1,1))
         count = 1
         ppm_df = as.data.frame(matrix(,ncol=0, nrow = ncol(msidata)))
+
         for (calibrant in inputcalibrantmasses){
             ### find m/z with the highest mean intensity in m/z range, if no m/z in the range, ppm differences for this calibrant will be NA
             filtered_data = msidata[mz(msidata) >= calibrant-plusminusvalues[count] & mz(msidata) <= calibrant+plusminusvalues[count],]
@@ -978,9 +984,14 @@
 
     #end if
 
-    }else{print("plot 16+17+18+19) The inputcalibrant m/z were not provided or outside the m/z range")}
+    }else{
+    plot.new()
+    text(0.5, 0.5, "plot 16+17+18+19) The input calibrant m/z were not provided \n or outside the m/z range", cex = 1.5)
+    print("plot 16+17+18+19) The input calibrant m/z were not provided or outside the m/z range")}
 }else{
-    print("inputfile has no intensities > 0")
+    plot.new()
+    text(0.5, 0.5, "The input file has no intensities > 0", cex = 1.5)
+    print("input file has no intensities > 0")
 }
     dev.off()
 
@@ -1002,7 +1013,7 @@
         <expand macro="pdf_filename"/>
         <expand macro="reading_2_column_mz_tabular" optional="true"/>
         <param name="plusminus_ppm" value="200" type="float" label="ppm range" help="Will be added in both directions to input calibrant m/z"/>
-        <param name="do_pca" type="boolean" label="PCA with 2 components"/>
+        <!--param name="do_pca" type="boolean" label="PCA with 2 components"/-->
         <param name="report_depth" type="boolean" label="Generate full QC report" truevalue="TRUE" falsevalue="FALSE" checked="True" help="No: does not generate all plots but only the most informatives"/>
         <repeat name="calibrantratio" title="Plot fold change of two m/z" min="0" max="10">
             <param name="mass1" value="1111" type="float" label="M/z 1" help="First m/z"/>
@@ -1040,7 +1051,7 @@
             <param name="name_column" value="1"/>
             <param name="plusminus_ppm" value="100"/>
             <param name="filename" value="Testfile_imzml"/>
-            <param name="do_pca" value="True"/>
+            <!--param name="do_pca" value="True"/-->
             <repeat name="calibrantratio">
                 <param name="mass1" value="328.9"/>
                 <param name="mass2" value="398.8"/>
@@ -1055,7 +1066,7 @@
                 <param name="load_annotation" value="no_annotation"/>
             </conditional>
             <param name="filename" value="Testfile_analyze75"/>
-            <param name="do_pca" value="True"/>
+            <!--param name="do_pca" value="True"/-->
             <output name="QC_report" file="QC_analyze75.pdf" compare="sim_size"/>
         </test>
         <test>
@@ -1073,8 +1084,12 @@
             <param name="name_column" value="1"/>
             <param name="plusminus_ppm" value="100"/>
             <param name="filename" value="Testfile_rdata"/>
-            <param name="do_pca" value="True"/>
-            <output name="QC_report" file="QC_rdata.pdf" compare="sim_size"/>
+            <!--param name="do_pca" value="True"/-->
+            <output name="QC_report" ftype="pdf">
+                <assert_contents>
+                    <has_size value="1276311" delta="100"/>
+                </assert_contents>
+            </output>
         </test>
         <test>
             <param name="infile" value="empty_spectra.rdata" ftype="rdata"/>
@@ -1085,7 +1100,7 @@
             <param name="mz_column" value="1"/>
             <param name="name_column" value="2"/>
             <param name="filename" value="Testfile_rdata"/>
-            <param name="do_pca" value="False"/>
+            <!--param name="do_pca" value="False"/-->
             <output name="QC_report" file="QC_empty_spectra.pdf" compare="sim_size"/>
         </test>
         <test>