Mercurial > repos > galaxyp > cardinal_filtering

diff filtering.xml @ 2:0c4579390f73 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f127be2141cf22e269c85282d226eb16fe14a9c1
author: galaxyp
date: Fri, 15 Feb 2019 10:15:15 -0500
parents: aac805a9d2ae
children: 58376f5a6319
--- a/filtering.xml	Thu Oct 25 07:25:13 2018 -0400
+++ b/filtering.xml	Fri Feb 15 10:15:15 2019 -0500
@@ -1,4 +1,4 @@
-<tool id="cardinal_filtering" name="MSI filtering" version="@VERSION@.1">
+<tool id="cardinal_filtering" name="MSI filtering" version="@VERSION@.2">
     <description>tool for filtering mass spectrometry imaging data</description>
     <macros>
         <import>macros.xml</import>
@@ -15,16 +15,13 @@
         cat '${MSI_subsetting}' &&
         Rscript '${MSI_subsetting}' &&
 
-        #if $imzml_output:
+        #if str($imzml_output) == "imzml_format":
             mkdir $outfile_imzml.files_path &&
-            ls -l &&
             mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
             mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
         #end if
         echo "imzML file:" > $outfile_imzml &&
         ls -l "$outfile_imzml.files_path" >> $outfile_imzml
-
-
     ]]>
     </command>
 
@@ -58,30 +55,13 @@
         ## Range y coordinates
         minimumy = min(coord(msidata)[,2])
         maximumy = max(coord(msidata)[,2])
-        ## Number of intensities > 0
-        npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE)
-        ## Spectra multiplied with m/z (potential number of peaks)
-        numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
-        ## Percentage of intensities > 0
-        percpeaks = round(npeaks/numpeaks*100, digits=2)
-        ## Number of empty TICs
-        TICs = colSums(spectra(msidata)[], na.rm=TRUE) 
-        NumemptyTIC = sum(TICs == 0)
-        ## median TIC
-        medint = round(median(TICs), digits=2)
         ## Store features for QC plot
         featuresinfile = mz(msidata)
 
-## Next steps will only run if there are more than 0 intensities/pixels/features in the file
-
-if (sum(spectra(msidata)[]>0, na.rm=TRUE) > 0)
-{
+## Next steps will only run if there are more than 0 pixels/features in the file
 
-
-        ## prepare dataframe for QC of pixel distribution (will be overwritten in filtering of pixels condition)
-        position_df = cbind(coord(msidata)[,1:2], rep("$infile.element_identifier", times=ncol(msidata)))
-        colnames(position_df)[3] = "annotation"
-
+if (ncol(msidata)>0 & nrow(msidata) >0)
+{
     ###################################### Filtering of pixels #####################
     ################################################################################
 
@@ -93,69 +73,90 @@
         ## read tabular file
         input_list = read.delim("$pixels_cond.annotation_file", header = $pixels_cond.tabular_header, 
         stringsAsFactors = FALSE)
-        numberpixels = nrow(input_list)
-        inputpixels = input_list[,c($pixels_cond.column_x, $pixels_cond.column_y, $pixels_cond.column_names)]
+        inputpixels = input_list[,c($pixels_cond.column_x, $pixels_cond.column_y)]
+        input_pixels = paste(inputpixels[,1], inputpixels[,2], sep="_")
+        dataset_pixels = paste(coord(msidata)\$x, coord(msidata)\$y, sep="_")
+        pixelsofinterest = dataset_pixels %in% input_pixels
 
-        ## rewrite into x = 1, y = 1 format and filter msidata, count validpixels
-        pixelvector = character()
-        for (pixel in 1:nrow(inputpixels)){
-            pixelvector[pixel] = paste0("x = ", inputpixels[pixel,1],", ", "y = ", inputpixels[pixel,2])}
-        pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector]
-        msidata = msidata[,pixelsofinterest]
+        tryCatch(
+                {
+                msidata = msidata[,pixelsofinterest]
+                if (ncol(msidata) == 0)
+                    {
+                    stop(call.=FALSE)
+                    }
+                },
+                error=function(cond) {
+                ## in case all coordinates were outside the dataset leading to zero pixels, tool is stopped to avoid continuing with wrong data
+                    message("Error during pixel filtering")
+                    message("Possible problems: Forgot to set 'Tabular file contains a header line' = Yes, wrong columns selected, columns with coordinates contain empty fields or letters, all coordinates were outside the range of the dataset - this can be checked with the 'MSI qualitycontrol' tool)")
+                    stop(call.=FALSE)
+                }
+            )    
+
+        ## QC values:
+        numberpixels = nrow(input_list)
         validpixels=ncol(msidata)
 
-        ## in case some pixels are left print annotation plot
-        colnames(inputpixels) = c("x", "y", "annotation")
-        position_df = merge(coord(msidata)[,1:2], inputpixels, by=c("x", "y"), all.x=TRUE)
-        colnames(position_df)[3] = "annotation"
-        position_df\$annotation = factor(position_df\$annotation)
-
-
-    ########### Pixels wihin x and y minima and maxima are kept ###################
+    ########### Pixels wihin x and y minima and maxima are kept #################
 
     #elif str($pixels_cond.pixel_filtering) == "pixel_range":
         print("pixel range")
 
+        ## QC values:
         numberpixels = "range"
         validpixels = "range"
 
-        ## only filter pixels if at least one pixel will be left
-        if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0){
-
-            msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range]
-            msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range]
-        }else{
+        tryCatch(
+                {
+                    msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range]
+                    msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range]
+                    if (ncol(msidata) == 0)
+                        {
+                        stop(call.=FALSE)
+                        }
+                },
+                error=function(cond) {
+                ## in case one of the ranges was outside the dataset leading to zero pixels, tool is stopped to avoid continuing with wrong data
+                    message("Error during pixel filtering")
+                    message("Check that both x and y ranges were inside the dataset coordinates (can be checked with the 'MSI qualitycontrol' tool) or if any not numeric character was entered into the input fields")
+                    stop(call.=FALSE)
+                }
+            )    
 
-            print("no valid pixel found")
-            msidata = msidata[,0]}
-
-        ## update position_df for filtered pixels
-        position_df = cbind(coord(msidata)[,1:2], rep("$infile.element_identifier", times=ncol(msidata)))
-        colnames(position_df)[3] = "annotation"
-        position_df\$annotation = factor(position_df\$annotation)
+    ######################## no pixel filtering ################################
 
     #elif str($pixels_cond.pixel_filtering) == "none":
         print("no pixel filtering")
 
+        ## QC values:
         numberpixels = 0
         validpixels = 0
 
     #end if
 
+    ############################# QC data #####################################
+
+    ## dataframe for QC of pixel distribution
+    position_df = cbind(coord(msidata)[,1:2], rep("remaining pixels", times=ncol(msidata)))
+    colnames(position_df)[3] = "annotation"
+    position_df\$annotation = factor(position_df\$annotation)
+    gc()
+
 }else{
     print("Inputfile has no intensities > 0")
 }
 
-    ################################# filtering of features ######################
-    ##############################################################################
+################################# filtering of features ######################
+##############################################################################
 
-    ####################### Keep m/z from tabular file #########################
+####################### Keep m/z from tabular file #########################
 
 ## feature filtering only when pixels/features/intensities are left
 
+
 if (ncol(msidata) > 0){
-    npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE)
-    if (npeaks_before_filtering > 0)
+    if (nrow(msidata) > 0)
     {
 
         #if str($features_cond.features_filtering) == "features_list":
@@ -165,6 +166,7 @@
             input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE)
             extracted_features = input_features[,$features_cond.feature_column]
             numberfeatures = length(extracted_features)
+
             if (class(extracted_features) == "numeric"){
                 ### max digits given in the input file will be used to match m/z but the maximum is 4
                    max_digits = max(nchar(sapply(strsplit(as.character(extracted_features), "\\."),`[`,2)), na.rm=TRUE)
@@ -177,12 +179,31 @@
                 validfeatures = round(extracted_features, max_digits) %in% round(mz(msidata),max_digits)
                 featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% round(extracted_features[validfeatures], max_digits)]
                 validmz = length(unique(featuresofinterest))
+
             }else{
                     validmz = 0
                     featuresofinterest = 0}
 
             ### filter msidata for valid features
-            msidata = msidata[featuresofinterest,]
+
+                tryCatch(
+                        {
+                        msidata = msidata[featuresofinterest,]
+                        ## does not throw error when processed file has no features left, therefore create error to avoid continuing with wrong data
+                        if (nrow(msidata) == 0)
+                            {
+                            stop(call.=FALSE)
+                            }
+                        },
+                        error=function(cond) {
+                        ## in case all provided m/z values were outside the m/z range
+                        ## tool is stopped to avoid continuing with wrong data
+                            message("Error during m/z filtering")
+                            message("Possible problems: Forgot to set 'Tabular file contains a header line' = Yes, wrong columns selected, column with m/z features contains empty fields or letters, all m/z features s were outside the range of the dataset  - this can be checked with the 'MSI qualitycontrol' tool) or did not match any m/z feature of the dataset (see help section for more information on that)")
+                            stop(call.=FALSE)
+                        }
+                    )    
+
 
         ############### features within a given range are kept #####################
 
@@ -192,11 +213,22 @@
             numberfeatures = "range"
             validmz = "range"
 
-            if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0){
-                msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,]
-            }else{ 
-                msidata = msidata[0,]
-                print("no valid mz range")}
+            tryCatch(
+                    {
+                    msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,]
+                    ## does not throw error when processed file has no features left, therefore create error to avoid continuing with wrong data
+                    if (nrow(msidata) == 0)
+                        {
+                        stop(call.=FALSE)
+                        }
+                    },
+                    error=function(cond) {
+                    ## in case all m/z features were outside the dataset leading to zero m/z features, tool is stopped to avoid continuing with wrong data
+                        message("Error during m/z filtering")
+                        message("Check that the entered m/z range is inside the dataset coordinates (can be checked with the 'MSI qualitycontrol' tool) or if any not numeric character was entered into the input fields")
+                        stop(call.=FALSE)
+                    }
+                )    
 
         ############### Remove m/z from tabular file #########################
 
@@ -204,31 +236,45 @@
             print("remove features")
 
             ## read tabular file, define starting row, extract and count valid features
-            input_features = read.delim("$mz_tabular", header = $features_cond.removal_header, stringsAsFactors = FALSE) 
-            extracted_features = input_features[,$features_cond.removal_column]
+            input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE) 
+            extracted_features = input_features[,$features_cond.feature_column]
             numberfeatures = length(extracted_features)
             if (class(extracted_features) == "numeric"){
                 print("input is numeric")
                 featuresofinterest = extracted_features
-                validmz = sum(featuresofinterest <= max(mz(msidata))& featuresofinterest >= min(mz(msidata)))
-            }else{featuresofinterest = 0
-                    validmz = 0}
+            }else{featuresofinterest = 0}
 
         ### Here starts removal of features: 
             plusminus = $features_cond.removal_plusminus
 
-            mass_to_remove = numeric()
-            if (sum(featuresofinterest) > 0){
-                for (masses in featuresofinterest){
-                    #if str($features_cond.units_removal) == "ppm": 
-                        plusminus = masses * $features_cond.removal_plusminus/1000000
-                    #end if 
-                    current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus))
-                    mass_to_remove = append(mass_to_remove, current_mass)}
-                msidata= msidata[-mass_to_remove, ]
-            }else{print("No features were removed as they were not fitting to m/z values and/or range")}
+            tryCatch(
+                    {
+                    mass_to_remove = numeric()
+                    for (masses in featuresofinterest){
+                        #if str($features_cond.units_removal) == "ppm": 
+                            plusminus = masses * $features_cond.removal_plusminus/1000000
+                        #end if 
+                        current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus))
+                        mass_to_remove = append(mass_to_remove, current_mass)}
+                    msidata= msidata[-mass_to_remove, ]
+                    validmz = numberfeatures - nrow(msidata)
+                    ## does not throw error when processed file has no features left, therefore create error to avoid continuing with wrong data
+                    if (nrow(msidata) == 0)
+                        {
+                        stop(call.=FALSE)
+                        }
+                    },
+                    error=function(cond) {
+                        message("Error during removal of m/z features")
+                        stop(call.=FALSE)
+                    }
+                )    
 
 
+
+
+        ######################## No m/z filtering ##############################
+
         #elif str($features_cond.features_filtering) == "none":
 
             print("no feature filtering")
@@ -237,157 +283,115 @@
 
         #end if
 
-        ## save msidata as Rfile
-        save(msidata, file="$msidata_filtered")
-
-        ## Number of empty TICs
-        TICs2 = colSums(spectra(msidata)[], na.rm=TRUE)
-        ## Number of intensities > 0
-        npeaks2= sum(spectra(msidata)[]>0, na.rm=TRUE)
-        ## Spectra multiplied with m/z (potential number of peaks)
-        numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
-
-
-
     }else{
-        print("Inputfile or file filtered for pixels has no intensities > 0")
+        print("Inputfile has no m/z features")
         numberfeatures = NA
         validmz = NA
-        ## Number of empty TICs
-        TICs2 = 0
-        npeaks2 = 0
-        numpeaks2 = 0
     }
 }else{
-        print("Inputfile or file filtered for pixels has no pixels left")
+        print("Inputfile or file filtered for pixels has no pixels")
         numberfeatures = NA
         validmz = NA
-        ## Number of empty TICs
-        TICs2 = 0
-        npeaks2 = 0
-        numpeaks2 = 0
 }
-    #################### QC numbers #######################
+gc()
 
+#################### QC numbers #######################
 
-        ## Number of features (m/z)
-        maxfeatures2 = length(features(msidata))
-        ## Range m/z
-        minmz2 = round(min(mz(msidata)), digits=2)
-        maxmz2 = round(max(mz(msidata)), digits=2)
-        ## Number of spectra (pixels)
-        pixelcount2 = length(pixels(msidata))
-        ## Range x coordinates
-        minimumx2 = min(coord(msidata)[,1])
-        maximumx2 = max(coord(msidata)[,1])
-        ## Range y coordinates
-        minimumy2 = min(coord(msidata)[,2])
-        maximumy2 = max(coord(msidata)[,2])
+## Number of features (m/z)
+maxfeatures2 = length(features(msidata))
+## Range m/z
+minmz2 = round(min(mz(msidata)), digits=2)
+maxmz2 = round(max(mz(msidata)), digits=2)
+## Number of spectra (pixels)
+pixelcount2 = length(pixels(msidata))
+## Range x coordinates
+minimumx2 = min(coord(msidata)[,1])
+maximumx2 = max(coord(msidata)[,1])
+## Range y coordinates
+minimumy2 = min(coord(msidata)[,2])
+maximumy2 = max(coord(msidata)[,2])
 
-        ## Percentage of intensities > 0
-        percpeaks2 = round(npeaks2/numpeaks2*100, digits=2)
-        ## Number of empty TICs
-        NumemptyTIC2 = sum(TICs2 == 0)
-        ## median TIC
-        medint2 = round(median(TICs2), digits=2)
+properties = c("Number of m/z features",
+               "Range of m/z values",
+               "Number of pixels", 
+               "Range of x coordinates", 
+               "Range of y coordinates",
+               "pixel overview", 
+               "feature overview")
 
-        properties = c("Number of m/z features",
-                       "Range of m/z values",
-                       "Number of pixels", 
-                       "Range of x coordinates", 
-                       "Range of y coordinates",
-                       "Intensities > 0",
-                       "Median TIC per pixel",
-                       "Number of empty spectra", 
-                       "pixel overview", 
-                       "feature overview")
-
-        before = c(paste0(maxfeatures), 
-                   paste0(minmz, " - ", maxmz), 
-                   paste0(pixelcount), 
-                   paste0(minimumx, " - ", maximumx), 
-                   paste0(minimumy, " - ", maximumy), 
-                   paste0(percpeaks, " %"), 
-                   paste0(medint),
-                   paste0(NumemptyTIC), 
-                   paste0("input pixels: ", numberpixels),
-                   paste0("input mz: ", numberfeatures))
+before = c(paste0(maxfeatures), 
+           paste0(minmz, " - ", maxmz), 
+           paste0(pixelcount), 
+           paste0(minimumx, " - ", maximumx), 
+           paste0(minimumy, " - ", maximumy), 
+           paste0("input pixels: ", numberpixels),
+           paste0("input mz: ", numberfeatures))
 
-        filtered = c(paste0(maxfeatures2), 
-                   paste0(minmz2, " - ", maxmz2), 
-                   paste0(pixelcount2), 
-                   paste0(minimumx2, " - ", maximumx2),  
-                   paste0(minimumy2, " - ", maximumy2), 
-                   paste0(percpeaks2, " %"), 
-                   paste0(medint2),
-                   paste0(NumemptyTIC2), 
-                   paste0("valid pixels: ", validpixels),
-                   paste0("valid mz: ", validmz))
+filtered = c(paste0(maxfeatures2), 
+           paste0(minmz2, " - ", maxmz2), 
+           paste0(pixelcount2), 
+           paste0(minimumx2, " - ", maximumx2),  
+           paste0(minimumy2, " - ", maximumy2), 
+           paste0("valid pixels: ", validpixels),
+           paste0("valid mz: ", validmz))
+
+property_df = data.frame(properties, before, filtered)
 
-        property_df = data.frame(properties, before, filtered)
-print(property_df)
-
-    ########################### PDF QC and imzml output ###########################
+########################### PDF QC and MSI output ###########################
 
-        pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12)
-        plot(0,type='n',axes=FALSE,ann=FALSE)
-        title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name"))
-        grid.table(property_df, rows= NULL)
+pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12)
+plot(0,type='n',axes=FALSE,ann=FALSE)
+title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name"))
+grid.table(property_df, rows= NULL)
 
-## QC report with more than value-table: only when pixels/features/intensities are left
-if (npeaks2 > 0)
+## QC report only when pixels/features are left
+if (ncol(msidata)>0 & nrow(msidata) >0)
 {
 
-    ## save msidata as imzML file, will only work if there is at least 1 m/z left
-    #if $imzml_output:
-        if (maxfeatures2 > 0){
-        writeImzML(msidata, "out")}
+    ### visual pixel control
+
+    pixel_image = ggplot(position_df, aes(x=x, y=y, fill=annotation))+
+           geom_tile(height = 1, width=1)+
+           coord_fixed()+
+           ggtitle("Spatial orientation of filtered pixels")+
+           theme_bw()+
+           theme(plot.title = element_text(hjust = 0.5))+
+           theme(legend.position="bottom",legend.direction="vertical")
+    print(pixel_image)
+
+    ### plot features which are removed
+    hist(mz(msidata), xlab="m/z", main="Kept m/z values")
+    #if str($features_cond.features_filtering) == "none":
+        print("no difference histogram as no m/z filtering took place")
+    #else:
+        if (isTRUE(all.equal(featuresinfile, mz(msidata)))){
+        print("No difference in m/z values before and after filtering, no histogram drawn")
+        }else{
+        hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")}
     #end if
 
-
-        ### visual pixel control
-
-            levels(position_df\$annotation) = factor(paste(1:length(levels(position_df\$annotation)), levels(position_df\$annotation), sep="_"))
+        dev.off()
 
-            pixel_image = ggplot(position_df, aes(x=x, y=y, fill=annotation))+
-                   geom_tile(height = 1, width=1)+
-                   coord_fixed()+
-                   ggtitle("Spatial orientation of filtered pixels")+
-                   theme_bw()+
-                   theme(plot.title = element_text(hjust = 0.5))+
-                   theme(text=element_text(family="ArialMT", face="bold", size=12))+
-                   theme(legend.position="bottom",legend.direction="vertical")+
-                   theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 6))+
-                   guides(fill=guide_legend(ncol=4,byrow=TRUE))
-
-    coord_labels = aggregate(cbind(x,y)~annotation, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
-    coord_labels\$file_number = 1:length(levels(position_df\$annotation))
+    ## save msidata as imzML file, will only work if there is at least 1 m/z left
 
-    for(file_count in 1:nrow(coord_labels))
-    {pixel_image = pixel_image + annotate("text",x=coord_labels[file_count,"x"],
-    y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))}
-
-            print(pixel_image)
-
-            ### control features which are removed
-            hist(mz(msidata), xlab="m/z", main="Kept m/z values")
-            #if str($features_cond.features_filtering) == "none":
-                print("no difference histogram as no m/z filtering took place")
-            #else:
-
-                if (isTRUE(all.equal(featuresinfile, mz(msidata)))){
-                print("No difference in m/z values before and after filtering, no histogram drawn")
-                }else{
-                hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")}
-            #end if
-
-        dev.off()
+    #if str($imzml_output) == "imzml_format":
+        if (maxfeatures2 > 0){
+            ## make sure that coordinates are integers
+            coord(msidata)\$y = as.integer(coord(msidata)\$y)
+            coord(msidata)\$x = as.integer(coord(msidata)\$x)
+        writeImzML(msidata, "out")}
+    #elif str($imzml_output) == "rdata_format":
+        ## save msidata as Rfile
+        iData(msidata) = iData(msidata)[]
+        save(msidata, file="$outfile_rdata")
+    #end if
 
 
 }else{
     print("Inputfile or filtered file has no intensities > 0")
     dev.off()
 }
+
     ]]></configfile>
     </configfiles>
     <inputs>
@@ -395,13 +399,16 @@
         <conditional name="pixels_cond">
             <param name="pixel_filtering" type="select" label="Select pixel filtering option">
                 <option value="none" selected="True">none</option>
-                <option value="two_columns">list of pixel coordinates (tabular file)</option>
+                <option value="two_columns">coordinates from tabular file</option>
                 <option value="pixel_range">ranges for x and y (manually)</option>
             </param>
             <when value="none"/>
             <when value="two_columns">
-                <expand macro="reading_pixel_annotations"/>
-
+                <param name="annotation_file" type="data" format="tabular" label="Tabular file with pixel coordinates"
+                help="Tabular file with two columns: x values and y values"/>
+                <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
+                <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
+                <param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
             </when> 
             <when value="pixel_range">
                 <param name="min_x_range" type="integer" value="0" label="Minimum value for x"/>
@@ -414,9 +421,9 @@
         <conditional name="features_cond">
             <param name="features_filtering" type="select" label="Select m/z feature filtering option">
                 <option value="none" selected="True">none</option>
-                <option value="features_list">keep a list of m/z (tabular file)</option>
+                <option value="features_list">keep m/z (tabular file)</option>
                 <option value="features_range">m/z range (manually)</option>
-                <option value="remove_features">remove a list of m/z (tabular file)</option>
+                <option value="remove_features">remove m/z (tabular file)</option>
             </param>
             <when value="none"/>
             <when value="features_list">
@@ -435,16 +442,21 @@
                 </param>
             </when>
         </conditional>
-        <param name="imzml_output" type="boolean" label="Output of imzML file" truevalue="TRUE" falsevalue="FALSE"/>
-
+        <param name="imzml_output" type="select" display = "radio" optional = "False"
+               label="Output format" help= "Choose the output format">
+                <option value="imzml_format" selected="True">imzML</option>
+                <option value="rdata_format">RData</option>
+        </param>
     </inputs>
 
     <outputs>
-        <data format="rdata" name="msidata_filtered" label="${tool.name} on ${on_string}"/>
+        <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML">
+            <filter>imzml_output =='imzml_format'</filter>
+        </data>
+        <data format="rdata" name="outfile_rdata" label="${tool.name} on ${on_string}: RData">
+            <filter>imzml_output == 'rdata_format'</filter>
+        </data>
         <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "${tool.name} on ${on_string}: QC"/>
-        <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML">
-            <filter>imzml_output</filter>
-       </data>
     </outputs>
     <tests>
         <test>
@@ -455,24 +467,27 @@
             <param name="min_y_range" value="2"/>
             <param name="max_y_range" value="2"/>
             <param name="features_filtering" value="features_range"/>
-            <param name="min_mz" value="350" />
+            <param name="min_mz" value="350"/>
             <param name="max_mz" value="500"/>
+            <param name="imzml_output" value="imzml_format"/>
             <output name="QC_overview" file="imzml_filtered3.pdf" compare="sim_size"/>
-            <output name="msidata_filtered" file="imzml_filtered3.RData" compare="sim_size"/>
+            <output name="outfile_imzml" ftype="imzml" file="out3.imzml.txt" compare="sim_size">
+                <extra_files type="file" file="out3.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="out3.ibd" name="ibd" compare="sim_size"/>
+            </output>
         </test>
         <test>
             <expand macro="infile_imzml"/>
             <param name="pixel_filtering" value="two_columns"/>
-            <param name="annotation_file" ftype="tabular" value = "inputpixels_2column.tabular"/>
+            <param name="annotation_file" ftype="tabular" value="inputpixels_2column.tabular"/>
             <param name="column_x" value="1"/>
             <param name="column_y" value="3"/>
-            <param name="column_names" value="2"/>
+            <param name="imzml_output" value="imzml_format"/>
             <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size"/>
-            <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size"/>
-            <!--imzml output test not yet working: output name="outfile_imzml" file="filtering_imzmls/summary" compare="sim_size" delta="10000">
-                <extra_files type="file" name="imzml" value="filtering_imzmls/out4.imzML" compare="sim_size" delta="10000"/>
-                <extra_files type="file" name="ibd" value="filtering_imzmls/out4.ibd" compare="sim_size" delta="10000"/>
-            </output-->
+            <output name="outfile_imzml" ftype="imzml" file="out4.imzml.txt" compare="sim_size">
+                <extra_files type="file" file="out4.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="out4.ibd" name="ibd" compare="sim_size"/>
+            </output>
         </test>
         <test>
             <expand macro="infile_imzml"/>
@@ -485,21 +500,33 @@
             <param name="mz_tabular" ftype="tabular" value = "featuresofinterest5.tabular"/>
             <param name="feature_column" value="1"/>
             <param name="feature_header" value="0"/>
+            <param name="imzml_output" value="imzml_format"/>
             <output name="QC_overview" file="imzml_filtered5.pdf" compare="sim_size"/>
-            <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" />
+            <output name="outfile_imzml" ftype="imzml" file="out5.imzml.txt" compare="sim_size">
+                <extra_files type="file" file="out5.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="out5.ibd" name="ibd" compare="sim_size"/>
+            </output>
         </test>
         <test>
             <expand macro="infile_analyze75"/>
+            <param name="imzml_output" value="imzml_format"/>
             <output name="QC_overview" file="analyze75_filtered2.pdf" compare="sim_size"/>
-            <output name="msidata_filtered" file="analyze_filteredoutside.RData" compare="sim_size" />
+            <output name="outfile_imzml" ftype="imzml" file="out6.imzml.txt" compare="sim_size">
+                <extra_files type="file" file="out6.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="out6.ibd" name="ibd" compare="sim_size"/>
+            </output>
         </test>
         <test>
             <param name="infile" value="preprocessed.RData" ftype="rdata"/>
             <conditional name="outputs">
                 <param name="outputs_select" value="no_quality_control"/>
             </conditional>
-            <output name="msidata_filtered" file="rdata_notfiltered.RData" compare="sim_size"/>
+            <param name="imzml_output" value="imzml_format"/>
             <output name="QC_overview" file="rdata_notfiltered.pdf" compare="sim_size" />
+            <output name="outfile_imzml" ftype="imzml" file="out7.imzml.txt" compare="sim_size">
+                <extra_files type="file" file="out7.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="out7.ibd" name="ibd" compare="sim_size"/>
+            </output>
         </test>
     </tests>
     <help>
@@ -513,27 +540,59 @@
 
 @MSIDATA_INPUT_DESCRIPTION@
 
-@SPECTRA_TABULAR_INPUT_DESCRIPTION@
+        - Optional file with pixel coordinates and annotation: 
+
+            - Tabular file: One column with x values, one column with y values
+            - The file is allowed to have any column names as header (in this case set "Tabular file contains a header line" to yes)
+            - Pixel with coordinates outside the coordinates of the input file are ignored
+
+                ::
+
+                      x_coord     y_coord
+                        1            1   
+                        2            1   
+                        3            1   
+                        ...
+                        ...
 
 @MZ_TABULAR_INPUT_DESCRIPTION@
 
 **Options**
 
-- pixel filtering/annotation: either with a tabular file containing x and y coordinates and pixel annotations or by defining a range for x and y by hand (for the latter no annotation is possible). Pixel that are not present in the dataset are ignored. It is not possible to filter only for pixels that are not present in the dataset. 
-- m/z feature filtering: m/z values for filtering should be either imported as a tabular file containing containing m/z of interest or by defining a range for the m/z values. m/z that are not present in the dataset are ignored. It is not possible to filter only for m/z that are not present in the dataset. 
-- m/z feature removing: perturbing m/z features such as matrix contaminants can be removed by specifying their m/z in a tabular file, optionally with a half window size in ppm or m/z for the window in which peaks should be removed.
+- Pixel filtering/annotation: 
+
+    - Either with a tabular file containing x and y coordinates or by entering x-min, x-max, y-min, y-max manually
+    - Pixel that are not present in the dataset are ignored
+    - An error occurs if the input for filtering (tabular file, x-range or y-range) contains not a single coordinate that occurs in the input dataset
+
+
+- m/z feature filtering: 
+
+    - Either with a tabular file containing m/z values or by entering m/z-min and m/z-max manually
+    - m/z values that are not present in the dataset are ignored
+    - An error occurs if the input for filtering (tabular file or mz-range) contains not a single m/z feature that occurs in the dataset
+
+
+- m/z feature removing: 
+
+    - Perturbing m/z features such as matrix contaminants can be removed by specifying their m/z value in a tabular file, optionally with a half window size in ppm or m/z for the window in which peaks should be removed
 
 
 **Tips**
 
-- Numeric m/z features imported via a tabular file and m/z features of the dataset are rounded to 4 decimal points (or maximum number of decimal points of input m/z) and then matched. Therefore, it is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If the m/z values are from a different dataset, the tool "Join two files on column allowing a small difference" should be used to find corresponding m/z values, which can then be used for filtering. 
-- In case tabular file cannot be selected in drop-down menu: Datatype in Galaxy must be tabular otherwise file will not appear in selection window (if Galaxy auto-detection was wrong, datatype can be changed by pressing the pen button (edit attributes))
+- m/z feautre filtering with a tabular file: 
+
+    - For matching the m/z features of the input dataset are rounded to the number of decimal points of the m/z values from the tabular file. In case the input had more than 4 digits m/z values of dataset and tabular file are rounded to 4 digits. 
+    - Therefore, it is recommended to use the filtering tool only for m/z features which have been extracted from the same dataset. If the m/z values are from a different dataset, the tool 'Join two files on column allowing a small difference' should be used to find corresponding m/z values, which can then be used for filtering. 
+
+- Problems to select tabular file: 
+
+    - In case tabular file cannot be selected in drop-down menu: Datatype in Galaxy might be different from 'tabular' - datatype can be changed by pressing the pen button of the dataset (edit attributes)
 
 
 **Output**
 
-- MSI data as .RData output (can be read with the Cardinal package in R)
-- optional: MSI data as imzML file
+- MSI data as imzML file or .RData (can be read with the Cardinal package in R)
 - pdf with heatmap showing the pixels that are left after filtering and histograms of kept and removed m/z
author	galaxyp
date	Fri, 15 Feb 2019 10:15:15 -0500
parents	aac805a9d2ae
children	58376f5a6319