changeset 9:28ac8199d4d5 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_filtering commit e87eea03505ab1ba067e192bbbcdc6197dc4b42e
author galaxyp
date Tue, 04 Sep 2018 13:42:37 -0400
parents 262db9893c6f
children
files msi_filtering.xml test-data/analyze75_filtered2.pdf test-data/analyze_filtered.RData test-data/analyze_filtered.pdf test-data/analyze_filteredoutside.RData test-data/analyze_matrix.tabular test-data/imzml_filtered.RData test-data/imzml_filtered.pdf test-data/imzml_filtered2.pdf test-data/imzml_filtered3.RData test-data/imzml_filtered3.pdf test-data/imzml_filtered4.RData test-data/imzml_filtered4.pdf test-data/imzml_filtered5.RData test-data/imzml_filtered5.pdf test-data/imzml_matrix3.tabular test-data/rdata_matrix.tabular test-data/rdata_notfiltered.pdf
diffstat 18 files changed, 36 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/msi_filtering.xml	Wed Aug 22 13:42:05 2018 -0400
+++ b/msi_filtering.xml	Tue Sep 04 13:42:37 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.6">
+<tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.7">
     <description>tool for filtering mass spectrometry imaging data</description>
     <requirements>
         <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
@@ -134,27 +134,27 @@
     #elif str($pixels_cond.pixel_filtering) == "two_columns":
         print("two columns")
 
-        ## read tabular file, count number of rows (= number of pixels), extract dataframe with x,y,annotation (for QC), count number of valid pixels
+        ## read tabular file
         input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, 
         stringsAsFactors = FALSE)
         startingrow = $pixels_cond.pixel_header+1
         numberpixels = length(startingrow:nrow(input_list))
         inputpixels = input_list[startingrow:nrow(input_list),c($pixels_cond.pixel_column_x, $pixels_cond.pixel_column_y, $pixels_cond.annotation_column_xy)]
+
+        ## rewrite into x = 1, y = 1 format and filter msidata, count validpixels
+        pixelvector = character()
+        for (pixel in 1:nrow(inputpixels)){
+            pixelvector[pixel] = paste0("x = ", inputpixels[pixel,1],", ", "y = ", inputpixels[pixel,2])}
+        pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector]
+        msidata = msidata[,pixelsofinterest]
+        validpixels=ncol(msidata)
+
+        ## in case some pixels are left print annotation plot
         colnames(inputpixels) = c("x", "y", "annotation")
         position_df = merge(coord(msidata)[,1:2], inputpixels, by=c("x", "y"), all.x=TRUE)
-        validpixels = nrow(position_df)
         colnames(position_df)[3] = "annotation"
         position_df\$annotation = factor(position_df\$annotation)
 
-        ## for valid pixels: filter file for pixels
-        if (validpixels != 0){
-            pixelvector = character()
-            for (pixel in 1:nrow(position_df)){
-                pixelvector[pixel] = paste0("x = ", position_df[pixel,1],", ", "y = ", position_df[pixel,2])}
-            pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector]
-            msidata = msidata[,pixelsofinterest]
-        }else{
-            validpixels=0}
 
     ########### Pixels wihin x and y minima and maxima are kept ###################
 
@@ -199,7 +199,6 @@
     npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE)
 
 
-
 if (npeaks_before_filtering > 0)
 
 {
@@ -314,11 +313,14 @@
 
     ## save msidata as Rfile
     save(msidata, file="$msidata_filtered")
-
+        ## Number of empty TICs
+        TICs2 = colSums(spectra(msidata)[], na.rm=TRUE)
 }else{
     print("Inputfile or file filtered for pixels has no intensities > 0")
     numberfeatures = NA
     validmz = NA
+    ## Number of empty TICs
+    TICs2 = NA
 }
 
     #################### QC numbers #######################
@@ -344,7 +346,6 @@
         ## Percentage of intensities > 0
         percpeaks2 = round(npeaks2/numpeaks2*100, digits=2)
         ## Number of empty TICs
-        TICs2 = colSums(spectra(msidata)[], na.rm=TRUE) 
         NumemptyTIC2 = sum(TICs2 == 0)
         ## median TIC
         medint2 = round(median(TICs2), digits=2)
@@ -356,7 +357,7 @@
                        "Range of y coordinates",
                        "Intensities > 0",
                        "Median TIC per pixel",
-                       "Number of zero TICs", 
+                       "Number of empty spectra", 
                        "pixel overview", 
                        "feature overview")
 
@@ -439,8 +440,8 @@
 
         spectramatrix = spectra(msidata)[]
         spectramatrix = cbind(mz(msidata),spectramatrix)
-        newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix)
-        write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
+        newmatrix = rbind(c("mz", names(pixels(msidata))), spectramatrix)
+        write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
 
     #end if
 
@@ -533,8 +534,8 @@
 
     <outputs>
         <data format="rdata" name="msidata_filtered" label="$infile.display_name filtered"/>
-        <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC"/>
-        <data format="tabular" name="matrixasoutput" label="$infile.display_name filtered_matrix">
+        <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC"/>
+        <data format="tabular" name="intensity_matrix" label="$infile.display_name filtered_matrix">
             <filter>output_matrix</filter>
         </data>
     </outputs>
@@ -552,7 +553,7 @@
             <param name="inputfeatures" ftype="tabular" value = "inputfeatures.tabular"/>
             <param name="feature_column" value="2"/>
             <param name="feature_header" value="1"/>
-            <output name="filtering_qc" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/>
+            <output name="QC_overview" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/>
             <output name="msidata_filtered" file="imzml_filtered.RData" compare="sim_size"/>
         </test>
         <test expect_num_outputs="2">
@@ -565,7 +566,7 @@
             <param name="max_x_range" value="20"/>
             <param name="min_y_range" value="2"/>
             <param name="max_y_range" value="2"/>
-            <output name="filtering_qc" file="imzml_filtered2.pdf" compare="sim_size" delta="20000"/>
+            <output name="QC_overview" file="imzml_filtered2.pdf" compare="sim_size" delta="20000"/>
             <output name="msidata_filtered" file="imzml_filtered2.RData" compare="sim_size"/>
         </test>
         <test expect_num_outputs="3">
@@ -582,9 +583,9 @@
             <param name="min_mz" value="350" />
             <param name="max_mz" value="500"/>
             <param name="output_matrix" value="True"/>
-            <output name="filtering_qc" file="imzml_filtered3.pdf" compare="sim_size" delta="20000"/>
+            <output name="QC_overview" file="imzml_filtered3.pdf" compare="sim_size" delta="20000"/>
             <output name="msidata_filtered" file="imzml_filtered3.RData" compare="sim_size"/>
-            <output name="matrixasoutput" file="imzml_matrix3.tabular"/>
+            <output name="intensity_matrix" file="imzml_matrix3.tabular"/>
         </test>
         <test expect_num_outputs="2">
             <param name="infile" value="" ftype="imzml">
@@ -596,7 +597,7 @@
             <param name="pixel_column_x" value="1"/>
             <param name="pixel_column_y" value="3"/>
             <param name="annotation_column_xy" value="2"/>
-            <output name="filtering_qc" file="imzml_filtered4.pdf" compare="sim_size" delta="20000"/>
+            <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size" delta="20000"/>
             <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size"/>
         </test>
         <test expect_num_outputs="2">
@@ -613,7 +614,7 @@
             <param name="inputfeatures" ftype="tabular" value = "featuresofinterest5.tabular"/>
             <param name="feature_column" value="1"/>
             <param name="feature_header" value="0"/>
-            <output name="filtering_qc" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/>
+            <output name="QC_overview" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/>
             <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" />
         </test>
         <test expect_num_outputs="3">
@@ -629,9 +630,9 @@
             <param name="inputfeatures" ftype="tabular" value = "featuresofinterest2.tabular"/>
             <param name="feature_column" value="1"/>
             <param name="output_matrix" value="True"/>
-            <output name="filtering_qc" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/>
+            <output name="QC_overview" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/>
             <output name="msidata_filtered" file="analyze_filtered.RData" compare="sim_size" />
-            <output name="matrixasoutput" file="analyze_matrix.tabular"/>
+            <output name="intensity_matrix" file="analyze_matrix.tabular"/>
         </test>
         <test expect_num_outputs="2">
            <param name="infile" value="" ftype="analyze75">
@@ -639,7 +640,7 @@
                 <composite_data value="Analyze75.img"/>
                 <composite_data value="Analyze75.t2m"/>
             </param>
-            <output name="filtering_qc" file="analyze75_filtered2.pdf" compare="sim_size" delta="20000"/>
+            <output name="QC_overview" file="analyze75_filtered2.pdf" compare="sim_size" delta="20000"/>
             <output name="msidata_filtered" file="analyze_filteredoutside.RData" compare="sim_size" />
         </test>
         <test expect_num_outputs="3">
@@ -648,9 +649,9 @@
                 <param name="outputs_select" value="no_quality_control"/>
             </conditional>
             <param name="output_matrix" value="True"/>
-            <output name="matrixasoutput" file="rdata_matrix.tabular"/>
+            <output name="intensity_matrix" file="rdata_matrix.tabular"/>
             <output name="msidata_filtered" file="rdata_notfiltered.RData" compare="sim_size" />
-            <output name="filtering_qc" file="rdata_notfiltered.pdf" compare="sim_size" />
+            <output name="QC_overview" file="rdata_notfiltered.pdf" compare="sim_size" />
         </test>
     </tests>
     <help>
@@ -683,7 +684,7 @@
 
 Tip: 
 
-- It is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If you have m/z from dataset A and you want to use them to filter dataset B, first find the corresponding (closest) features in dataset B by using the tool "Join two files on column allowing a small difference". Afterwards use the corresponding feature m/z from dataset A to filter dataset B. 
+- It is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If the m/z values are from a different dataset, the tool "Join two files on column allowing a small difference" should be used to find corresponding m/z values, which can then be used for filtering. 
 
 
         ]]>
Binary file test-data/analyze75_filtered2.pdf has changed
Binary file test-data/analyze_filtered.RData has changed
Binary file test-data/analyze_filtered.pdf has changed
Binary file test-data/analyze_filteredoutside.RData has changed
--- a/test-data/analyze_matrix.tabular	Wed Aug 22 13:42:05 2018 -0400
+++ b/test-data/analyze_matrix.tabular	Tue Sep 04 13:42:37 2018 -0400
@@ -1,4 +1,4 @@
-mz | spectra	x = 1, y = 1	x = 1, y = 2	x = 3, y = 2	x = 1, y = 3
+mz	x = 1, y = 1	x = 1, y = 2	x = 3, y = 2	x = 1, y = 3
 1201.3349609375	14	12	9	14
 1201.37634277344	17	21	11	20
 1201.45910644531	22	18	18	22
Binary file test-data/imzml_filtered.RData has changed
Binary file test-data/imzml_filtered.pdf has changed
Binary file test-data/imzml_filtered2.pdf has changed
Binary file test-data/imzml_filtered3.RData has changed
Binary file test-data/imzml_filtered3.pdf has changed
Binary file test-data/imzml_filtered4.RData has changed
Binary file test-data/imzml_filtered4.pdf has changed
Binary file test-data/imzml_filtered5.RData has changed
Binary file test-data/imzml_filtered5.pdf has changed
--- a/test-data/imzml_matrix3.tabular	Wed Aug 22 13:42:05 2018 -0400
+++ b/test-data/imzml_matrix3.tabular	Tue Sep 04 13:42:37 2018 -0400
@@ -1,4 +1,4 @@
-mz | spectra	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2
+mz	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2
 350	0	1.18586093356332e-26	7.10052307988494e-32
 350.083343505859	0	1.41173515299902e-27	0
 350.166687011719	5.94295388740686e-26	0	0
--- a/test-data/rdata_matrix.tabular	Wed Aug 22 13:42:05 2018 -0400
+++ b/test-data/rdata_matrix.tabular	Tue Sep 04 13:42:37 2018 -0400
@@ -1,4 +1,4 @@
-mz | spectra	x = 1, y = 1	x = 2, y = 1	x = 3, y = 1	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2	x = 1, y = 3	x = 2, y = 3	x = 3, y = 3
+mz	x = 1, y = 1	x = 2, y = 1	x = 3, y = 1	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2	x = 1, y = 3	x = 2, y = 3	x = 3, y = 3
 200.083343505859	46.3652739153013	0	9.17289559719717e-05	0	0	0	1.29693162341385	0	1.78496635304646e-05
 200.16667175293	22.4757921402152	0	0	5.8254927250654e-08	0	0	0	0	0
 200.25	38.2466047658708	0	0	3.59839441365526e-08	0	0	0	8.34774930605485e-08	0
Binary file test-data/rdata_notfiltered.pdf has changed