Mercurial > repos > galaxyp > msi_filtering
changeset 9:28ac8199d4d5 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_filtering commit e87eea03505ab1ba067e192bbbcdc6197dc4b42e
author | galaxyp |
---|---|
date | Tue, 04 Sep 2018 13:42:37 -0400 |
parents | 262db9893c6f |
children | |
files | msi_filtering.xml test-data/analyze75_filtered2.pdf test-data/analyze_filtered.RData test-data/analyze_filtered.pdf test-data/analyze_filteredoutside.RData test-data/analyze_matrix.tabular test-data/imzml_filtered.RData test-data/imzml_filtered.pdf test-data/imzml_filtered2.pdf test-data/imzml_filtered3.RData test-data/imzml_filtered3.pdf test-data/imzml_filtered4.RData test-data/imzml_filtered4.pdf test-data/imzml_filtered5.RData test-data/imzml_filtered5.pdf test-data/imzml_matrix3.tabular test-data/rdata_matrix.tabular test-data/rdata_notfiltered.pdf |
diffstat | 18 files changed, 36 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/msi_filtering.xml Wed Aug 22 13:42:05 2018 -0400 +++ b/msi_filtering.xml Tue Sep 04 13:42:37 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.6"> +<tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.7"> <description>tool for filtering mass spectrometry imaging data</description> <requirements> <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> @@ -134,27 +134,27 @@ #elif str($pixels_cond.pixel_filtering) == "two_columns": print("two columns") - ## read tabular file, count number of rows (= number of pixels), extract dataframe with x,y,annotation (for QC), count number of valid pixels + ## read tabular file input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, stringsAsFactors = FALSE) startingrow = $pixels_cond.pixel_header+1 numberpixels = length(startingrow:nrow(input_list)) inputpixels = input_list[startingrow:nrow(input_list),c($pixels_cond.pixel_column_x, $pixels_cond.pixel_column_y, $pixels_cond.annotation_column_xy)] + + ## rewrite into x = 1, y = 1 format and filter msidata, count validpixels + pixelvector = character() + for (pixel in 1:nrow(inputpixels)){ + pixelvector[pixel] = paste0("x = ", inputpixels[pixel,1],", ", "y = ", inputpixels[pixel,2])} + pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector] + msidata = msidata[,pixelsofinterest] + validpixels=ncol(msidata) + + ## in case some pixels are left print annotation plot colnames(inputpixels) = c("x", "y", "annotation") position_df = merge(coord(msidata)[,1:2], inputpixels, by=c("x", "y"), all.x=TRUE) - validpixels = nrow(position_df) colnames(position_df)[3] = "annotation" position_df\$annotation = factor(position_df\$annotation) - ## for valid pixels: filter file for pixels - if (validpixels != 0){ - pixelvector = character() - for (pixel in 1:nrow(position_df)){ - pixelvector[pixel] = paste0("x = ", position_df[pixel,1],", ", "y = ", position_df[pixel,2])} - pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector] - msidata = msidata[,pixelsofinterest] - }else{ - validpixels=0} ########### Pixels wihin x and y minima and maxima are kept ################### @@ -199,7 +199,6 @@ npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE) - if (npeaks_before_filtering > 0) { @@ -314,11 +313,14 @@ ## save msidata as Rfile save(msidata, file="$msidata_filtered") - + ## Number of empty TICs + TICs2 = colSums(spectra(msidata)[], na.rm=TRUE) }else{ print("Inputfile or file filtered for pixels has no intensities > 0") numberfeatures = NA validmz = NA + ## Number of empty TICs + TICs2 = NA } #################### QC numbers ####################### @@ -344,7 +346,6 @@ ## Percentage of intensities > 0 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) ## Number of empty TICs - TICs2 = colSums(spectra(msidata)[], na.rm=TRUE) NumemptyTIC2 = sum(TICs2 == 0) ## median TIC medint2 = round(median(TICs2), digits=2) @@ -356,7 +357,7 @@ "Range of y coordinates", "Intensities > 0", "Median TIC per pixel", - "Number of zero TICs", + "Number of empty spectra", "pixel overview", "feature overview") @@ -439,8 +440,8 @@ spectramatrix = spectra(msidata)[] spectramatrix = cbind(mz(msidata),spectramatrix) - newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix) - write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + newmatrix = rbind(c("mz", names(pixels(msidata))), spectramatrix) + write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") #end if @@ -533,8 +534,8 @@ <outputs> <data format="rdata" name="msidata_filtered" label="$infile.display_name filtered"/> - <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC"/> - <data format="tabular" name="matrixasoutput" label="$infile.display_name filtered_matrix"> + <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC"/> + <data format="tabular" name="intensity_matrix" label="$infile.display_name filtered_matrix"> <filter>output_matrix</filter> </data> </outputs> @@ -552,7 +553,7 @@ <param name="inputfeatures" ftype="tabular" value = "inputfeatures.tabular"/> <param name="feature_column" value="2"/> <param name="feature_header" value="1"/> - <output name="filtering_qc" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/> + <output name="QC_overview" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="imzml_filtered.RData" compare="sim_size"/> </test> <test expect_num_outputs="2"> @@ -565,7 +566,7 @@ <param name="max_x_range" value="20"/> <param name="min_y_range" value="2"/> <param name="max_y_range" value="2"/> - <output name="filtering_qc" file="imzml_filtered2.pdf" compare="sim_size" delta="20000"/> + <output name="QC_overview" file="imzml_filtered2.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="imzml_filtered2.RData" compare="sim_size"/> </test> <test expect_num_outputs="3"> @@ -582,9 +583,9 @@ <param name="min_mz" value="350" /> <param name="max_mz" value="500"/> <param name="output_matrix" value="True"/> - <output name="filtering_qc" file="imzml_filtered3.pdf" compare="sim_size" delta="20000"/> + <output name="QC_overview" file="imzml_filtered3.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="imzml_filtered3.RData" compare="sim_size"/> - <output name="matrixasoutput" file="imzml_matrix3.tabular"/> + <output name="intensity_matrix" file="imzml_matrix3.tabular"/> </test> <test expect_num_outputs="2"> <param name="infile" value="" ftype="imzml"> @@ -596,7 +597,7 @@ <param name="pixel_column_x" value="1"/> <param name="pixel_column_y" value="3"/> <param name="annotation_column_xy" value="2"/> - <output name="filtering_qc" file="imzml_filtered4.pdf" compare="sim_size" delta="20000"/> + <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size"/> </test> <test expect_num_outputs="2"> @@ -613,7 +614,7 @@ <param name="inputfeatures" ftype="tabular" value = "featuresofinterest5.tabular"/> <param name="feature_column" value="1"/> <param name="feature_header" value="0"/> - <output name="filtering_qc" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/> + <output name="QC_overview" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" /> </test> <test expect_num_outputs="3"> @@ -629,9 +630,9 @@ <param name="inputfeatures" ftype="tabular" value = "featuresofinterest2.tabular"/> <param name="feature_column" value="1"/> <param name="output_matrix" value="True"/> - <output name="filtering_qc" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/> + <output name="QC_overview" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="analyze_filtered.RData" compare="sim_size" /> - <output name="matrixasoutput" file="analyze_matrix.tabular"/> + <output name="intensity_matrix" file="analyze_matrix.tabular"/> </test> <test expect_num_outputs="2"> <param name="infile" value="" ftype="analyze75"> @@ -639,7 +640,7 @@ <composite_data value="Analyze75.img"/> <composite_data value="Analyze75.t2m"/> </param> - <output name="filtering_qc" file="analyze75_filtered2.pdf" compare="sim_size" delta="20000"/> + <output name="QC_overview" file="analyze75_filtered2.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="analyze_filteredoutside.RData" compare="sim_size" /> </test> <test expect_num_outputs="3"> @@ -648,9 +649,9 @@ <param name="outputs_select" value="no_quality_control"/> </conditional> <param name="output_matrix" value="True"/> - <output name="matrixasoutput" file="rdata_matrix.tabular"/> + <output name="intensity_matrix" file="rdata_matrix.tabular"/> <output name="msidata_filtered" file="rdata_notfiltered.RData" compare="sim_size" /> - <output name="filtering_qc" file="rdata_notfiltered.pdf" compare="sim_size" /> + <output name="QC_overview" file="rdata_notfiltered.pdf" compare="sim_size" /> </test> </tests> <help> @@ -683,7 +684,7 @@ Tip: -- It is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If you have m/z from dataset A and you want to use them to filter dataset B, first find the corresponding (closest) features in dataset B by using the tool "Join two files on column allowing a small difference". Afterwards use the corresponding feature m/z from dataset A to filter dataset B. +- It is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If the m/z values are from a different dataset, the tool "Join two files on column allowing a small difference" should be used to find corresponding m/z values, which can then be used for filtering. ]]>
--- a/test-data/analyze_matrix.tabular Wed Aug 22 13:42:05 2018 -0400 +++ b/test-data/analyze_matrix.tabular Tue Sep 04 13:42:37 2018 -0400 @@ -1,4 +1,4 @@ -mz | spectra x = 1, y = 1 x = 1, y = 2 x = 3, y = 2 x = 1, y = 3 +mz x = 1, y = 1 x = 1, y = 2 x = 3, y = 2 x = 1, y = 3 1201.3349609375 14 12 9 14 1201.37634277344 17 21 11 20 1201.45910644531 22 18 18 22
--- a/test-data/imzml_matrix3.tabular Wed Aug 22 13:42:05 2018 -0400 +++ b/test-data/imzml_matrix3.tabular Tue Sep 04 13:42:37 2018 -0400 @@ -1,4 +1,4 @@ -mz | spectra x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 +mz x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 350 0 1.18586093356332e-26 7.10052307988494e-32 350.083343505859 0 1.41173515299902e-27 0 350.166687011719 5.94295388740686e-26 0 0
--- a/test-data/rdata_matrix.tabular Wed Aug 22 13:42:05 2018 -0400 +++ b/test-data/rdata_matrix.tabular Tue Sep 04 13:42:37 2018 -0400 @@ -1,4 +1,4 @@ -mz | spectra x = 1, y = 1 x = 2, y = 1 x = 3, y = 1 x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 x = 1, y = 3 x = 2, y = 3 x = 3, y = 3 +mz x = 1, y = 1 x = 2, y = 1 x = 3, y = 1 x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 x = 1, y = 3 x = 2, y = 3 x = 3, y = 3 200.083343505859 46.3652739153013 0 9.17289559719717e-05 0 0 0 1.29693162341385 0 1.78496635304646e-05 200.16667175293 22.4757921402152 0 0 5.8254927250654e-08 0 0 0 0 0 200.25 38.2466047658708 0 0 3.59839441365526e-08 0 0 0 8.34774930605485e-08 0