Mercurial > repos > galaxyp > cardinal_data_exporter
diff data_exporter.xml @ 0:28ba52c9548c draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 0825a4ccd3ebf4ca8a298326d14f3e7b25ae8415
author | galaxyp |
---|---|
date | Mon, 01 Oct 2018 01:05:33 -0400 |
parents | |
children | e30d8b72415f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_exporter.xml Mon Oct 01 01:05:33 2018 -0400 @@ -0,0 +1,359 @@ +<tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.0"> + <description> + exports imzML and Analyze7.5 to tabular files + </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"> + <![CDATA[ + + @INPUT_LINKING@ + cat '${cardinal_imzml_exporter}' && + Rscript '${cardinal_imzml_exporter}' + + ]]> + </command> + <configfiles> + <configfile name="cardinal_imzml_exporter"><![CDATA[ + +################################# load libraries and read file ################# + +library(Cardinal) + +@READING_MSIDATA@ + +npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE) + +if (npeaks > 0){ + + ###################### Intensity matrix output ################################ + + #if "int_matrix" in str($output_options).split(","): + print("intensity matrix output") + + spectramatrix = spectra(msidata)[] + mz_names = gsub(" = ", "_", names(features(msidata))) + mz_names = gsub("/", "", mz_names) + pixel_names = gsub(", y = ", "_", names(pixels(msidata))) + pixel_names = gsub(" = ", "y_", pixel_names) + + spectramatrix = cbind(mz_names,spectramatrix) + newmatrix = rbind(c("mz_name", pixel_names), spectramatrix) + write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + + #end if + + + ############################## m/z feature output ########################## + #if "mz_tabular" in str($output_options).split(","): + print("mz feature output") + + mz_names = gsub(" = ", "_", names(features(msidata))) + mz_names = gsub("/", "", mz_names) + + ## mean, median, sd and SEM intensity per file and mz + full_sample_mean = apply(spectra(msidata)[],1,mean, na.rm=TRUE) + full_sample_median = apply(spectra(msidata)[],1,median, na.rm=TRUE) + full_sample_sd = apply(spectra(msidata)[],1,sd, na.rm=TRUE) + full_sample_sem = full_sample_sd/full_sample_mean*100 + ## npeaks and sum of all intensities per spectrum and mz + mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z + peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra) + + ## combine into dataframe, order is the same for all vectors + mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz) + colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks") + write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") + #end if + + ###################### summarized m/z feature output ####################### + + #if str($tabular_annotation.load_annotation) == 'yes_annotation': + print("summarized annotation output") + + ## read and extract x,y,annotation information + input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) + annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] + colnames(annotation_input) = c("x", "y", "annotation") + + ## merge with coordinate information of msidata + msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) + colnames(msidata_coordinates)[3] = "pixel_index" + merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE) + merged_annotation[is.na(merged_annotation)] = "NA" + merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] + msidata\$annotation = as.factor(merged_annotation[,4]) + + ## create m/z feature name + mz_names = gsub(" = ", "_", names(features(msidata))) + mz_names = gsub("/", "", mz_names) + + #if "mean" in str($tabular_annotation.summary_type).split(","): + print("summarized mean") + + ## calculate mean per annotation group + sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) + count = 1 + for (subsample in levels(msidata\$annotation)){ + subsample_pixels = msidata[,msidata\$annotation == subsample] + subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE) + sample_matrix = cbind(sample_matrix, subsample_calc) + count = count+1} + sample_matrix_mean = cbind(mz_names,sample_matrix) + sample_matrix_mean = rbind(c("mz_name", levels(msidata\$annotation)), sample_matrix_mean) + write.table(sample_matrix_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + #end if + + #if "median" in str($tabular_annotation.summary_type).split(","): + print("summarized median") + + sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) + count = 1 + for (subsample in levels(msidata\$annotation)){ + subsample_pixels = msidata[,msidata\$annotation == subsample] + subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE) + sample_matrix = cbind(sample_matrix, subsample_calc) + count = count+1} + sample_matrix_median = cbind(mz_names,sample_matrix) + sample_matrix_median = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_median) + write.table(sample_matrix_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + #end if + + #if "sd" in str($tabular_annotation.summary_type).split(","): + print("summarized sd") + + sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) + count = 1 + for (subsample in levels(msidata\$annotation)){ + subsample_pixels = msidata[,msidata\$annotation == subsample] + subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE) + sample_matrix = cbind(sample_matrix, subsample_calc) + count = count+1} + sample_matrix_sd = cbind(mz_names,sample_matrix) + sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd) + write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + #end if + + #end if + + + ############################ spectra (pixel) output ############################ + #if "pixel_tabular" in str($output_options).split(","): + print("pixel output") + + ## coordinates + xycoordinates = coord(msidata)[,1:2] + + ## pixel name + pixel_names = gsub(", y = ", "_", names(pixels(msidata))) + pixel_names = gsub(" = ", "y_", pixel_names) + + ## pixel order + pixelxyarray=1:length(pixels(msidata)) + + ## number of pixels per spectrum: every intensity value > 0 counts as peak + peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE) + + ## Total ion chromatogram per spectrum + TICs = round(colSums(spectra(msidata)[], na.rm=TRUE), digits = 2) + + ## Highest m/z per spectrum + highestmz = apply(spectra(msidata)[],2,which.max) + highestmz_data = mz(msidata)[highestmz] + + ## Combine into dataframe; order is the same for all vectors + spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, TICs, highestmz_data) + colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz") + + #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": + + calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE) + calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE] + ### calculate how many input calibrant m/z are valid: + inputcalibrants = calibrant_list[calibrant_list[,1]>min(mz(msidata)) & calibrant_list[,1]<max(mz(msidata)),,drop = FALSE] + inputcalibrantmasses = inputcalibrants[,1] + + ##QC plot number 2) Number of calibrants per spectrum + + ## matrix with calibrants in columns and in rows if there is peak intensity in range or not + pixelmatrix = matrix(ncol=ncol(msidata), nrow = 0) + + if (length(inputcalibrantmasses) != 0){ + + ## calculate plusminus values in m/z for each calibrant + plusminusvalues = rep($counting_calibrants.plusminus_ppm/1000000, length(inputcalibrantmasses))*inputcalibrantmasses + + ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0 + + for (mass in 1:length(inputcalibrantmasses)){ + filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] + if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){ + ## intensity of all m/z > 0 + intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0 + }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){ + ## intensity of only m/z > 0 + intensity_sum = spectra(filtered_data)[] > 0 + }else{ + intensity_sum = rep(FALSE, ncol(filtered_data))} + ## for each pixel add sum of intensities > 0 in the given m/z range + pixelmatrix = rbind(pixelmatrix, intensity_sum) + } + ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) + countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE)) + }else{countvector = rep(0,ncol(msidata))} + countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts + colnames(countdf) = c("x_values", "y_values", "input m/z count") + spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values")) + + ## sort columns to have spectra_names as rowname in first column + spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "input m/z count")] + + #end if + #if str($tabular_annotation.load_annotation) == 'yes_annotation': + + colnames(annotation_input) = c("x_values", "y_values", "annotation") + spectra_df = merge(annotation_input,spectra_df, by=c("x_values", "y_values")) + + ## sort columns to have spectra_names as rowname in first column + #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": + spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "input m/z count", "annotation")] + #else + spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "annotation")] + #end if + + #end if + ## sort rows according to original pixel order + spectra_df = spectra_df[match(pixel_names, spectra_df\$spectra_names),] + + ## Create list and output tabular + write.table(spectra_df, file="$pixel_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") + #end if + + +}else{ + print("file has no features or pixels left") +} + + + ]]></configfile> + </configfiles> + <inputs> + <expand macro="reading_msidata"/> + <param name="output_options" type="select" display="checkboxes" optional="False" multiple="true" label="Multiple output files can be selected"> + <option value="int_matrix" selected="True" >intensity matrix</option> + <option value="mz_tabular">mz feature output</option> + <option value="pixel_tabular">pixel output</option> + </param> + <conditional name="counting_calibrants"> + <param name="pixel_with_calibrants" type="select" label="Add number of m/z of interest per spectrum to pixel output"> + <option value="no_calibrants" selected="True">no</option> + <option value="yes_calibrants">yes</option> + </param> + <when value="no_calibrants"/> + <when value="yes_calibrants"> + <expand macro="reading_1_column_mz_tabular" label="For each spectrum the occurrence of the provided m/z values is counted"/> + <param name="plusminus_ppm" value="200" type="float" label="ppm range will be added in both directions to input m/z" help="The m/z window is used to search for peaks, if intensity > 0 found in the window the m/z is considered present, if all intensities are 0 the m/z is considered not present"/> + </when> + </conditional> + <conditional name="tabular_annotation"> + <param name="load_annotation" type="select" label="Pixel annotation can be used to summarize intensities per annotation group"> + <option value="no_annotation" selected="True">no</option> + <option value="yes_annotation">yes</option> + </param> + <when value="no_annotation"/> + <when value="yes_annotation"> + <expand macro="reading_pixel_annotations"/> + <param name="summary_type" type="select" display="checkboxes" optional="False" multiple="true" label="Calculation for each m/z and all pixels of a annotation group" help="This step will only work if pixel annotations are provided"> + <option value="mean">mean</option> + <option value="median">median</option> + <option value="sd">standard deviation</option> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix"> + <filter>"int_matrix" in output_options</filter> + </data> + <data format="tabular" name="pixel_output" label="${tool.name} on ${on_string}: spectra"> + <filter>"pixel_tabular" in output_options</filter> + </data> + <data format="tabular" name="feature_output" label="${tool.name} on ${on_string}: features"> + <filter>"mz_tabular" in output_options</filter> + </data> + <data format="tabular" name="summarized_mean" label="${tool.name} on ${on_string}: group_mean"> + <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'mean' in tabular_annotation['summary_type']</filter> + </data> + <data format="tabular" name="summarized_median" label="${tool.name} on ${on_string}: group_median"> + <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'median' in tabular_annotation['summary_type']</filter> + </data> + <data format="tabular" name="summarized_sd" label="${tool.name} on ${on_string}: group_sd"> + <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'sd' in tabular_annotation['summary_type']</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="2"> + <expand macro="infile_imzml"/> + <param name="output_options" value="int_matrix,mz_tabular"/> + <output name="intensity_matrix" file="int_matrix1.tabular"/> + <output name="feature_output" file="features_out1.tabular"/> + </test> + <test expect_num_outputs="3"> + <expand macro="infile_analyze75"/> + <param name="output_options" value="pixel_tabular"/> + <conditional name="tabular_annotation"> + <param name="load_annotation" value="yes_annotation"/> + <param name="annotation_file" value="annotations.tabular"/> + <param name="column_x" value="1"/> + <param name="column_y" value="2"/> + <param name="column_names" value="4"/> + <param name="tabular_header" value="True"/> + <param name="summary_type" value="mean,sd"/> + </conditional> + <output name="pixel_output" file="pixel_out2.tabular"/> + <output name="summarized_mean" file="mean_out2.tabular"/> + <output name="summarized_sd" file="sd_out2.tabular"/> + </test> + <test expect_num_outputs="3"> + <expand macro="infile_imzml"/> + <param name="output_options" value="int_matrix,pixel_tabular,mz_tabular"/> + <conditional name="counting_calibrants"> + <param name="pixel_with_calibrants" value="yes_calibrants"/> + <param name="mz_tabular" value="inputcalibrantfile2.txt"/> + <param name="feature_column" value="1"/> + <param name="feature_header" value="False"/> + <param name="plusminus_ppm" value="200"/> + </conditional> + <output name="intensity_matrix" file="int_matrix3.tabular"/> + <output name="feature_output" file="features_out3.tabular"/> + <output name="pixel_output" file="pixel_out3.tabular"/> + </test> + </tests> + <help> + <![CDATA[ + +@CARDINAL_DESCRIPTION@ + +----- + +This tool provides multiple tabular output options for mass spectrometry imaging data files. + +@MSIDATA_INPUT_DESCRIPTION@ + +@SPECTRA_TABULAR_INPUT_DESCRIPTION@ + +@MZ_TABULAR_INPUT_DESCRIPTION@ + +**Output options** + +- intensity matrix: m/z in rows, spectra in columns, filled with intensity values +- spectra output: spectra in rows - for each spectrum: name, x and y coordinates,order, number of peaks (intensities > 0), total ion chromatogram (TIC), highest m/z feature per spectrum, optional count of input m/z per spectrum, optional spectrum annotation +- mz feature output: m/z in rows - for each m/z: name, m/z, mean, median, standard deviation (sd), standard error of the mean (sem), sum of all intensities per m/z, number of peaks (intensity > 0) per m/z +- summarized intensities: pixel annotations will be used to group spectra into annotation groups and calculate mean, median and sd of the intensities per group + + ]]> + </help> + <expand macro="citations"/> +</tool>