Mercurial > repos > galaxyp > msi_combine
view msi_combine.xml @ 4:d05bd881af3d draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_combine commit 37da74ed68228b16efbdbde776e7c38cc06eb5d5
author | galaxyp |
---|---|
date | Tue, 19 Jun 2018 18:07:04 -0400 |
parents | 91bba2486773 |
children | ff91e78b5c5c |
line wrap: on
line source
<tool id="mass_spectrometry_imaging_combine" name="MSI combine" version="1.10.0.2"> <description> combine several mass spectrometry imaging datasets into one </description> <requirements> <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> <requirement type="package" version="2.2.1">r-ggplot2</requirement> </requirements> <command detect_errors="exit_code"> <![CDATA[ #for $i, $infile in enumerate($infiles): #if $infile.ext == 'imzml' ln -s '${infile.extra_files_path}/imzml' infile_${i}.imzML && ln -s '${infile.extra_files_path}/ibd' infile_${i}.ibd && #elif $infile.ext == 'analyze75' ln -s '${infile.extra_files_path}/hdr' infile_${i}.hdr && ln -s '${infile.extra_files_path}/img' infile_${i}.img && ln -s '${infile.extra_files_path}/t2m' infile_${i}.t2m && #else ln -s '$infile' infile_${i}.RData && #end if #end for cat '${msi_combine}' && Rscript '${msi_combine}' ]]> </command> <configfiles> <configfile name="msi_combine"><![CDATA[ #import re ################ load libraries and some preparations ################# library(Cardinal) library(ggplot2) ## read tabular file for xy_shift option #if str( $combine_conditional.combine_method ) == 'xy_shifts': input_list = read.delim("$combine_conditional.coordinates_file", header = FALSE, stringsAsFactors = FALSE) #end if ## load RData and store with new variable name loadRData <- function(fileName){ #loads an RData file, and returns it load(fileName) get(ls()[ls() != "fileName"]) } ## preparations for reading files one by one with for loop pixel_vector = numeric() x_shifts = 0 y_shifts = 0 max_y = numeric() valid_dataset = logical() #set $msidata = [] #set $pixelcoords = [] #set $num_infiles = len($infiles) all_files = $num_infiles ############## reading files and changing pixel coordinates ################### #for $i, $infile in enumerate($infiles): #if $infile.ext == 'imzml' msidata_$i <- readImzML('infile_${i}', mass.accuracy=$accuracy, units.accuracy = "$units") #elif $infile.ext == 'analyze75' msidata_$i <- readAnalyze('infile_${i}') #else msidata_$i = loadRData('infile_${i}.RData') #end if sampleNames(msidata_$i) = "msidata" ## same name necessary to combine data in one single coordinate system ################### preparation xy shifts ########################## #if str( $combine_conditional.combine_method ) == 'xy_shifts': coord(msidata_$i)\$x = coord(msidata_$i)\$x + input_list[$i+1,$combine_conditional.column_x] ## shifts x coordinates according to tabular file coord(msidata_$i)\$y = coord(msidata_$i)\$y + input_list[$i+1,$combine_conditional.column_y] ## shifts y coordinates according to tabular file pixel_vector = append(pixel_vector, rep(paste($i+1, input_list[$i+1,$combine_conditional.column_names], sep="_"),times=ncol(msidata_$i))) ## stores file name for each pixel pixelcoords_$i = cbind(coord(msidata_$i)[,1:2], rep($i+1,ncol(msidata_$i))) #silent $pixelcoords.append('pixelcoords_'+str($i)) colnames(pixelcoords_$i)[3] = "file_number" ################### preparation automatic combination ########################## #elif str( $combine_conditional.combine_method ) == 'automatic_combine': names_vector = character() #set escaped_element_identifier = re.sub('[^\w\-\s\[/]]', '_', str($infile.element_identifier)) ## use name of inputfile from Galaxy if (sum(spectra(msidata_$i))>0) ## use only valid files { if (is.null(levels(msidata_$i\$combined_sample))) ### if the file was not combined before use input file name, otherwise keep combined_sample name which was assigned before { names_vector = append(names_vector, rep(paste($i+1, "$escaped_element_identifier", sep="_"),ncol(msidata_$i))) msidata_$i\$combined_sample = as.factor(names_vector) } } ## Number of input files define grid which is row-wise filled with files coord(msidata_$i)\$x = coord(msidata_$i)\$x - (min(coord(msidata_$i)\$x-1)) + x_shifts coord(msidata_$i)\$y = coord(msidata_$i)\$y - (min(coord(msidata_$i)\$y-1)) + y_shifts x_shifts = max(coord(msidata_$i)\$x) + 5 max_y = append(max_y, max(coord(msidata_$i)\$y)) all_files = $num_infiles new_row = ($i+1)/ceiling(sqrt(all_files)) new_row%%1==0 if (new_row%%1==0) {x_shifts = 0 ### when row is filled: x values start again at zero y_shifts = max(max_y) + 5 ### when row is filled: y value increases to start a new row max_y = numeric()} #end if ## store files to combine them later and for each file check if it is valid #silent $msidata.append('msidata_'+str($i)) valid_dataset = append(valid_dataset, (ncol(msidata_$i)>0 & nrow(msidata_$i)>0 & sum(spectra(msidata_$i))>0)) #end for ###################### automatic combination ################################### ################################################################################ #if str( $combine_conditional.combine_method ) == 'automatic_combine': print("automatic_combine") ## combine only valid datasets valid_data = list(#echo ','.join($msidata)#)[valid_dataset] msidata_combined = do.call(combine, valid_data) print("Valid datasets in order of input bottom to top:") print(valid_dataset) ## create dataframe with x,y,sample_name and show all pixels in PDF as QC pdf("Combined_qc.pdf", width=15, height=15) position_df = cbind(coord(msidata_combined)[,1:2], msidata_combined\$combined_sample) colnames(position_df)[3] = "sample_name" combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ geom_tile() + coord_fixed()+ ggtitle("Spatial orientation of combined data")+ theme_bw()+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ guides(fill=guide_legend(ncol=4,byrow=TRUE)) coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name) for(file_count in 1:nrow(coord_labels)) {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"], y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))} print(combine_plot) dev.off() ## save as (.RData) msidata = msidata_combined save(msidata, file="$msidata_combined") ################################## xy shifts ################################### ################################################################################ #elif str( $combine_conditional.combine_method ) == 'xy_shifts': print("xy_shifts") #if str($combine_conditional.combination_true) == "yes_combi": print("combination with xy shifts") msidata_combined = do.call(combine, list(#echo ','.join($msidata)#)) sample_names = as.factor(pixel_vector) ## the sample names are assigned to each pixel msidata_combined\$combined_sample = sample_names ## sample names are stored in $combined_sample slot ## save as (.RData) msidata = msidata_combined save(msidata, file="$msidata_combined") ## create x,y,sample_name dataframe for QC pdf position_df = cbind(coord(msidata), msidata\$combined_sample) colnames(position_df)[3] = "sample_name" #else: print("no combination, only testing xy shifts") position_df = do.call(rbind, list(#echo ','.join($pixelcoords)#)) position_df\$sample_name = as.factor(pixel_vector) #end if ## create PDF to show all pixels in PDF as QC pdf("Combined_qc.pdf", width=15, height=15) combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ geom_tile() + coord_fixed()+ ggtitle("Spatial orientation of combined data")+ theme_bw()+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 6))+ guides(fill=guide_legend(ncol=5,byrow=TRUE)) coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name) for(file_count in 1:nrow(coord_labels)) {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"], y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))} print(combine_plot) dev.off() #end if ####################### optional matrix output ################################# #if $output_matrix: if (length(features(msidata_combined))> 0 & length(pixels(msidata_combined)) > 0) { spectramatrix = spectra(msidata_combined) rownames(spectramatrix) = mz(msidata_combined) newmatrix = rbind(pixels(msidata_combined), spectramatrix) write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") }else{ print("file has no features or pixels left") } #end if ]]></configfile> </configfiles> <inputs> <param name="infiles" type="data" multiple="true" format="imzml,rdata,analyze75" label="MSI data as imzml, analyze7.5 or Cardinal MSImageSet saved as RData" help="load imzml and ibd file by uploading composite datatype imzml"/> <param name="accuracy" type="float" value="50" label="Only for processed imzML files: enter mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/> <param name="units" display="radio" type="select" label="Only for processed imzML files: unit of the mass accuracy" help="either m/z or ppm"> <option value="mz" >mz</option> <option value="ppm" selected="True" >ppm</option> </param> <conditional name="combine_conditional"> <param name="combine_method" type="select" label="Select the way you want to combine multiple files" help="More detailed help can be found in the help section at the bottom"> <option value="automatic_combine" selected="True" >automatic combination</option> <option value="xy_shifts">xy shifts by hand</option> </param> <when value="automatic_combine"/> <when value="xy_shifts"> <param name="coordinates_file" type="data" format="tabular" label="datasetnames, X and y values to shift data before combining" help="Tabular file with three columns: 1 for the filename, 1 for the x-coordinate shift and 1 for the y-coordinate shift"/> <param name="column_x" data_ref="coordinates_file" label="Column with values for shift in x direction" type="data_column"/> <param name="column_y" data_ref="coordinates_file" label="Column with values for shift in y direction" type="data_column"/> <param name="column_names" data_ref="coordinates_file" label="Column with dataset names" type="data_column"/> <param name="combination_true" type="boolean" display="radio" truevalue="yes_combi" falsevalue="no_combi" label="Combine datasets" help = "Combination only works if x and y-shifts lead to unique pixel positions. If this is unknown use the No option to get an idea about the pixel overlap"/> </when> </conditional> <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> </inputs> <outputs> <data format="rdata" name="msidata_combined" label="MSI_data_combined"/> <data format="pdf" name="combining_qc" from_work_dir="Combined_qc.pdf" label = "Combined_QC"/> <data format="tabular" name="matrixasoutput" label="Combined_matrix"> <filter>output_matrix</filter> </data> </outputs> <tests> <test expect_num_outputs="3"> <param name="infiles" value="msidata_1.RData,msidata_2.RData,msidata_3.RData" ftype="rdata"/> <param name="combine_method" value="xy_shifts"/> <param name="coordinates_file" ftype="tabular" value="xy_coordinates.tabular"/> <param name="column_x" value="1"/> <param name="column_y" value="2"/> <param name="column_names" value="3"/> <param name="combination_true" value="yes_combi"/> <param name="output_matrix" value="True"/> <output name="matrixasoutput" file="123_combined_matrix.tabular"/> <output name="msidata_combined" file="123_combined.RData" compare="sim_size" /> <output name="combining_qc" file="123_combined_QC.pdf" compare="sim_size" delta="20000"/> </test> <test expect_num_outputs="2"> <param name="infiles" value="msidata_1.RData,msidata_2.RData,msidata_3.RData" ftype="rdata"/> <param name="combine_method" value="xy_shifts"/> <param name="coordinates_file" ftype="tabular" value="xy_coordinates.tabular"/> <param name="column_x" value="1"/> <param name="column_y" value="2"/> <param name="column_names" value="3"/> <param name="combination_true" value="no_combi"/> <output name="msidata_combined" file="123_no_combi.RData" compare="sim_size" /> <output name="combining_qc" file="123_no_combi_QC.pdf" compare="sim_size" delta="20000"/> </test> <test expect_num_outputs="3"> <param name="infiles" value="msidata_1.RData,msidata_2.RData" ftype="rdata"/> <param name="combine_method" value="automatic_combine"/> <param name="output_matrix" value="True"/> <output name="matrixasoutput" file="12_combined_matrix.tabular"/> <output name="msidata_combined" file="12_combined.RData" compare="sim_size" /> <output name="combining_qc" file="12_combined_QC.pdf" compare="sim_size" delta="20000"/> </test> <test expect_num_outputs="2"> <param name="infiles" value="msidata_1.RData,123_combined.RData" ftype="rdata"/> <param name="combine_method" value="automatic_combine"/> <param name="output_matrix" value="False"/> <output name="msidata_combined" file="112_auto_combined.RData" compare="sim_size" /> <output name="combining_qc" file="112_auto_combined_QC.pdf" compare="sim_size" delta="20000"/> </test> </tests> <help> <![CDATA[ Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ This tool uses the Cardinal combine function to combine several mass spectrometry imaging data. Input data: 3 types of input data can be used: - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) Prerequisite: - m/z values need to be the same across all datasets (before using this tool), this can be achieved with the filtering tool (use same m/z range) and the preprocessing tool (use same binning parameter) Options: - "automatic combination": files are automatically arranged in a grid, subfiles are named according to input file name - "xy shifts by hand": each file can be moved in x and y direction according to the users need (define one tabular file in the order in which the files are loaded in the history (bottom to top) and define for each file the x and y coordinates shifts in separate columns and the file name in a third column). To test if the pixels are correctly shifted use "combine datasets: No". Output: - imzML file containing multiple subfiles - pdf that shows the pixel positions of the combined files - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) ]]> </help> <citations> <citation type="doi">10.1093/bioinformatics/btv146</citation> </citations> </tool>