comparison msi_combine.xml @ 1:f3f6c32ab690 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_combine commit dd64f41874a56c4e2619bf58ae3681d806cf9b3f
author galaxyp
date Tue, 08 May 2018 02:36:26 -0400
parents 9cbcf48bf60a
children 00b6c61f5054
comparison
equal deleted inserted replaced
0:9cbcf48bf60a 1:f3f6c32ab690
1 <tool id="mass_spectrometry_imaging_combine" name="MSI combine" version="1.7.0.0"> 1 <tool id="mass_spectrometry_imaging_combine" name="MSI combine" version="1.10.0.0">
2 <description> 2 <description>
3 combine several mass spectrometry imaging datasets into one 3 combine several mass spectrometry imaging datasets into one
4 </description> 4 </description>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement> 6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
7 <requirement type="package" version="2.2.1">r-ggplot2</requirement>
7 </requirements> 8 </requirements>
8 <command detect_errors="exit_code"> 9 <command detect_errors="exit_code">
9 <![CDATA[ 10 <![CDATA[
10 #for $i, $infile in enumerate($infiles): 11 #for $i, $infile in enumerate($infiles):
11 #if $infile.ext == 'imzml' 12 #if $infile.ext == 'imzml'
12 cp '${infile.extra_files_path}/imzml' infile_$i.imzML && 13 ln -s '${infile.extra_files_path}/imzml' infile.imzML &&
13 cp '${infile.extra_files_path}/ibd' infile_$i.ibd && 14 ln -s '${infile.extra_files_path}/ibd' infile.ibd &&
14 #elif $infile.ext == 'analyze75' 15 #elif $infile.ext == 'analyze75'
15 cp '${infile.extra_files_path}/hdr' infile_$i.hdr && 16 ln -s '${infile.extra_files_path}/hdr' infile.hdr &&
16 cp '${infile.extra_files_path}/img' infile_$i.img && 17 ln -s '${infile.extra_files_path}/img' infile.img &&
17 cp '${infile.extra_files_path}/t2m' infile_$i.t2m && 18 ln -s '${infile.extra_files_path}/t2m' infile.t2m &&
18 #else 19 #else
19 ln -s '$infile' infile_${i}.RData && 20 ln -s '$infile' infile_${i}.RData &&
20 #end if 21 #end if
21 #end for 22 #end for
22 cat '${msi_combine}' && 23 cat '${msi_combine}' &&
24 25
25 ]]> 26 ]]>
26 </command> 27 </command>
27 <configfiles> 28 <configfiles>
28 <configfile name="msi_combine"><![CDATA[ 29 <configfile name="msi_combine"><![CDATA[
30 #import re
31 ################ load libraries, read rename and combine files #################
32
29 library(Cardinal) 33 library(Cardinal)
30 34 library(ggplot2)
31 #if $coordinates_file: 35
32 input_list = read.delim("$coordinates_file", header = FALSE, 36 #if str( $combine_conditional.combine_method ) == 'xy_shifts':
37 input_list = read.delim("$combine_conditional.coordinates_file", header = FALSE,
33 stringsAsFactors = FALSE) 38 stringsAsFactors = FALSE)
34 #end if 39 #end if
35 40
36 pixel_vector = numeric() 41 pixel_vector = numeric()
42 names_vector = character()
43 x_shifts = 0
44 y_shifts = 0
45 max_y = numeric()
37 46
38 #set $msidata = [] 47 #set $msidata = []
48 #set $pixelcoords = []
49 #set $num_infiles = len($infiles)
50
39 #for $i, $infile in enumerate($infiles): 51 #for $i, $infile in enumerate($infiles):
40 52
41 #if $infile.ext == 'imzml' 53 #if $infile.ext == 'imzml'
42 msidata_$i <- readMSIData('infile_${i}.imzML') 54 msidata_$i <- readImzML('infile_${i}')
43 sampleNames(msidata_$i) = "msidata" 55 sampleNames(msidata_$i) = "msidata"
56 pixelcoords_$i = cbind(coord(msidata_$i)[,1:2], rep($i+1,ncol(msidata_$i)))
44 #elif $infile.ext == 'analyze75' 57 #elif $infile.ext == 'analyze75'
45 msidata_$i <- readMSIData('infile_${i}.hdr') 58 msidata_$i <- readAnalyze('infile_${i}')
46 sampleNames(msidata_$i) = "msidata" 59 sampleNames(msidata_$i) = "msidata"
60 pixelcoords_$i = cbind(coord(msidata_$i)[,1:2], rep($i+1,ncol(msidata_$i)))
47 #else 61 #else
48 loadRData <- function(fileName){ 62 loadRData <- function(fileName){
49 #loads an RData file, and returns it 63 #loads an RData file, and returns it
50 load(fileName) 64 load(fileName)
51 get(ls()[ls() != "fileName"]) 65 get(ls()[ls() != "fileName"])
52 } 66 }
53 msidata_$i = loadRData('infile_${i}.RData') 67 msidata_$i = loadRData('infile_${i}.RData')
54 sampleNames(msidata_$i) = "msidata" 68 sampleNames(msidata_$i) = "msidata"
69 pixelcoords_$i = cbind(coord(msidata_$i)[,1:2], rep($i+1,ncol(msidata_$i)))
55 #end if 70 #end if
56 71 colnames(pixelcoords_$i)[3] = "file_number"
57 #if $coordinates_file: 72
58 coord(msidata_$i)\$x = coord(msidata_$i)\$x + input_list[$i+1+$coordinates_header,$column_x] 73 #if str( $combine_conditional.combine_method ) == 'xy_shifts':
59 coord(msidata_$i)\$y = coord(msidata_$i)\$y + input_list[$i+1+$coordinates_header,$column_y] 74 coord(msidata_$i)\$x = coord(msidata_$i)\$x + input_list[$i+1,$combine_conditional.column_x]
60 pixelnumber = ncol(msidata_$i) 75 coord(msidata_$i)\$y = coord(msidata_$i)\$y + input_list[$i+1,$combine_conditional.column_y]
61 pixel_vector = append(pixel_vector, rep(input_list[$i+1+$coordinates_header,$column_names],times=pixelnumber)) 76 pixel_vector = append(pixel_vector, rep(input_list[$i+1,$combine_conditional.column_names],times=ncol(msidata_$i)))
77
78 #elif str( $combine_conditional.combine_method ) == 'automatic_combine':
79 #set escaped_element_identifier = re.sub('[^\w\-\s\[/]]', '_', str($infile.element_identifier))
80 names_vector = append(names_vector, rep(paste($i+1, "$escaped_element_identifier", sep="_"),ncol(msidata_$i)))
81 coord(msidata_$i)\$x = coord(msidata_$i)\$x - (min(coord(msidata_$i)\$x-1)) + x_shifts
82 coord(msidata_$i)\$y = coord(msidata_$i)\$y - (min(coord(msidata_$i)\$y-1)) + y_shifts
83 x_shifts = max(coord(msidata_$i)\$x) + $combine_conditional.x_distance
84 max_y = append(max_y, max(coord(msidata_$i)\$y))
85
86 all_files = $num_infiles
87 new_row = ($i+1)/ceiling(sqrt(all_files))
88 new_row%%1==0
89 if (new_row%%1==0)
90 {x_shifts = 0 ### x values start again at zero
91 y_shifts = max(max_y) + $combine_conditional.y_distance
92 max_y = numeric()}
93
62 #end if 94 #end if
63 #silent $msidata.append('msidata_'+str($i)) 95 #silent $msidata.append('msidata_'+str($i))
96 #silent $pixelcoords.append('pixelcoords_'+str($i))
64 #end for 97 #end for
65 98
66 msidata_combined = do.call(combine, list(#echo ','.join($msidata)#)) 99 ###################### automatic combination ###################################
67 100 ################################################################################
68 ### count NAs and replace by 0 101
69 spectra(msidata_combined)[is.na(spectra(msidata_combined))] <- 0 102 #if str( $combine_conditional.combine_method ) == 'automatic_combine':
70 print(paste0("Number of NAs which were replaced ",sum(is.na(msidata_combined)))) 103 print("automatic_combine")
71 104 msidata_combined = do.call(combine, list(#echo ','.join($msidata)#))
72 #if $coordinates_file: 105 sample_names = as.factor(names_vector)
73 ### rename pixels according to dataset 106 pData(msidata_combined)\$sample = sample_names
74 sample_names = as.factor(pixel_vector) 107
75 msidata_combined@pixelData@data\$sample = sample_names 108 ## create PDF to show pixels of each file
109 pdf("combining_qc.pdf", width=15, height=15)
110 position_df = cbind(coord(msidata_combined)[,1:2], pData(msidata_combined)\$sample)
111 colnames(position_df)[3] = "sample_name"
112
113 combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+
114 geom_tile() +
115 coord_fixed()+
116 ggtitle("Spatial orientation of combined data")+
117 theme_bw()+
118 theme(text=element_text(family="ArialMT", face="bold", size=15))+
119 theme(legend.position="bottom",legend.direction="vertical")+
120 guides(fill=guide_legend(ncol=4,byrow=TRUE))
121
122
123 coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean)
124 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name)
125 for(file_count in 1:nrow(coord_labels))
126 {
127 combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))
128 }
129
130 print(combine_plot)
131 dev.off()
132
133 ## save as (.RData)
134 msidata = msidata_combined
135 save(msidata, file="$msidata_combined")
136 ################################## xy shifts ###################################
137 ################################################################################
138 #elif str( $combine_conditional.combine_method ) == 'xy_shifts':
139 print("xy_shifts")
140 msidata_combined = do.call(combine, list(#echo ','.join($msidata)#))
141
142 ############# replace NA with 0 and rename pixels according to dataset #########
143
144 spectra(msidata_combined)[is.na(spectra(msidata_combined))] <- 0
145 print(paste0("Number of NAs which were replaced ",sum(is.na(spectra(msidata_combined)))))
146
147 sample_names = as.factor(pixel_vector)
148 pData(msidata_combined)\$sample = sample_names
149
150 ###################################### outputs #################################
151 ## save as (.RData)
152 msidata = msidata_combined
153 save(msidata, file="$msidata_combined")
154
155 ## create PDF to show pixels of each file
156 pdf("combining_qc.pdf")
157 position_df = cbind(coord(msidata), pData(msidata)\$sample)
158 colnames(position_df)[3] = "sample_name"
159
160 combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+
161 geom_tile() +
162 coord_fixed()+
163 ggtitle("Spatial orientation of combined data")+
164 theme_bw()+
165 theme(text=element_text(family="ArialMT", face="bold", size=12))
166
167 coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean)
168 for(file_count in 1:nrow(coord_labels))
169 {
170 combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,"sample_name"]))
171 }
172
173 print(combine_plot)
174
175 dev.off()
176
177
178 ################################## no shifts ###################################
179 ################################################################################
180 #elif str( $combine_conditional.combine_method ) == 'no_shifts':
181 print("no_shifts")
182 msidata_combined = do.call(combine, list(#echo ','.join($msidata)#))
183
184 ############# replace NA with 0 and rename pixels according to dataset #########
185
186 spectra(msidata_combined)[is.na(spectra(msidata_combined))] <- 0
187 print(paste0("Number of NAs which were replaced ",sum(is.na(spectra(msidata_combined)))))
188
189 ###################################### outputs #################################
190 ## save as (.RData)
191 msidata = msidata_combined
192 save(msidata, file="$msidata_combined")
193
194 ## create PDF to show pixels of each file
195 pdf("combining_qc.pdf")
196 position_df = cbind(coord(msidata), pData(msidata)\$sample)
197 colnames(position_df)[3] = "sample_name"
198
199 ggplot(position_df, aes(x=x, y=y, fill=sample_name))+
200 geom_tile() +
201 coord_fixed()+
202 ggtitle("Spatial orientation of combined data")+
203 theme_bw()+
204 theme(text=element_text(family="ArialMT", face="bold", size=12))
205
206 dev.off()
207
208
209 ################################## no combination ##############################
210 ################################################################################
211 #elif str( $combine_conditional.combine_method ) == 'no_combine':
212 print("no_combine")
213
214 ## create PDF to show pixels of each file
215 pdf("combining_qc.pdf")
216 position_df = do.call(rbind, list(#echo ','.join($pixelcoords)#))
217 position_df[duplicated(position_df[,1:2]),3] = 0
218 position_df\$file_number = as.factor(position_df\$file_number)
219
220 combine_plot = ggplot(position_df, aes(x=x, y=y, fill=file_number))+
221 geom_tile() +
222 coord_fixed()+
223 ggtitle("Spatial orientation before combination")+
224 theme_bw()+
225 theme(text=element_text(family="ArialMT", face="bold", size=12))+
226 theme(panel.grid.major = element_line(colour = "black")) +
227 scale_x_continuous(minor_breaks = seq(min(position_df\$x-50), max(position_df\$x+50, 1))) +
228 scale_y_continuous(minor_breaks = seq(min(position_df\$y-50), max(position_df\$y+50, 1)))
229
230
231 coord_labels = aggregate(cbind(x,y)~file_number, data=position_df, mean)
232 for(file_count in 1:nrow(coord_labels))
233 {
234 combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,"file_number"]))
235 }
236
237 print(combine_plot)
238
239 dev.off()
76 #end if 240 #end if
77 241
78
79 ### outputs ###
80
81 ## save as (.RData)
82 msidata = msidata_combined
83 save(msidata, file="$msidata_combined")
84
85 pdf("combining_qc.pdf", fonts = "Times", pointsize = 12)
86 image(msidata_combined, mz=1, colorkey=FALSE)
87 dev.off()
88
89 ### optional: intensity matrix ###
90
91 #if $output_matrix: 242 #if $output_matrix:
92 243
93 if (length(features(msidata_combined))> 0 & length(pixels(msidata_combined)) > 0) 244 if (length(features(msidata_combined))> 0 & length(pixels(msidata_combined)) > 0)
94 { 245 {
95 spectramatrix = spectra(msidata_combined) 246 spectramatrix = spectra(msidata_combined)
96 rownames(spectramatrix) = mz(msidata_combined) 247 rownames(spectramatrix) = mz(msidata_combined)
97 newmatrix = rbind(pixels(msidata_combined), spectramatrix) 248 newmatrix = rbind(pixels(msidata_combined), spectramatrix)
98 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 249 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
99 }else{ 250 }else{
100 print("file has no features or pixels left") 251 print("file has no features or pixels left")
101 } 252 }
102 #end if 253 #end if
103 254
104 ]]></configfile> 255 ]]></configfile>
105 </configfiles> 256 </configfiles>
106 <inputs> 257 <inputs>
107 <param name="infiles" type="data" multiple="true" format="imzml,rdata,analyze75" 258 <param name="infiles" type="data" multiple="true" format="imzml,rdata,analyze75"
108 label="MSI rawdata as imzml, analyze7.5 or Cardinal MSImageSet saved as RData" 259 label="MSI rawdata as imzml, analyze7.5 or Cardinal MSImageSet saved as RData"
109 help="load imzml and ibd file by uploading composite datatype imzml"/> 260 help="load imzml and ibd file by uploading composite datatype imzml"/>
110 <param name="coordinates_file" type="data" optional="true" format="tabular" label="X and y values to shift data before combining" 261 <conditional name="combine_conditional">
111 help="tabular file with pixels of interest in two separate columns"/> 262 <param name="combine_method" type="select" label="Select the way you want to combine multiple files" help="More detailed help can be found in the help section at the bottom">
112 <param name="column_x" data_ref="coordinates_file" optional="true" label="Column with values for shift in x direction" type="data_column"/> 263 <option value="automatic_combine" selected="True" >automatic combination</option>
113 <param name="column_y" data_ref="coordinates_file" optional="true" label="Column with values for shift in y direction" type="data_column"/> 264 <option value="no_shifts" >no coordinates shift</option>
114 <param name="column_names" data_ref="coordinates_file" optional="true" label="Column with dataset names" type="data_column"/> 265 <option value="xy_shifts">xy shifts by hand</option>
115 <param name="coordinates_header" label="Number of header lines to skip" value="0" type="integer"/> 266 <option value="no_combine">check pixels before combination</option>
116 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> 267 </param>
268 <when value="no_shifts">
269 </when>
270 <when value="automatic_combine">
271 <param name="x_distance" type="integer" value="10" label="How many pixels in x direction should be between files?"/>
272 <param name="y_distance" type="integer" value="10" label="How many pixels in y direction should be between files?"/>
273 </when>
274 <when value="xy_shifts">
275 <param name="coordinates_file" type="data" format="tabular" label="datasetnames, X and y values to shift data before combining"
276 help="Tabular file with three columns: 1 for the filename, 1 for the x-coordinate shift and 1 for the y-coordinate shift"/>
277 <param name="column_x" data_ref="coordinates_file" label="Column with values for shift in x direction" type="data_column"/>
278 <param name="column_y" data_ref="coordinates_file" label="Column with values for shift in y direction" type="data_column"/>
279 <param name="column_names" data_ref="coordinates_file" label="Column with dataset names" type="data_column"/>
280 </when>
281 <when value="no_combine"/>
282 </conditional>
283 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/>
117 </inputs> 284 </inputs>
118 <outputs> 285 <outputs>
119 <data format="rdata" name="msidata_combined" label="Combined MSI data"/> 286 <data format="rdata" name="msidata_combined" label="Combined MSI data"/>
120 <data format="pdf" name="combining_qc" from_work_dir="combining_qc.pdf" label = "Combined image of pixels"/> 287 <data format="pdf" name="combining_qc" from_work_dir="combining_qc.pdf" label = "Combined image of pixels"/>
121 <data format="tabular" name="matrixasoutput" label="Combined matrix"> 288 <data format="tabular" name="matrixasoutput" label="Combined matrix">
123 </data> 290 </data>
124 </outputs> 291 </outputs>
125 <tests> 292 <tests>
126 <test expect_num_outputs="3"> 293 <test expect_num_outputs="3">
127 <param name="infiles" value="msidata_1.RData,msidata_2.RData,msidata_3.RData" ftype="rdata"/> 294 <param name="infiles" value="msidata_1.RData,msidata_2.RData,msidata_3.RData" ftype="rdata"/>
295 <param name="combine_method" value="xy_shifts"/>
128 <param name="coordinates_file" ftype="tabular" value="xy_coordinates.tabular"/> 296 <param name="coordinates_file" ftype="tabular" value="xy_coordinates.tabular"/>
129 <param name="column_x" value="1"/> 297 <param name="column_x" value="1"/>
130 <param name="column_y" value="2"/> 298 <param name="column_y" value="2"/>
131 <param name="column_names" value="3"/> 299 <param name="column_names" value="3"/>
132 <param name="output_matrix" value="True"/> 300 <param name="output_matrix" value="True"/>
134 <output name="msidata_combined" file="123_combined.RData" compare="sim_size" /> 302 <output name="msidata_combined" file="123_combined.RData" compare="sim_size" />
135 <output name="combining_qc" file="123_combined_QC.pdf" compare="sim_size" delta="20000"/> 303 <output name="combining_qc" file="123_combined_QC.pdf" compare="sim_size" delta="20000"/>
136 </test> 304 </test>
137 <test expect_num_outputs="3"> 305 <test expect_num_outputs="3">
138 <param name="infiles" value="msidata_1.RData,msidata_2.RData" ftype="rdata"/> 306 <param name="infiles" value="msidata_1.RData,msidata_2.RData" ftype="rdata"/>
307 <param name="combine_method" value="no_shifts"/>
139 <param name="output_matrix" value="True"/> 308 <param name="output_matrix" value="True"/>
140 <output name="matrixasoutput" file="12_combined_matrix.tabular"/> 309 <output name="matrixasoutput" file="12_combined_matrix.tabular"/>
141 <output name="msidata_combined" file="12_combined.RData" compare="sim_size" /> 310 <output name="msidata_combined" file="12_combined.RData" compare="sim_size" />
142 <output name="combining_qc" file="12_combined_QC.pdf" compare="sim_size" delta="20000"/> 311 <output name="combining_qc" file="12_combined_QC.pdf" compare="sim_size" delta="20000"/>
143 </test> 312 </test>
313 <test expect_num_outputs="3">
314 <param name="infiles" value="msidata_1.RData,msidata_2.RData" ftype="rdata"/>
315 <param name="combine_method" value="automatic_combine"/>
316 <param name="x_distance" value="1"/>
317 <param name="y_distance" value="1"/>
318 <param name="output_matrix" value="True"/>
319 <output name="matrixasoutput" file="12_auto_combined_matrix.tabular"/>
320 <output name="msidata_combined" file="12_auto_combined.RData" compare="sim_size" />
321 <output name="combining_qc" file="12_auto_combined_QC.pdf" compare="sim_size" delta="20000"/>
322 </test>
144 </tests> 323 </tests>
145 <help> 324 <help>
146 <![CDATA[ 325 <![CDATA[
147 This tool can combine several mass-spectrometry imaging files. A prerequesite for the combination is that the m/z values are the same across all datasets. To achieve this use the filtering tool to get all datasets to the same m/z range and then use the binning function in the preprocessing tool to obtain the same bins for all dataset. The pixels on the other hand must be unique, therefore you should provide a number for the shift of x and y coordinates so that pixels of different datasets do not overlap. 326 This tool can combine several mass-spectrometry imaging files.
327 1) m/z values need to be the same across all datasets
328 2) pixels (defined by x and y coordinates) must be unique
329
330 1) Same m/z values/axis can be achieved with the filtering tool to get all datasets to the same m/z range and afterwards binning in the preprocessing tool to obtain the same bins for all dataset.
331 2) The pixels (defined by x and y coordinates) must be unique across all datasets, therefore the option "Select the way you want to combine multiple files" is helpful:
332
333 - "automatic combination": files are arranged in a grid with a distance in x and y direction which can be given by the user
334 - "no coordinates shift": this option can only be used if all pixels are unique across datasets
335 - "xy shifts by hand": each file can be moved in x and y direction according to the users need (define one tabular file in the order in which the files are loaded in the history (bottom to top) and define for each file the x and y coordinates shifts in separate columns
336 - "check pixels before combination": no combination takes place. You will only get a pdf which shows the arrangement of the pixels (with or without additional xy shifts)
148 337
149 Input data: 3 types of input data can be used: 338 Input data: 3 types of input data can be used:
150 339
151 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_ 340 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_
152 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) 341 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)