comparison msi_combine.xml @ 5:ff91e78b5c5c draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_combine commit 8087490eb4dcaf4ead0f03eae4126780d21e5503
author galaxyp
date Fri, 06 Jul 2018 14:13:08 -0400
parents d05bd881af3d
children f4aafc565aa3
comparison
equal deleted inserted replaced
4:d05bd881af3d 5:ff91e78b5c5c
1 <tool id="mass_spectrometry_imaging_combine" name="MSI combine" version="1.10.0.2"> 1 <tool id="mass_spectrometry_imaging_combine" name="MSI combine" version="1.10.0.3">
2 <description> 2 <description>
3 combine several mass spectrometry imaging datasets into one 3 combine several mass spectrometry imaging datasets into one
4 </description> 4 </description>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> 6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
65 65
66 ############## reading files and changing pixel coordinates ################### 66 ############## reading files and changing pixel coordinates ###################
67 67
68 #for $i, $infile in enumerate($infiles): 68 #for $i, $infile in enumerate($infiles):
69 69
70 #if $infile.ext == 'imzml' 70 #if $infile.ext == 'imzml'
71 msidata_$i <- readImzML('infile_${i}', mass.accuracy=$accuracy, units.accuracy = "$units") 71 #if str($processed_cond.processed_file) == "processed":
72 #elif $infile.ext == 'analyze75' 72 msidata_$i <- readImzML('infile_${i}', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units")
73 #else
74 msidata <- readImzML('infile')
75 #end if
76 #elif $infile.ext == 'analyze75'
73 msidata_$i <- readAnalyze('infile_${i}') 77 msidata_$i <- readAnalyze('infile_${i}')
74 #else 78 #else
75 msidata_$i = loadRData('infile_${i}.RData') 79 msidata_$i = loadRData('infile_${i}.RData')
76 #end if 80 #end if
81
77 82
78 sampleNames(msidata_$i) = "msidata" ## same name necessary to combine data in one single coordinate system 83 sampleNames(msidata_$i) = "msidata" ## same name necessary to combine data in one single coordinate system
79 84
80 ################### preparation xy shifts ########################## 85 ################### preparation xy shifts ##########################
81 86
82 #if str( $combine_conditional.combine_method ) == 'xy_shifts': 87 #if str( $combine_conditional.combine_method ) == 'xy_shifts':
83 88
84 coord(msidata_$i)\$x = coord(msidata_$i)\$x + input_list[$i+1,$combine_conditional.column_x] ## shifts x coordinates according to tabular file 89 coord(msidata_$i)\$x = coord(msidata_$i)\$x + input_list[$i+1,$combine_conditional.column_x] ## shifts x coordinates according to tabular file
85 coord(msidata_$i)\$y = coord(msidata_$i)\$y + input_list[$i+1,$combine_conditional.column_y] ## shifts y coordinates according to tabular file 90 coord(msidata_$i)\$y = coord(msidata_$i)\$y + input_list[$i+1,$combine_conditional.column_y] ## shifts y coordinates according to tabular file
86 pixel_vector = append(pixel_vector, rep(paste($i+1, input_list[$i+1,$combine_conditional.column_names], sep="_"),times=ncol(msidata_$i))) ## stores file name for each pixel 91 pixel_vector = append(pixel_vector, rep(paste($i+1, input_list[$i+1,$combine_conditional.column_names], sep="_"),times=ncol(msidata_$i))) ## stores file name for each pixel
87 92 msidata_$i\$combined_sample = rep(paste($i+1, input_list[$i+1,$combine_conditional.column_names], sep="_"),times=ncol(msidata_$i))
88 pixelcoords_$i = cbind(coord(msidata_$i)[,1:2], rep($i+1,ncol(msidata_$i))) 93 pixelcoords_$i = cbind(coord(msidata_$i)[,1:2], rep($i+1,ncol(msidata_$i)))
89 #silent $pixelcoords.append('pixelcoords_'+str($i)) 94 #silent $pixelcoords.append('pixelcoords_'+str($i))
90 colnames(pixelcoords_$i)[3] = "file_number" 95 colnames(pixelcoords_$i)[3] = "file_number"
91 96
92 ################### preparation automatic combination ########################## 97 ################### preparation automatic combination ##########################
93 98
94 #elif str( $combine_conditional.combine_method ) == 'automatic_combine': 99 #elif str( $combine_conditional.combine_method ) == 'automatic_combine':
95 names_vector = character() 100 names_vector = character()
96 #set escaped_element_identifier = re.sub('[^\w\-\s\[/]]', '_', str($infile.element_identifier)) ## use name of inputfile from Galaxy 101 #set escaped_element_identifier = re.sub('[^\w\-\s\[/]]', '_', str($infile.element_identifier)) ## use name of inputfile from Galaxy
97 if (sum(spectra(msidata_$i))>0) ## use only valid files 102 if (sum(spectra(msidata_$i)[],na.rm=TRUE)>0) ## use only valid files
98 { 103 {
99 if (is.null(levels(msidata_$i\$combined_sample))) ### if the file was not combined before use input file name, otherwise keep combined_sample name which was assigned before 104 if (is.null(levels(msidata_$i\$combined_sample))) ### if the file was not combined before use input file name, otherwise keep combined_sample name which was assigned before
100 { 105 {
101 names_vector = append(names_vector, rep(paste($i+1, "$escaped_element_identifier", sep="_"),ncol(msidata_$i))) 106 names_vector = append(names_vector, rep(paste($i+1, "$escaped_element_identifier", sep="_"),ncol(msidata_$i)))
102 msidata_$i\$combined_sample = as.factor(names_vector) 107 msidata_$i\$combined_sample = as.factor(names_vector)
121 126
122 ## store files to combine them later and for each file check if it is valid 127 ## store files to combine them later and for each file check if it is valid
123 128
124 #silent $msidata.append('msidata_'+str($i)) 129 #silent $msidata.append('msidata_'+str($i))
125 valid_dataset = append(valid_dataset, 130 valid_dataset = append(valid_dataset,
126 (ncol(msidata_$i)>0 & nrow(msidata_$i)>0 & sum(spectra(msidata_$i))>0)) 131 (ncol(msidata_$i)>0 & nrow(msidata_$i)>0 & sum(spectra(msidata_$i)[], na.rm=TRUE)>0))
127 132
128 #end for 133 #end for
129 134
130 135
131 ###################### automatic combination ################################### 136 ###################### automatic combination ###################################
152 ggtitle("Spatial orientation of combined data")+ 157 ggtitle("Spatial orientation of combined data")+
153 theme_bw()+ 158 theme_bw()+
154 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 159 theme(text=element_text(family="ArialMT", face="bold", size=15))+
155 theme(legend.position="bottom",legend.direction="vertical")+ 160 theme(legend.position="bottom",legend.direction="vertical")+
156 guides(fill=guide_legend(ncol=4,byrow=TRUE)) 161 guides(fill=guide_legend(ncol=4,byrow=TRUE))
157 coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) 162 coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
158 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name) 163 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name)
159 for(file_count in 1:nrow(coord_labels)) 164 for(file_count in 1:nrow(coord_labels))
160 {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"], 165 {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],
161 y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))} 166 y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))}
162 print(combine_plot) 167 print(combine_plot)
175 print("xy_shifts") 180 print("xy_shifts")
176 181
177 #if str($combine_conditional.combination_true) == "yes_combi": 182 #if str($combine_conditional.combination_true) == "yes_combi":
178 print("combination with xy shifts") 183 print("combination with xy shifts")
179 184
180 msidata_combined = do.call(combine, list(#echo ','.join($msidata)#)) 185 ## find duplicated coordinates
181 sample_names = as.factor(pixel_vector) ## the sample names are assigned to each pixel 186 all_coordinates = do.call(rbind, list(#echo ','.join($pixelcoords)#))
182 msidata_combined\$combined_sample = sample_names ## sample names are stored in $combined_sample slot 187 duplicated_coordinates= duplicated(all_coordinates[,1:2])| duplicated(all_coordinates[,1:2], fromLast=TRUE)
188 print(paste0("Number of removed duplicated coordinates: ", sum(duplicated_coordinates)/2))
189 unique_coordinates = all_coordinates[!duplicated_coordinates,]
190
191 ## remove duplicated coordinates
192 datasetlist = list()
193 count = 1
194 for (usable_dataset in list(#echo ','.join($msidata)#)){
195 pixelsofinterest = pixels(usable_dataset)[names(pixels(usable_dataset)) %in% rownames(unique_coordinates)]
196 filtered_dataset = usable_dataset[,pixelsofinterest]
197 if (ncol(filtered_dataset) > 0 ){
198 datasetlist[[count]] = filtered_dataset}
199 count = count +1}
200
201 msidata_combined = do.call(combine, datasetlist)
183 202
184 ## save as (.RData) 203 ## save as (.RData)
185 204
186 msidata = msidata_combined 205 msidata = msidata_combined
187 save(msidata, file="$msidata_combined") 206 save(msidata, file="$msidata_combined")
194 #else: 213 #else:
195 print("no combination, only testing xy shifts") 214 print("no combination, only testing xy shifts")
196 215
197 position_df = do.call(rbind, list(#echo ','.join($pixelcoords)#)) 216 position_df = do.call(rbind, list(#echo ','.join($pixelcoords)#))
198 position_df\$sample_name = as.factor(pixel_vector) 217 position_df\$sample_name = as.factor(pixel_vector)
218
219 print(paste0("Number of duplicated coordinates: ", sum(duplicated(position_df[,1:2]))))
199 220
200 #end if 221 #end if
201 222
202 ## create PDF to show all pixels in PDF as QC 223 ## create PDF to show all pixels in PDF as QC
203 224
207 coord_fixed()+ 228 coord_fixed()+
208 ggtitle("Spatial orientation of combined data")+ 229 ggtitle("Spatial orientation of combined data")+
209 theme_bw()+ 230 theme_bw()+
210 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 231 theme(text=element_text(family="ArialMT", face="bold", size=15))+
211 theme(legend.position="bottom",legend.direction="vertical")+ 232 theme(legend.position="bottom",legend.direction="vertical")+
212 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 6))+
213 guides(fill=guide_legend(ncol=5,byrow=TRUE)) 233 guides(fill=guide_legend(ncol=5,byrow=TRUE))
214 coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) 234 coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean)
215 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name) 235 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name)
216 for(file_count in 1:nrow(coord_labels)) 236 for(file_count in 1:nrow(coord_labels))
217 {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"], 237 {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],
223 243
224 ####################### optional matrix output ################################# 244 ####################### optional matrix output #################################
225 245
226 #if $output_matrix: 246 #if $output_matrix:
227 247
228 if (length(features(msidata_combined))> 0 & length(pixels(msidata_combined)) > 0) 248 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){
229 { 249 spectramatrix = spectra(msidata)[]
230 spectramatrix = spectra(msidata_combined) 250 spectramatrix = cbind(mz(msidata),spectramatrix)
231 rownames(spectramatrix) = mz(msidata_combined) 251 newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix)
232 newmatrix = rbind(pixels(msidata_combined), spectramatrix) 252 write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
233 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
234 }else{ 253 }else{
235 print("file has no features or pixels left") 254 print("file has no features or pixels left")
236 } 255 }
256
237 #end if 257 #end if
238 258
239 ]]></configfile> 259 ]]></configfile>
240 </configfiles> 260 </configfiles>
241 <inputs> 261 <inputs>
242 <param name="infiles" type="data" multiple="true" format="imzml,rdata,analyze75" 262 <param name="infiles" type="data" multiple="true" format="imzml,rdata,analyze75"
243 label="MSI data as imzml, analyze7.5 or Cardinal MSImageSet saved as RData" 263 label="MSI data as imzml, analyze7.5 or Cardinal MSImageSet saved as RData"
244 help="load imzml and ibd file by uploading composite datatype imzml"/> 264 help="load imzml and ibd file by uploading composite datatype imzml"/>
245 <param name="accuracy" type="float" value="50" label="Only for processed imzML files: enter mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/> 265 <conditional name="processed_cond">
246 <param name="units" display="radio" type="select" label="Only for processed imzML files: unit of the mass accuracy" help="either m/z or ppm"> 266 <param name="processed_file" type="select" label="Is the input file a processed imzML file ">
247 <option value="mz" >mz</option> 267 <option value="no_processed" selected="True">not a processed imzML</option>
248 <option value="ppm" selected="True" >ppm</option> 268 <option value="processed">processed imzML</option>
249 </param> 269 </param>
270 <when value="no_processed"/>
271 <when value="processed">
272 <param name="accuracy" type="float" value="50" label="Mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/>
273 <param name="units" display="radio" type="select" label="Unit of the mass accuracy" help="either m/z or ppm">
274 <option value="mz" >mz</option>
275 <option value="ppm" selected="True" >ppm</option>
276 </param>
277 </when>
278 </conditional>
250 <conditional name="combine_conditional"> 279 <conditional name="combine_conditional">
251 <param name="combine_method" type="select" label="Select the way you want to combine multiple files" help="More detailed help can be found in the help section at the bottom"> 280 <param name="combine_method" type="select" label="Select the way you want to combine multiple files" help="More detailed help can be found in the help section at the bottom">
252 <option value="automatic_combine" selected="True" >automatic combination</option> 281 <option value="automatic_combine" selected="True" >automatic combination</option>
253 <option value="xy_shifts">xy shifts by hand</option> 282 <option value="xy_shifts">xy shifts by hand</option>
254 </param> 283 </param>
257 <param name="coordinates_file" type="data" format="tabular" label="datasetnames, X and y values to shift data before combining" 286 <param name="coordinates_file" type="data" format="tabular" label="datasetnames, X and y values to shift data before combining"
258 help="Tabular file with three columns: 1 for the filename, 1 for the x-coordinate shift and 1 for the y-coordinate shift"/> 287 help="Tabular file with three columns: 1 for the filename, 1 for the x-coordinate shift and 1 for the y-coordinate shift"/>
259 <param name="column_x" data_ref="coordinates_file" label="Column with values for shift in x direction" type="data_column"/> 288 <param name="column_x" data_ref="coordinates_file" label="Column with values for shift in x direction" type="data_column"/>
260 <param name="column_y" data_ref="coordinates_file" label="Column with values for shift in y direction" type="data_column"/> 289 <param name="column_y" data_ref="coordinates_file" label="Column with values for shift in y direction" type="data_column"/>
261 <param name="column_names" data_ref="coordinates_file" label="Column with dataset names" type="data_column"/> 290 <param name="column_names" data_ref="coordinates_file" label="Column with dataset names" type="data_column"/>
262 <param name="combination_true" type="boolean" display="radio" truevalue="yes_combi" falsevalue="no_combi" label="Combine datasets" help = "Combination only works if x and y-shifts lead to unique pixel positions. If this is unknown use the No option to get an idea about the pixel overlap"/> 291 <param name="combination_true" type="boolean" display="radio" truevalue="yes_combi" falsevalue="no_combi" label="Combine datasets" help = "If there are duplicated pixels they will be deleted. If it is not clear if there are duplicated pixels, select No to get an idea about the pixel overlap"/>
263 </when> 292 </when>
264 </conditional> 293 </conditional>
265 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> 294 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/>
266 </inputs> 295 </inputs>
267 <outputs> 296 <outputs>
331 360
332 Options: 361 Options:
333 362
334 - "automatic combination": files are automatically arranged in a grid, subfiles are named according to input file name 363 - "automatic combination": files are automatically arranged in a grid, subfiles are named according to input file name
335 - "xy shifts by hand": each file can be moved in x and y direction according to the users need (define one tabular file in the order in which the files are loaded in the history (bottom to top) and define for each file the x and y coordinates shifts in separate columns and the file name in a third column). To test if the pixels are correctly shifted use "combine datasets: No". 364 - "xy shifts by hand": each file can be moved in x and y direction according to the users need (define one tabular file in the order in which the files are loaded in the history (bottom to top) and define for each file the x and y coordinates shifts in separate columns and the file name in a third column). To test if the pixels are correctly shifted use "combine datasets: No".
336 365 Combine datasets: Yes - Combines all datasets and removes all duplicated pixels (same x and y coordinates).
337 366
338 Output: 367 Output:
339 368
340 - imzML file containing multiple subfiles 369 - imzML file containing multiple subfiles
341 - pdf that shows the pixel positions of the combined files 370 - pdf that shows the pixel positions of the combined files