Mercurial > repos > yhoogstrate > edger_with_design_matrix
comparison edgeR_Differential_Gene_Expression.xml @ 3:12fb0d4b1e93 draft
planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit c7e4f2dfb8c35144b787850b60e116edfbaaa20f
author | yhoogstrate |
---|---|
date | Thu, 03 Sep 2015 09:42:12 -0400 |
parents | ec951a5017f8 |
children | 5d38abf7e4b6 |
comparison
equal
deleted
inserted
replaced
2:ec951a5017f8 | 3:12fb0d4b1e93 |
---|---|
13 <stdio> | 13 <stdio> |
14 <regex match="Error in[^a-z]+contrasts" | 14 <regex match="Error in[^a-z]+contrasts" |
15 source="both" | 15 source="both" |
16 level="fatal" | 16 level="fatal" |
17 description="Have the design- and expression-matrix been swapped?" /> | 17 description="Have the design- and expression-matrix been swapped?" /> |
18 <regex match="Error in eval\(expr, envir, enclos\)" | |
19 source="both" | |
20 level="fatal" | |
21 description="You have most likely used a condition in the contrast, that is not present in the Design matrix" /> | |
18 <regex match="Execution halted" | 22 <regex match="Execution halted" |
19 source="both" | 23 source="both" |
20 level="fatal" /> | 24 level="fatal" /> |
21 <regex match="Calculating library sizes from column" | 25 <regex match="Calculating library sizes from column" |
22 source="stderr" | 26 source="stderr" |
55 $output_MDSplot_logFC | 59 $output_MDSplot_logFC |
56 #else: | 60 #else: |
57 /dev/null | 61 /dev/null |
58 #end if | 62 #end if |
59 | 63 |
64 #if $output_MDSplot_logFC_coordinates: | |
65 $output_MDSplot_logFC_coordinates | |
66 #else: | |
67 /dev/null | |
68 #end if | |
69 | |
60 #if $output_MDSplot_bcv: | 70 #if $output_MDSplot_bcv: |
61 $output_MDSplot_bcv | 71 $output_MDSplot_bcv |
72 #else: | |
73 /dev/null | |
74 #end if | |
75 | |
76 #if $output_MDSplot_bcv_coordinates: | |
77 $output_MDSplot_bcv_coordinates | |
62 #else: | 78 #else: |
63 /dev/null | 79 /dev/null |
64 #end if | 80 #end if |
65 | 81 |
66 #if $output_BCVplot: | 82 #if $output_BCVplot: |
124 output_cpm <- args[6] | 140 output_cpm <- args[6] |
125 | 141 |
126 output_xpkm <- args[7] ##FPKM file - to be implemented | 142 output_xpkm <- args[7] ##FPKM file - to be implemented |
127 | 143 |
128 output_raw_counts <- args[8] | 144 output_raw_counts <- args[8] |
145 | |
129 output_MDSplot_logFC <- args[9] | 146 output_MDSplot_logFC <- args[9] |
130 output_MDSplot_bcv <- args[10] | 147 output_MDSplot_logFC_coordinates <- args[10] |
131 output_BCVplot <- args[11] | 148 |
132 output_MAplot <- args[12] | 149 output_MDSplot_bcv <- args[11] |
133 output_PValue_distribution_plot <- args[13] | 150 output_MDSplot_bcv_coordinates <- args[12] |
134 output_hierarchical_clustering_plot <- args[14] | 151 |
135 output_heatmap_plot <- args[15] | 152 output_BCVplot <- args[13] |
136 output_RData_obj <- args[16] | 153 output_MAplot <- args[14] |
137 output_format_images <- args[17] | 154 output_PValue_distribution_plot <- args[15] |
155 output_hierarchical_clustering_plot <- args[16] | |
156 output_heatmap_plot <- args[17] | |
157 output_RData_obj <- args[18] | |
158 output_format_images <- args[19] | |
138 | 159 |
139 | 160 |
140 ## Obtain read-counts | 161 ## Obtain read-counts |
141 expression_matrix <- read.delim(expression_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c("")) | 162 expression_matrix <- read.delim(expression_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c("")) |
142 design_matrix <- read.delim(design_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c("")) | 163 design_matrix <- read.delim(design_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c("")) |
212 write("Estimating tagwise dispersion...",stdout()) | 233 write("Estimating tagwise dispersion...",stdout()) |
213 dge <- estimateGLMTagwiseDisp(dge,design) | 234 dge <- estimateGLMTagwiseDisp(dge,design) |
214 | 235 |
215 | 236 |
216 # hierarchical clustering makes use of the distance of the MDS | 237 # hierarchical clustering makes use of the distance of the MDS |
217 if(output_MDSplot_logFC != "/dev/null" || output_hierarchical_clustering_plot != "/dev/null") { | 238 if(output_MDSplot_logFC != "/dev/null" || output_MDSplot_logFC_coordinates != "/dev/null" || output_hierarchical_clustering_plot != "/dev/null") { |
218 write("Calculating MDS plot (logFC method)",stdout()) | 239 write("Calculating MDS plot (logFC method)",stdout()) |
219 mds_distance_logFC <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot | 240 n_dim <- nrow(dge\$samples) |
241 mds_distance_logFC <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)),dim.plot=c(n_dim-2,n_dim-1)) | |
220 dev.off()# Kill it | 242 dev.off()# Kill it |
243 | |
244 # Reset to primary dimensions | |
245 mds_distance_logFC\$x = mds_distance_logFC\$cmdscale.out[,1] | |
246 mds_distance_logFC\$y = mds_distance_logFC\$cmdscale.out[,2] | |
221 | 247 |
222 if(output_MDSplot_logFC != "/dev/null") { | 248 if(output_MDSplot_logFC != "/dev/null") { |
223 write("Creating MDS plot (logFC method)",stdout()) | 249 write("Creating MDS plot (logFC method)",stdout()) |
224 if(output_format_images == "pdf") { | 250 if(output_format_images == "pdf") { |
225 pdf(output_MDSplot_logFC,height=14,width=14) | 251 pdf(output_MDSplot_logFC,height=14,width=14) |
234 | 260 |
235 diff_x <- abs(max(mds_distance_logFC\$x)-min(mds_distance_logFC\$x)) | 261 diff_x <- abs(max(mds_distance_logFC\$x)-min(mds_distance_logFC\$x)) |
236 diff_y <-(max(mds_distance_logFC\$y)-min(mds_distance_logFC\$y)) | 262 diff_y <-(max(mds_distance_logFC\$y)-min(mds_distance_logFC\$y)) |
237 plot(c(min(mds_distance_logFC\$x),max(mds_distance_logFC\$x) + 0.45 * diff_x), c(min(mds_distance_logFC\$y) - 0.05 * diff_y,max(mds_distance_logFC\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") | 263 plot(c(min(mds_distance_logFC\$x),max(mds_distance_logFC\$x) + 0.45 * diff_x), c(min(mds_distance_logFC\$y) - 0.05 * diff_y,max(mds_distance_logFC\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") |
238 points(mds_distance_logFC\$x,mds_distance_logFC\$y,pch=20) | 264 points(mds_distance_logFC\$x,mds_distance_logFC\$y,pch=20) |
239 text(mds_distance_logFC\$x, mds_distance_logFC\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) | 265 text(mds_distance_logFC\$x,mds_distance_logFC\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) |
240 rm(diff_x,diff_y) | 266 rm(diff_x,diff_y) |
241 | 267 |
242 dev.off() | 268 dev.off() |
269 } | |
270 | |
271 if(output_MDSplot_logFC_coordinates != "/dev/null") { | |
272 n_dim <- ncol(mds_distance_logFC\$cmdscale.out) | |
273 colnames(mds_distance_logFC\$cmdscale.out) <- paste(rep("Dim",n_dim),(1:n_dim),sep="_") | |
274 export <- data.frame(samples=rownames(mds_distance_logFC\$cmdscale.out),mds_distance_logFC\$cmdscale.out) | |
275 row.names(export) <- NULL | |
276 write.table(file=output_MDSplot_logFC_coordinates,export,sep="\t",row.names=FALSE,col.names=TRUE) | |
277 } | |
278 } | |
279 | |
280 | |
281 if(output_MDSplot_bcv != "/dev/null" || output_MDSplot_bcv_coordinates != "/dev/null" ) { | |
282 write("Creating MDS plot (bcv method)",stdout()) | |
283 | |
284 ## 1. First create a virtual plot to obtain the desired coordinates | |
285 n_dim <- nrow(dge\$samples) | |
286 mds_distance_BCV <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples)),dim.plot=c(n_dim-2,n_dim-1)) | |
287 dev.off() | |
288 | |
289 if(output_MDSplot_logFC != "/dev/null") { | |
290 ## 2. Re-plot the coordinates in a new figure with the size and settings. | |
291 if(output_format_images == "pdf") { | |
292 pdf(output_MDSplot_bcv,height=14,width=14) | |
293 } else if(output_format_images == "svg") { | |
294 svg(output_MDSplot_bcv,height=14,width=14) | |
295 } else { | |
296 ## png(output_MDSplot_bcv) | |
297 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | |
298 | |
299 bitmap(output_MDSplot_bcv,type="png16m",height=7*3,width=7*3) | |
243 } | 300 } |
244 } | 301 |
245 | 302 diff_x <- abs(max(mds_distance_BCV\$x)-min(mds_distance_BCV\$x)) |
246 | 303 diff_y <- (max(mds_distance_BCV\$y)-min(mds_distance_BCV\$y)) |
247 if(output_MDSplot_bcv != "/dev/null") { | 304 plot(c(min(mds_distance_BCV\$x),max(mds_distance_BCV\$x) + 0.45 * diff_x), c(min(mds_distance_BCV\$y) - 0.05 * diff_y,max(mds_distance_BCV\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") |
248 write("Creating MDS plot (bcv method)",stdout()) | 305 points(mds_distance_BCV\$x,mds_distance_BCV\$y,pch=20) |
249 | 306 text(mds_distance_BCV\$x, mds_distance_BCV\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) |
250 ## 1. First create a virtual plot to obtain the desired coordinates | 307 rm(diff_x,diff_y) |
251 pdf("bcvmds.pdf") | 308 |
252 mds_distance_BCV <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) | 309 dev.off() |
253 dev.off()# Kill it | 310 } |
254 | 311 |
255 ## 2. Re-plot the coordinates in a new figure with the size and settings. | 312 if(output_MDSplot_bcv_coordinates != "/dev/null") { |
256 if(output_format_images == "pdf") { | 313 n_dim <- ncol(mds_distance_BCV\$cmdscale.out) |
257 pdf(output_MDSplot_bcv,height=14,width=14) | 314 colnames(mds_distance_BCV\$cmdscale.out) <- paste(rep("Dim",n_dim),(1:n_dim),sep="_") |
258 } else if(output_format_images == "svg") { | 315 export <- data.frame(samples=rownames(mds_distance_BCV\$cmdscale.out),mds_distance_BCV\$cmdscale.out) |
259 svg(output_MDSplot_bcv,height=14,width=14) | 316 row.names(export) <- NULL |
260 } else { | 317 write.table(file=output_MDSplot_bcv_coordinates,export,sep="\t",row.names=FALSE,col.names=TRUE) |
261 ## png(output_MDSplot_bcv) | 318 } |
262 ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ | |
263 | |
264 bitmap(output_MDSplot_bcv,type="png16m",height=7*3,width=7*3) | |
265 } | |
266 | |
267 diff_x <- abs(max(mds_distance_BCV\$x)-min(mds_distance_BCV\$x)) | |
268 diff_y <- (max(mds_distance_BCV\$y)-min(mds_distance_BCV\$y)) | |
269 plot(c(min(mds_distance_BCV\$x),max(mds_distance_BCV\$x) + 0.45 * diff_x), c(min(mds_distance_BCV\$y) - 0.05 * diff_y,max(mds_distance_BCV\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") | |
270 points(mds_distance_BCV\$x,mds_distance_BCV\$y,pch=20) | |
271 text(mds_distance_BCV\$x, mds_distance_BCV\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) | |
272 rm(diff_x,diff_y) | |
273 | |
274 dev.off() | |
275 } | 319 } |
276 | 320 |
277 | 321 |
278 if(output_BCVplot != "/dev/null") { | 322 if(output_BCVplot != "/dev/null") { |
279 write("Creating Biological coefficient of variation plot",stdout()) | 323 write("Creating Biological coefficient of variation plot",stdout()) |
421 <param name="fdr" type="float" min="0" max="1" value="0.05" label="False Discovery Rate (FDR)" /> | 465 <param name="fdr" type="float" min="0" max="1" value="0.05" label="False Discovery Rate (FDR)" /> |
422 | 466 |
423 <param name="outputs" type="select" label="Optional desired outputs" multiple="true" display="checkboxes"> | 467 <param name="outputs" type="select" label="Optional desired outputs" multiple="true" display="checkboxes"> |
424 <option value="make_output_raw_counts">Raw counts table</option> | 468 <option value="make_output_raw_counts">Raw counts table</option> |
425 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option> | 469 <option value="make_output_MDSplot_logFC">MDS-plot (logFC-method)</option> |
426 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; much slower)</option> | 470 <option value="make_output_MDSplot_logFC_coordinates">MDS-plot coordinates table (logFC-method)</option> |
471 <option value="make_output_MDSplot_bcv">MDS-plot (BCV-method; slow)</option> | |
472 <option value="make_output_MDSplot_bcv_coordinates">MDS-plot coordinates table (BCV-method; slow)</option> | |
427 <option value="make_output_BCVplot">BCV-plot</option> | 473 <option value="make_output_BCVplot">BCV-plot</option> |
428 <option value="make_output_MAplot">MA-plot</option> | 474 <option value="make_output_MAplot">MA-plot</option> |
429 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option> | 475 <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option> |
430 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering</option> | 476 <option value="make_output_hierarchical_clustering_plot">Hierarchical custering</option> |
431 <option value="make_output_heatmap_plot">Heatmap</option> | 477 <option value="make_output_heatmap_plot">Heatmap</option> |
455 <when input="output_format_images" value="pdf" format="pdf" /> | 501 <when input="output_format_images" value="pdf" format="pdf" /> |
456 <when input="output_format_images" value="svg" format="svg" /> | 502 <when input="output_format_images" value="svg" format="svg" /> |
457 </change_format> | 503 </change_format> |
458 </data> | 504 </data> |
459 | 505 |
506 <data format="tabular" name="output_MDSplot_logFC_coordinates" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot coordinates table (logFC method)"> | |
507 <filter>outputs and ("make_output_MDSplot_logFC_coordinates" in outputs)</filter> | |
508 </data> | |
509 | |
460 <data format="png" name="output_MDSplot_bcv" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (bcv method)"> | 510 <data format="png" name="output_MDSplot_bcv" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (bcv method)"> |
461 <filter>outputs and ("make_output_MDSplot_bcv" in outputs)</filter> | 511 <filter>outputs and ("make_output_MDSplot_bcv" in outputs)</filter> |
462 | 512 |
463 <change_format> | 513 <change_format> |
464 <when input="output_format_images" value="png" format="png" /> | 514 <when input="output_format_images" value="png" format="png" /> |
465 <when input="output_format_images" value="pdf" format="pdf" /> | 515 <when input="output_format_images" value="pdf" format="pdf" /> |
466 <when input="output_format_images" value="svg" format="svg" /> | 516 <when input="output_format_images" value="svg" format="svg" /> |
467 </change_format> | 517 </change_format> |
518 </data> | |
519 | |
520 <data format="tabular" name="output_MDSplot_bcv_coordinates" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot coordinates table (BCV method)"> | |
521 <filter>outputs and ("make_output_MDSplot_bcv_coordinates" in outputs)</filter> | |
468 </data> | 522 </data> |
469 | 523 |
470 <data format="png" name="output_BCVplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - BCV-plot"> | 524 <data format="png" name="output_BCVplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - BCV-plot"> |
471 <filter>outputs and ("make_output_BCVplot" in outputs)</filter> | 525 <filter>outputs and ("make_output_BCVplot" in outputs)</filter> |
472 | 526 |