Mercurial > repos > yhoogstrate > edger_with_design_matrix
changeset 2:ec951a5017f8 draft
planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit a6cf3ec153ca4a3846258a223d287ca125eea7be
author | yhoogstrate |
---|---|
date | Tue, 01 Sep 2015 09:15:07 -0400 |
parents | a4a4c88783ea |
children | 12fb0d4b1e93 |
files | edgeR_Concatenate_Expression_Matrices.xml edgeR_Convert_DGE_Table_to_Bedgraph.xml edgeR_Design_from_Expression_Matrix.xml edgeR_Differential_Gene_Expression.xml test-data/Differential_Gene_Expression/differentially_expressed_genes.tabular.txt tool_dependencies.xml |
diffstat | 6 files changed, 92 insertions(+), 108 deletions(-) [+] |
line wrap: on
line diff
--- a/edgeR_Concatenate_Expression_Matrices.xml Tue Sep 01 04:59:05 2015 -0400 +++ b/edgeR_Concatenate_Expression_Matrices.xml Tue Sep 01 09:15:07 2015 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="concatenate_expression_matrices" name="edgeR: Concatenate Expression Matrices" version="1.0.0"> +<tool id="concatenate_expression_matrices" name="edgeR: Concatenate Expression Matrices" version="1.0.0.b"> <description>Create a full expression matrix by selecting the desired columns from specific count tables</description> <macros>
--- a/edgeR_Convert_DGE_Table_to_Bedgraph.xml Tue Sep 01 04:59:05 2015 -0400 +++ b/edgeR_Convert_DGE_Table_to_Bedgraph.xml Tue Sep 01 09:15:07 2015 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> - <tool id="edger_dge_table_to_bedgraph" name="edgeR: Convert 'differentially expressed genes'-table to bedgraph(s)" version="1.0.0"> + <tool id="edger_dge_table_to_bedgraph" name="edgeR: Convert 'differentially expressed genes'-table to bedgraph(s)" version="1.0.0.b"> <description>EdgeR's "differentially expressed genes" table to bedgraph(s)</description> <macros> @@ -10,7 +10,7 @@ <requirement type="package" version="1.0.0">edger_dge_table_to_bedgraph</requirement> </requirements> - <command interpreter="python"> + <command> edger_dge_table_to_bedgraph -t $cpm_table -g $geneset
--- a/edgeR_Design_from_Expression_Matrix.xml Tue Sep 01 04:59:05 2015 -0400 +++ b/edgeR_Design_from_Expression_Matrix.xml Tue Sep 01 09:15:07 2015 -0400 @@ -10,7 +10,7 @@ <requirement type="package" version="1.0.0">design_matrix_creator</requirement> </requirements> - <command interpreter="python"> + <command> design_matrix_creator -c $expression_matrix @@ -123,17 +123,11 @@ Overview -------- -Create a design matrix by selecting the desired patients from an -expression matrix. +Create a design matrix by selecting the desired patients from an expression matrix. Input ----- -**References** - -The test data is coming from: doi: 10.1093/bioinformatics/btt688. -http://www.ncbi.nlm.nih.gov/pubmed/24319002 - @CONTACT@ </help>
--- a/edgeR_Differential_Gene_Expression.xml Tue Sep 01 04:59:05 2015 -0400 +++ b/edgeR_Differential_Gene_Expression.xml Tue Sep 01 09:15:07 2015 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.a"> +<tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.b"> <description>RNA-Seq gene expression analysis using edgeR (R package)</description> <macros> @@ -30,7 +30,7 @@ description="LOCALE has not been set correctly" /> </stdio> - <version_command>echo $(R --version | grep version | grep -v GNU) ", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")</version_command> + <version_command>echo $(R --version | grep version | grep -v GNU)", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")</version_command> <command> R --vanilla --slave -f $R_script '--args @@ -101,11 +101,6 @@ $output_format_images ' - #if $output_R: - > $output_R - #else: - > /dev/null - #end if </command> <configfiles> @@ -218,39 +213,43 @@ dge <- estimateGLMTagwiseDisp(dge,design) - if(output_MDSplot_logFC != "/dev/null") { - write("Creating MDS plot (logFC method)",stdout()) - points <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot + # hierarchical clustering makes use of the distance of the MDS + if(output_MDSplot_logFC != "/dev/null" || output_hierarchical_clustering_plot != "/dev/null") { + write("Calculating MDS plot (logFC method)",stdout()) + mds_distance_logFC <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot dev.off()# Kill it - if(output_format_images == "pdf") { - pdf(output_MDSplot_logFC,height=14,width=14) - } else if(output_format_images == "svg") { - svg(output_MDSplot_logFC,height=14,width=14) - } else { - ## png(output_MDSplot_logFC) - ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ + if(output_MDSplot_logFC != "/dev/null") { + write("Creating MDS plot (logFC method)",stdout()) + if(output_format_images == "pdf") { + pdf(output_MDSplot_logFC,height=14,width=14) + } else if(output_format_images == "svg") { + svg(output_MDSplot_logFC,height=14,width=14) + } else { + ## png(output_MDSplot_logFC) + ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ + + bitmap(output_MDSplot_logFC,type="png16m",height=7*3,width=7*3) + } - bitmap(output_MDSplot_logFC,type="png16m",height=14,width=14) - } - - - diff_x <- abs(max(points\$x)-min(points\$x)) - diff_y <-(max(points\$y)-min(points\$y)) - plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") - points(points\$x,points\$y,pch=20) - text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) - rm(diff_x,diff_y) - - dev.off() + diff_x <- abs(max(mds_distance_logFC\$x)-min(mds_distance_logFC\$x)) + diff_y <-(max(mds_distance_logFC\$y)-min(mds_distance_logFC\$y)) + plot(c(min(mds_distance_logFC\$x),max(mds_distance_logFC\$x) + 0.45 * diff_x), c(min(mds_distance_logFC\$y) - 0.05 * diff_y,max(mds_distance_logFC\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2") + points(mds_distance_logFC\$x,mds_distance_logFC\$y,pch=20) + text(mds_distance_logFC\$x, mds_distance_logFC\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) + rm(diff_x,diff_y) + + dev.off() + } } + if(output_MDSplot_bcv != "/dev/null") { write("Creating MDS plot (bcv method)",stdout()) ## 1. First create a virtual plot to obtain the desired coordinates pdf("bcvmds.pdf") - points <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) + mds_distance_BCV <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples))) dev.off()# Kill it ## 2. Re-plot the coordinates in a new figure with the size and settings. @@ -262,14 +261,14 @@ ## png(output_MDSplot_bcv) ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ - bitmap(output_MDSplot_bcv,type="png16m",height=14,width=14) + bitmap(output_MDSplot_bcv,type="png16m",height=7*3,width=7*3) } - diff_x <- abs(max(points\$x)-min(points\$x)) - diff_y <- (max(points\$y)-min(points\$y)) - plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") - points(points\$x,points\$y,pch=20) - text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) + diff_x <- abs(max(mds_distance_BCV\$x)-min(mds_distance_BCV\$x)) + diff_y <- (max(mds_distance_BCV\$y)-min(mds_distance_BCV\$y)) + plot(c(min(mds_distance_BCV\$x),max(mds_distance_BCV\$x) + 0.45 * diff_x), c(min(mds_distance_BCV\$y) - 0.05 * diff_y,max(mds_distance_BCV\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2") + points(mds_distance_BCV\$x,mds_distance_BCV\$y,pch=20) + text(mds_distance_BCV\$x, mds_distance_BCV\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4) rm(diff_x,diff_y) dev.off() @@ -287,7 +286,7 @@ ## png(output_BCVplot) ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ - bitmap(output_BCVplot,type="png16m") + bitmap(output_BCVplot,type="png16m",width=10.5*3,height=7*3) } plotBCV(dge, cex=0.4, main="edgeR: Biological coefficient of variation (BCV) vs abundance") @@ -303,7 +302,7 @@ cont <- makeContrasts(contrasts=cont, levels=design) lrt <- glmLRT(fit, contrast=cont[,1]) - write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout()) + write(paste("Exporting DGE results to file...",output_count_edgeR,sep=""),stdout()) write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA) write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA) @@ -325,7 +324,7 @@ ## png(output_MAplot) ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ - bitmap(output_MAplot,type="png16m") + bitmap(output_MAplot,type="png16m",width=10.5*3,height=7*3) } with(etable, plot(logCPM, logFC, pch=20, main="edgeR: Fold change vs abundance")) @@ -345,7 +344,7 @@ ## png(output_PValue_distribution_plot) ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ - bitmap(output_PValue_distribution_plot,type="png16m",width=14,height=14) + bitmap(output_PValue_distribution_plot,type="png16m",width=7*3,height=7*3) } expressed_genes <- subset(etable, PValue < 0.99) @@ -374,7 +373,7 @@ ## png(output_heatmap_plot) ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ - bitmap(output_heatmap_plot,type="png16m",width=10.5) + bitmap(output_heatmap_plot,type="png16m",width=10.5*3,height=7*3) } etable2 <- topTags(lrt, n=100)\$table @@ -384,7 +383,24 @@ dev.off() } - ##output_hierarchical_clustering_plot = args[13] + if(output_hierarchical_clustering_plot != "/dev/null") { + if(output_hierarchical_clustering_plot == "pdf") { + pdf(output_hierarchical_clustering_plot,width=10.5) + } else if(output_hierarchical_clustering_plot == "svg") { + svg(output_hierarchical_clustering_plot,width=10.5) + } else { + ## png(output_hierarchical_clustering_plot) + ## png does not work out of the box in the Galaxy Toolshed Version of R due to its compile settings: https://biostar.usegalaxy.org/p/9170/ + + bitmap(output_hierarchical_clustering_plot,type="png16m",width=10.5*3,height=7*3) + } + + mds_distance = as.dist(mds_distance_logFC\$distance.matrix) + clustering = hclust(mds_distance) + plot(clustering,main=paste("Cluster Dendogram on the ",mds_distance_logFC\$top," TopTags",sep="",sub="\ncomplete linkage on logFC MDS distance")) + + dev.off() + } if(output_RData_obj != "/dev/null") { save.image(output_RData_obj) @@ -411,10 +427,8 @@ <option value="make_output_BCVplot">BCV-plot</option> <option value="make_output_MAplot">MA-plot</option> <option value="make_output_PValue_distribution_plot">P-Value distribution plot</option> - <option value="make_output_hierarchical_clustering_plot">Hierarchical custering (under contstruction)</option> + <option value="make_output_hierarchical_clustering_plot">Hierarchical custering</option> <option value="make_output_heatmap_plot">Heatmap</option> - - <option value="make_output_R_stdout">R stdout</option> <option value="make_output_RData_obj">R Data object</option> </param> @@ -595,7 +609,7 @@ Installation ------------ -This tool requires no specific configurations. The following dependencies are installed automatically: +This tool requires no specific configuration. The following dependencies will installed automatically: - R - limma @@ -610,28 +624,6 @@ - edgeR - GPL (>=2) -References ----------- - -EdgeR -^^^^^ -**[1] edgeR: a Bioconductor package for differential expression analysis of digital gene expression data.** - -*Mark D. Robinson, Davis J. McCarthy and Gordon K. Smyth* - Bioinformatics (2010) 26 (1): 139-140. - -- http://www.bioconductor.org/packages/2.12/bioc/html/edgeR.html -- http://dx.doi.org/10.1093/bioinformatics/btp616 -- http://www.bioconductor.org/packages/release/bioc/html/edgeR.html - -Test-data (MCF7) -^^^^^^^^^^^^^^^^ -**[2] RNA-seq differential expression studies: more sequence or more replication?** - -*Yuwen Liu, Jie Zhou and Kevin P. White* - Bioinformatics (2014) 30 (3): 301-304. - -- http://www.ncbi.nlm.nih.gov/pubmed/24319002 -- http://dx.doi.org/10.1093/bioinformatics/btt688 - @CONTACT@ </help>
--- a/test-data/Differential_Gene_Expression/differentially_expressed_genes.tabular.txt Tue Sep 01 04:59:05 2015 -0400 +++ b/test-data/Differential_Gene_Expression/differentially_expressed_genes.tabular.txt Tue Sep 01 09:15:07 2015 -0400 @@ -1,29 +1,29 @@ "" "genes" "logFC" "logCPM" "LR" "PValue" "FDR" -"15" "RET" 1.94897640107287 13.2940435307943 77.654599543179 1.22730171836821e-18 3.55917498326782e-17 -"24" "HSPA8" 0.607138087178611 18.9380827005326 16.8408380175314 4.06490891367457e-05 0.000589411792482813 -"12" "IFI44L" -0.665544707287885 11.7020333673755 13.7144720204488 0.000212808307971688 0.00205714697705965 -"10" "MYO18A" -0.608389235629078 14.1586814058554 10.0030349278008 0.00156282461000764 0.0113304784225554 -"4" "DDX11" 0.719283453206407 12.4597575302041 9.12036988131984 0.00252778847255745 0.0146611731408332 -"26" "NR2C2AP" 0.538719097450498 12.9331552590697 7.88314604327009 0.00498976028659176 0.0241171747185268 -"19" "RASSF3" -0.322613484306053 12.4964626371138 4.62652086564883 0.0314813398355935 0.130422693604602 -"3" "DDX17" -0.202898462175599 16.1804174471336 2.58563106106719 0.107837421983493 0.390910654690163 -"2" "USP26" -2.18026172235612 4.27316802151059 2.23964914988384 0.134511497925546 0.433425937760093 -"27" "INADL" -0.171517188407227 15.2186072712825 1.67097950802946 0.19612738879992 0.532259040389235 -"21" "LYSMD3" -0.195688416211883 13.32535006408 1.62863926869131 0.201891360147641 0.532259040389235 -"16" "IQCG" -0.204602031239979 10.1622224199572 1.27895197461613 0.258093988838365 0.623727139692716 -"14" "KCNAB3" 0.235558773538239 7.85434240586327 0.626910369343036 0.428490880664559 0.920477060517754 -"6" "SLC35D3" 0.732791849378157 4.62274850327991 0.571072605743573 0.449833005996972 0.920477060517754 -"20" "FUT8" -0.101052861033366 15.3006920075591 0.507763042822674 0.476108824405735 0.920477060517754 -"1" "COMMD10" 0.124183380838805 12.8850203875481 0.427219144496277 0.513356635936627 0.927280503730139 -"9" "TXLNB" -0.182677549934781 7.2825391341052 0.332700921323453 0.56407299924751 0.927280503730139 -"28" "TMEM31" -0.242966946412951 6.16897757110336 0.313478542290647 0.575553416108362 0.927280503730139 -"7" "GLOD4" -0.0680179216917625 13.8710260882794 0.180025725971987 0.671351132981887 0.998117639327786 -"17" "C20orf118" 0.14522927187254 6.73904801103973 0.160869778108134 0.688356992639853 0.998117639327786 -"22" "LMOD3" 0.0653797517936461 7.60390982671528 0.0485282387333417 0.825644327378613 1 -"8" "GIMAP7" 0.278076976843342 4.26623062002702 0.0410800206726094 0.839383539966141 1 -"23" "HIPK1" -0.00503549836369412 17.7957744498389 0.00154544013400937 0.968641570543933 1 -"13" "KHSRP" -0.00239769805103114 14.7206397592923 0.000263977505255752 0.987037033025321 1 -"11" "ATG4B" 0.00130641853526229 13.5252482941211 3.53338139831294e-05 0.995257222366413 1 +"15" "RET" 1.94897640107286 13.2940435307943 77.6545995415986 1.22730171935022e-18 3.55917498611563e-17 +"24" "HSPA8" 0.607138087178614 18.9380827005326 16.8408380186893 4.06490891119454e-05 0.000589411792123208 +"12" "IFI44L" -0.665544707287881 11.7020333673755 13.7144720195324 0.000212808308075529 0.00205714697806344 +"10" "MYO18A" -0.608389235629078 14.1586814058554 10.0030349277278 0.00156282461006963 0.0113304784230048 +"4" "DDX11" 0.719283453206409 12.4597575302041 9.1203698809081 0.00252778847312638 0.014661173144133 +"26" "NR2C2AP" 0.538719097450497 12.9331552590697 7.88314604309164 0.00498976028708414 0.0241171747209067 +"19" "RASSF3" -0.322613484306052 12.4964626371138 4.62652086549247 0.031481339838463 0.130422693616489 +"3" "DDX17" -0.202898462175601 16.1804174471336 2.58563106107022 0.107837421983287 0.390910654689415 +"2" "USP26" -2.18026172235612 4.27316802151059 2.23964990963176 0.134511431832489 0.433425724793576 +"27" "INADL" -0.171517188407228 15.2186072712825 1.67097950803399 0.196127388799313 0.532259040398981 +"21" "LYSMD3" -0.195688416211876 13.32535006408 1.62863926866461 0.201891360151338 0.532259040398981 +"16" "IQCG" -0.204602031239968 10.1622224199572 1.27895197435303 0.258093988887329 0.623727139811046 +"14" "KCNAB3" 0.235558773538239 7.85434240586327 0.626910368891663 0.42849088083079 0.920477060511131 +"6" "SLC35D3" 0.732791849378157 4.62274850327991 0.571073743098566 0.449832554709518 0.920477060511131 +"20" "FUT8" -0.10105286103336 15.3006920075591 0.507763042830561 0.476108824402309 0.920477060511131 +"1" "COMMD10" 0.124183380838807 12.8850203875481 0.427219144492929 0.513356635938278 0.927280503918676 +"9" "TXLNB" -0.182677549934781 7.2825391341052 0.332700920948795 0.564072999466928 0.927280503918676 +"28" "TMEM31" -0.242966946412956 6.16897757110336 0.313478542098543 0.575553416225385 0.927280503918676 +"7" "GLOD4" -0.0680179216917574 13.8710260882794 0.180025725971447 0.671351132982351 0.998117639846733 +"17" "C20orf118" 0.14522927187254 6.73904801103973 0.16086977771818 0.688356992997747 0.998117639846733 +"22" "LMOD3" 0.065379751793664 7.60390982671528 0.0485282385969317 0.825644327619727 1 +"8" "GIMAP7" 0.278076976843342 4.26623062002702 0.0410821671265458 0.839379401017925 1 +"23" "HIPK1" -0.0050354983636938 17.7957744498389 0.00154544012787028 0.968641570606185 1 +"13" "KHSRP" -0.00239769805103114 14.7206397592923 0.000263977502370949 0.987037033096146 1 +"11" "ATG4B" 0.00130641853526229 13.5252482941211 3.5333815787908e-05 0.995257222245289 1 "5" "PTPN20B" 0 4.09631395702755 0 1 1 "18" "GPIHBP1" 0 4.09631395702755 0 1 1 "25" "TAS2R39" 0 4.09631395702755 0 1 1
--- a/tool_dependencies.xml Tue Sep 01 04:59:05 2015 -0400 +++ b/tool_dependencies.xml Tue Sep 01 09:15:07 2015 -0400 @@ -7,13 +7,12 @@ <package name="design_matrix_creator" version="1.0.0"> <install version="1.0"> <actions> - <action type="shell_command">mkdir $INSTALL_DIR/bin ; cp $REPOSITORY_INSTALL_DIR/bin/design_matrix_creator $INSTALL_DIR/bin/</action> + <action type="shell_command">mkdir $INSTALL_DIR/bin ; cp $REPOSITORY_INSTALL_DIR/bin/design_matrix_creator $INSTALL_DIR/bin/design_matrix_creator</action> <action type="chmod"> <file mode="755">$INSTALL_DIR/bin/design_matrix_creator</file> </action> <action type="set_environment"> <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - <environment_variable action="prepend_to" name="PATH">$REPOSITORY_INSTALL_DIR</environment_variable> </action> </actions> </install> @@ -22,13 +21,12 @@ <package name="edger_dge_table_to_bedgraph" version="1.0.0"> <install version="1.0"> <actions> - <action type="shell_command">mkdir $INSTALL_DIR/bin ; cp $REPOSITORY_INSTALL_DIR/bin/edger_dge_table_to_bedgraph $INSTALL_DIR/bin/</action> + <action type="shell_command">mkdir $INSTALL_DIR/bin ; cp $REPOSITORY_INSTALL_DIR/bin/edger_dge_table_to_bedgraph $INSTALL_DIR/bin/edger_dge_table_to_bedgraph</action> <action type="chmod"> <file mode="755">$INSTALL_DIR/bin/edger_dge_table_to_bedgraph</file> </action> <action type="set_environment"> <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - <environment_variable action="prepend_to" name="PATH">$REPOSITORY_INSTALL_DIR</environment_variable> </action> </actions> </install>