# HG changeset patch # User yhoogstrate # Date 1450108898 18000 # Node ID bde663b872d9e1f8b52a91240db0f30697542f44 # Parent 5d38abf7e4b6b491fb9a551680e2daa3e3ea7d31 planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit 275a72ec0424e4e5d658d1bc8227077ea46f0fdc diff -r 5d38abf7e4b6 -r bde663b872d9 README.rst --- a/README.rst Wed Dec 09 10:43:03 2015 -0500 +++ b/README.rst Mon Dec 14 11:01:38 2015 -0500 @@ -1,6 +1,13 @@ EdgeR wrapper for Galaxy ======================== +This is a wrapper for the RNA-Seq differentially gene expression analysis tool EdgeR. +This wrapper contains 2 flavours of tests, a classical 2 group analysis and a more +sophistiacted multi-factor analysis. + +Input data can be generated using so called count tools. The wrapper has been written +to be compatible with at least featureCounts (by yhoogstrate) and HTSeq-count (by iuc). + http://www.bioconductor.org/packages/release/bioc/html/edgeR.html Implementation of EdgeR supporting quite advanced experimental @@ -42,17 +49,4 @@ **This wrapper**: - Copyright (C) 2013-2015 Youri Hoogstrate - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . +GPL (>=2) diff -r 5d38abf7e4b6 -r bde663b872d9 edgeR_Differential_Gene_Expression.xml --- a/edgeR_Differential_Gene_Expression.xml Wed Dec 09 10:43:03 2015 -0500 +++ b/edgeR_Differential_Gene_Expression.xml Mon Dec 14 11:01:38 2015 -0500 @@ -36,18 +36,58 @@ echo $(R --version | grep version | grep -v GNU)", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ") - + gene_ids.column.txt && + #for $file in $analysis_type.countsFile_control: + cut -f 2 "${file}" > "${file}.expression_column.txt" && + #end for + #for $file in $analysis_type.countsFile_condition: + cut -f 2 "${file}" > "${file}.expression_column.txt" && + #end for + + paste + gene_ids.column.txt + #for $file in $analysis_type.countsFile_control: + "${file}.expression_column.txt" + #end for + #for $file in $analysis_type.countsFile_condition: + "${file}.expression_column.txt" + #end for + > "${expression_matrix}" && + + ## -- Create design matrix matrix + echo "sample-name Condition" >> ${design_matrix} && + #for $file in $analysis_type.countsFile_control: + echo "${file.name} ${analysis_type.factorLevel_control}" >> ${design_matrix} && + #end for + #for $file in $analysis_type.countsFile_condition: + echo "${file.name} ${analysis_type.factorLevel_condition}" >> ${design_matrix} && + #end for + #end if + R --vanilla --slave -f $R_script '--args $expression_matrix $design_matrix $contrast + $analysis_report_genes $fdr $output_count_edgeR $output_cpm - /dev/null + /dev/null ### Calculation of FPKM/RPKM should come here #if $output_raw_counts: $output_raw_counts @@ -117,6 +157,7 @@ $output_format_images ' + ]]> @@ -134,28 +175,29 @@ design_matrix_file <- args[2] contrast <- args[3] -fdr <- args[4] +truncate_table_by_fdr <- args[4] +fdr <- as.double(args[5]) -output_count_edgeR <- args[5] -output_cpm <- args[6] +output_count_edgeR <- args[6] +output_cpm <- args[7] -output_xpkm <- args[7] ##FPKM file - to be implemented +output_xpkm <- args[8] ##FPKM file - to be implemented -output_raw_counts <- args[8] +output_raw_counts <- args[9] -output_MDSplot_logFC <- args[9] -output_MDSplot_logFC_coordinates <- args[10] +output_MDSplot_logFC <- args[10] +output_MDSplot_logFC_coordinates <- args[11] -output_MDSplot_bcv <- args[11] -output_MDSplot_bcv_coordinates <- args[12] +output_MDSplot_bcv <- args[12] +output_MDSplot_bcv_coordinates <- args[13] -output_BCVplot <- args[13] -output_MAplot <- args[14] -output_PValue_distribution_plot <- args[15] -output_hierarchical_clustering_plot <- args[16] -output_heatmap_plot <- args[17] -output_RData_obj <- args[18] -output_format_images <- args[19] +output_BCVplot <- args[14] +output_MAplot <- args[15] +output_PValue_distribution_plot <- args[16] +output_hierarchical_clustering_plot <- args[17] +output_heatmap_plot <- args[18] +output_RData_obj <- args[19] +output_format_images <- args[20] ## Obtain read-counts @@ -166,15 +208,22 @@ for(i in 1:ncol(design_matrix)) { old <- design_matrix[,i] - design_matrix[,i] <- make.names(design_matrix[,i]) - if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) { - print("Renaming of factors:") - print(old) - print("To:") - print(design_matrix[,i]) + + if(any(grepl("^[0-9]+$", old, perl=TRUE) == FALSE)){ + # Convert invalid names + design_matrix[,i] <- make.names(design_matrix[,i]) + + # Print if names have been converted + if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) { + print("Renamed of factors:") + print(old) + print("To:") + print(design_matrix[,i]) + } + } else { + # Only numerical factors: these are blocking / pairing factors + design_matrix[,i] <- as.numeric(design_matrix[,i]) } - ## The following line seems to malfunction the script: - ##design_matrix[,i] <- as.factor(design_matrix[,i]) } ## 1) In the expression matrix, you only want to have the samples described in the design matrix @@ -348,7 +397,13 @@ lrt <- glmLRT(fit, contrast=cont[,1]) write(paste("Exporting DGE results to file...",output_count_edgeR,sep=""),stdout()) - write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA) + + if(truncate_table_by_fdr =="all") { + write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA) + } + else { + write.table(file=output_count_edgeR,subset(topTags(lrt,n=nrow(read_counts))\$table, FDR < fdr),sep="\t",row.names=TRUE,col.names=NA) + } write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA) ## todo EXPORT FPKM @@ -458,12 +513,44 @@ - - + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + - + @@ -482,19 +569,23 @@ - + - - + + + + + + - + outputs and ("make_output_raw_counts" in outputs) - + outputs and ("make_output_MDSplot_logFC" in outputs) @@ -504,11 +595,11 @@ - + outputs and ("make_output_MDSplot_logFC_coordinates" in outputs) - + outputs and ("make_output_MDSplot_bcv" in outputs) @@ -518,11 +609,11 @@ - + outputs and ("make_output_MDSplot_bcv_coordinates" in outputs) - + outputs and ("make_output_BCVplot" in outputs) @@ -532,7 +623,7 @@ - + outputs and ("make_output_MAplot" in outputs) @@ -542,7 +633,7 @@ - + outputs and ("make_output_PValue_distribution_plot" in outputs) @@ -552,7 +643,7 @@ - + outputs and ("make_output_hierarchical_clustering_plot" in outputs) @@ -562,7 +653,7 @@ - + outputs and ("make_output_heatmap_plot" in outputs) @@ -572,28 +663,83 @@ - + outputs and ("make_output_RData_obj" in outputs) - + outputs and ("make_output_R_stdout" in outputs) + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -661,24 +807,6 @@ - African-European - 0.5*(Control+Placebo) / Treated -Installation ------------- - -This tool requires no specific configuration. The following dependencies will installed automatically: - -- R -- limma -- edgeR - -License -------- -- R - - GPL 2 & GPL 3 -- limma - - GPL (>=2) -- edgeR - - GPL (>=2) - @CONTACT@ diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/C1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/C1 Mon Dec 14 11:01:38 2015 -0500 @@ -0,0 +1,30 @@ +Geneid C1 +COMMD10 966 +USP26 1 +DDX17 8544 +DDX11 329 +PTPN20B 0 +SLC35D3 1 +GLOD4 1614 +GIMAP7 0 +TXLNB 15 +MYO18A 1775 +ATG4B 936 +IFI44L 347 +KHSRP 2557 +KCNAB3 20 +RET 331 +IQCG 125 +C20orf118 9 +GPIHBP1 0 +RASSF3 658 +FUT8 4834 +LYSMD3 1333 +LMOD3 12 +HIPK1 24218 +HSPA8 44244 +TAS2R39 0 +NR2C2AP 606 +INADL 4315 +TMEM31 5 +GC 0 diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/C2 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/C2 Mon Dec 14 11:01:38 2015 -0500 @@ -0,0 +1,30 @@ +Geneid C2 +COMMD10 1067 +USP26 0 +DDX17 13335 +DDX11 970 +PTPN20B 0 +SLC35D3 0 +GLOD4 2596 +GIMAP7 1 +TXLNB 29 +MYO18A 4666 +ATG4B 2602 +IFI44L 678 +KHSRP 5001 +KCNAB3 42 +RET 695 +IQCG 193 +C20orf118 20 +GPIHBP1 0 +RASSF3 1060 +FUT8 6459 +LYSMD3 1679 +LMOD3 31 +HIPK1 35223 +HSPA8 58864 +TAS2R39 0 +NR2C2AP 1162 +INADL 6418 +TMEM31 10 +GC 0 diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/C3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/C3 Mon Dec 14 11:01:38 2015 -0500 @@ -0,0 +1,30 @@ +Geneid C3 +COMMD10 438 +USP26 1 +DDX17 4579 +DDX11 221 +PTPN20B 0 +SLC35D3 0 +GLOD4 965 +GIMAP7 0 +TXLNB 9 +MYO18A 1193 +ATG4B 638 +IFI44L 307 +KHSRP 1593 +KCNAB3 10 +RET 361 +IQCG 84 +C20orf118 3 +GPIHBP1 0 +RASSF3 405 +FUT8 2599 +LYSMD3 666 +LMOD3 7 +HIPK1 14147 +HSPA8 26628 +TAS2R39 0 +NR2C2AP 403 +INADL 2421 +TMEM31 3 +GC 0 diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/C4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/C4 Mon Dec 14 11:01:38 2015 -0500 @@ -0,0 +1,30 @@ +Geneid C4 +COMMD10 1231 +USP26 0 +DDX17 16358 +DDX11 867 +PTPN20B 0 +SLC35D3 2 +GLOD4 2912 +GIMAP7 0 +TXLNB 25 +MYO18A 4741 +ATG4B 2394 +IFI44L 784 +KHSRP 5513 +KCNAB3 34 +RET 669 +IQCG 229 +C20orf118 14 +GPIHBP1 0 +RASSF3 1277 +FUT8 7977 +LYSMD3 2029 +LMOD3 48 +HIPK1 47991 +HSPA8 76924 +TAS2R39 0 +NR2C2AP 1223 +INADL 8507 +TMEM31 14 +GC 0 diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/E1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/E1 Mon Dec 14 11:01:38 2015 -0500 @@ -0,0 +1,30 @@ +Geneid E1 +COMMD10 964 +USP26 0 +DDX17 6995 +DDX11 916 +PTPN20B 0 +SLC35D3 1 +GLOD4 1807 +GIMAP7 1 +TXLNB 14 +MYO18A 1669 +ATG4B 1605 +IFI44L 268 +KHSRP 3162 +KCNAB3 28 +RET 2077 +IQCG 118 +C20orf118 6 +GPIHBP1 0 +RASSF3 507 +FUT8 4291 +LYSMD3 868 +LMOD3 19 +HIPK1 19201 +HSPA8 72195 +TAS2R39 0 +NR2C2AP 1293 +INADL 3443 +TMEM31 6 +GC 0 diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/E2 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/E2 Mon Dec 14 11:01:38 2015 -0500 @@ -0,0 +1,30 @@ +Geneid E2 +COMMD10 812 +USP26 0 +DDX17 8079 +DDX11 632 +PTPN20B 0 +SLC35D3 0 +GLOD4 1448 +GIMAP7 0 +TXLNB 15 +MYO18A 1457 +ATG4B 953 +IFI44L 302 +KHSRP 2624 +KCNAB3 34 +RET 1431 +IQCG 116 +C20orf118 13 +GPIHBP1 0 +RASSF3 575 +FUT8 4187 +LYSMD3 1141 +LMOD3 26 +HIPK1 28435 +HSPA8 61132 +TAS2R39 0 +NR2C2AP 761 +INADL 4415 +TMEM31 5 +GC 0 diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/E3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/E3 Mon Dec 14 11:01:38 2015 -0500 @@ -0,0 +1,30 @@ +Geneid E3 +COMMD10 528 +USP26 0 +DDX17 5994 +DDX11 706 +PTPN20B 0 +SLC35D3 2 +GLOD4 1039 +GIMAP7 0 +TXLNB 6 +MYO18A 1497 +ATG4B 1185 +IFI44L 191 +KHSRP 2434 +KCNAB3 22 +RET 1490 +IQCG 79 +C20orf118 10 +GPIHBP1 0 +RASSF3 401 +FUT8 2974 +LYSMD3 749 +LMOD3 9 +HIPK1 20715 +HSPA8 42728 +TAS2R39 0 +NR2C2AP 726 +INADL 3094 +TMEM31 6 +GC 0 diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/E4 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/E4 Mon Dec 14 11:01:38 2015 -0500 @@ -0,0 +1,30 @@ +Geneid E4 +COMMD10 860 +USP26 0 +DDX17 6596 +DDX11 518 +PTPN20B 0 +SLC35D3 1 +GLOD4 1564 +GIMAP7 0 +TXLNB 17 +MYO18A 1121 +ATG4B 911 +IFI44L 269 +KHSRP 2509 +KCNAB3 10 +RET 1327 +IQCG 107 +C20orf118 9 +GPIHBP1 0 +RASSF3 568 +FUT8 4154 +LYSMD3 1076 +LMOD3 20 +HIPK1 22614 +HSPA8 67106 +TAS2R39 0 +NR2C2AP 902 +INADL 3441 +TMEM31 3 +GC 0 diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/design_matrix.tabular.batch-effects.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/design_matrix.tabular.batch-effects.txt Mon Dec 14 11:01:38 2015 -0500 @@ -0,0 +1,9 @@ +sample-name Condition Batch +C1 C 1 +C2 C 1 +C3 C 2 +C4 C 2 +E1 E 1 +E2 E 1 +E3 E 2 +E4 E 2 diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/differentially_expressed_genes.batch-effects.tabular.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/differentially_expressed_genes.batch-effects.tabular.txt Mon Dec 14 11:01:38 2015 -0500 @@ -0,0 +1,30 @@ +"" "genes" "logFC" "logCPM" "LR" "PValue" "FDR" +"15" "RET" 1.95351498277649 13.2940435307943 70.7884298827703 3.9766237329402e-17 1.15322088255266e-15 +"24" "HSPA8" 0.607097086193171 18.9380827005326 18.3388522248215 1.84897696413517e-05 0.000268101659799599 +"12" "IFI44L" -0.663271215486821 11.7020333673755 10.9083408215917 0.00095732231526494 0.00925411571422775 +"10" "MYO18A" -0.607030110998538 14.1586814058554 8.85972925087567 0.0029153081194653 0.0208760330354099 +"4" "DDX11" 0.724160415576466 12.4597575302041 8.47565074268945 0.00359931604058791 0.0208760330354099 +"26" "NR2C2AP" 0.538450796715875 12.9331552590697 6.96854404518889 0.00829549660040106 0.0400949002352718 +"19" "RASSF3" -0.323616221214522 12.4964626371138 3.7019217136962 0.05434983717189 0.225163611140687 +"3" "DDX17" -0.20201533346913 16.1804174471336 2.4240349611365 0.119486805704499 0.43313967067881 +"2" "USP26" -2.19888949506255 4.27316802151059 2.23930233943077 0.1345416719394 0.433523165138066 +"27" "INADL" -0.171504044976009 15.2186072712825 1.49672630947915 0.221175760548511 0.641409705590681 +"21" "LYSMD3" -0.194806526906794 13.32535006408 1.33073961519612 0.248673682102714 0.655594252816247 +"16" "IQCG" -0.208260226810614 10.1622224199572 0.967867000668835 0.3252127194599 0.785930738694759 +"6" "SLC35D3" 0.789817686242913 4.62274850327991 0.638997348362095 0.424074105189856 0.922823270583707 +"14" "KCNAB3" 0.224776816841962 7.85434240586327 0.477050454902194 0.489761385678098 0.922823270583707 +"20" "FUT8" -0.100266057923686 15.3006920075591 0.448528599960923 0.503034528215111 0.922823270583707 +"1" "COMMD10" 0.129868119349192 12.8850203875481 0.43582621570193 0.509143873425494 0.922823270583707 +"9" "TXLNB" -0.183590716507963 7.2825391341052 0.269612199658876 0.603592030691037 0.990834288521076 +"28" "TMEM31" -0.231195333650393 6.16897757110336 0.252957033365721 0.615000592875151 0.990834288521076 +"17" "C20orf118" 0.163495741844413 6.73904801103973 0.171566008065263 0.678723780532467 1 +"7" "GLOD4" -0.0679141042839315 13.8710260882794 0.154827011727691 0.693964522467115 1 +"22" "LMOD3" 0.0791372844747401 7.60390982671528 0.0540610465986004 0.816141676483626 1 +"8" "GIMAP7" 0.195152071961945 4.26623062002702 0.0194047958154413 0.889212222514442 1 +"23" "HIPK1" -0.00677640818725696 17.7957744498389 0.00305975315578166 0.955887483884316 1 +"11" "ATG4B" 0.00473988666702576 13.5252482941211 0.000418258796557325 0.983683299966982 1 +"13" "KHSRP" -0.00184247000571017 14.7206397592923 0.000136964665301775 0.990662418229392 1 +"5" "PTPN20B" 0 4.09631395702755 0 1 1 +"18" "GPIHBP1" 0 4.09631395702755 0 1 1 +"25" "TAS2R39" 0 4.09631395702755 0 1 1 +"29" "GC" 0 4.09631395702755 0 1 1 diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt Mon Dec 14 11:01:38 2015 -0500 @@ -0,0 +1,7 @@ +"" "genes" "logFC" "logCPM" "LR" "PValue" "FDR" +"15" "RET" 1.94897640107286 13.2940435307943 77.6545995415986 1.22730171935022e-18 3.55917498611563e-17 +"24" "HSPA8" 0.607138087178614 18.9380827005326 16.8408380186893 4.06490891119454e-05 0.000589411792123208 +"12" "IFI44L" -0.665544707287881 11.7020333673755 13.7144720195324 0.000212808308075529 0.00205714697806344 +"10" "MYO18A" -0.608389235629078 14.1586814058554 10.0030349277278 0.00156282461006963 0.0113304784230048 +"4" "DDX11" 0.719283453206409 12.4597575302041 9.1203698809081 0.00252778847312638 0.014661173144133 +"26" "NR2C2AP" 0.538719097450497 12.9331552590697 7.88314604309164 0.00498976028708414 0.0241171747209067