# HG changeset patch
# User yhoogstrate
# Date 1450108898 18000
# Node ID bde663b872d9e1f8b52a91240db0f30697542f44
# Parent 5d38abf7e4b6b491fb9a551680e2daa3e3ea7d31
planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit 275a72ec0424e4e5d658d1bc8227077ea46f0fdc
diff -r 5d38abf7e4b6 -r bde663b872d9 README.rst
--- a/README.rst Wed Dec 09 10:43:03 2015 -0500
+++ b/README.rst Mon Dec 14 11:01:38 2015 -0500
@@ -1,6 +1,13 @@
EdgeR wrapper for Galaxy
========================
+This is a wrapper for the RNA-Seq differentially gene expression analysis tool EdgeR.
+This wrapper contains 2 flavours of tests, a classical 2 group analysis and a more
+sophistiacted multi-factor analysis.
+
+Input data can be generated using so called count tools. The wrapper has been written
+to be compatible with at least featureCounts (by yhoogstrate) and HTSeq-count (by iuc).
+
http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
Implementation of EdgeR supporting quite advanced experimental
@@ -42,17 +49,4 @@
**This wrapper**:
- Copyright (C) 2013-2015 Youri Hoogstrate
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see .
+GPL (>=2)
diff -r 5d38abf7e4b6 -r bde663b872d9 edgeR_Differential_Gene_Expression.xml
--- a/edgeR_Differential_Gene_Expression.xml Wed Dec 09 10:43:03 2015 -0500
+++ b/edgeR_Differential_Gene_Expression.xml Mon Dec 14 11:01:38 2015 -0500
@@ -36,18 +36,58 @@
echo $(R --version | grep version | grep -v GNU)", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
-
+ gene_ids.column.txt &&
+ #for $file in $analysis_type.countsFile_control:
+ cut -f 2 "${file}" > "${file}.expression_column.txt" &&
+ #end for
+ #for $file in $analysis_type.countsFile_condition:
+ cut -f 2 "${file}" > "${file}.expression_column.txt" &&
+ #end for
+
+ paste
+ gene_ids.column.txt
+ #for $file in $analysis_type.countsFile_control:
+ "${file}.expression_column.txt"
+ #end for
+ #for $file in $analysis_type.countsFile_condition:
+ "${file}.expression_column.txt"
+ #end for
+ > "${expression_matrix}" &&
+
+ ## -- Create design matrix matrix
+ echo "sample-name Condition" >> ${design_matrix} &&
+ #for $file in $analysis_type.countsFile_control:
+ echo "${file.name} ${analysis_type.factorLevel_control}" >> ${design_matrix} &&
+ #end for
+ #for $file in $analysis_type.countsFile_condition:
+ echo "${file.name} ${analysis_type.factorLevel_condition}" >> ${design_matrix} &&
+ #end for
+ #end if
+
R --vanilla --slave -f $R_script '--args
$expression_matrix
$design_matrix
$contrast
+ $analysis_report_genes
$fdr
$output_count_edgeR
$output_cpm
- /dev/null
+ /dev/null ### Calculation of FPKM/RPKM should come here
#if $output_raw_counts:
$output_raw_counts
@@ -117,6 +157,7 @@
$output_format_images
'
+ ]]>
@@ -134,28 +175,29 @@
design_matrix_file <- args[2]
contrast <- args[3]
-fdr <- args[4]
+truncate_table_by_fdr <- args[4]
+fdr <- as.double(args[5])
-output_count_edgeR <- args[5]
-output_cpm <- args[6]
+output_count_edgeR <- args[6]
+output_cpm <- args[7]
-output_xpkm <- args[7] ##FPKM file - to be implemented
+output_xpkm <- args[8] ##FPKM file - to be implemented
-output_raw_counts <- args[8]
+output_raw_counts <- args[9]
-output_MDSplot_logFC <- args[9]
-output_MDSplot_logFC_coordinates <- args[10]
+output_MDSplot_logFC <- args[10]
+output_MDSplot_logFC_coordinates <- args[11]
-output_MDSplot_bcv <- args[11]
-output_MDSplot_bcv_coordinates <- args[12]
+output_MDSplot_bcv <- args[12]
+output_MDSplot_bcv_coordinates <- args[13]
-output_BCVplot <- args[13]
-output_MAplot <- args[14]
-output_PValue_distribution_plot <- args[15]
-output_hierarchical_clustering_plot <- args[16]
-output_heatmap_plot <- args[17]
-output_RData_obj <- args[18]
-output_format_images <- args[19]
+output_BCVplot <- args[14]
+output_MAplot <- args[15]
+output_PValue_distribution_plot <- args[16]
+output_hierarchical_clustering_plot <- args[17]
+output_heatmap_plot <- args[18]
+output_RData_obj <- args[19]
+output_format_images <- args[20]
## Obtain read-counts
@@ -166,15 +208,22 @@
for(i in 1:ncol(design_matrix)) {
old <- design_matrix[,i]
- design_matrix[,i] <- make.names(design_matrix[,i])
- if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) {
- print("Renaming of factors:")
- print(old)
- print("To:")
- print(design_matrix[,i])
+
+ if(any(grepl("^[0-9]+$", old, perl=TRUE) == FALSE)){
+ # Convert invalid names
+ design_matrix[,i] <- make.names(design_matrix[,i])
+
+ # Print if names have been converted
+ if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) {
+ print("Renamed of factors:")
+ print(old)
+ print("To:")
+ print(design_matrix[,i])
+ }
+ } else {
+ # Only numerical factors: these are blocking / pairing factors
+ design_matrix[,i] <- as.numeric(design_matrix[,i])
}
- ## The following line seems to malfunction the script:
- ##design_matrix[,i] <- as.factor(design_matrix[,i])
}
## 1) In the expression matrix, you only want to have the samples described in the design matrix
@@ -348,7 +397,13 @@
lrt <- glmLRT(fit, contrast=cont[,1])
write(paste("Exporting DGE results to file...",output_count_edgeR,sep=""),stdout())
- write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
+
+ if(truncate_table_by_fdr =="all") {
+ write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
+ }
+ else {
+ write.table(file=output_count_edgeR,subset(topTags(lrt,n=nrow(read_counts))\$table, FDR < fdr),sep="\t",row.names=TRUE,col.names=NA)
+ }
write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA)
## todo EXPORT FPKM
@@ -458,12 +513,44 @@
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
-
+
@@ -482,19 +569,23 @@
-
+
-
-
+
+
+
+
+
+
-
+
outputs and ("make_output_raw_counts" in outputs)
-
+
outputs and ("make_output_MDSplot_logFC" in outputs)
@@ -504,11 +595,11 @@
-
+
outputs and ("make_output_MDSplot_logFC_coordinates" in outputs)
-
+
outputs and ("make_output_MDSplot_bcv" in outputs)
@@ -518,11 +609,11 @@
-
+
outputs and ("make_output_MDSplot_bcv_coordinates" in outputs)
-
+
outputs and ("make_output_BCVplot" in outputs)
@@ -532,7 +623,7 @@
-
+
outputs and ("make_output_MAplot" in outputs)
@@ -542,7 +633,7 @@
-
+
outputs and ("make_output_PValue_distribution_plot" in outputs)
@@ -552,7 +643,7 @@
-
+
outputs and ("make_output_hierarchical_clustering_plot" in outputs)
@@ -562,7 +653,7 @@
-
+
outputs and ("make_output_heatmap_plot" in outputs)
@@ -572,28 +663,83 @@
-
+
outputs and ("make_output_RData_obj" in outputs)
-
+
outputs and ("make_output_R_stdout" in outputs)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -661,24 +807,6 @@
- African-European
- 0.5*(Control+Placebo) / Treated
-Installation
-------------
-
-This tool requires no specific configuration. The following dependencies will installed automatically:
-
-- R
-- limma
-- edgeR
-
-License
--------
-- R
- - GPL 2 & GPL 3
-- limma
- - GPL (>=2)
-- edgeR
- - GPL (>=2)
-
@CONTACT@
diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/C1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/C1 Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid C1
+COMMD10 966
+USP26 1
+DDX17 8544
+DDX11 329
+PTPN20B 0
+SLC35D3 1
+GLOD4 1614
+GIMAP7 0
+TXLNB 15
+MYO18A 1775
+ATG4B 936
+IFI44L 347
+KHSRP 2557
+KCNAB3 20
+RET 331
+IQCG 125
+C20orf118 9
+GPIHBP1 0
+RASSF3 658
+FUT8 4834
+LYSMD3 1333
+LMOD3 12
+HIPK1 24218
+HSPA8 44244
+TAS2R39 0
+NR2C2AP 606
+INADL 4315
+TMEM31 5
+GC 0
diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/C2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/C2 Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid C2
+COMMD10 1067
+USP26 0
+DDX17 13335
+DDX11 970
+PTPN20B 0
+SLC35D3 0
+GLOD4 2596
+GIMAP7 1
+TXLNB 29
+MYO18A 4666
+ATG4B 2602
+IFI44L 678
+KHSRP 5001
+KCNAB3 42
+RET 695
+IQCG 193
+C20orf118 20
+GPIHBP1 0
+RASSF3 1060
+FUT8 6459
+LYSMD3 1679
+LMOD3 31
+HIPK1 35223
+HSPA8 58864
+TAS2R39 0
+NR2C2AP 1162
+INADL 6418
+TMEM31 10
+GC 0
diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/C3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/C3 Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid C3
+COMMD10 438
+USP26 1
+DDX17 4579
+DDX11 221
+PTPN20B 0
+SLC35D3 0
+GLOD4 965
+GIMAP7 0
+TXLNB 9
+MYO18A 1193
+ATG4B 638
+IFI44L 307
+KHSRP 1593
+KCNAB3 10
+RET 361
+IQCG 84
+C20orf118 3
+GPIHBP1 0
+RASSF3 405
+FUT8 2599
+LYSMD3 666
+LMOD3 7
+HIPK1 14147
+HSPA8 26628
+TAS2R39 0
+NR2C2AP 403
+INADL 2421
+TMEM31 3
+GC 0
diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/C4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/C4 Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid C4
+COMMD10 1231
+USP26 0
+DDX17 16358
+DDX11 867
+PTPN20B 0
+SLC35D3 2
+GLOD4 2912
+GIMAP7 0
+TXLNB 25
+MYO18A 4741
+ATG4B 2394
+IFI44L 784
+KHSRP 5513
+KCNAB3 34
+RET 669
+IQCG 229
+C20orf118 14
+GPIHBP1 0
+RASSF3 1277
+FUT8 7977
+LYSMD3 2029
+LMOD3 48
+HIPK1 47991
+HSPA8 76924
+TAS2R39 0
+NR2C2AP 1223
+INADL 8507
+TMEM31 14
+GC 0
diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/E1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/E1 Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid E1
+COMMD10 964
+USP26 0
+DDX17 6995
+DDX11 916
+PTPN20B 0
+SLC35D3 1
+GLOD4 1807
+GIMAP7 1
+TXLNB 14
+MYO18A 1669
+ATG4B 1605
+IFI44L 268
+KHSRP 3162
+KCNAB3 28
+RET 2077
+IQCG 118
+C20orf118 6
+GPIHBP1 0
+RASSF3 507
+FUT8 4291
+LYSMD3 868
+LMOD3 19
+HIPK1 19201
+HSPA8 72195
+TAS2R39 0
+NR2C2AP 1293
+INADL 3443
+TMEM31 6
+GC 0
diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/E2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/E2 Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid E2
+COMMD10 812
+USP26 0
+DDX17 8079
+DDX11 632
+PTPN20B 0
+SLC35D3 0
+GLOD4 1448
+GIMAP7 0
+TXLNB 15
+MYO18A 1457
+ATG4B 953
+IFI44L 302
+KHSRP 2624
+KCNAB3 34
+RET 1431
+IQCG 116
+C20orf118 13
+GPIHBP1 0
+RASSF3 575
+FUT8 4187
+LYSMD3 1141
+LMOD3 26
+HIPK1 28435
+HSPA8 61132
+TAS2R39 0
+NR2C2AP 761
+INADL 4415
+TMEM31 5
+GC 0
diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/E3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/E3 Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid E3
+COMMD10 528
+USP26 0
+DDX17 5994
+DDX11 706
+PTPN20B 0
+SLC35D3 2
+GLOD4 1039
+GIMAP7 0
+TXLNB 6
+MYO18A 1497
+ATG4B 1185
+IFI44L 191
+KHSRP 2434
+KCNAB3 22
+RET 1490
+IQCG 79
+C20orf118 10
+GPIHBP1 0
+RASSF3 401
+FUT8 2974
+LYSMD3 749
+LMOD3 9
+HIPK1 20715
+HSPA8 42728
+TAS2R39 0
+NR2C2AP 726
+INADL 3094
+TMEM31 6
+GC 0
diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/E4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/E4 Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+Geneid E4
+COMMD10 860
+USP26 0
+DDX17 6596
+DDX11 518
+PTPN20B 0
+SLC35D3 1
+GLOD4 1564
+GIMAP7 0
+TXLNB 17
+MYO18A 1121
+ATG4B 911
+IFI44L 269
+KHSRP 2509
+KCNAB3 10
+RET 1327
+IQCG 107
+C20orf118 9
+GPIHBP1 0
+RASSF3 568
+FUT8 4154
+LYSMD3 1076
+LMOD3 20
+HIPK1 22614
+HSPA8 67106
+TAS2R39 0
+NR2C2AP 902
+INADL 3441
+TMEM31 3
+GC 0
diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/design_matrix.tabular.batch-effects.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/design_matrix.tabular.batch-effects.txt Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,9 @@
+sample-name Condition Batch
+C1 C 1
+C2 C 1
+C3 C 2
+C4 C 2
+E1 E 1
+E2 E 1
+E3 E 2
+E4 E 2
diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/differentially_expressed_genes.batch-effects.tabular.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/differentially_expressed_genes.batch-effects.tabular.txt Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,30 @@
+"" "genes" "logFC" "logCPM" "LR" "PValue" "FDR"
+"15" "RET" 1.95351498277649 13.2940435307943 70.7884298827703 3.9766237329402e-17 1.15322088255266e-15
+"24" "HSPA8" 0.607097086193171 18.9380827005326 18.3388522248215 1.84897696413517e-05 0.000268101659799599
+"12" "IFI44L" -0.663271215486821 11.7020333673755 10.9083408215917 0.00095732231526494 0.00925411571422775
+"10" "MYO18A" -0.607030110998538 14.1586814058554 8.85972925087567 0.0029153081194653 0.0208760330354099
+"4" "DDX11" 0.724160415576466 12.4597575302041 8.47565074268945 0.00359931604058791 0.0208760330354099
+"26" "NR2C2AP" 0.538450796715875 12.9331552590697 6.96854404518889 0.00829549660040106 0.0400949002352718
+"19" "RASSF3" -0.323616221214522 12.4964626371138 3.7019217136962 0.05434983717189 0.225163611140687
+"3" "DDX17" -0.20201533346913 16.1804174471336 2.4240349611365 0.119486805704499 0.43313967067881
+"2" "USP26" -2.19888949506255 4.27316802151059 2.23930233943077 0.1345416719394 0.433523165138066
+"27" "INADL" -0.171504044976009 15.2186072712825 1.49672630947915 0.221175760548511 0.641409705590681
+"21" "LYSMD3" -0.194806526906794 13.32535006408 1.33073961519612 0.248673682102714 0.655594252816247
+"16" "IQCG" -0.208260226810614 10.1622224199572 0.967867000668835 0.3252127194599 0.785930738694759
+"6" "SLC35D3" 0.789817686242913 4.62274850327991 0.638997348362095 0.424074105189856 0.922823270583707
+"14" "KCNAB3" 0.224776816841962 7.85434240586327 0.477050454902194 0.489761385678098 0.922823270583707
+"20" "FUT8" -0.100266057923686 15.3006920075591 0.448528599960923 0.503034528215111 0.922823270583707
+"1" "COMMD10" 0.129868119349192 12.8850203875481 0.43582621570193 0.509143873425494 0.922823270583707
+"9" "TXLNB" -0.183590716507963 7.2825391341052 0.269612199658876 0.603592030691037 0.990834288521076
+"28" "TMEM31" -0.231195333650393 6.16897757110336 0.252957033365721 0.615000592875151 0.990834288521076
+"17" "C20orf118" 0.163495741844413 6.73904801103973 0.171566008065263 0.678723780532467 1
+"7" "GLOD4" -0.0679141042839315 13.8710260882794 0.154827011727691 0.693964522467115 1
+"22" "LMOD3" 0.0791372844747401 7.60390982671528 0.0540610465986004 0.816141676483626 1
+"8" "GIMAP7" 0.195152071961945 4.26623062002702 0.0194047958154413 0.889212222514442 1
+"23" "HIPK1" -0.00677640818725696 17.7957744498389 0.00305975315578166 0.955887483884316 1
+"11" "ATG4B" 0.00473988666702576 13.5252482941211 0.000418258796557325 0.983683299966982 1
+"13" "KHSRP" -0.00184247000571017 14.7206397592923 0.000136964665301775 0.990662418229392 1
+"5" "PTPN20B" 0 4.09631395702755 0 1 1
+"18" "GPIHBP1" 0 4.09631395702755 0 1 1
+"25" "TAS2R39" 0 4.09631395702755 0 1 1
+"29" "GC" 0 4.09631395702755 0 1 1
diff -r 5d38abf7e4b6 -r bde663b872d9 test-data/Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt Mon Dec 14 11:01:38 2015 -0500
@@ -0,0 +1,7 @@
+"" "genes" "logFC" "logCPM" "LR" "PValue" "FDR"
+"15" "RET" 1.94897640107286 13.2940435307943 77.6545995415986 1.22730171935022e-18 3.55917498611563e-17
+"24" "HSPA8" 0.607138087178614 18.9380827005326 16.8408380186893 4.06490891119454e-05 0.000589411792123208
+"12" "IFI44L" -0.665544707287881 11.7020333673755 13.7144720195324 0.000212808308075529 0.00205714697806344
+"10" "MYO18A" -0.608389235629078 14.1586814058554 10.0030349277278 0.00156282461006963 0.0113304784230048
+"4" "DDX11" 0.719283453206409 12.4597575302041 9.1203698809081 0.00252778847312638 0.014661173144133
+"26" "NR2C2AP" 0.538719097450497 12.9331552590697 7.88314604309164 0.00498976028708414 0.0241171747209067