annotate edgeR.pl @ 4:a8a56766694e draft default tip

Uploaded
author amawla
date Mon, 24 Aug 2015 18:50:49 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
a8a56766694e Uploaded
amawla
parents:
diff changeset
1 #!/bin/perl
a8a56766694e Uploaded
amawla
parents:
diff changeset
2
a8a56766694e Uploaded
amawla
parents:
diff changeset
3 #EdgeR.pl Version 0.0.3
a8a56766694e Uploaded
amawla
parents:
diff changeset
4 #Contributors: Monica Britton, Blythe Durbin-Johnson, Joseph Fass, Nikhil Joshi, Alex Mawla
a8a56766694e Uploaded
amawla
parents:
diff changeset
5
a8a56766694e Uploaded
amawla
parents:
diff changeset
6 use strict;
a8a56766694e Uploaded
amawla
parents:
diff changeset
7 use warnings;
a8a56766694e Uploaded
amawla
parents:
diff changeset
8 use Getopt::Std;
a8a56766694e Uploaded
amawla
parents:
diff changeset
9 use File::Basename;
a8a56766694e Uploaded
amawla
parents:
diff changeset
10 use File::Path qw(make_path remove_tree);
a8a56766694e Uploaded
amawla
parents:
diff changeset
11
a8a56766694e Uploaded
amawla
parents:
diff changeset
12 $| = 1;
a8a56766694e Uploaded
amawla
parents:
diff changeset
13
a8a56766694e Uploaded
amawla
parents:
diff changeset
14 my %OPTIONS = (a => "glm", d => "tag", f => "BH", r => 5, u => "movingave");
a8a56766694e Uploaded
amawla
parents:
diff changeset
15
a8a56766694e Uploaded
amawla
parents:
diff changeset
16 getopts('a:d:e:f:h:lmn:o:r:tu:', \%OPTIONS);
a8a56766694e Uploaded
amawla
parents:
diff changeset
17
a8a56766694e Uploaded
amawla
parents:
diff changeset
18
a8a56766694e Uploaded
amawla
parents:
diff changeset
19 die qq(
a8a56766694e Uploaded
amawla
parents:
diff changeset
20 Usage: edgeR.pl [OPTIONS] factor::factor1::levels [factor::factor2::levels ...] cp::cont_pred1::values [cp::cont_pred2::values ...] cnt::contrast1 [cnt::contrast2] matrix
a8a56766694e Uploaded
amawla
parents:
diff changeset
21
a8a56766694e Uploaded
amawla
parents:
diff changeset
22 OPTIONS: -a STR Type Of Analysis [glm, pw, limma] (default: $OPTIONS{a})
a8a56766694e Uploaded
amawla
parents:
diff changeset
23 -d STR The dispersion estimate to use for GLM analysis [tag] (default: $OPTIONS{d})
a8a56766694e Uploaded
amawla
parents:
diff changeset
24 -e STR Path to place additional output files
a8a56766694e Uploaded
amawla
parents:
diff changeset
25 -f STR False discovery rate adjustment method [BH] (default: $OPTIONS{f})
a8a56766694e Uploaded
amawla
parents:
diff changeset
26 -h STR Name of html file for additional files
a8a56766694e Uploaded
amawla
parents:
diff changeset
27 -l Output the normalised digital gene expression matrix in log2 format (only applicable when using limma and -n is also specified)
a8a56766694e Uploaded
amawla
parents:
diff changeset
28 -m Perform all pairwise comparisons
a8a56766694e Uploaded
amawla
parents:
diff changeset
29 -n STR File name to output the normalised digital gene expression matrix (only applicable when usinf glm or limma model)
a8a56766694e Uploaded
amawla
parents:
diff changeset
30 -o STR File name to output csv file with results
a8a56766694e Uploaded
amawla
parents:
diff changeset
31 -r INT Common Dispersion Rowsum Filter, ony applicable when 1 factor analysis selected (default: $OPTIONS{r})
a8a56766694e Uploaded
amawla
parents:
diff changeset
32 -t Estimate Tagwise Disp when performing 1 factor analysis
a8a56766694e Uploaded
amawla
parents:
diff changeset
33 -u STR Method for allowing the prior distribution for the dispersion to be abundance- dependent ["movingave"] (default: $OPTIONS{u})
a8a56766694e Uploaded
amawla
parents:
diff changeset
34
a8a56766694e Uploaded
amawla
parents:
diff changeset
35 ) if(!@ARGV);
a8a56766694e Uploaded
amawla
parents:
diff changeset
36
a8a56766694e Uploaded
amawla
parents:
diff changeset
37 my $matrix = pop @ARGV;
a8a56766694e Uploaded
amawla
parents:
diff changeset
38
a8a56766694e Uploaded
amawla
parents:
diff changeset
39 make_path($OPTIONS{e});
a8a56766694e Uploaded
amawla
parents:
diff changeset
40 open(Rcmd,">$OPTIONS{e}/r_script.R") or die "Cannot open $OPTIONS{e}/r_script.R\n\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
41 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
42 zz <- file(\"$OPTIONS{e}/r_script.err\", open=\"wt\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
43 sink(zz)
a8a56766694e Uploaded
amawla
parents:
diff changeset
44 sink(zz, type=\"message\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
45
a8a56766694e Uploaded
amawla
parents:
diff changeset
46 library(edgeR)
a8a56766694e Uploaded
amawla
parents:
diff changeset
47 library(limma)
a8a56766694e Uploaded
amawla
parents:
diff changeset
48
a8a56766694e Uploaded
amawla
parents:
diff changeset
49 toc <- read.table(\"$matrix\", sep=\"\\t\", comment=\"\", as.is=T)
a8a56766694e Uploaded
amawla
parents:
diff changeset
50 groups <- sapply(toc[1, -1], strsplit, \":\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
51 for(i in 1:length(groups)) { g <- make.names(groups[[i]][2]); names(groups)[i] <- g; groups[[i]] <- groups[[i]][-2] }
a8a56766694e Uploaded
amawla
parents:
diff changeset
52 colnames(toc) <- make.names(toc[2,])
a8a56766694e Uploaded
amawla
parents:
diff changeset
53 toc[,1] <- gsub(\",\", \".\", toc[,1])
a8a56766694e Uploaded
amawla
parents:
diff changeset
54 tagnames <- toc[-(1:2), 1]
a8a56766694e Uploaded
amawla
parents:
diff changeset
55 rownames(toc) <- toc[,1]
a8a56766694e Uploaded
amawla
parents:
diff changeset
56 toc <- toc[-(1:2), -1]
a8a56766694e Uploaded
amawla
parents:
diff changeset
57 for(i in colnames(toc)) toc[, i] <- as.numeric(toc[,i])
a8a56766694e Uploaded
amawla
parents:
diff changeset
58 norm_factors <- calcNormFactors(as.matrix(toc))
a8a56766694e Uploaded
amawla
parents:
diff changeset
59
a8a56766694e Uploaded
amawla
parents:
diff changeset
60 pw_tests <- list()
a8a56766694e Uploaded
amawla
parents:
diff changeset
61 uniq_groups <- unique(names(groups))
a8a56766694e Uploaded
amawla
parents:
diff changeset
62 for(i in 1:(length(uniq_groups)-1)) for(j in (i+1):length(uniq_groups)) pw_tests[[length(pw_tests)+1]] <- c(uniq_groups[i], uniq_groups[j])
a8a56766694e Uploaded
amawla
parents:
diff changeset
63 DGE <- DGEList(toc, lib.size=norm_factors*colSums(toc), group=names(groups))
a8a56766694e Uploaded
amawla
parents:
diff changeset
64 pdf(\"$OPTIONS{e}/MA_plots_normalisation.pdf\", width=14)
a8a56766694e Uploaded
amawla
parents:
diff changeset
65 for(i in 1:length(pw_tests)) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
66 j <- c(which(names(groups) == pw_tests[[i]][1])[1], which(names(groups) == pw_tests[[i]][2])[1])
a8a56766694e Uploaded
amawla
parents:
diff changeset
67 par(mfrow = c(1, 2))
a8a56766694e Uploaded
amawla
parents:
diff changeset
68 maPlot(toc[, j[1]], toc[, j[2]], normalize = TRUE, pch = 19, cex = 0.2, ylim = c(-10, 10), main=paste(\"MA Plot\", colnames(toc)[j[1]], \"vs\", colnames(toc)[j[2]]))
a8a56766694e Uploaded
amawla
parents:
diff changeset
69 grid(col = \"blue\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
70 abline(h = log2(norm_factors[j[2]]), col = \"red\", lwd = 4)
a8a56766694e Uploaded
amawla
parents:
diff changeset
71 maPlot(DGE\$counts[, j[1]]/DGE\$samples\$lib.size[j[1]], DGE\$counts[, j[2]]/DGE\$samples\$lib.size[j[2]], normalize = FALSE, pch = 19, cex = 0.2, ylim = c(-8, 8), main=paste(\"MA Plot\", colnames(toc)[j[1]], \"vs\", colnames(toc)[j[2]], \"Normalised\"))
a8a56766694e Uploaded
amawla
parents:
diff changeset
72 grid(col = \"blue\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
73 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
74 dev.off()
a8a56766694e Uploaded
amawla
parents:
diff changeset
75 pdf(file=\"$OPTIONS{e}/MDSplot.pdf\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
76 plotMDS(DGE, main=\"MDS Plot\", col=as.numeric(factor(names(groups)))+1, xlim=c(-3,3))
a8a56766694e Uploaded
amawla
parents:
diff changeset
77 dev.off()
a8a56766694e Uploaded
amawla
parents:
diff changeset
78 tested <- list()
a8a56766694e Uploaded
amawla
parents:
diff changeset
79 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
80
a8a56766694e Uploaded
amawla
parents:
diff changeset
81 my $all_cont;
a8a56766694e Uploaded
amawla
parents:
diff changeset
82 my @add_cont;
a8a56766694e Uploaded
amawla
parents:
diff changeset
83 my @fact;
a8a56766694e Uploaded
amawla
parents:
diff changeset
84 my @fact_names;
a8a56766694e Uploaded
amawla
parents:
diff changeset
85 my @cp;
a8a56766694e Uploaded
amawla
parents:
diff changeset
86 my @cp_names;
a8a56766694e Uploaded
amawla
parents:
diff changeset
87 if(@ARGV) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
88 foreach my $input (@ARGV) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
89 my @tmp = split "::", $input;
a8a56766694e Uploaded
amawla
parents:
diff changeset
90 if($tmp[0] eq "factor") {
a8a56766694e Uploaded
amawla
parents:
diff changeset
91 $tmp[1] =~ s/[ \?\(\)\[\]\/\\=+<>:;\"\',\*\^\|\&-]/./g;
a8a56766694e Uploaded
amawla
parents:
diff changeset
92 push @fact_names, $tmp[1];
a8a56766694e Uploaded
amawla
parents:
diff changeset
93 $tmp[2] =~ s/:/\", \"/g;
a8a56766694e Uploaded
amawla
parents:
diff changeset
94 $tmp[2] = "\"".$tmp[2]."\"";
a8a56766694e Uploaded
amawla
parents:
diff changeset
95 push @fact, $tmp[2];
a8a56766694e Uploaded
amawla
parents:
diff changeset
96 } elsif($tmp[0] eq "cp") {
a8a56766694e Uploaded
amawla
parents:
diff changeset
97 $tmp[1] =~ s/[ \?\(\)\[\]\/\\=+<>:;\"\',\*\^\|\&-]/./g;
a8a56766694e Uploaded
amawla
parents:
diff changeset
98 push @cp_names, $tmp[1];
a8a56766694e Uploaded
amawla
parents:
diff changeset
99 $tmp[2] =~ s/:/, /g;
a8a56766694e Uploaded
amawla
parents:
diff changeset
100 push @cp, $tmp[2];
a8a56766694e Uploaded
amawla
parents:
diff changeset
101 } elsif($tmp[0] eq "cnt") {
a8a56766694e Uploaded
amawla
parents:
diff changeset
102 push @add_cont, $tmp[1];
a8a56766694e Uploaded
amawla
parents:
diff changeset
103 } else {
a8a56766694e Uploaded
amawla
parents:
diff changeset
104 die("Unknown Input: $input\n");
a8a56766694e Uploaded
amawla
parents:
diff changeset
105 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
106 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
107 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
108
a8a56766694e Uploaded
amawla
parents:
diff changeset
109 if($OPTIONS{a} eq "pw") {
a8a56766694e Uploaded
amawla
parents:
diff changeset
110 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
111 disp <- estimateCommonDisp(DGE, rowsum.filter=$OPTIONS{r})
a8a56766694e Uploaded
amawla
parents:
diff changeset
112 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
113 if(defined $OPTIONS{t}) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
114 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
115 disp <- estimateTrendedDisp (disp)
a8a56766694e Uploaded
amawla
parents:
diff changeset
116 disp <- estimateTagwiseDisp(disp, trend=\"$OPTIONS{u}\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
117 pdf(file=\"$OPTIONS{e}/Tagwise_Dispersion_vs_Abundance.pdf\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
118 plotBCV(disp, cex=0.4)
a8a56766694e Uploaded
amawla
parents:
diff changeset
119 abline(h=disp\$common.dispersion, col=\"firebrick\", lwd=3)
a8a56766694e Uploaded
amawla
parents:
diff changeset
120 dev.off()
a8a56766694e Uploaded
amawla
parents:
diff changeset
121 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
122 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
123 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
124 for(i in 1:length(pw_tests)) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
125 tested[[i]] <- exactTest(disp, pair=pw_tests[[i]])
a8a56766694e Uploaded
amawla
parents:
diff changeset
126 names(tested)[i] <- paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
127 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
128 pdf(file=\"$OPTIONS{e}/Smear_Plots.pdf\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
129 for(i in 1:length(pw_tests)) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
130 dt <- decideTestsDGE(tested[[i]], p.value=0.05, adjust.method=\"$OPTIONS{f}\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
131 if(sum(dt) > 0) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
132 de_tags <- rownames(disp)[which(dt != 0)]
a8a56766694e Uploaded
amawla
parents:
diff changeset
133 ttl <- \"Diff. Exp. Genes With adj. Pvalue < 0.05\"
a8a56766694e Uploaded
amawla
parents:
diff changeset
134 } else {
a8a56766694e Uploaded
amawla
parents:
diff changeset
135 de_tags <- rownames(topTags(tested[[i]], n=100)\$table)
a8a56766694e Uploaded
amawla
parents:
diff changeset
136 ttl <- \"Top 100 tags\"
a8a56766694e Uploaded
amawla
parents:
diff changeset
137 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
138
a8a56766694e Uploaded
amawla
parents:
diff changeset
139 if(length(dt) < 5000) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
140 pointcex = 0.5
a8a56766694e Uploaded
amawla
parents:
diff changeset
141 } else {
a8a56766694e Uploaded
amawla
parents:
diff changeset
142 pointcex = 0.2
a8a56766694e Uploaded
amawla
parents:
diff changeset
143 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
144 plotSmear(disp, pair=pw_tests[[i]], de.tags = de_tags, main = paste(\"Smear Plot\", names(tested)[i]), cex=0.5)
a8a56766694e Uploaded
amawla
parents:
diff changeset
145 abline(h = c(-1, 1), col = \"blue\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
146 legend(\"topright\", c(\"2 Fold Change\", ttl) , lty=c(1, NA), pch=c(NA, 19), pt.cex=0.5, col=c(\"blue\", \"red\"), bty=\"n\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
147 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
148 dev.off()
a8a56766694e Uploaded
amawla
parents:
diff changeset
149 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
150 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
151 elsif($OPTIONS{a} eq "glm") {
a8a56766694e Uploaded
amawla
parents:
diff changeset
152 for(my $fct = 0; $fct <= $#fact_names; $fct++) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
153 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
154 $fact_names[$fct] <- c($fact[$fct])
a8a56766694e Uploaded
amawla
parents:
diff changeset
155 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
156 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
157 for(my $fct = 0; $fct <= $#cp_names; $fct++) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
158 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
159 $cp_names[$fct] <- c($cp[$fct])
a8a56766694e Uploaded
amawla
parents:
diff changeset
160 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
161 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
162 my $all_fact = "";
a8a56766694e Uploaded
amawla
parents:
diff changeset
163 if(@fact_names) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
164 foreach (@fact_names) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
165 $all_fact .= " + factor($_)";
a8a56766694e Uploaded
amawla
parents:
diff changeset
166 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
167 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
168 my $all_cp = "";
a8a56766694e Uploaded
amawla
parents:
diff changeset
169 if(@cp_names) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
170 $all_cp = " + ".join(" + ", @cp_names);
a8a56766694e Uploaded
amawla
parents:
diff changeset
171 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
172 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
173 group_fact <- factor(names(groups))
a8a56766694e Uploaded
amawla
parents:
diff changeset
174 design <- model.matrix(~ -1 + group_fact${all_fact}${all_cp})
a8a56766694e Uploaded
amawla
parents:
diff changeset
175 colnames(design) <- sub(\"group_fact\", \"\", colnames(design))
a8a56766694e Uploaded
amawla
parents:
diff changeset
176 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
177 foreach my $fct (@fact_names) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
178 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
179 colnames(design) <- make.names(sub(\"factor.$fct.\", \"\", colnames(design)))
a8a56766694e Uploaded
amawla
parents:
diff changeset
180 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
181 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
182 if($OPTIONS{d} eq "tag") {
a8a56766694e Uploaded
amawla
parents:
diff changeset
183 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
184 disp <- estimateGLMCommonDisp(DGE, design)
a8a56766694e Uploaded
amawla
parents:
diff changeset
185 disp <- estimateGLMTrendedDisp(disp, design)
a8a56766694e Uploaded
amawla
parents:
diff changeset
186 disp <- estimateGLMTagwiseDisp(disp, design)
a8a56766694e Uploaded
amawla
parents:
diff changeset
187 fit <- glmFit(disp, design)
a8a56766694e Uploaded
amawla
parents:
diff changeset
188 pdf(file=\"$OPTIONS{e}/Tagwise_Dispersion_vs_Abundance.pdf\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
189 plotBCV(disp, cex=0.4)
a8a56766694e Uploaded
amawla
parents:
diff changeset
190 dev.off()
a8a56766694e Uploaded
amawla
parents:
diff changeset
191 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
192 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
193 if(@add_cont) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
194 $all_cont = "\"".join("\", \"", @add_cont)."\"";
a8a56766694e Uploaded
amawla
parents:
diff changeset
195 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
196 cont <- c(${all_cont})
a8a56766694e Uploaded
amawla
parents:
diff changeset
197 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"([^0-9])\", sep=\"\"), paste(i, \"\\\\1\", sep=\"\"), cont)
a8a56766694e Uploaded
amawla
parents:
diff changeset
198 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"\$\", sep=\"\"), i, cont)
a8a56766694e Uploaded
amawla
parents:
diff changeset
199 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
200 } else {
a8a56766694e Uploaded
amawla
parents:
diff changeset
201 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
202 cont <- NULL
a8a56766694e Uploaded
amawla
parents:
diff changeset
203 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
204 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
205 if(defined $OPTIONS{m}) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
206 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
207 for(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\"))
a8a56766694e Uploaded
amawla
parents:
diff changeset
208 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
209 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
210 if(!defined $OPTIONS{m} && !@add_cont){
a8a56766694e Uploaded
amawla
parents:
diff changeset
211 die("No Contrasts have been specified, you must at least either select multiple pairwise comparisons or specify a custom contrast\n");
a8a56766694e Uploaded
amawla
parents:
diff changeset
212 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
213 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
214 fit <- glmFit(disp, design)
a8a56766694e Uploaded
amawla
parents:
diff changeset
215 cont <- makeContrasts(contrasts=cont, levels=design)
a8a56766694e Uploaded
amawla
parents:
diff changeset
216 for(i in colnames(cont)) tested[[i]] <- glmLRT(fit, contrast=cont[,i])
a8a56766694e Uploaded
amawla
parents:
diff changeset
217 pdf(file=\"$OPTIONS{e}/Smear_Plots.pdf\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
218 for(i in colnames(cont)) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
219 dt <- decideTestsDGE(tested[[i]], p.value=0.05, adjust.method=\"$OPTIONS{f}\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
220 if(sum(dt) > 0) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
221 de_tags <- rownames(disp)[which(dt != 0)]
a8a56766694e Uploaded
amawla
parents:
diff changeset
222 ttl <- \"Diff. Exp. Genes With adj. Pvalue < 0.05\"
a8a56766694e Uploaded
amawla
parents:
diff changeset
223 } else {
a8a56766694e Uploaded
amawla
parents:
diff changeset
224 de_tags <- rownames(topTags(tested[[i]], n=100)\$table)
a8a56766694e Uploaded
amawla
parents:
diff changeset
225 ttl <- \"Top 100 tags\"
a8a56766694e Uploaded
amawla
parents:
diff changeset
226 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
227
a8a56766694e Uploaded
amawla
parents:
diff changeset
228 if(length(dt) < 5000) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
229 pointcex = 0.5
a8a56766694e Uploaded
amawla
parents:
diff changeset
230 } else {
a8a56766694e Uploaded
amawla
parents:
diff changeset
231 pointcex = 0.2
a8a56766694e Uploaded
amawla
parents:
diff changeset
232 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
233 plotSmear(disp, de.tags = de_tags, main = paste(\"Smear Plot\", i), cex=pointcex)
a8a56766694e Uploaded
amawla
parents:
diff changeset
234 abline(h = c(-1, 1), col = \"blue\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
235 legend(\"topright\", c(\"2 Fold Change\", ttl) , lty=c(1, NA), pch=c(NA, 19), pt.cex=0.5, col=c(\"blue\", \"red\"), bty=\"n\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
236 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
237 dev.off()
a8a56766694e Uploaded
amawla
parents:
diff changeset
238
a8a56766694e Uploaded
amawla
parents:
diff changeset
239 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
240 if(defined $OPTIONS{n}) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
241 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
242 tab <- data.frame(ID=rownames(fit\$fitted.values), fit\$fitted.values, stringsAsFactors=F)
a8a56766694e Uploaded
amawla
parents:
diff changeset
243 write.table(tab, \"$OPTIONS{n}\", quote=F, sep=\"\\t\", row.names=F)
a8a56766694e Uploaded
amawla
parents:
diff changeset
244 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
245 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
246 } elsif($OPTIONS{a} eq "limma") {
a8a56766694e Uploaded
amawla
parents:
diff changeset
247 for(my $fct = 0; $fct <= $#fact_names; $fct++) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
248 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
249 $fact_names[$fct] <- c($fact[$fct])
a8a56766694e Uploaded
amawla
parents:
diff changeset
250 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
251 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
252 for(my $fct = 0; $fct <= $#cp_names; $fct++) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
253 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
254 $cp_names[$fct] <- c($cp[$fct])
a8a56766694e Uploaded
amawla
parents:
diff changeset
255 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
256 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
257 my $all_fact = "";
a8a56766694e Uploaded
amawla
parents:
diff changeset
258 if(@fact_names) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
259 foreach (@fact_names) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
260 $all_fact .= " + factor($_)";
a8a56766694e Uploaded
amawla
parents:
diff changeset
261 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
262 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
263 my $all_cp = "";
a8a56766694e Uploaded
amawla
parents:
diff changeset
264 if(@cp_names) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
265 $all_cp = " + ".join(" + ", @cp_names);
a8a56766694e Uploaded
amawla
parents:
diff changeset
266 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
267 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
268
a8a56766694e Uploaded
amawla
parents:
diff changeset
269 group_fact <- factor(names(groups))
a8a56766694e Uploaded
amawla
parents:
diff changeset
270 design <- model.matrix(~ -1 + group_fact${all_fact}${all_cp})
a8a56766694e Uploaded
amawla
parents:
diff changeset
271 colnames(design) <- sub(\"group_fact\", \"\", colnames(design))
a8a56766694e Uploaded
amawla
parents:
diff changeset
272 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
273 foreach my $fct (@fact_names) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
274 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
275 colnames(design) <- make.names(sub(\"factor.$fct.\", \"\", colnames(design)))
a8a56766694e Uploaded
amawla
parents:
diff changeset
276 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
277 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
278 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
279 isexpr <- rowSums(cpm(toc)>1) >= 1
a8a56766694e Uploaded
amawla
parents:
diff changeset
280 toc <- toc[isexpr, ]
a8a56766694e Uploaded
amawla
parents:
diff changeset
281 pdf(file=\"$OPTIONS{e}/LIMMA_voom.pdf\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
282 y <- voom(toc, design, plot=TRUE, lib.size=colSums(toc)*norm_factors)
a8a56766694e Uploaded
amawla
parents:
diff changeset
283 dev.off()
a8a56766694e Uploaded
amawla
parents:
diff changeset
284
a8a56766694e Uploaded
amawla
parents:
diff changeset
285 pdf(file=\"$OPTIONS{e}/LIMMA_MDS_plot.pdf\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
286 plotMDS(y, labels=colnames(toc), col=as.numeric(factor(names(groups)))+1, gene.selection=\"common\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
287 dev.off()
a8a56766694e Uploaded
amawla
parents:
diff changeset
288 fit <- lmFit(y, design)
a8a56766694e Uploaded
amawla
parents:
diff changeset
289 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
290 if(defined $OPTIONS{n}) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
291 if(defined $OPTIONS{l}) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
292 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
293 tab <- data.frame(ID=rownames(y\$E), y\$E, stringsAsFactors=F)
a8a56766694e Uploaded
amawla
parents:
diff changeset
294 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
295 } else {
a8a56766694e Uploaded
amawla
parents:
diff changeset
296 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
297 tab <- data.frame(ID=rownames(y\$E), 2^y\$E, stringsAsFactors=F)
a8a56766694e Uploaded
amawla
parents:
diff changeset
298 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
299 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
300 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
301 write.table(tab, \"$OPTIONS{n}\", quote=F, sep=\"\\t\", row.names=F)
a8a56766694e Uploaded
amawla
parents:
diff changeset
302 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
303 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
304 if(@add_cont) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
305 $all_cont = "\"".join("\", \"", @add_cont)."\"";
a8a56766694e Uploaded
amawla
parents:
diff changeset
306 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
307 cont <- c(${all_cont})
a8a56766694e Uploaded
amawla
parents:
diff changeset
308 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"([^0-9])\", sep=\"\"), paste(i, \"\\\\1\", sep=\"\"), cont)
a8a56766694e Uploaded
amawla
parents:
diff changeset
309 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"\$\", sep=\"\"), i, cont)
a8a56766694e Uploaded
amawla
parents:
diff changeset
310 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
311 } else {
a8a56766694e Uploaded
amawla
parents:
diff changeset
312 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
313 cont <- NULL
a8a56766694e Uploaded
amawla
parents:
diff changeset
314 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
315 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
316 if(defined $OPTIONS{m}) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
317 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
318 for(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\"))
a8a56766694e Uploaded
amawla
parents:
diff changeset
319 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
320 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
321 if(!defined $OPTIONS{m} && !@add_cont){
a8a56766694e Uploaded
amawla
parents:
diff changeset
322 die("No Contrasts have been specified, you must at least either select multiple pairwise comparisons or specify a custom contrast\n");
a8a56766694e Uploaded
amawla
parents:
diff changeset
323 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
324 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
325 cont <- makeContrasts(contrasts=cont, levels=design)
a8a56766694e Uploaded
amawla
parents:
diff changeset
326 fit2 <- contrasts.fit(fit, cont)
a8a56766694e Uploaded
amawla
parents:
diff changeset
327 fit2 <- eBayes(fit2)
a8a56766694e Uploaded
amawla
parents:
diff changeset
328 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
329 } else {
a8a56766694e Uploaded
amawla
parents:
diff changeset
330 die("Anaysis type $OPTIONS{a} not found\n");
a8a56766694e Uploaded
amawla
parents:
diff changeset
331
a8a56766694e Uploaded
amawla
parents:
diff changeset
332 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
333 if($OPTIONS{a} ne "limma") {
a8a56766694e Uploaded
amawla
parents:
diff changeset
334 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
335 options(digits = 6)
a8a56766694e Uploaded
amawla
parents:
diff changeset
336 tab <- NULL
a8a56766694e Uploaded
amawla
parents:
diff changeset
337 for(i in names(tested)) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
338 tab_tmp <- topTags(tested[[i]], n=Inf, adjust.method=\"$OPTIONS{f}\")[[1]]
a8a56766694e Uploaded
amawla
parents:
diff changeset
339 colnames(tab_tmp) <- paste(i, colnames(tab_tmp), sep=\":\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
340 tab_tmp <- tab_tmp[tagnames,]
a8a56766694e Uploaded
amawla
parents:
diff changeset
341 if(is.null(tab)) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
342 tab <- tab_tmp
a8a56766694e Uploaded
amawla
parents:
diff changeset
343 } else tab <- cbind(tab, tab_tmp)
a8a56766694e Uploaded
amawla
parents:
diff changeset
344 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
345 tab <- cbind(Feature=rownames(tab), tab)
a8a56766694e Uploaded
amawla
parents:
diff changeset
346 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
347 } else {
a8a56766694e Uploaded
amawla
parents:
diff changeset
348 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
349 tab <- NULL
a8a56766694e Uploaded
amawla
parents:
diff changeset
350 options(digits = 6)
a8a56766694e Uploaded
amawla
parents:
diff changeset
351 for(i in colnames(fit2)) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
352 tab_tmp <- topTable(fit2, coef=i, n=Inf, sort.by=\"none\", adjust.method=\"$OPTIONS{f}\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
353 colnames(tab_tmp)[-1] <- paste(i, colnames(tab_tmp)[-1], sep=\":\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
354 if(is.null(tab)) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
355 tab <- tab_tmp
a8a56766694e Uploaded
amawla
parents:
diff changeset
356 } else tab <- cbind(tab, tab_tmp)
a8a56766694e Uploaded
amawla
parents:
diff changeset
357 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
358 tab <- cbind(Feature=rownames(tab), tab)
a8a56766694e Uploaded
amawla
parents:
diff changeset
359 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
360 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
361 print Rcmd "
a8a56766694e Uploaded
amawla
parents:
diff changeset
362 write.table(tab, \"$OPTIONS{o}\", quote=F, sep=\"\\t\", row.names=F)
a8a56766694e Uploaded
amawla
parents:
diff changeset
363 sink(type=\"message\")
a8a56766694e Uploaded
amawla
parents:
diff changeset
364 sink()
a8a56766694e Uploaded
amawla
parents:
diff changeset
365 ";
a8a56766694e Uploaded
amawla
parents:
diff changeset
366 close(Rcmd);
a8a56766694e Uploaded
amawla
parents:
diff changeset
367 system("R --no-restore --no-save --no-readline < $OPTIONS{e}/r_script.R > $OPTIONS{e}/r_script.out");
a8a56766694e Uploaded
amawla
parents:
diff changeset
368
a8a56766694e Uploaded
amawla
parents:
diff changeset
369 open(HTML, ">$OPTIONS{h}");
a8a56766694e Uploaded
amawla
parents:
diff changeset
370 print HTML "<html><head><title>EdgeR: Empirical analysis of digital gene expression data</title></head><body><h3>EdgeR Additional Files:</h3><p><ul>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
371 print HTML "<li><a href=MA_plots_normalisation.pdf>MA_plots_normalisation.pdf</a></li>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
372 print HTML "<li><a href=MDSplot.pdf>MDSplot.pdf</a></li>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
373 if($OPTIONS{a} eq "pw") {
a8a56766694e Uploaded
amawla
parents:
diff changeset
374 if(defined $OPTIONS{t}) {
a8a56766694e Uploaded
amawla
parents:
diff changeset
375 print HTML "<li><a href=Tagwise_Dispersion_vs_Abundance.pdf>Tagwise_Dispersion_vs_Abundance.pdf</a></li>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
376 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
377 print HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
378 } elsif($OPTIONS{a} eq "glm" && $OPTIONS{d} eq "tag") {
a8a56766694e Uploaded
amawla
parents:
diff changeset
379 print HTML "<li><a href=Tagwise_Dispersion_vs_Abundance.pdf>Tagwise_Dispersion_vs_Abundance.pdf</a></li>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
380 print HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
381 } elsif($OPTIONS{a} eq "limma") {
a8a56766694e Uploaded
amawla
parents:
diff changeset
382 print HTML "<li><a href=LIMMA_MDS_plot.pdf>LIMMA_MDS_plot.pdf</a></li>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
383 print HTML "<li><a href=LIMMA_voom.pdf>LIMMA_voom.pdf</a></li>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
384 }
a8a56766694e Uploaded
amawla
parents:
diff changeset
385 print HTML "<li><a href=r_script.R>r_script.R</a></li>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
386 print HTML "<li><a href=r_script.out>r_script.out</a></li>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
387 print HTML "<li><a href=r_script.err>r_script.err</a></li>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
388 print HTML "</ul></p>\n";
a8a56766694e Uploaded
amawla
parents:
diff changeset
389 close(HTML);
a8a56766694e Uploaded
amawla
parents:
diff changeset
390