Mercurial > repos > fcaramia > edger
comparison edgeR.pl @ 11:e5fcbabbdea7 draft
Uploaded
author | fcaramia |
---|---|
date | Wed, 21 Aug 2013 22:13:03 -0400 |
parents | 674c75219f15 |
children |
comparison
equal
deleted
inserted
replaced
10:86292c2b0ba9 | 11:e5fcbabbdea7 |
---|---|
6 use File::Basename; | 6 use File::Basename; |
7 use File::Path qw(make_path remove_tree); | 7 use File::Path qw(make_path remove_tree); |
8 $| = 1; | 8 $| = 1; |
9 | 9 |
10 # Grab and set all options | 10 # Grab and set all options |
11 my %OPTIONS = (a => "glm", d => "tag", f => "BH", p => 0.3, r => 5, u => "movingave"); | 11 my %OPTIONS = (a => "glm", d => "tag", f => "BH", r => 5, u => "movingave"); |
12 | 12 |
13 getopts('a:d:e:f:h:lmn:o:p:r:tu:', \%OPTIONS); | 13 getopts('a:d:e:f:h:lmn:o:r:tu:', \%OPTIONS); |
14 | 14 |
15 die qq( | 15 die qq( |
16 Usage: edgeR.pl [OPTIONS] factor::factor1::levels [factor::factor2::levels ...] cp::cont_pred1::values [cp::cont_pred2::values ...] cnt::contrast1 [cnt::contrast2] matrix | 16 Usage: edgeR.pl [OPTIONS] factor::factor1::levels [factor::factor2::levels ...] cp::cont_pred1::values [cp::cont_pred2::values ...] cnt::contrast1 [cnt::contrast2] matrix |
17 | 17 |
18 OPTIONS: -a STR Type Of Analysis [glm, pw, limma] (default: $OPTIONS{a}) | 18 OPTIONS: -a STR Type Of Analysis [glm, pw, limma] (default: $OPTIONS{a}) |
22 -h STR Name of html file for additional files | 22 -h STR Name of html file for additional files |
23 -l Output the normalised digital gene expression matrix in log2 format (only applicable when using limma and -n is also specified) | 23 -l Output the normalised digital gene expression matrix in log2 format (only applicable when using limma and -n is also specified) |
24 -m Perform all pairwise comparisons | 24 -m Perform all pairwise comparisons |
25 -n STR File name to output the normalised digital gene expression matrix (only applicable when usinf glm or limma model) | 25 -n STR File name to output the normalised digital gene expression matrix (only applicable when usinf glm or limma model) |
26 -o STR File name to output csv file with results | 26 -o STR File name to output csv file with results |
27 -p FLT The proportion of all tags/genes to be used for the locally weighted estimation of the tagwise dispersion, ony applicable when 1 factor analysis selected (default: $OPTIONS{p}) | |
28 -r INT Common Dispersion Rowsum Filter, ony applicable when 1 factor analysis selected (default: $OPTIONS{r}) | 27 -r INT Common Dispersion Rowsum Filter, ony applicable when 1 factor analysis selected (default: $OPTIONS{r}) |
29 -t Estimate Tagwise Disp when performing 1 factor analysis | 28 -t Estimate Tagwise Disp when performing 1 factor analysis |
30 -u STR Method for allowing the prior distribution for the dispersion to be abundance- dependent ["movingave", "tricube", "none"] (default: $OPTIONS{u}) | 29 -u STR Method for allowing the prior distribution for the dispersion to be abundance- dependent ["movingave", "tricube", "none"] (default: $OPTIONS{u}) |
31 | 30 |
32 ) if(!@ARGV); | 31 ) if(!@ARGV); |
33 | 32 |
34 my $matrix = pop @ARGV; | 33 my $matrix = pop @ARGV; |
35 | 34 |
36 make_path($OPTIONS{e}); | 35 make_path($OPTIONS{e}); |
37 open(Rcmd,">$OPTIONS{e}/r_script.R") or die "Cannot open $OPTIONS{e}/r_script.R\n\n"; | 36 open(Rcmd,">$OPTIONS{e}/r_script.R") or die "Cannot open $OPTIONS{e}/r_script.R\n\n"; |
38 print Rcmd " | 37 print Rcmd " |
39 zz <- file(\"$OPTIONS{e}/r_script.err\", open=\"wt\") | 38 zz <- file(\"$OPTIONS{e}/r_script.err\", open=\"wt\") |
40 sink(zz) | 39 sink(zz) |
41 sink(zz, type=\"message\") | 40 sink(zz, type=\"message\") |
42 | 41 |
43 library(edgeR) | 42 library(edgeR) |
44 library(limma) | 43 library(limma) |
45 | 44 |
46 # read in matrix and groups | 45 # read in matrix and groups |
47 toc <- read.table(\"$matrix\", sep=\"\\t\", comment=\"\", as.is=T) | 46 toc <- read.table(\"$matrix\", sep=\"\\t\", comment=\"\", as.is=T) |
48 groups <- sapply(toc[1, -1], strsplit, \":\") | 47 groups <- sapply(toc[1, -1], strsplit, \":\") |
49 for(i in 1:length(groups)) { g <- make.names(groups[[i]][2]); names(groups)[i] <- g; groups[[i]] <- groups[[i]][-2] } | 48 for(i in 1:length(groups)) { g <- make.names(groups[[i]][2]); names(groups)[i] <- g; groups[[i]] <- groups[[i]][-2] } |
50 colnames(toc) <- make.names(toc[2,]) | 49 colnames(toc) <- make.names(toc[2,]) |
51 toc[,1] <- gsub(\",\", \".\", toc[,1]) | 50 toc[,1] <- gsub(\",\", \".\", toc[,1]) |
52 tagnames <- toc[-(1:2), 1] | 51 tagnames <- toc[-(1:2), 1] |
53 rownames(toc) <- toc[,1] | 52 rownames(toc) <- toc[,1] |
54 toc <- toc[-(1:2), -1] | 53 toc <- toc[-(1:2), -1] |
55 for(i in colnames(toc)) toc[, i] <- as.numeric(toc[,i]) | 54 for(i in colnames(toc)) toc[, i] <- as.numeric(toc[,i]) |
56 norm_factors <- calcNormFactors(as.matrix(toc)) | 55 norm_factors <- calcNormFactors(as.matrix(toc)) |
57 | 56 |
58 pw_tests <- list() | 57 pw_tests <- list() |
59 uniq_groups <- unique(names(groups)) | 58 uniq_groups <- unique(names(groups)) |
60 for(i in 1:(length(uniq_groups)-1)) for(j in (i+1):length(uniq_groups)) pw_tests[[length(pw_tests)+1]] <- c(uniq_groups[i], uniq_groups[j]) | 59 for(i in 1:(length(uniq_groups)-1)) for(j in (i+1):length(uniq_groups)) pw_tests[[length(pw_tests)+1]] <- c(uniq_groups[i], uniq_groups[j]) |
61 DGE <- DGEList(toc, lib.size=norm_factors*colSums(toc), group=names(groups)) | 60 DGE <- DGEList(toc, lib.size=norm_factors*colSums(toc), group=names(groups)) |
62 pdf(\"$OPTIONS{e}/MA_plots_normalisation.pdf\", width=14) | 61 pdf(\"$OPTIONS{e}/MA_plots_normalisation.pdf\", width=14) |
63 for(i in 1:length(pw_tests)) { | 62 for(i in 1:length(pw_tests)) { |
64 j <- c(which(names(groups) == pw_tests[[i]][1])[1], which(names(groups) == pw_tests[[i]][2])[1]) | 63 j <- c(which(names(groups) == pw_tests[[i]][1])[1], which(names(groups) == pw_tests[[i]][2])[1]) |
65 par(mfrow = c(1, 2)) | 64 par(mfrow = c(1, 2)) |
66 maPlot(toc[, j[1]], toc[, j[2]], normalize = TRUE, pch = 19, cex = 0.2, ylim = c(-10, 10), main=paste(\"MA Plot\", colnames(toc)[j[1]], \"vs\", colnames(toc)[j[2]])) | 65 maPlot(toc[, j[1]], toc[, j[2]], normalize = TRUE, pch = 19, cex = 0.2, ylim = c(-10, 10), main=paste(\"MA Plot\", colnames(toc)[j[1]], \"vs\", colnames(toc)[j[2]])) |
67 grid(col = \"blue\") | 66 grid(col = \"blue\") |
68 abline(h = log2(norm_factors[j[2]]), col = \"red\", lwd = 4) | 67 abline(h = log2(norm_factors[j[2]]), col = \"red\", lwd = 4) |
69 maPlot(DGE\$counts[, j[1]]/DGE\$samples\$lib.size[j[1]], DGE\$counts[, j[2]]/DGE\$samples\$lib.size[j[2]], normalize = FALSE, pch = 19, cex = 0.2, ylim = c(-8, 8), main=paste(\"MA Plot\", colnames(toc)[j[1]], \"vs\", colnames(toc)[j[2]], \"Normalised\")) | 68 maPlot(DGE\$counts[, j[1]]/DGE\$samples\$lib.size[j[1]], DGE\$counts[, j[2]]/DGE\$samples\$lib.size[j[2]], normalize = FALSE, pch = 19, cex = 0.2, ylim = c(-8, 8), main=paste(\"MA Plot\", colnames(toc)[j[1]], \"vs\", colnames(toc)[j[2]], \"Normalised\")) |
70 grid(col = \"blue\") | 69 grid(col = \"blue\") |
71 } | 70 } |
72 dev.off() | 71 dev.off() |
73 pdf(file=\"$OPTIONS{e}/MDSplot.pdf\") | 72 pdf(file=\"$OPTIONS{e}/MDSplot.pdf\") |
74 plotMDS(DGE, main=\"MDS Plot\", col=as.numeric(factor(names(groups)))+1, xlim=c(-3,3)) | 73 plotMDS(DGE, main=\"MDS Plot\", col=as.numeric(factor(names(groups)))+1, xlim=c(-3,3)) |
75 dev.off() | 74 dev.off() |
76 tested <- list() | 75 tested <- list() |
77 "; | 76 "; |
78 | 77 |
79 my $all_cont; | 78 my $all_cont; |
80 my @add_cont; | 79 my @add_cont; |
81 my @fact; | 80 my @fact; |
104 } | 103 } |
105 } | 104 } |
106 | 105 |
107 if($OPTIONS{a} eq "pw") { | 106 if($OPTIONS{a} eq "pw") { |
108 print Rcmd " | 107 print Rcmd " |
109 disp <- estimateCommonDisp(DGE, rowsum.filter=$OPTIONS{r}) | 108 disp <- estimateCommonDisp(DGE, rowsum.filter=$OPTIONS{r}) |
110 "; | 109 "; |
111 if(defined $OPTIONS{t}) { | 110 if(defined $OPTIONS{t}) { |
112 print Rcmd " | 111 print Rcmd " |
113 disp <- estimateTagwiseDisp(disp, trend=\"$OPTIONS{u}\", prop.used=$OPTIONS{p}) | 112 disp <- estimateTagwiseDisp(disp, trend=\"$OPTIONS{u}\") |
114 pdf(file=\"$OPTIONS{e}/Tagwise_Dispersion_vs_Abundance.pdf\") | 113 pdf(file=\"$OPTIONS{e}/Tagwise_Dispersion_vs_Abundance.pdf\") |
115 plot(log2(1e06*disp\$conc\$conc.common), disp\$tagwise.dispersion, xlab=\"Counts per million (log2 scale)\", ylab=\"Tagwise dispersion\") | 114 plotBCV(disp, cex=0.4) |
116 abline(h=disp\$common.dispersion, col=\"firebrick\", lwd=3) | 115 abline(h=disp\$common.dispersion, col=\"firebrick\", lwd=3) |
117 dev.off() | 116 dev.off() |
118 " | 117 "; |
119 } | 118 } |
120 print Rcmd " | 119 print Rcmd " |
121 for(i in 1:length(pw_tests)) { | 120 for(i in 1:length(pw_tests)) { |
122 tested[[i]] <- exactTest(disp, pair=pw_tests[[i]]) | 121 tested[[i]] <- exactTest(disp, pair=pw_tests[[i]]) |
123 names(tested)[i] <- paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\") | 122 names(tested)[i] <- paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\") |
124 } | 123 } |
125 pdf(file=\"$OPTIONS{e}/Smear_Plots.pdf\") | 124 pdf(file=\"$OPTIONS{e}/Smear_Plots.pdf\") |
126 for(i in 1:length(pw_tests)) { | 125 for(i in 1:length(pw_tests)) { |
127 if(nrow(decideTestsDGE(tested[[i]] , p.value=0.05)) > 0) { | 126 dt <- decideTestsDGE(tested[[i]], p.value=0.05, adjust.method=\"$OPTIONS{f}\") |
128 de_tags <- rownames(decideTestsDGE(tested[[i]] , p.value=0.05, adjust.method=\"$OPTIONS{f}\")) | 127 if(sum(dt) > 0) { |
129 ttl <- \"(Diff. Exp. Genes With adj. Pvalue < 0.05 highlighted)\" | 128 de_tags <- rownames(disp)[which(dt != 0)] |
130 } else { | 129 ttl <- \"Diff. Exp. Genes With adj. Pvalue < 0.05\" |
131 de_tags <- rownames(topTags(tested[[i]], n=100)\$table) | 130 } else { |
132 ttl <- \"(Top 100 tags highlighted)\" | 131 de_tags <- rownames(topTags(tested[[i]], n=100)\$table) |
133 } | 132 ttl <- \"Top 100 tags\" |
134 | 133 } |
135 plotSmear(disp, pair=pw_tests[[i]], de.tags = de_tags, main = paste(\"FC plot\", ttl)) | 134 |
136 abline(h = c(-2, 2), col = \"dodgerblue\") | 135 if(length(dt) < 5000) { |
137 } | 136 pointcex = 0.5 |
138 dev.off() | 137 } else { |
139 "; | 138 pointcex = 0.2 |
139 } | |
140 plotSmear(disp, pair=pw_tests[[i]], de.tags = de_tags, main = paste(\"Smear Plot\", names(tested)[i]), cex=0.5) | |
141 abline(h = c(-1, 1), col = \"blue\") | |
142 legend(\"topright\", c(\"2 Fold Change\", ttl) , lty=c(1, NA), pch=c(NA, 19), pt.cex=0.5, col=c(\"blue\", \"red\"), bty=\"n\") | |
143 } | |
144 dev.off() | |
145 "; | |
140 } elsif($OPTIONS{a} eq "glm") { | 146 } elsif($OPTIONS{a} eq "glm") { |
141 for(my $fct = 0; $fct <= $#fact_names; $fct++) { | 147 for(my $fct = 0; $fct <= $#fact_names; $fct++) { |
142 print Rcmd " | 148 print Rcmd " |
143 $fact_names[$fct] <- c($fact[$fct]) | 149 $fact_names[$fct] <- c($fact[$fct]) |
144 "; | 150 "; |
145 } | 151 } |
146 for(my $fct = 0; $fct <= $#cp_names; $fct++) { | 152 for(my $fct = 0; $fct <= $#cp_names; $fct++) { |
147 print Rcmd " | 153 print Rcmd " |
148 $cp_names[$fct] <- c($cp[$fct]) | 154 $cp_names[$fct] <- c($cp[$fct]) |
149 "; | 155 "; |
150 } | 156 } |
151 my $all_fact = ""; | 157 my $all_fact = ""; |
152 if(@fact_names) { | 158 if(@fact_names) { |
153 foreach (@fact_names) { | 159 foreach (@fact_names) { |
154 $all_fact .= " + factor($_)"; | 160 $all_fact .= " + factor($_)"; |
157 my $all_cp = ""; | 163 my $all_cp = ""; |
158 if(@cp_names) { | 164 if(@cp_names) { |
159 $all_cp = " + ".join(" + ", @cp_names); | 165 $all_cp = " + ".join(" + ", @cp_names); |
160 } | 166 } |
161 print Rcmd " | 167 print Rcmd " |
162 group_fact <- factor(names(groups)) | 168 group_fact <- factor(names(groups)) |
163 design <- model.matrix(~ -1 + group_fact${all_fact}${all_cp}) | 169 design <- model.matrix(~ -1 + group_fact${all_fact}${all_cp}) |
164 colnames(design) <- sub(\"group_fact\", \"\", colnames(design)) | 170 colnames(design) <- sub(\"group_fact\", \"\", colnames(design)) |
165 "; | 171 "; |
166 foreach my $fct (@fact_names) { | 172 foreach my $fct (@fact_names) { |
167 print Rcmd " | 173 print Rcmd " |
168 colnames(design) <- make.names(sub(\"factor.$fct.\", \"\", colnames(design))) | 174 colnames(design) <- make.names(sub(\"factor.$fct.\", \"\", colnames(design))) |
169 "; | 175 "; |
170 } | 176 } |
171 print Rcmd " | 177 print Rcmd " |
172 disp <- estimateGLMCommonDisp(DGE, design) | 178 disp <- estimateGLMCommonDisp(DGE, design) |
173 "; | 179 "; |
174 if($OPTIONS{d} eq "tag" || $OPTIONS{d} eq "trend") { | 180 if($OPTIONS{d} eq "tag" || $OPTIONS{d} eq "trend") { |
175 print Rcmd " | 181 print Rcmd " |
176 disp <- estimateGLMTrendedDisp(disp, design) | 182 disp <- estimateGLMTrendedDisp(disp, design) |
177 "; | 183 "; |
178 } | 184 } |
179 if($OPTIONS{d} eq "tag") { | 185 if($OPTIONS{d} eq "tag") { |
180 print Rcmd " | 186 print Rcmd " |
181 disp <- estimateGLMTagwiseDisp(disp, design) | 187 disp <- estimateGLMTagwiseDisp(disp, design) |
182 fit <- glmFit(disp, design) | 188 fit <- glmFit(disp, design) |
183 pdf(file=\"$OPTIONS{e}/Tagwise_Dispersion_vs_Abundance.pdf\") | 189 pdf(file=\"$OPTIONS{e}/Tagwise_Dispersion_vs_Abundance.pdf\") |
184 plot(fit\$abund+log(1e06), sqrt(disp\$tagwise.dispersion), xlab=\"Counts per million (log2 scale)\", ylab=\"Tagwise dispersion\") | 190 plotBCV(disp, cex=0.4) |
185 oo <- order(disp\$abundance) | 191 dev.off() |
186 lines(fit\$abundance[oo]+log(1e06), sqrt(disp\$trended.dispersion[oo]), col=\"dodgerblue\", lwd=3) | 192 "; |
187 abline(h=sqrt(disp\$common.dispersion), col=\"firebrick\", lwd=3) | |
188 dev.off() | |
189 "; | |
190 } | 193 } |
191 if(@add_cont) { | 194 if(@add_cont) { |
192 $all_cont = "\"".join("\", \"", @add_cont)."\""; | 195 $all_cont = "\"".join("\", \"", @add_cont)."\""; |
193 print Rcmd " | 196 print Rcmd " |
194 cont <- c(${all_cont}) | 197 cont <- c(${all_cont}) |
195 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"([^0-9])\", sep=\"\"), paste(i, \"\\\\1\", sep=\"\"), cont) | 198 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"([^0-9])\", sep=\"\"), paste(i, \"\\\\1\", sep=\"\"), cont) |
196 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"\$\", sep=\"\"), i, cont) | 199 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"\$\", sep=\"\"), i, cont) |
197 "; | 200 "; |
198 } else { | 201 } else { |
199 print Rcmd " | 202 print Rcmd " |
200 cont <- NULL | 203 cont <- NULL |
201 "; | 204 "; |
202 } | 205 } |
203 if(defined $OPTIONS{m}) { | 206 if(defined $OPTIONS{m}) { |
204 print Rcmd " | 207 print Rcmd " |
205 for(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\")) | 208 for(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\")) |
206 "; | 209 "; |
207 } | 210 } |
208 if(!defined $OPTIONS{m} && !@add_cont){ | 211 if(!defined $OPTIONS{m} && !@add_cont){ |
209 die("No Contrasts have been specified, you must at least either select multiple pairwise comparisons or specify a custom contrast\n"); | 212 die("No Contrasts have been specified, you must at least either select multiple pairwise comparisons or specify a custom contrast\n"); |
210 } | 213 } |
211 print Rcmd " | 214 print Rcmd " |
212 fit <- glmFit(disp, design) | 215 fit <- glmFit(disp, design) |
213 cont <- makeContrasts(contrasts=cont, levels=design) | 216 cont <- makeContrasts(contrasts=cont, levels=design) |
214 for(i in colnames(cont)) tested[[i]] <- glmLRT(disp, fit, contrast=cont[,i]) | 217 for(i in colnames(cont)) tested[[i]] <- glmLRT(fit, contrast=cont[,i]) |
215 "; | 218 pdf(file=\"$OPTIONS{e}/Smear_Plots.pdf\") |
219 for(i in colnames(cont)) { | |
220 dt <- decideTestsDGE(tested[[i]], p.value=0.05, adjust.method=\"$OPTIONS{f}\") | |
221 if(sum(dt) > 0) { | |
222 de_tags <- rownames(disp)[which(dt != 0)] | |
223 ttl <- \"Diff. Exp. Genes With adj. Pvalue < 0.05\" | |
224 } else { | |
225 de_tags <- rownames(topTags(tested[[i]], n=100)\$table) | |
226 ttl <- \"Top 100 tags\" | |
227 } | |
228 | |
229 if(length(dt) < 5000) { | |
230 pointcex = 0.5 | |
231 } else { | |
232 pointcex = 0.2 | |
233 } | |
234 plotSmear(disp, de.tags = de_tags, main = paste(\"Smear Plot\", i), cex=pointcex) | |
235 abline(h = c(-1, 1), col = \"blue\") | |
236 legend(\"topright\", c(\"2 Fold Change\", ttl) , lty=c(1, NA), pch=c(NA, 19), pt.cex=0.5, col=c(\"blue\", \"red\"), bty=\"n\") | |
237 } | |
238 dev.off() | |
239 "; | |
216 if(defined $OPTIONS{n}) { | 240 if(defined $OPTIONS{n}) { |
217 print Rcmd " | 241 print Rcmd " |
218 tab <- data.frame(ID=rownames(fit\$fitted.values), fit\$fitted.values, stringsAsFactors=F) | 242 tab <- data.frame(ID=rownames(fit\$fitted.values), fit\$fitted.values, stringsAsFactors=F) |
219 write.table(tab, \"$OPTIONS{n}\", quote=F, sep=\"\\t\", row.names=F) | 243 write.table(tab, \"$OPTIONS{n}\", quote=F, sep=\"\\t\", row.names=F) |
220 "; | 244 "; |
221 } | 245 } |
222 } elsif($OPTIONS{a} eq "limma") { | 246 } elsif($OPTIONS{a} eq "limma") { |
223 for(my $fct = 0; $fct <= $#fact_names; $fct++) { | 247 for(my $fct = 0; $fct <= $#fact_names; $fct++) { |
224 print Rcmd " | 248 print Rcmd " |
225 $fact_names[$fct] <- c($fact[$fct]) | 249 $fact_names[$fct] <- c($fact[$fct]) |
226 "; | 250 "; |
227 } | 251 } |
228 for(my $fct = 0; $fct <= $#cp_names; $fct++) { | 252 for(my $fct = 0; $fct <= $#cp_names; $fct++) { |
229 print Rcmd " | 253 print Rcmd " |
230 $cp_names[$fct] <- c($cp[$fct]) | 254 $cp_names[$fct] <- c($cp[$fct]) |
231 "; | 255 "; |
232 } | 256 } |
233 my $all_fact = ""; | 257 my $all_fact = ""; |
234 if(@fact_names) { | 258 if(@fact_names) { |
235 foreach (@fact_names) { | 259 foreach (@fact_names) { |
236 $all_fact .= " + factor($_)"; | 260 $all_fact .= " + factor($_)"; |
239 my $all_cp = ""; | 263 my $all_cp = ""; |
240 if(@cp_names) { | 264 if(@cp_names) { |
241 $all_cp = " + ".join(" + ", @cp_names); | 265 $all_cp = " + ".join(" + ", @cp_names); |
242 } | 266 } |
243 print Rcmd " | 267 print Rcmd " |
244 group_fact <- factor(names(groups)) | 268 group_fact <- factor(names(groups)) |
245 design <- model.matrix(~ -1 + group_fact${all_fact}${all_cp}) | 269 design <- model.matrix(~ -1 + group_fact${all_fact}${all_cp}) |
246 colnames(design) <- sub(\"group_fact\", \"\", colnames(design)) | 270 colnames(design) <- sub(\"group_fact\", \"\", colnames(design)) |
247 "; | 271 "; |
248 foreach my $fct (@fact_names) { | 272 foreach my $fct (@fact_names) { |
249 print Rcmd " | 273 print Rcmd " |
250 colnames(design) <- make.names(sub(\"factor.$fct.\", \"\", colnames(design))) | 274 colnames(design) <- make.names(sub(\"factor.$fct.\", \"\", colnames(design))) |
251 "; | 275 "; |
252 } | 276 } |
253 print Rcmd " | 277 print Rcmd " |
254 isexpr <- rowSums(cpm(toc)>1) >= 2 | 278 isexpr <- rowSums(cpm(toc)>1) >= 2 |
255 toc <- toc[isexpr, ] | 279 toc <- toc[isexpr, ] |
256 pdf(file=\"$OPTIONS{e}/LIMMA_voom.pdf\") | 280 pdf(file=\"$OPTIONS{e}/LIMMA_voom.pdf\") |
257 y <- voom(toc, design, plot=TRUE, lib.size=colSums(toc)*norm_factors) | 281 y <- voom(toc, design, plot=TRUE, lib.size=colSums(toc)*norm_factors) |
258 dev.off() | 282 dev.off() |
259 | 283 |
260 pdf(file=\"$OPTIONS{e}/LIMMA_MDS_plot.pdf\") | 284 pdf(file=\"$OPTIONS{e}/LIMMA_MDS_plot.pdf\") |
261 plotMDS(y, labels=colnames(toc), col=as.numeric(factor(names(groups)))+1, gene.selection=\"common\") | 285 plotMDS(y, labels=colnames(toc), col=as.numeric(factor(names(groups)))+1, gene.selection=\"common\") |
262 dev.off() | 286 dev.off() |
263 fit <- lmFit(y, design) | 287 fit <- lmFit(y, design) |
264 "; | 288 "; |
265 if(defined $OPTIONS{n}) { | 289 if(defined $OPTIONS{n}) { |
266 if(defined $OPTIONS{l}) { | 290 if(defined $OPTIONS{l}) { |
267 print Rcmd " | 291 print Rcmd " |
268 tab <- data.frame(ID=rownames(y\$E), y\$E, stringsAsFactors=F) | 292 tab <- data.frame(ID=rownames(y\$E), y\$E, stringsAsFactors=F) |
269 "; | 293 "; |
270 } else { | 294 } else { |
271 print Rcmd " | 295 print Rcmd " |
272 tab <- data.frame(ID=rownames(y\$E), 2^y\$E, stringsAsFactors=F) | 296 tab <- data.frame(ID=rownames(y\$E), 2^y\$E, stringsAsFactors=F) |
273 "; | 297 "; |
274 } | 298 } |
275 print Rcmd " | 299 print Rcmd " |
276 write.table(tab, \"$OPTIONS{n}\", quote=F, sep=\"\\t\", row.names=F) | 300 write.table(tab, \"$OPTIONS{n}\", quote=F, sep=\"\\t\", row.names=F) |
277 "; | 301 "; |
278 } | 302 } |
279 if(@add_cont) { | 303 if(@add_cont) { |
280 $all_cont = "\"".join("\", \"", @add_cont)."\""; | 304 $all_cont = "\"".join("\", \"", @add_cont)."\""; |
281 print Rcmd " | 305 print Rcmd " |
282 cont <- c(${all_cont}) | 306 cont <- c(${all_cont}) |
283 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"([^0-9])\", sep=\"\"), paste(i, \"\\\\1\", sep=\"\"), cont) | 307 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"([^0-9])\", sep=\"\"), paste(i, \"\\\\1\", sep=\"\"), cont) |
284 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"\$\", sep=\"\"), i, cont) | 308 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"\$\", sep=\"\"), i, cont) |
285 "; | 309 "; |
286 } else { | 310 } else { |
287 print Rcmd " | 311 print Rcmd " |
288 cont <- NULL | 312 cont <- NULL |
289 "; | 313 "; |
290 } | 314 } |
291 if(defined $OPTIONS{m}) { | 315 if(defined $OPTIONS{m}) { |
292 print Rcmd " | 316 print Rcmd " |
293 for(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\")) | 317 for(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\")) |
294 "; | 318 "; |
295 } | 319 } |
296 if(!defined $OPTIONS{m} && !@add_cont){ | 320 if(!defined $OPTIONS{m} && !@add_cont){ |
297 die("No Contrasts have been specified, you must at least either select multiple pairwise comparisons or specify a custom contrast\n"); | 321 die("No Contrasts have been specified, you must at least either select multiple pairwise comparisons or specify a custom contrast\n"); |
298 } | 322 } |
299 print Rcmd " | 323 print Rcmd " |
300 cont <- makeContrasts(contrasts=cont, levels=design) | 324 cont <- makeContrasts(contrasts=cont, levels=design) |
301 fit2 <- contrasts.fit(fit, cont) | 325 fit2 <- contrasts.fit(fit, cont) |
302 fit2 <- eBayes(fit2) | 326 fit2 <- eBayes(fit2) |
303 "; | 327 "; |
304 } else { | 328 } else { |
305 die("Anaysis type $OPTIONS{a} not found\n"); | 329 die("Anaysis type $OPTIONS{a} not found\n"); |
306 | 330 |
307 } | 331 } |
308 | 332 |
309 if($OPTIONS{a} ne "limma") { | 333 if($OPTIONS{a} ne "limma") { |
310 print Rcmd " | 334 print Rcmd " |
311 options(digits = 6) | 335 options(digits = 6) |
312 tab <- NULL | 336 tab <- NULL |
313 for(i in names(tested)) { | 337 for(i in names(tested)) { |
314 tab_tmp <- topTags(tested[[i]], n=Inf, adjust.method=\"$OPTIONS{f}\")[[1]] | 338 tab_tmp <- topTags(tested[[i]], n=Inf, adjust.method=\"$OPTIONS{f}\")[[1]] |
315 colnames(tab_tmp) <- paste(i, colnames(tab_tmp), sep=\":\") | 339 colnames(tab_tmp) <- paste(i, colnames(tab_tmp), sep=\":\") |
316 tab_tmp <- tab_tmp[tagnames,] | 340 tab_tmp <- tab_tmp[tagnames,] |
317 if(is.null(tab)) { | 341 if(is.null(tab)) { |
318 tab <- tab_tmp | 342 tab <- tab_tmp |
319 } else tab <- cbind(tab, tab_tmp) | 343 } else tab <- cbind(tab, tab_tmp) |
320 } | 344 } |
321 tab <- cbind(Feature=rownames(tab), tab) | 345 tab <- cbind(Feature=rownames(tab), tab) |
322 "; | 346 "; |
323 } else { | 347 } else { |
324 print Rcmd " | 348 print Rcmd " |
325 tab <- NULL | 349 tab <- NULL |
326 options(digits = 6) | 350 options(digits = 6) |
327 for(i in colnames(fit2)) { | 351 for(i in colnames(fit2)) { |
328 tab_tmp <- topTable(fit2, coef=i, n=Inf, sort.by=\"none\", adjust.method=\"$OPTIONS{f}\") | 352 tab_tmp <- topTable(fit2, coef=i, n=Inf, sort.by=\"none\", adjust.method=\"$OPTIONS{f}\") |
329 colnames(tab_tmp)[-1] <- paste(i, colnames(tab_tmp)[-1], sep=\":\") | 353 colnames(tab_tmp)[-1] <- paste(i, colnames(tab_tmp)[-1], sep=\":\") |
330 if(is.null(tab)) { | 354 if(is.null(tab)) { |
331 tab <- tab_tmp | 355 tab <- tab_tmp |
332 } else tab <- cbind(tab, tab_tmp[,-1]) | 356 } else tab <- cbind(tab, tab_tmp[,-1]) |
333 } | 357 } |
334 "; | 358 "; |
335 } | 359 } |
336 print Rcmd " | 360 print Rcmd " |
337 write.table(tab, \"$OPTIONS{o}\", quote=F, sep=\"\\t\", row.names=F) | 361 write.table(tab, \"$OPTIONS{o}\", quote=F, sep=\"\\t\", row.names=F) |
338 sink(type=\"message\") | 362 sink(type=\"message\") |
339 sink() | 363 sink() |
340 "; | 364 "; |
341 close(Rcmd); | 365 close(Rcmd); |
342 system("R --no-restore --no-save --no-readline < $OPTIONS{e}/r_script.R > $OPTIONS{e}/r_script.out"); | 366 system("R --no-restore --no-save --no-readline < $OPTIONS{e}/r_script.R > $OPTIONS{e}/r_script.out"); |
343 | 367 |
344 open(HTML, ">$OPTIONS{h}"); | 368 open(HTML, ">$OPTIONS{h}"); |
350 print HTML "<li><a href=Tagwise_Dispersion_vs_Abundance.pdf>Tagwise_Dispersion_vs_Abundance.pdf</a></li>\n"; | 374 print HTML "<li><a href=Tagwise_Dispersion_vs_Abundance.pdf>Tagwise_Dispersion_vs_Abundance.pdf</a></li>\n"; |
351 } | 375 } |
352 print HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\n"; | 376 print HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\n"; |
353 } elsif($OPTIONS{a} eq "glm" && $OPTIONS{d} eq "tag") { | 377 } elsif($OPTIONS{a} eq "glm" && $OPTIONS{d} eq "tag") { |
354 print HTML "<li><a href=Tagwise_Dispersion_vs_Abundance.pdf>Tagwise_Dispersion_vs_Abundance.pdf</a></li>\n"; | 378 print HTML "<li><a href=Tagwise_Dispersion_vs_Abundance.pdf>Tagwise_Dispersion_vs_Abundance.pdf</a></li>\n"; |
379 print HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\n"; | |
380 } elsif($OPTIONS{a} eq "glm" && ($OPTIONS{d} eq "trend" || $OPTIONS{d} eq "common")) { | |
381 print HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\n"; | |
355 } elsif($OPTIONS{a} eq "limma") { | 382 } elsif($OPTIONS{a} eq "limma") { |
356 print HTML "<li><a href=LIMMA_MDS_plot.pdf>LIMMA_MDS_plot.pdf</a></li>\n"; | 383 print HTML "<li><a href=LIMMA_MDS_plot.pdf>LIMMA_MDS_plot.pdf</a></li>\n"; |
357 print HTML "<li><a href=LIMMA_voom.pdf>LIMMA_voom.pdf</a></li>\n"; | 384 print HTML "<li><a href=LIMMA_voom.pdf>LIMMA_voom.pdf</a></li>\n"; |
358 } | 385 } |
359 print HTML "<li><a href=r_script.R>r_script.R</a></li>\n"; | 386 print HTML "<li><a href=r_script.R>r_script.R</a></li>\n"; |