annotate abims_anova.r @ 0:8dd2a438bfba draft

Uploaded
author lecorguille
date Tue, 30 Jun 2015 06:02:46 -0400
parents
children b147b17759a6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
1 #!/usr/local/public/bin/Rscript
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
2 # version="1.1"
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
3
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
4 # date: 06-06-2012
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
5 # update: 18-02-2014
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
6 # **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
7
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
8 # abims_anova.r version 20140218
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
9
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
10 library(batch)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
11
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
12
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
13 # function avova
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
14 anova = function (file, sampleinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatafiltered="anova.datafiltered.output") {
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
15
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
16
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
17 if (sep=="tabulation") sep="\t"
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
18 if (sep=="semicolon") sep=";"
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
19 if (sep=="comma") sep=","
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
20
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
21 anova_formula_operator = "+"
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
22 if (interaction) anova_formula_operator = "*"
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
23
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
24 # -- import --
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
25 data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
26
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
27 if (mode == "row") data=t(data)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
28
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
29 sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
30 rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab))
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
31
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
32
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
33 # -- group --
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
34 match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab))
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
35 if (sum(is.na(match_data_sampleinfoTab)) > 0) {
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
36 write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr())
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
37 write("You may need to use change the mode (column/row)", stderr())
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
38 write("10 first sample names in the data matrix:", stderr())
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
39 write(head(colnames(data)), stderr())
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
40 write("10 first sample names in the sample info:", stderr())
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
41 write(head(rownames(sampleinfoTab)), stderr())
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
42 quit("no",status=10)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
43 }
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
44
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
45
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
46 # -- anova --
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
47
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
48 # formula
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
49 grps=list()
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
50 anova_formula_s = "data ~ "
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
51 cat("\ncontrasts:\n")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
52 for (i in 1:length(condition)) {
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
53 grps[[i]] = factor(sampleinfoTab[,condition[i]][match_data_sampleinfoTab])
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
54 anova_formula_s = paste(anova_formula_s, "grps[[",i,"]]",anova_formula_operator, sep="")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
55 cat(condition[i],"\t",levels(grps[[i]]),"\n")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
56 # write("Current groups: ", stderr())
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
57 # write(grp[[i]], stderr())
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
58 }
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
59 anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
60 anova_formula = as.formula(anova_formula_s)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
61
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
62
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
63
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
64 # anova
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
65 manovaObjectList = manova(anova_formula)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
66 manovaList = summary.aov(manovaObjectList)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
67
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
68 # condition renaming
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
69 manovaRownames = gsub(" ","",rownames(manovaList[[1]]))
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
70 manovaNbrPvalue = length(manovaRownames)-1
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
71 manovaRownames = manovaRownames[-(manovaNbrPvalue+1)]
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
72
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
73 for (i in 1:length(condition)) {
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
74 manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
75 anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
76 }
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
77
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
78 # log
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
79 cat("\nanova_formula",anova_formula_s,"\n")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
80
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
81 # p-value
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
82 aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]})
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
83 if(length(condition) == 1) aovPValue = t(aovPValue)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
84 rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
85
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
86 # p-value adjusted
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
87 if(length(condition) == 1) {
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
88 aovAdjPValue = t(p.adjust(aovPValue,method=method))
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
89 } else {
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
90 aovAdjPValue = apply(aovPValue,2,p.adjust, method=method)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
91 }
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
92 rownames(aovAdjPValue) = paste("pvalueadjusted.",method,".",manovaRownames,sep="")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
93
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
94 # selection
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
95 colSumThreshold = colSums(aovAdjPValue <= threshold)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
96 if (selection_method == "intersection") {
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
97 datafiltered = data[,colSumThreshold == nrow(aovAdjPValue )]
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
98 } else {
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
99 datafiltered = data[,colSumThreshold != 0]
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
100 }
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
101
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
102 #data=rbind(data, aovPValue, aovAdjPValue)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
103 data=rbind(data, aovAdjPValue)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
104
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
105
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
106 if (mode == "row") {
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
107 data=t(data)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
108 datafiltered=t(datafiltered)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
109 }
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
110
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
111 # -- output / return --
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
112 write.table(data, outputdatapvalue, sep=sep, quote=F, col.names = NA)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
113 write.table(datafiltered, outputdatafiltered, sep=sep, quote=F, col.names = NA)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
114
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
115 # log
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
116 cat("\nthreshold:",threshold,"\n")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
117 cat("result:",nrow(datafiltered),"/",nrow(data),"\n")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
118
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
119 quit("no",status=0)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
120 }
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
121
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
122 # log
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
123 cat("ANOVA\n\n")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
124 cat("Arguments\n")
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
125 args <- commandArgs(trailingOnly = TRUE)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
126 print(args)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
127
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
128 listArguments = parseCommandArgs(evaluate=FALSE)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
129 do.call(anova, listArguments)
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
130
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
131
8dd2a438bfba Uploaded
lecorguille
parents:
diff changeset
132