Mercurial > repos > lecorguille > anova
comparison galaxy/stat_anova/abims_anova.r @ 8:c2be3b890724 draft
planemo upload commit 00a7f9aadf30a7d728eb4df72ca38ebe5dd7be03
author | lecorguille |
---|---|
date | Tue, 06 Jun 2017 09:43:23 -0400 |
parents | b6298c38e53f |
children |
comparison
equal
deleted
inserted
replaced
7:bcd05315efc5 | 8:c2be3b890724 |
---|---|
1 #!/usr/local/public/bin/Rscript | |
2 # version="1.1" | |
3 | |
4 # date: 06-06-2012 | |
5 # update: 18-02-2014 | |
6 # **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr | |
7 | |
8 # abims_anova.r version 20140218 | |
9 | |
10 library(batch) | |
11 | |
12 | |
13 # function avova | |
14 anova = function (file, sampleinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatafiltered="anova.datafiltered.output") { | |
15 | |
16 | |
17 if (sep=="tabulation") sep="\t" | |
18 if (sep=="semicolon") sep=";" | |
19 if (sep=="comma") sep="," | |
20 | |
21 anova_formula_operator = "+" | |
22 if (interaction) anova_formula_operator = "*" | |
23 | |
24 # -- import -- | |
25 data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA") | |
26 | |
27 if (mode == "row") data=t(data) | |
28 | |
29 sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"") | |
30 rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab)) | |
31 | |
32 | |
33 # -- group -- | |
34 match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab)) | |
35 if (sum(is.na(match_data_sampleinfoTab)) > 0) { | |
36 write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr()) | |
37 write("You may need to use change the mode (column/row)", stderr()) | |
38 write("10 first sample names in the data matrix:", stderr()) | |
39 write(head(colnames(data)), stderr()) | |
40 write("10 first sample names in the sample info:", stderr()) | |
41 write(head(rownames(sampleinfoTab)), stderr()) | |
42 quit("no",status=10) | |
43 } | |
44 | |
45 | |
46 # -- anova -- | |
47 | |
48 # formula | |
49 grps=list() | |
50 anova_formula_s = "data ~ " | |
51 cat("\ncontrasts:\n") | |
52 for (i in 1:length(condition)) { | |
53 grps[[i]] = factor(sampleinfoTab[,condition[i]][match_data_sampleinfoTab]) | |
54 anova_formula_s = paste(anova_formula_s, "grps[[",i,"]]",anova_formula_operator, sep="") | |
55 cat(condition[i],"\t",levels(grps[[i]]),"\n") | |
56 # write("Current groups: ", stderr()) | |
57 # write(grp[[i]], stderr()) | |
58 } | |
59 anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1) | |
60 anova_formula = as.formula(anova_formula_s) | |
61 | |
62 | |
63 | |
64 # anova | |
65 manovaObjectList = manova(anova_formula) | |
66 manovaList = summary.aov(manovaObjectList) | |
67 | |
68 # condition renaming | |
69 manovaRownames = gsub(" ","",rownames(manovaList[[1]])) | |
70 manovaNbrPvalue = length(manovaRownames)-1 | |
71 manovaRownames = manovaRownames[-(manovaNbrPvalue+1)] | |
72 | |
73 for (i in 1:length(condition)) { | |
74 manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames) | |
75 anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s) | |
76 } | |
77 | |
78 # log | |
79 cat("\nanova_formula",anova_formula_s,"\n") | |
80 | |
81 # p-value | |
82 aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]}) | |
83 if(length(condition) == 1) aovPValue = t(aovPValue) | |
84 rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="") | |
85 | |
86 # p-value adjusted | |
87 if(length(condition) == 1) { | |
88 aovAdjPValue = t(p.adjust(aovPValue,method=method)) | |
89 } else { | |
90 aovAdjPValue = apply(aovPValue,2,p.adjust, method=method) | |
91 } | |
92 rownames(aovAdjPValue) = paste("pvalueadjusted.",method,".",manovaRownames,sep="") | |
93 | |
94 # selection | |
95 colSumThreshold = colSums(aovAdjPValue <= threshold) | |
96 if (selection_method == "intersection") { | |
97 datafiltered = data[,colSumThreshold == nrow(aovAdjPValue )] | |
98 } else { | |
99 datafiltered = data[,colSumThreshold != 0] | |
100 } | |
101 | |
102 #data=rbind(data, aovPValue, aovAdjPValue) | |
103 data=rbind(data, aovAdjPValue) | |
104 | |
105 | |
106 if (mode == "row") { | |
107 data=t(data) | |
108 datafiltered=t(datafiltered) | |
109 } | |
110 | |
111 # -- output / return -- | |
112 write.table(data, outputdatapvalue, sep=sep, quote=F, col.names = NA) | |
113 write.table(datafiltered, outputdatafiltered, sep=sep, quote=F, col.names = NA) | |
114 | |
115 # log | |
116 cat("\nthreshold:",threshold,"\n") | |
117 cat("result:",nrow(datafiltered),"/",nrow(data),"\n") | |
118 | |
119 quit("no",status=0) | |
120 } | |
121 | |
122 # log | |
123 cat("ANOVA\n\n") | |
124 cat("Arguments\n") | |
125 args <- commandArgs(trailingOnly = TRUE) | |
126 print(args) | |
127 | |
128 listArguments = parseCommandArgs(evaluate=FALSE) | |
129 do.call(anova, listArguments) | |
130 | |
131 | |
132 |