Mercurial > repos > cristian > rbgoa
annotate gomwu.functions.R @ 1:f7287f82602f draft
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
author | cristian |
---|---|
date | Tue, 19 Apr 2022 08:28:43 +0000 |
parents | 91261b42c07e |
children | 5acf9dfdfa27 |
rev | line source |
---|---|
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
1 clusteringGOs=function(gen2go,div,cutHeight) { |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
2 nn <- strsplit(gen2go, "[/.]") |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
3 if (length(nn[[1]]) == 3) { |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
4 dir <- nn[[1]][1] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
5 name <- nn[[1]][2] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
6 ext <- nn[[1]][3] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
7 } else if (length(nn[[1]]) == 2) { |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
8 dir <- "." |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
9 name <- nn[[1]][1] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
10 ext <- nn[[1]][2] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
11 } |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
12 inname=paste(dir,"/","dissim0_",div,"_",name,".",ext,sep="") |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
13 |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
14 outname=paste(dir,"/","cl_dissim0_",div,"_",name,".",ext,sep="") |
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
15 if (!file.exists(outname)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
16 diss=read.table(inname,sep="\t",header=T,check.names=F) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
17 row.names(diss)=names(diss) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
18 hc=hclust(as.dist(diss),method="complete") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
19 cc=cutree(hc,h=cutHeight) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
20 write.csv(cc,file=outname,quote=F) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
21 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
22 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
23 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
24 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
25 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
26 #--------------- |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
27 gomwuStats=function(input,goDatabase,goAnnotations, goDivision, scriptdir, Module=FALSE, Alternative="t", adjust.multcomp="BH", clusterCutHeight=0.25,largest=0.1,smallest=5,perlPath="perl", shuffle.reps=20){ |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
28 |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
29 nn <- strsplit(input, "[/.]") |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
30 if (length(nn[[1]]) == 3) { |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
31 dir <- nn[[1]][1] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
32 name <- nn[[1]][2] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
33 ext <- nn[[1]][3] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
34 } else if (length(nn[[1]]) == 2) { |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
35 dir <- "." |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
36 name <- nn[[1]][1] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
37 ext <- nn[[1]][2] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
38 } |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
39 |
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
40 extraOptions=paste("largest=",largest," smallest=",smallest," cutHeight=",clusterCutHeight,sep="") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
41 if (Module==TRUE) { adjust.multcomp="shuffle" } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
42 gomwu_a = paste(scriptdir, "gomwu_a.pl", sep="/") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
43 gomwu_b = paste(scriptdir, "gomwu_b.pl", sep="/") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
44 system(paste(perlPath, gomwu_a,goDatabase,goAnnotations,input,goDivision,extraOptions)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
45 clusteringGOs(goAnnotations,goDivision,clusterCutHeight) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
46 system(paste(perlPath, gomwu_b,goAnnotations,input,goDivision)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
47 |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
48 inname=paste(dir,"/",name,"_",goDivision,".tsv",sep="") |
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
49 rsq=read.table(inname,sep="\t",header=T) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
50 rsq$term=as.factor(rsq$term) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
51 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
52 mwut.t=TRUE |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
53 if (length(levels(as.factor(rsq$value)))==2) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
54 cat("Binary classification detected; will perform Fisher's test\n"); |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
55 mwut.t=F |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
56 rr=fisherTest(rsq) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
57 } else { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
58 if (Module==TRUE) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
59 rsq.f=rsq |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
60 rsq.f$value=as.numeric(rsq.f$value>0) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
61 rf=fisherTest(rsq.f) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
62 rsq.m=rsq[rsq$value>0,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
63 rsq.m$term=factor(rsq.m$term,levels=unique(rsq.m$term)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
64 rrm=mwuTest(rsq.m,"g") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
65 rr0=rf[rf$term %in% rrm$term,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
66 rr1=rf[!(rf$term %in% rrm$term),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
67 rr0=rr0[order(rr0$term),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
68 rrm=rrm[order(rrm$term),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
69 rr0$pval=rr0$pval*rrm$pval |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
70 rr=rbind(rr0,rr1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
71 } else { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
72 cat("Continuous measure of interest: will perform MWU test\n"); |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
73 rr=mwuTest(rsq,Alternative) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
74 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
75 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
76 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
77 if (adjust.multcomp=="shuffle"){ |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
78 message("shuffling values to calculate FDR, ",shuffle.reps," reps") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
79 reps=shuffle.reps |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
80 spv=c() |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
81 for (i in 1:reps) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
82 message("replicate ",i) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
83 rsqq=rsq |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
84 rsqq$value=sample(rsq$value) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
85 if (Module==TRUE) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
86 rsq.f=rsqq |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
87 rsq.f$value=as.numeric(rsq.f$value>0) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
88 rf=fisherTest(rsq.f) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
89 rsq.m=rsqq[rsqq$value>0,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
90 rsq.m$term=factor(rsq.m$term,levels=unique(rsq.m$term)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
91 rrm=mwuTest(rsq.m,"g") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
92 rr0=rf[rf$term %in% rrm$term,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
93 rr1=rf[!(rf$term %in% rrm$term),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
94 rr0=rr0[order(rr0$term),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
95 rrm=rrm[order(rrm$term),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
96 rr0$pval=rr0$pval*rrm$pval |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
97 rs=rbind(rr0,rr1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
98 } else { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
99 if (mwut.t==TRUE) { rs=mwuTest(rsqq,Alternative) } else { rs=fisherTest(rsqq) } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
100 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
101 spv=append(spv,rs$pval) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
102 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
103 fdr=c() |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
104 for (p in rr$pval){ |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
105 fdr=append(fdr,(sum(spv<=p)/reps)/sum(rr$pval<=p)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
106 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
107 fdr[fdr>1]=1 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
108 } else { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
109 fdr=p.adjust(rr$pval,method=adjust.multcomp) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
110 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
111 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
112 message(sum(fdr<0.1)," GO terms at 10% FDR") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
113 rr$p.adj=fdr |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
114 fname=paste(dir,"/","MWU_",goDivision,"_",name,".",ext,sep="") |
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
115 write.table(rr,fname,row.names=F) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
116 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
117 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
118 #--------------------- |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
119 mwuTest=function(gotable,Alternative) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
120 gos=gotable |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
121 terms=levels(gos$term) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
122 gos$seq=as.character(gos$seq) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
123 nrg=gos[!duplicated(gos$seq),5] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
124 names(nrg)=gos[!duplicated(gos$seq),4] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
125 # nrg=nrg+rnorm(nrg,sd=0.01) # to be able to do exact wilcox test |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
126 rnk=rank(nrg) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
127 names(rnk)=names(nrg) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
128 pvals=c();drs=c();nams=c();levs=c();nseqs=c() |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
129 for (t in terms){ |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
130 got=gos[gos$term==t,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
131 got=got[!duplicated(got$seq),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
132 ngot=gos[gos$term!=t,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
133 ngot=ngot[!duplicated(ngot$seq),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
134 ngot=ngot[!(ngot$seq %in% got$seq),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
135 sgo.yes=got$seq |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
136 n1=length(sgo.yes) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
137 sgo.no=ngot$seq |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
138 n2=length(sgo.no) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
139 #message(t," wilcox:",n1," ",n2) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
140 wi=wilcox.test(nrg[sgo.yes],nrg[sgo.no],alternative=Alternative) # removed correct=FALSE |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
141 r1=sum(rnk[sgo.yes])/n1 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
142 r0=sum(rnk[sgo.no])/n2 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
143 dr=r1-r0 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
144 drs=append(drs,round(dr,0)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
145 levs=append(levs,got$lev[1]) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
146 nams=append(nams,as.character(got$name[1])) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
147 pvals=append(pvals,wi$p.value) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
148 nseqs=append(nseqs,n1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
149 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
150 res=data.frame(cbind("delta.rank"=drs,"pval"=pvals,"level"=levs,nseqs)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
151 res=cbind(res,"term"=as.character(terms),"name"=nams) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
152 res$pval=as.numeric(as.character(res$pval)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
153 res$delta.rank=as.numeric(as.character(res$delta.rank)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
154 res$level=as.numeric(as.character(res$level)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
155 res$nseqs=as.numeric(as.character(res$nseqs)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
156 return(res) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
157 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
158 #------------------------ |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
159 fisherTest=function(gotable) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
160 gos=gotable |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
161 terms=levels(gos$term) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
162 gos$seq=as.character(gos$seq) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
163 pft=c();nam=c();lev=c();nseqs=c() |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
164 for (t in terms) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
165 got=gos[gos$term==t,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
166 got=got[!duplicated(got$seq),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
167 ngot=gos[gos$term!=t,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
168 ngot=ngot[!duplicated(ngot$seq),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
169 ngot=ngot[!(ngot$seq %in% got$seq),] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
170 go.sig=sum(got$value) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
171 go.ns=length(got[,1])-go.sig |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
172 ngo.sig=sum(ngot$value) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
173 ngo.ns=length(ngot[,1])-ngo.sig |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
174 sig=c(go.sig,ngo.sig) # number of significant genes belonging and not belonging to the tested GO category |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
175 ns=c(go.ns,ngo.ns) # number of not-significant genes belonging and not belonging to the tested GO category |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
176 mm=matrix(c(sig,ns),nrow=2,dimnames=list(ns=c("go","notgo"),sig=c("go","notgo"))) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
177 ff=fisher.test(mm,alternative="greater") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
178 pft=append(pft,ff$p.value) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
179 nam=append(nam,as.character(got$name[1])) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
180 lev=append(lev,got$lev[1]) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
181 nseqs=append(nseqs,length(got[,1])) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
182 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
183 res=data.frame(cbind("delta.rank"=rep(0),"pval"=pft,"level"=lev,nseqs,"term"=terms,"name"=nam)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
184 res[,1]=as.numeric(as.character(res[,1])) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
185 res[,2]=as.numeric(as.character(res[,2])) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
186 res[,3]=as.numeric(as.character(res[,3])) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
187 res$nseqs=as.numeric(as.character(res$nseqs)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
188 return(res) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
189 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
190 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
191 #------------------------- |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
192 gomwuPlot=function(inFile,goAnnotations,goDivision,level1=0.1,level2=0.05,level3=0.01,absValue=-log(0.05,10),adjusted=TRUE,txtsize=1,font.family="sans",treeHeight=0.5,colors=NULL) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
193 require(ape) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
194 |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
195 nn <- strsplit(inFile, "[/.]") |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
196 if (length(nn[[1]]) == 3) { |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
197 dir <- nn[[1]][1] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
198 name <- nn[[1]][2] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
199 ext <- nn[[1]][3] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
200 } else if (length(nn[[1]]) == 2) { |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
201 dir <- "." |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
202 name <- nn[[1]][1] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
203 ext <- nn[[1]][2] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
204 } |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
205 aa <- strsplit(goAnnotations, "[/.]") |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
206 if (length(aa[[1]]) == 3) { |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
207 adir <- aa[[1]][1] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
208 aname <- aa[[1]][2] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
209 aext <- aa[[1]][3] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
210 } else if (length(aa[[1]]) == 2) { |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
211 adir <- "." |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
212 aname <- aa[[1]][1] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
213 aext <- aa[[1]][2] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
214 } |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
215 # input=inFile |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
216 in.mwu=paste(dir,"/",paste("MWU",goDivision,name,sep="_"), ".", ext,sep="") |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
217 in.dissim=paste(dir, "/", paste("dissim",goDivision,name,aname,sep="_"), ".", aext, sep="") |
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
218 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
219 cutoff=-log(level1,10) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
220 pv=read.table(in.mwu,header=T) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
221 row.names(pv)=pv$term |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
222 in.raw=paste(dir,"/",paste(name,goDivision,sep="_"), ".tsv", sep="") |
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
223 rsq=read.table(in.raw,sep="\t",header=T) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
224 rsq$term=as.factor(rsq$term) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
225 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
226 if (adjusted==TRUE) { pvals=pv$p.adj } else { pvals=pv$pval } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
227 heat=data.frame(cbind("pval"=pvals)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
228 row.names(heat)=pv$term |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
229 heat$pval=-log(heat$pval+1e-15,10) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
230 heat$direction=0 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
231 heat$direction[pv$delta.rank>0]=1 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
232 if (cutoff>0) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
233 goods=subset(heat,pval>=cutoff) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
234 } else { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
235 goods.names=unique(rsq$term[abs(rsq$value)>=absValue]) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
236 goods=heat[row.names(heat) %in% goods.names,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
237 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
238 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
239 if (is.null(colors) | length(colors)<4 ) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
240 colors=c("dodgerblue2","firebrick1","skyblue2","lightcoral") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
241 if (sum(goods$direction)==nrow(goods) | sum(goods$direction)==0) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
242 colors=c("black","black","grey50","grey50") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
243 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
244 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
245 goods.names=row.names(goods) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
246 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
247 # reading and subsetting dissimilarity matrix |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
248 diss=read.table(in.dissim,sep="\t",header=T,check.names=F) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
249 row.names(diss)=names(diss) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
250 diss.goods=diss[goods.names,goods.names] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
251 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
252 # how many genes out of what we started with we account for with our best categories? |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
253 good.len=c();good.genes=c() |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
254 for (g in goods.names) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
255 sel=rsq[rsq$term==g,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
256 pass=abs(sel$value)>=absValue |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
257 sel=sel[pass,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
258 good.genes=append(good.genes,as.character(sel$seq)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
259 good.len=append(good.len,nrow(sel)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
260 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
261 ngenes=length(unique(good.genes)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
262 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
263 #hist(rsq$value) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
264 totSum=length(unique(rsq$seq[abs(rsq$value)>=absValue])) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
265 row.names(goods)=paste(good.len,"/",pv[pv$term %in% goods.names,]$nseqs," ",pv[pv$term %in% goods.names,]$name,sep="") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
266 row.names(heat)=paste(good.len,"/",pv$nseqs," ",pv$name,sep="") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
267 row.names(diss.goods)=paste(good.len,"/",pv[pv$term %in% goods.names,]$nseqs," ",pv[pv$term %in% goods.names,]$name,sep="") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
268 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
269 # clustering terms better than cutoff |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
270 GO.categories=as.dist(diss.goods) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
271 cl.goods=hclust(GO.categories,method="average") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
272 labs=cl.goods$labels[cl.goods$order] # saving the labels to order the plot |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
273 goods=goods[labs,] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
274 labs=sub(" activity","",labs) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
275 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
276 old.par <- par( no.readonly = TRUE ) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
277 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
278 plots=layout(matrix(c(1,2,3),1,3,byrow=T),c(treeHeight,3,1),TRUE) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
279 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
280 par(mar = c(2,2,0.85,0)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
281 plot(as.phylo(cl.goods),show.tip.label=FALSE,cex=0.0000001) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
282 step=100 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
283 left=1 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
284 top=step*(2+length(labs)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
285 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
286 par(mar = c(0,0,0.3,0)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
287 plot(c(1:top)~c(1:top),type="n",axes=F,xlab="",ylab="") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
288 ii=1 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
289 goods$color=1 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
290 goods$color[goods$direction==1 & goods$pval>cutoff]=colors[4] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
291 goods$color[goods$direction==0 & goods$pval>cutoff]=colors[3] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
292 goods$color[goods$direction==1 & goods$pval>(-log(level2,10))]=colors[2] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
293 goods$color[goods$direction==0 & goods$pval>(-log(level2,10))]=colors[1] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
294 goods$color[goods$direction==1 & goods$pval>(-log(level3,10))]=colors[2] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
295 goods$color[goods$direction==0 & goods$pval>(-log(level3,10))]=colors[1] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
296 for (i in length(labs):1) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
297 ypos=top-step*ii |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
298 ii=ii+1 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
299 if (goods$pval[i]> -log(level3,10)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
300 text(left,ypos,labs[i],font=2,cex=1*txtsize,col=goods$color[i],adj=c(0,0),family=font.family) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
301 } else { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
302 if (goods$pval[i]>-log(level2,10)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
303 text(left,ypos,labs[i],font=1,cex=0.8* txtsize,col=goods$color[i],adj=c(0,0),family=font.family) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
304 } else { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
305 # if (goods$pval[i]>cutoff) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
306 # text(left,ypos,labs[i],font=3,cex=0.8* txtsize,col=goods$color[i],adj=c(0,0),family=font.family) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
307 # } else { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
308 text(left,ypos,labs[i],font=3,cex=0.8* txtsize,col=goods$color[i],adj=c(0,0),family=font.family) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
309 #} |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
310 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
311 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
312 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
313 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
314 par(mar = c(3,1,1,0)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
315 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
316 plot(c(1:top)~c(1:top),type="n",axes=F,xlab="",ylab="") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
317 text(left,top-step*2,paste("p < ",level3,sep=""),font=2,cex=1* txtsize,adj=c(0,0),family=font.family) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
318 text(left,top-step*3,paste("p < ",level2,sep=""),font=1,cex=0.8* txtsize,adj=c(0,0),family=font.family) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
319 text(left,top-step*4,paste("p < ",10^(-cutoff),sep=""),font=3,col="grey50",cex=0.8* txtsize,adj=c(0,0),family=font.family) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
320 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
321 message("GO terms dispayed: ",length(goods.names)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
322 message("\"Good genes\" accounted for: ", ngenes," out of ",totSum, " ( ",round(100*ngenes/totSum,0), "% )") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
323 par(old.par) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
324 goods$pval=10^(-1*goods$pval) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
325 return(list(goods,cl.goods)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
326 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
327 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
328 #------------------ |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
329 # returns non-overlapping GO categories based on dissimilarity table |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
330 indepGO=function(dissim.table,min.similarity=1) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
331 tt=read.table(dissim.table,sep="\t", header=TRUE) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
332 tt=as.matrix(tt) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
333 diag(tt)=1 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
334 for (i in 1:ncol(tt)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
335 mins=apply(tt,2,min) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
336 if (min(mins)>=min.similarity) { break } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
337 sums=apply(tt,2,sum) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
338 worst=which(sums==min(sums))[1] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
339 # cat("\n",worsts,"\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
340 # gw=c() |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
341 # for(w in worsts) { gw=append(gw,sum(tt[,w]==1)) } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
342 # cat(gw) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
343 # worst=worsts[gw==min(gw)] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
344 # cat("\n",i," removing",worst,"; sum =",sums[worst]) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
345 tt=tt[-worst,-worst] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
346 mins=mins[-worst] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
347 # cat(" new min =",min(mins)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
348 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
349 goods=colnames(tt) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
350 goods=gsub("GO\\.","GO:",goods) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
351 goods=gsub("\\.GO",";GO",goods) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
352 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
353 |