annotate gomwu.functions.R @ 1:f7287f82602f draft

"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
author cristian
date Tue, 19 Apr 2022 08:28:43 +0000
parents 91261b42c07e
children 5acf9dfdfa27
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
1 clusteringGOs=function(gen2go,div,cutHeight) {
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
2 nn <- strsplit(gen2go, "[/.]")
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
3 if (length(nn[[1]]) == 3) {
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
4 dir <- nn[[1]][1]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
5 name <- nn[[1]][2]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
6 ext <- nn[[1]][3]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
7 } else if (length(nn[[1]]) == 2) {
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
8 dir <- "."
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
9 name <- nn[[1]][1]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
10 ext <- nn[[1]][2]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
11 }
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
12 inname=paste(dir,"/","dissim0_",div,"_",name,".",ext,sep="")
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
13
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
14 outname=paste(dir,"/","cl_dissim0_",div,"_",name,".",ext,sep="")
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
15 if (!file.exists(outname)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
16 diss=read.table(inname,sep="\t",header=T,check.names=F)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
17 row.names(diss)=names(diss)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
18 hc=hclust(as.dist(diss),method="complete")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
19 cc=cutree(hc,h=cutHeight)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
20 write.csv(cc,file=outname,quote=F)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
21 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
22 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
23
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
24
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
25
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
26 #---------------
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
27 gomwuStats=function(input,goDatabase,goAnnotations, goDivision, scriptdir, Module=FALSE, Alternative="t", adjust.multcomp="BH", clusterCutHeight=0.25,largest=0.1,smallest=5,perlPath="perl", shuffle.reps=20){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
28
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
29 nn <- strsplit(input, "[/.]")
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
30 if (length(nn[[1]]) == 3) {
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
31 dir <- nn[[1]][1]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
32 name <- nn[[1]][2]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
33 ext <- nn[[1]][3]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
34 } else if (length(nn[[1]]) == 2) {
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
35 dir <- "."
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
36 name <- nn[[1]][1]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
37 ext <- nn[[1]][2]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
38 }
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
39
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
40 extraOptions=paste("largest=",largest," smallest=",smallest," cutHeight=",clusterCutHeight,sep="")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
41 if (Module==TRUE) { adjust.multcomp="shuffle" }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
42 gomwu_a = paste(scriptdir, "gomwu_a.pl", sep="/")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
43 gomwu_b = paste(scriptdir, "gomwu_b.pl", sep="/")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
44 system(paste(perlPath, gomwu_a,goDatabase,goAnnotations,input,goDivision,extraOptions))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
45 clusteringGOs(goAnnotations,goDivision,clusterCutHeight)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
46 system(paste(perlPath, gomwu_b,goAnnotations,input,goDivision))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
47
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
48 inname=paste(dir,"/",name,"_",goDivision,".tsv",sep="")
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
49 rsq=read.table(inname,sep="\t",header=T)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
50 rsq$term=as.factor(rsq$term)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
51
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
52 mwut.t=TRUE
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
53 if (length(levels(as.factor(rsq$value)))==2) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
54 cat("Binary classification detected; will perform Fisher's test\n");
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
55 mwut.t=F
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
56 rr=fisherTest(rsq)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
57 } else {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
58 if (Module==TRUE) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
59 rsq.f=rsq
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
60 rsq.f$value=as.numeric(rsq.f$value>0)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
61 rf=fisherTest(rsq.f)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
62 rsq.m=rsq[rsq$value>0,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
63 rsq.m$term=factor(rsq.m$term,levels=unique(rsq.m$term))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
64 rrm=mwuTest(rsq.m,"g")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
65 rr0=rf[rf$term %in% rrm$term,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
66 rr1=rf[!(rf$term %in% rrm$term),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
67 rr0=rr0[order(rr0$term),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
68 rrm=rrm[order(rrm$term),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
69 rr0$pval=rr0$pval*rrm$pval
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
70 rr=rbind(rr0,rr1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
71 } else {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
72 cat("Continuous measure of interest: will perform MWU test\n");
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
73 rr=mwuTest(rsq,Alternative)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
74 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
75 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
76
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
77 if (adjust.multcomp=="shuffle"){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
78 message("shuffling values to calculate FDR, ",shuffle.reps," reps")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
79 reps=shuffle.reps
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
80 spv=c()
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
81 for (i in 1:reps) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
82 message("replicate ",i)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
83 rsqq=rsq
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
84 rsqq$value=sample(rsq$value)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
85 if (Module==TRUE) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
86 rsq.f=rsqq
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
87 rsq.f$value=as.numeric(rsq.f$value>0)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
88 rf=fisherTest(rsq.f)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
89 rsq.m=rsqq[rsqq$value>0,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
90 rsq.m$term=factor(rsq.m$term,levels=unique(rsq.m$term))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
91 rrm=mwuTest(rsq.m,"g")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
92 rr0=rf[rf$term %in% rrm$term,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
93 rr1=rf[!(rf$term %in% rrm$term),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
94 rr0=rr0[order(rr0$term),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
95 rrm=rrm[order(rrm$term),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
96 rr0$pval=rr0$pval*rrm$pval
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
97 rs=rbind(rr0,rr1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
98 } else {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
99 if (mwut.t==TRUE) { rs=mwuTest(rsqq,Alternative) } else { rs=fisherTest(rsqq) }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
100 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
101 spv=append(spv,rs$pval)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
102 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
103 fdr=c()
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
104 for (p in rr$pval){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
105 fdr=append(fdr,(sum(spv<=p)/reps)/sum(rr$pval<=p))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
106 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
107 fdr[fdr>1]=1
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
108 } else {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
109 fdr=p.adjust(rr$pval,method=adjust.multcomp)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
110 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
111
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
112 message(sum(fdr<0.1)," GO terms at 10% FDR")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
113 rr$p.adj=fdr
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
114 fname=paste(dir,"/","MWU_",goDivision,"_",name,".",ext,sep="")
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
115 write.table(rr,fname,row.names=F)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
116 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
117
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
118 #---------------------
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
119 mwuTest=function(gotable,Alternative) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
120 gos=gotable
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
121 terms=levels(gos$term)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
122 gos$seq=as.character(gos$seq)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
123 nrg=gos[!duplicated(gos$seq),5]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
124 names(nrg)=gos[!duplicated(gos$seq),4]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
125 # nrg=nrg+rnorm(nrg,sd=0.01) # to be able to do exact wilcox test
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
126 rnk=rank(nrg)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
127 names(rnk)=names(nrg)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
128 pvals=c();drs=c();nams=c();levs=c();nseqs=c()
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
129 for (t in terms){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
130 got=gos[gos$term==t,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
131 got=got[!duplicated(got$seq),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
132 ngot=gos[gos$term!=t,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
133 ngot=ngot[!duplicated(ngot$seq),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
134 ngot=ngot[!(ngot$seq %in% got$seq),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
135 sgo.yes=got$seq
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
136 n1=length(sgo.yes)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
137 sgo.no=ngot$seq
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
138 n2=length(sgo.no)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
139 #message(t," wilcox:",n1," ",n2)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
140 wi=wilcox.test(nrg[sgo.yes],nrg[sgo.no],alternative=Alternative) # removed correct=FALSE
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
141 r1=sum(rnk[sgo.yes])/n1
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
142 r0=sum(rnk[sgo.no])/n2
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
143 dr=r1-r0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
144 drs=append(drs,round(dr,0))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
145 levs=append(levs,got$lev[1])
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
146 nams=append(nams,as.character(got$name[1]))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
147 pvals=append(pvals,wi$p.value)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
148 nseqs=append(nseqs,n1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
149 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
150 res=data.frame(cbind("delta.rank"=drs,"pval"=pvals,"level"=levs,nseqs))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
151 res=cbind(res,"term"=as.character(terms),"name"=nams)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
152 res$pval=as.numeric(as.character(res$pval))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
153 res$delta.rank=as.numeric(as.character(res$delta.rank))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
154 res$level=as.numeric(as.character(res$level))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
155 res$nseqs=as.numeric(as.character(res$nseqs))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
156 return(res)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
157 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
158 #------------------------
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
159 fisherTest=function(gotable) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
160 gos=gotable
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
161 terms=levels(gos$term)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
162 gos$seq=as.character(gos$seq)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
163 pft=c();nam=c();lev=c();nseqs=c()
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
164 for (t in terms) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
165 got=gos[gos$term==t,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
166 got=got[!duplicated(got$seq),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
167 ngot=gos[gos$term!=t,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
168 ngot=ngot[!duplicated(ngot$seq),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
169 ngot=ngot[!(ngot$seq %in% got$seq),]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
170 go.sig=sum(got$value)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
171 go.ns=length(got[,1])-go.sig
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
172 ngo.sig=sum(ngot$value)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
173 ngo.ns=length(ngot[,1])-ngo.sig
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
174 sig=c(go.sig,ngo.sig) # number of significant genes belonging and not belonging to the tested GO category
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
175 ns=c(go.ns,ngo.ns) # number of not-significant genes belonging and not belonging to the tested GO category
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
176 mm=matrix(c(sig,ns),nrow=2,dimnames=list(ns=c("go","notgo"),sig=c("go","notgo")))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
177 ff=fisher.test(mm,alternative="greater")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
178 pft=append(pft,ff$p.value)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
179 nam=append(nam,as.character(got$name[1]))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
180 lev=append(lev,got$lev[1])
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
181 nseqs=append(nseqs,length(got[,1]))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
182 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
183 res=data.frame(cbind("delta.rank"=rep(0),"pval"=pft,"level"=lev,nseqs,"term"=terms,"name"=nam))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
184 res[,1]=as.numeric(as.character(res[,1]))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
185 res[,2]=as.numeric(as.character(res[,2]))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
186 res[,3]=as.numeric(as.character(res[,3]))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
187 res$nseqs=as.numeric(as.character(res$nseqs))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
188 return(res)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
189 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
190
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
191 #-------------------------
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
192 gomwuPlot=function(inFile,goAnnotations,goDivision,level1=0.1,level2=0.05,level3=0.01,absValue=-log(0.05,10),adjusted=TRUE,txtsize=1,font.family="sans",treeHeight=0.5,colors=NULL) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
193 require(ape)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
194
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
195 nn <- strsplit(inFile, "[/.]")
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
196 if (length(nn[[1]]) == 3) {
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
197 dir <- nn[[1]][1]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
198 name <- nn[[1]][2]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
199 ext <- nn[[1]][3]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
200 } else if (length(nn[[1]]) == 2) {
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
201 dir <- "."
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
202 name <- nn[[1]][1]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
203 ext <- nn[[1]][2]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
204 }
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
205 aa <- strsplit(goAnnotations, "[/.]")
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
206 if (length(aa[[1]]) == 3) {
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
207 adir <- aa[[1]][1]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
208 aname <- aa[[1]][2]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
209 aext <- aa[[1]][3]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
210 } else if (length(aa[[1]]) == 2) {
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
211 adir <- "."
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
212 aname <- aa[[1]][1]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
213 aext <- aa[[1]][2]
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
214 }
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
215 # input=inFile
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
216 in.mwu=paste(dir,"/",paste("MWU",goDivision,name,sep="_"), ".", ext,sep="")
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
217 in.dissim=paste(dir, "/", paste("dissim",goDivision,name,aname,sep="_"), ".", aext, sep="")
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
218
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
219 cutoff=-log(level1,10)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
220 pv=read.table(in.mwu,header=T)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
221 row.names(pv)=pv$term
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
222 in.raw=paste(dir,"/",paste(name,goDivision,sep="_"), ".tsv", sep="")
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
223 rsq=read.table(in.raw,sep="\t",header=T)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
224 rsq$term=as.factor(rsq$term)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
225
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
226 if (adjusted==TRUE) { pvals=pv$p.adj } else { pvals=pv$pval }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
227 heat=data.frame(cbind("pval"=pvals))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
228 row.names(heat)=pv$term
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
229 heat$pval=-log(heat$pval+1e-15,10)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
230 heat$direction=0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
231 heat$direction[pv$delta.rank>0]=1
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
232 if (cutoff>0) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
233 goods=subset(heat,pval>=cutoff)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
234 } else {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
235 goods.names=unique(rsq$term[abs(rsq$value)>=absValue])
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
236 goods=heat[row.names(heat) %in% goods.names,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
237 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
238
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
239 if (is.null(colors) | length(colors)<4 ) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
240 colors=c("dodgerblue2","firebrick1","skyblue2","lightcoral")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
241 if (sum(goods$direction)==nrow(goods) | sum(goods$direction)==0) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
242 colors=c("black","black","grey50","grey50")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
243 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
244 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
245 goods.names=row.names(goods)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
246
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
247 # reading and subsetting dissimilarity matrix
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
248 diss=read.table(in.dissim,sep="\t",header=T,check.names=F)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
249 row.names(diss)=names(diss)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
250 diss.goods=diss[goods.names,goods.names]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
251
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
252 # how many genes out of what we started with we account for with our best categories?
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
253 good.len=c();good.genes=c()
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
254 for (g in goods.names) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
255 sel=rsq[rsq$term==g,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
256 pass=abs(sel$value)>=absValue
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
257 sel=sel[pass,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
258 good.genes=append(good.genes,as.character(sel$seq))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
259 good.len=append(good.len,nrow(sel))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
260 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
261 ngenes=length(unique(good.genes))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
262
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
263 #hist(rsq$value)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
264 totSum=length(unique(rsq$seq[abs(rsq$value)>=absValue]))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
265 row.names(goods)=paste(good.len,"/",pv[pv$term %in% goods.names,]$nseqs," ",pv[pv$term %in% goods.names,]$name,sep="")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
266 row.names(heat)=paste(good.len,"/",pv$nseqs," ",pv$name,sep="")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
267 row.names(diss.goods)=paste(good.len,"/",pv[pv$term %in% goods.names,]$nseqs," ",pv[pv$term %in% goods.names,]$name,sep="")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
268
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
269 # clustering terms better than cutoff
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
270 GO.categories=as.dist(diss.goods)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
271 cl.goods=hclust(GO.categories,method="average")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
272 labs=cl.goods$labels[cl.goods$order] # saving the labels to order the plot
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
273 goods=goods[labs,]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
274 labs=sub(" activity","",labs)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
275
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
276 old.par <- par( no.readonly = TRUE )
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
277
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
278 plots=layout(matrix(c(1,2,3),1,3,byrow=T),c(treeHeight,3,1),TRUE)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
279
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
280 par(mar = c(2,2,0.85,0))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
281 plot(as.phylo(cl.goods),show.tip.label=FALSE,cex=0.0000001)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
282 step=100
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
283 left=1
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
284 top=step*(2+length(labs))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
285
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
286 par(mar = c(0,0,0.3,0))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
287 plot(c(1:top)~c(1:top),type="n",axes=F,xlab="",ylab="")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
288 ii=1
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
289 goods$color=1
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
290 goods$color[goods$direction==1 & goods$pval>cutoff]=colors[4]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
291 goods$color[goods$direction==0 & goods$pval>cutoff]=colors[3]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
292 goods$color[goods$direction==1 & goods$pval>(-log(level2,10))]=colors[2]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
293 goods$color[goods$direction==0 & goods$pval>(-log(level2,10))]=colors[1]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
294 goods$color[goods$direction==1 & goods$pval>(-log(level3,10))]=colors[2]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
295 goods$color[goods$direction==0 & goods$pval>(-log(level3,10))]=colors[1]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
296 for (i in length(labs):1) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
297 ypos=top-step*ii
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
298 ii=ii+1
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
299 if (goods$pval[i]> -log(level3,10)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
300 text(left,ypos,labs[i],font=2,cex=1*txtsize,col=goods$color[i],adj=c(0,0),family=font.family)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
301 } else {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
302 if (goods$pval[i]>-log(level2,10)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
303 text(left,ypos,labs[i],font=1,cex=0.8* txtsize,col=goods$color[i],adj=c(0,0),family=font.family)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
304 } else {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
305 # if (goods$pval[i]>cutoff) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
306 # text(left,ypos,labs[i],font=3,cex=0.8* txtsize,col=goods$color[i],adj=c(0,0),family=font.family)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
307 # } else {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
308 text(left,ypos,labs[i],font=3,cex=0.8* txtsize,col=goods$color[i],adj=c(0,0),family=font.family)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
309 #}
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
310 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
311 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
312 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
313
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
314 par(mar = c(3,1,1,0))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
315
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
316 plot(c(1:top)~c(1:top),type="n",axes=F,xlab="",ylab="")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
317 text(left,top-step*2,paste("p < ",level3,sep=""),font=2,cex=1* txtsize,adj=c(0,0),family=font.family)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
318 text(left,top-step*3,paste("p < ",level2,sep=""),font=1,cex=0.8* txtsize,adj=c(0,0),family=font.family)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
319 text(left,top-step*4,paste("p < ",10^(-cutoff),sep=""),font=3,col="grey50",cex=0.8* txtsize,adj=c(0,0),family=font.family)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
320
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
321 message("GO terms dispayed: ",length(goods.names))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
322 message("\"Good genes\" accounted for: ", ngenes," out of ",totSum, " ( ",round(100*ngenes/totSum,0), "% )")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
323 par(old.par)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
324 goods$pval=10^(-1*goods$pval)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
325 return(list(goods,cl.goods))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
326 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
327
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
328 #------------------
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
329 # returns non-overlapping GO categories based on dissimilarity table
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
330 indepGO=function(dissim.table,min.similarity=1) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
331 tt=read.table(dissim.table,sep="\t", header=TRUE)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
332 tt=as.matrix(tt)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
333 diag(tt)=1
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
334 for (i in 1:ncol(tt)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
335 mins=apply(tt,2,min)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
336 if (min(mins)>=min.similarity) { break }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
337 sums=apply(tt,2,sum)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
338 worst=which(sums==min(sums))[1]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
339 # cat("\n",worsts,"\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
340 # gw=c()
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
341 # for(w in worsts) { gw=append(gw,sum(tt[,w]==1)) }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
342 # cat(gw)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
343 # worst=worsts[gw==min(gw)]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
344 # cat("\n",i," removing",worst,"; sum =",sums[worst])
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
345 tt=tt[-worst,-worst]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
346 mins=mins[-worst]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
347 # cat(" new min =",min(mins))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
348 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
349 goods=colnames(tt)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
350 goods=gsub("GO\\.","GO:",goods)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
351 goods=gsub("\\.GO",";GO",goods)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
352 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
353