annotate Dotplot_Release/Step2_data_filtering.R @ 17:296e40bfe2d7 draft

Uploaded
author bornea
date Wed, 16 Mar 2016 12:11:39 -0400
parents bc752a05f16d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
bc752a05f16d Uploaded
bornea
parents:
diff changeset
1 #!/usr/bin/env Rscript
bc752a05f16d Uploaded
bornea
parents:
diff changeset
2
bc752a05f16d Uploaded
bornea
parents:
diff changeset
3 args <- commandArgs(trailingOnly = TRUE)
bc752a05f16d Uploaded
bornea
parents:
diff changeset
4
bc752a05f16d Uploaded
bornea
parents:
diff changeset
5 d = read.delim(args[1], header=T, as.is=T)
bc752a05f16d Uploaded
bornea
parents:
diff changeset
6
bc752a05f16d Uploaded
bornea
parents:
diff changeset
7 d2 = d
bc752a05f16d Uploaded
bornea
parents:
diff changeset
8 d2s = d
bc752a05f16d Uploaded
bornea
parents:
diff changeset
9
bc752a05f16d Uploaded
bornea
parents:
diff changeset
10 ss_cutoff <- as.numeric(args[2])
bc752a05f16d Uploaded
bornea
parents:
diff changeset
11 ### Here I'm only going to take the preys which appeared in at least 2 baits with >args[2] counts
bc752a05f16d Uploaded
bornea
parents:
diff changeset
12 id = apply(d, 1, function(x) sum(x>ss_cutoff) >= 2)
bc752a05f16d Uploaded
bornea
parents:
diff changeset
13 id2 = apply(d, 1, function(x) sum(x>ss_cutoff) < 2)
bc752a05f16d Uploaded
bornea
parents:
diff changeset
14 d2 = d2[id, ]
bc752a05f16d Uploaded
bornea
parents:
diff changeset
15 d2s = d2s[id2, 0]
bc752a05f16d Uploaded
bornea
parents:
diff changeset
16 max.d2 = max(as.numeric(as.matrix(d2)))
bc752a05f16d Uploaded
bornea
parents:
diff changeset
17 d2 = d2 / max.d2 * 10
bc752a05f16d Uploaded
bornea
parents:
diff changeset
18
bc752a05f16d Uploaded
bornea
parents:
diff changeset
19 d3 = data.frame(PROT = rownames(d2), d2)
bc752a05f16d Uploaded
bornea
parents:
diff changeset
20
bc752a05f16d Uploaded
bornea
parents:
diff changeset
21 outfile <- paste(c(args[3]), "dat", sep=".")
bc752a05f16d Uploaded
bornea
parents:
diff changeset
22
bc752a05f16d Uploaded
bornea
parents:
diff changeset
23 ### The following file is the outcome of running this step.
bc752a05f16d Uploaded
bornea
parents:
diff changeset
24 write.table(d3, outfile, sep="\t", quote=F, row.names=F)
bc752a05f16d Uploaded
bornea
parents:
diff changeset
25 ### This is the final input file for nested cluster algorithm
bc752a05f16d Uploaded
bornea
parents:
diff changeset
26
bc752a05f16d Uploaded
bornea
parents:
diff changeset
27 write.table(d2s, "singletons.txt", quote=F)
bc752a05f16d Uploaded
bornea
parents:
diff changeset
28