annotate mtls_analyze/cluster_peaks.R @ 0:a903c48f05b4

Added non-integrated scripts to galaxy
author kmace
date Thu, 22 Mar 2012 15:21:00 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
1 ## .-.-. .-.-. .-.-. .-.-. .-.-. .-.-. .-.-. .-.-.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
2 ## /|/ \|\ /|/ \|\ /|/ \|\ /|/ \|\ /|/ \|\ /|/ \|\ / / \ \ / / \ \
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
3 ##`-' `-`-' `-`-' `-`-' `-`-' `-`-' `-`-' `-`-' ' '
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
4 ## Feb 2012 th17
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
5 ## Bonneau lab - "Aviv Madar" <am2654@nyu.edu>,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
6 ## NYU - Center for Genomics and Systems Biology
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
7 ## .-.-. .-.-. .-.-. .-.-. .-.-. .-.-. .-.-. .-.-.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
8 ## /|/ \|\ /|/ \|\ /|/ \|\ /|/ \|\ /|/ \|\ /|/ \|\ / / \ \ / / \ \
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
9 ##`-' `-`-' `-`-' `-`-' `-`-' `-`-' `-`-' `-`-' ' '
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
10
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
11 rm(list=ls())
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
12 debug=F
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
13 script.version=0.1
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
14 print.error <- function(){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
15 cat("
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
16 DESCRIPTIION:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
17 cluster_peaks.R takes MACS tab delimited files as input and produces one tab delimeted file (named mtls.xls) where
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
18 each row corresponds to a Multi TF Loci (MTL) in which peaks from different experiments (input MACS files)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
19 fall within a certain distance between summits from eachother.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
20
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
21 INPUT:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
22 1.path_input=path to MACS files '::' delim [path_input=f1::f2::f3::...::fk]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
23 2.path_output=path to save generated MTL cluster file (where to save mtls.xls)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
24 3.expt_names=user specified names for MACS files '::' delim [expt_names=n1::n2::n3::...::nk]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
25 4.dist.summits=maximum distance between summits belonging to the same MTL
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
26 5.n.autosome.chr=19 for mouse, 22 for human
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
27
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
28 EXAMPLE RUN:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
29 cluster_peaks.R
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
30 input_macs_files=SL2870_SL2871_peak.xls::SL2872_SL2876_peak.xls::SL3032_SL2871_peak.xls::SL3037_SL3036_peak.xls::SL3315_SL3319_peak.xls
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
31 path_output=~/Desktop/
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
32 expt_names=RORC_Th17::IRF4_Th17::MAF_Th17::BATF_Th17::STAT3_Th17
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
33 dist_summits=100
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
34 n_autosome_chr=19
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
35
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
36 Please cite us if you used this script:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
37 The transcription factor network regulating Th17 lineage specification and function.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
38 Maria Ciofani, Aviv Madar, Carolina Galan, Kieran Mace, Agarwal, Kim Newberry, Richard M. Myers,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
39 Richard Bonneau and Dan R. Littman et. al. (in preperation)\n\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
40 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
41
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
42 ############# helper functions:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
43 ## split.macs.list.to.chrmosomes
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
44 # input:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
45 # - macs.list: a list of macs expts: here are a few lines of one expt
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
46 ## chr start end length summit tags #NAME? fold_enrichment FDR(%)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
47 ## chr1 4322210 4323069 860 494 55 158.95 6.03 0.05
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
48 ## chr1 4797749 4798368 620 211 29 119.82 3.47 0.09
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
49 ## chr1 4848182 4849113 932 494 46 105.42 2.9 0.09
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
50 # - expts: a list of the expts names from macs.list that you want to process
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
51 # - chr: chrmosomes names
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
52 # output:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
53 # - x: a list with as many elements as chr specified by input.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
54 # -x[[i]]:macs list per chr, with peak.id column added (these are the row numbers of MACS)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
55 split.macs.list.to.chrmosomes.no.pml <- function(macs.list,expts,chr="chr1"){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
56 x <- list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
57 n <- length(expts)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
58 for(i in 1:n){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
59 e <- expts[i] #experiment name
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
60 cat("wroking on expt", e,"\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
61 x[[e]] <- lapply(chr,split.one.macs.expt.by.chromosome.no.pml,m=macs.list[[e]])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
62 names(x[[e]]) <- chr
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
63 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
64 return(x)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
65 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
66 # a helper function for spliat.macs.list.to.chrmosomes, gives for one chromosome the macs rows for expt MACS matrix m
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
67 # input:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
68 # - r is chromosome
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
69 # - m is macs matrix for expt e from above function
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
70 split.one.macs.expt.by.chromosome.no.pml <- function(r,m){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
71 ix.chr.i <- which(m[,"chr"]==r)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
72 # cat("working on",r,"\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
73 o <- list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
74 o[[r]] <- m[ix.chr.i,]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
75 o[[r]]$peak.id <- ix.chr.i
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
76 return(o[[r]])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
77 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
78
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
79 ## make.ruler makes a ruler: a line per chromosome with the locations of all tf binding sites (sorted from start of chromosome to end)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
80 # also match these summit locations with corresponding:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
81 # pvals, tfs, peak start and peak end trgts
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
82 # input:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
83 # - chr: a list of chromosome names
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
84 # - macs.list.per.chrom: a list of macs peaks for each chromosome
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
85 # output:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
86 # - o: a list each chormosome ruler as an element
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
87 make.ruler.no.pml <- function(chr,macs.list.per.chrom){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
88 x <- macs.list.per.chrom
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
89 o <- list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
90 for(j in 1:length(chr)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
91 r <- chr[j] # chrmosome we go over
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
92 s <- numeric()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
93 pval <- numeric()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
94 tf.to.s <- character()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
95 trgt.prox <- character()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
96 trgt.dist <- character()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
97 dtss.prox <- character()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
98 dtss.dist <- character()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
99 start <- numeric()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
100 end <- numeric()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
101 trtmnts <- names(x) # the treatments name (expt names)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
102 ## debug parameters ###
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
103 ## which experiment peaks come from
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
104 expt <- character()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
105 ## what was the peak id in that experiment
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
106 peak.ids <- numeric()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
107 ## this will allow us to always back track from a cluster to the actual peaks in it
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
108 ## debug params end ###
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
109 for(i in 1:length(trtmnts)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
110 o[[r]] <- list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
111 e <- trtmnts[i] #experiment name
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
112 tf <- strsplit(e,"_")[[1]][1]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
113 s <- c(s,x[[e]][[r]][,"start"]+x[[e]][[r]][,"summit"])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
114 pval <- c(pval,x[[e]][[r]][,7]) # the name of 7th column is X.10.log10.pvalue. this is pval
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
115 # tf.to.s <- c(tf.to.s,rep(tf,length(x[[e]][[r]][,7]))) # all summits belong to tf
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
116 start <- c(start,x[[e]][[r]][,"start"])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
117 end <- c(end,x[[e]][[r]][,"end"])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
118 expt <- c(expt,rep(e,length(x[[e]][[r]][,"end"])))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
119 peak.ids <- c(peak.ids,x[[e]][[r]][,"peak.id"])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
120 trgt.prox <- c(trgt.prox,x[[e]][[r]][,"trgt.prox"])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
121 trgt.dist <- c(trgt.dist,x[[e]][[r]][,"trgt.dist"])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
122 dtss.prox <- c(dtss.prox,x[[e]][[r]][,"dtss.prox"])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
123 dtss.dist <- c(dtss.dist,x[[e]][[r]][,"dtss.dist"])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
124 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
125 ix <- sort(s,index.return=TRUE)$ix
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
126 o[[r]] <- list(s=s[ix],pval=pval[ix],expt=expt[ix],start=start[ix],end=end[ix],peak.ids=peak.ids[ix],
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
127 trgt.prox=trgt.prox[ix],trgt.dist=trgt.dist[ix],dtss.prox=dtss.prox[ix],dtss.dist=dtss.dist[ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
128 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
129 return(o)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
130 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
131
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
132 ## add cluster memberships based on ruler
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
133 ## require no more than d.cut distance between tf summits
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
134 ## cur.l is the current loci number (or cluster number)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
135 assign.clusters.ids.to.peaks <- function(ruler,d.cut){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
136 cur.l <- 0
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
137 for(j in 1:length(ruler)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
138 s <- ruler[[j]][["s"]]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
139 l <- numeric(length=length(s))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
140 if(length(l)>0){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
141 cur.l <- cur.l+1
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
142 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
143 if(length(l)==1){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
144 l[1] <- cur.l
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
145 } else if(length(l)>1) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
146 l[1] <- cur.l
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
147 for(i in 2:length(l)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
148 d <- s[i]-s[i-1] # assumes s is sorted increasingly
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
149 if(d>d.cut){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
150 cur.l <- cur.l+1
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
151 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
152 l[i] <- cur.l
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
153 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
154 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
155 ruler[[j]][["l"]] <- l
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
156 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
157 return(ruler)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
158 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
159
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
160 ## here we create a list of TF enriched loci (clusters)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
161 ## input:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
162 ## - ruler: a line per chromosome with the locations of all tf binding sites (sorted from start of chromosome to end)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
163 ## output:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
164 ## -ll: a list of clusters ll where each cluster (elem in ll holds):
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
165 ## - l: the current loci (elem in ll)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
166 ## - s: summits vector as before
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
167 ## - pval: pvals vector matching the summits
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
168 ## - tfs: a vector of tfs matching s, the summits in loci l
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
169 ## - spans: a vector of spans matching s, the summits in loci l, where spans is the dist between start and end of a peak
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
170 ## - trgts: genes that are targeted by the cluster (10kb upstream, in gene, 10kb downstream)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
171 create.cluster.list.no.pml <- function(ruler){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
172 tmp <- list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
173 ll <- list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
174 for(j in 1:length(ruler)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
175 r <- names(ruler)[j]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
176 # cat("working on ",r,"\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
177 x <- ruler[[j]] # for short typing let x stand for ruler[[chr[j]]]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
178 l.vec <- unique(x[["l"]]) # the clusters ids on j chr (only unique)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
179 n <- length(l.vec) # iterate over n clusters
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
180 tmp[[j]] <- lapply(1:n,get.cluster.params.no.pml,x=x,l.vec=l.vec,r=r)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
181 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
182 ## concatenate tmp into one list
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
183 command <- paste(sep="","c(",paste(sep="","tmp[[",1:length(tmp),"]]",collapse=","),")")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
184 ll=eval(parse(text=command))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
185 return(ll)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
186 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
187 get.cluster.params.no.pml <- function(i,x,l.vec,r){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
188 ix <- which(x[["l"]]==l.vec[i])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
189 l <- l.vec[i]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
190 start <- min(x[["start"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
191 end <- max(x[["end"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
192 s <- x[["s"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
193 # tf.to.s <- x[["tf.to.s"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
194 pval <- x[["pval"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
195 pval.mean <- mean(pval)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
196 span.tfs <- x[["end"]][ix]-x[["start"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
197 span.l <- end-start
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
198 peak.ids <- x[["peak.ids"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
199 expt <- x[["expt"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
200 expt.alphanum.sorted <- sort(x[["expt"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
201 trgt.prox <- unique(x[["trgt.prox"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
202 trgt.dist <- unique(x[["trgt.dist"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
203 dtss.prox <- unique(x[["dtss.prox"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
204 dtss.dist <- unique(x[["dtss.dist"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
205 chr <- rep(r,length(ix))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
206 return(list(l=l,chr=chr,expt=expt,expt.alphanum.sorted=expt.alphanum.sorted,start=start,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
207 end=end,s=s,pval=pval,pval.mean=pval.mean,span.tfs=span.tfs,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
208 span.l=span.l,peak.ids=peak.ids,trgt.prox=trgt.prox,trgt.dist=trgt.dist,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
209 dtss.prox=dtss.prox,dtss.dist=dtss.dist))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
210 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
211
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
212 ## here we create a list of TF enriched loci (clusters)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
213 ## input:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
214 ## - ruler: a line per chromosome with the locations of all tf binding sites (sorted from start of chromosome to end)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
215 ## output:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
216 ## -ll: a list of clusters ll where each cluster (elem in ll holds):
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
217 ## - l: the current loci (elem in ll)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
218 ## - s: summits vector as before
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
219 ## - pval: pvals vector matching the summits
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
220 ## - tfs: a vector of tfs matching s, the summits in loci l
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
221 ## - spans: a vector of spans matching s, the summits in loci l, where spans is the dist between start and end of a peak
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
222 ## - trgts: genes that are targeted by the cluster (10kb upstream, in gene, 10kb downstream)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
223 create.cluster.list.no.pml <- function(ruler){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
224 tmp <- list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
225 ll <- list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
226 for(j in 1:length(ruler)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
227 r <- names(ruler)[j]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
228 # cat("working on ",r,"\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
229 x <- ruler[[j]] # for short typing let x stand for ruler[[chr[j]]]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
230 l.vec <- unique(x[["l"]]) # the clusters ids on j chr (only unique)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
231 n <- length(l.vec) # iterate over n clusters
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
232 tmp[[j]] <- lapply(1:n,get.cluster.params.no.pml,x=x,l.vec=l.vec,r=r)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
233 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
234 ## concatenate tmp into one list
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
235 command <- paste(sep="","c(",paste(sep="","tmp[[",1:length(tmp),"]]",collapse=","),")")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
236 ll=eval(parse(text=command))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
237 return(ll)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
238 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
239 get.cluster.params.no.pml <- function(i,x,l.vec,r){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
240 ix <- which(x[["l"]]==l.vec[i])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
241 l <- l.vec[i]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
242 start <- min(x[["start"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
243 end <- max(x[["end"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
244 s <- x[["s"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
245 # tf.to.s <- x[["tf.to.s"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
246 pval <- x[["pval"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
247 pval.mean <- mean(pval)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
248 span.tfs <- x[["end"]][ix]-x[["start"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
249 span.l <- end-start
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
250 peak.ids <- x[["peak.ids"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
251 expt <- x[["expt"]][ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
252 expt.alphanum.sorted <- sort(x[["expt"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
253 trgt.prox <- unique(x[["trgt.prox"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
254 trgt.dist <- unique(x[["trgt.dist"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
255 dtss.prox <- unique(x[["dtss.prox"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
256 dtss.dist <- unique(x[["dtss.dist"]][ix])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
257 chr <- rep(r,length(ix))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
258 return(list(l=l,chr=chr,expt=expt,expt.alphanum.sorted=expt.alphanum.sorted,start=start,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
259 end=end,s=s,pval=pval,pval.mean=pval.mean,span.tfs=span.tfs,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
260 span.l=span.l,peak.ids=peak.ids,trgt.prox=trgt.prox,trgt.dist=trgt.dist,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
261 dtss.prox=dtss.prox,dtss.dist=dtss.dist))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
262 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
263 ## pretty print (tab deim) to file requested elements out of chip cluster list, ll.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
264 ## input:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
265 ## - ll: a cluster list
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
266 ## - f.nm: a file name (include path) to where you want files to print
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
267 ## - tfs: a list of the tfs we want to print the file for (the same as the tfs used for the peak clustering)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
268 ## output
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
269 ## - a tab delim file with clusers as rows and elems tab delim for each cluster
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
270 print.ll.verbose.all <- function(ll,f.nm="ll.xls"){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
271 options(digits=5)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
272 cat(file=f.nm,names(ll[[1]]),sep="\t")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
273 cat(file=f.nm,"\n",append=TRUE)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
274 for(i in 1:length(ll)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
275 line <- ll[[i]][[1]] # put MTL number
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
276 line <- paste(line,ll[[i]][[2]][1],sep="\t")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
277 for(j in 3:length(ll[[i]])){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
278 val <- ll[[i]][[j]]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
279 if(length(val) == 1){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
280 line <- paste(line,val,sep="\t")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
281 } else {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
282 line <- paste(line,paste(sep="",unlist(ll[[i]][j]),collapse="_"),sep="\t")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
283 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
284 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
285 cat(file=f.nm,line,"\n",append=TRUE)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
286 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
287 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
288 ############# Code:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
289 # retrieve args
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
290 if(debug==T){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
291 cmd.args <- c(
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
292 "input_macs_files=SL2870_SL2871_peak.xls::SL2872_SL2876_peak.xls::SL3032_SL2871_peak.xls::SL3037_SL3036_peak.xls::SL3315_SL3319_peak.xls",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
293 "path_output=~/Desktop/",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
294 "expt_names=RORC_Th17::IRF4_Th17::MAF_Th17::BATF_Th17::STAT3_Th17",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
295 "dist_summits=100",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
296 "n_autosome_chr=19"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
297 )
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
298 } else {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
299 cmd.args <- commandArgs();
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
300 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
301
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
302 if(length(grep("--version",cmd.args))){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
303 cat("version",script.version,"\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
304 q()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
305 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
306
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
307 arg.names.cmd.line <- sapply(strsplit(cmd.args,"="),function(i) i[1])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
308 args.val.cmd.line <- sapply(strsplit(cmd.args,"="),function(i) i[2])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
309
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
310 arg.nms <- c("input_macs_files","path_output","expt_names","dist_summits","n_autosome_chr")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
311 arg.val <- character(length=length(arg.nms))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
312 for(i in 1:length(arg.nms)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
313 ix <- which(arg.names.cmd.line==arg.nms[i])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
314 if(length(ix)==1){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
315 arg.val[i] <- args.val.cmd.line[ix]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
316 } else {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
317 stop("######could not find ",arg.nms[i]," arg######\n\n",print.error())
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
318
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
319 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
320 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
321 if(debug==T){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
322 print(paste(arg.nms,"=",arg.val))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
323 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
324 # the files here adhere to tab delim format
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
325 input.macs.files <- strsplit(arg.val[1],"::")[[1]]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
326 if(length(input.macs.files)==1){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
327 cat("only provided one MACS file to cluster.")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
328 print.error()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
329 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
330 path.output <- arg.val[2]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
331 expt.names <- strsplit(arg.val[3],"::")[[1]]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
332 dist.summits <- as.numeric(arg.val[4])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
333 n.autosome.chr <- as.numeric(arg.val[5])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
334
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
335 # source("~/Documents/nyu/littmanLab/th17_used_for_paper/r_scripts/th17/used_for_paper/cluster_peaks_util.R")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
336 chr <- c(paste(sep="","chr",1:n.autosome.chr),paste(sep="","chr",c("X","Y")))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
337
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
338 # read MACS files
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
339 macs.list <- list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
340 for(i in 1:length(input.macs.files)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
341 e <- expt.names[i]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
342 macs.list[[ e ]] <- read.delim(file=input.macs.files[i])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
343 macs.list[[ e ]][,"chr"] <- as.character(macs.list[[ e ]][,"chr"])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
344 macs.list[[ e ]][,"trgt.prox"] <- as.character(macs.list[[ e ]][,"trgt.prox"])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
345 macs.list[[ e ]][,"trgt.dist"] <- as.character(macs.list[[ e ]][,"trgt.dist"])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
346 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
347
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
348 # take all macs files and put peaks together on each chromosome
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
349 # (as if each chromosome is a ruler and we specify where in the ruler each peak summit falls)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
350 cat("splitting macs files per chromosome\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
351 x <- split.macs.list.to.chrmosomes.no.pml(macs.list=macs.list,expts=expt.names,chr=chr)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
352 cat("adding peaks from all macs files into chromosome rulers\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
353 ruler <- make.ruler.no.pml(chr,macs.list.per.chrom=x)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
354 cat("add MTL membership to the ruler\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
355 ruler <- assign.clusters.ids.to.peaks(ruler,d.cut=dist.summits)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
356 for(i in 1:length(ruler)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
357 ix <- which(is.na(ruler[[i]][["dtss.prox"]]))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
358 ruler[[i]][["dtss.prox"]][ix] <- ""
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
359 ix <- which(is.na(ruler[[i]][["dtss.dist"]]))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
360 ruler[[i]][["dtss.dist"]][ix] <- ""
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
361 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
362 cat("creating MTL list\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
363 ll <- create.cluster.list.no.pml(ruler=ruler)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
364 cat("writing MTL table\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
365 # f.nm <- paste(sep="",path.output,paste(expt.names,collapse="_"),"_MTLs.xls")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
366 f.nm <- paste(sep="",path.output,"mtls",".xls")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
367 print.ll.verbose.all(ll=ll,f.nm=f.nm)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
368
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
369
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
370
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
371
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
372
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
373
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
374
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
375
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
376