comparison Contra/scripts/large_region_cbs.R @ 0:7564f3b1e675

Uploaded
author fcaramia
date Thu, 13 Sep 2012 02:31:43 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7564f3b1e675
1 # ----------------------------------------------------------------------#
2 # Copyright (c) 2011, Richard Lupat & Jason Li.
3 #
4 # > Source License <
5 # This file is part of CONTRA.
6 #
7 # CONTRA is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # CONTRA is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 #-----------------------------------------------------------------------#
22 # Last Updated : 12 October 2011 16:43PM
23
24 library(DNAcopy)
25 options <- commandArgs(trailingOnly = T)
26 logcov.file = options[1]
27 param.small.seg = as.numeric(options[2])
28 param.large.seg = as.numeric(options[3])
29 param.pval.cutoff = as.numeric(options[4])
30 param.pvals.size = as.numeric(options[5])
31 param.call.low = as.numeric(options[6])
32 param.call.high = as.numeric(options[7])
33 bufloc = options[8]
34
35 #param.small.seg = 1
36 #param.large.seg = 25
37 #param.pval.cutoff = 0.1
38 #param.pvals.size = 0.35
39
40 #param.call.low = -0.3
41 #param.call.high= 0.3
42
43
44 dat = read.delim(logcov.file, as.is=F, header=T)
45 t.id = dat$Targeted.Region.ID
46 t.mean = dat$Adjusted.Mean.of.LogRatio
47 t.chr = dat$Chr
48
49 cna.dat <- CNA(t.mean, t.chr, t.id, data.type="logratio")
50 smooth.cna.dat = smooth.CNA(cna.dat)
51
52 kmax.range = c(param.large.seg, param.small.seg)
53 for (i in kmax.range){
54 out.f = paste(bufloc, "/CBS", i ,".txt", sep="")
55 cna.segment = segment(smooth.cna.dat, kmax = i, verbose = 1)
56 #pdf(paste("segment_", "kmax", i, ".pdf", sep=""))
57 #for (chr.list in unique(t.chr)){
58 # plotSample(cna.segment, chromlist=chr.list)
59 #}
60 #dev.off()
61
62 #Get Data
63 cna.segment.out <- cna.segment$output
64 cna.segment.start = c()
65 cna.segment.end = c()
66 cna.segment.mean = c()
67 cna.segment.pvals.size = c()
68 cna.segment.calls = c()
69 for (n in 1:nrow(cna.segment.out)){
70 cna.start.id = cna.segment.out[n,]$loc.start
71 cna.end.id = cna.segment.out[n,]$loc.end
72 cna.start.coord = dat[dat$Targeted.Region.ID==(cna.start.id),]$OriStCoordinate
73 cna.end.coord = dat[dat$Targeted.Region.ID==(cna.end.id), ]$OriEndCoordinate
74
75 dat.inrange = dat[dat$Targeted.Region.ID<=cna.end.id&dat$Targeted.Region.ID>=cna.start.id,]
76 cna.segment.logratios = dat.inrange$Adjusted.Mean.of.LogRatio
77 cna.segment.pvalues = dat.inrange$Adjusted.P.Value
78 segment.pvals.above = dat.inrange[dat.inrange$Adjusted.P.Value<=param.pval.cutoff,]$Adjusted.P.Value
79
80 segment.pvals.size = length(segment.pvals.above)/length(cna.segment.pvalues)
81 cna.segment.mean = c(cna.segment.mean, mean(cna.segment.logratios))
82 cna.segment.start = c(cna.segment.start, cna.start.coord)
83 cna.segment.end = c(cna.segment.end , cna.end.coord)
84 cna.segment.pvals.size = c(cna.segment.pvals.size, segment.pvals.size)
85
86 if (segment.pvals.size < param.pvals.size){
87 #No-Call
88 cna.segment.calls = c(cna.segment.calls, "No")
89 } else {
90 m = mean(cna.segment.logratios)
91 if ((m > param.call.low) && (m < param.call.high)){
92 #No-Call
93 cna.segment.calls = c(cna.segment.calls, "No")
94 } else {
95 #Call
96 cna.segment.calls = c(cna.segment.calls, "CNV")
97 }
98 }
99 }
100
101
102 outdf = data.frame(Chr=cna.segment.out$chrom, Target.Start=cna.segment.out$loc.start, Target.End=cna.segment.out$loc.end, NumberOfTargets=cna.segment.out$num.mark, OriStCoordinate=cna.segment.start, OriEndCoordinate=cna.segment.end, CBS.Mean = cna.segment.out$seg.mean, LogRatios = cna.segment.mean, Above.PValues.Cutoff=cna.segment.pvals.size, Calls = cna.segment.calls)
103
104 write.table(outdf, out.f, sep="\t", quote=F, row.names=F, col.names=T)
105
106 }
107
108 small.dat.f = paste(bufloc, "/CBS", param.small.seg ,".txt", sep="")
109 large.dat.f = paste(bufloc, "/CBS", param.large.seg ,".txt", sep="")
110
111 small.dat = read.delim(small.dat.f, as.is=F, header=T)
112 large.dat = read.delim(large.dat.f, as.is=F, header=T)
113
114 large.nocall = large.dat[large.dat$Calls=="No",]
115 small.call = small.dat[small.dat$Calls!="No",]
116 included.segment= large.dat[large.dat$Calls!="No",]
117
118 for (i in 1:nrow(small.call)){
119 small.segment = small.call[i, ]
120 chr.small = small.segment$Chr
121 start.small = small.segment$Target.Start
122 end.small = small.segment$Target.End
123 match.large.nocall = large.nocall[large.nocall$Chr==chr.small,]
124
125 match.large.nocall = match.large.nocall[match.large.nocall$Target.End>=start.small,]
126 match.large.nocall = match.large.nocall[match.large.nocall$Target.Start<=start.small,]
127 merged.hole = match.large.nocall[match.large.nocall$Target.End>=start.small,]
128
129 if (nrow(merged.hole) > 0){
130 included.segment = merge(included.segment, small.segment, all=T)
131 }
132 }
133
134 out.f = paste(logcov.file, ".LargeDeletion.txt", sep="")
135 write.table(included.segment, out.f, sep="\t", quote=F, row.names=F)
136
137
138
139
140
141
142
143
144
145
146
147
148