annotate annotate_peak/.Rhistory @ 18:6a9b9694acbf draft

Uploaded
author testtool
date Mon, 20 Mar 2017 06:51:22 -0400
parents 53df7871db21
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
15
53df7871db21 Uploaded
testtool
parents:
diff changeset
1 TAB <- read.csv("input.csv")
53df7871db21 Uploaded
testtool
parents:
diff changeset
2 mysamples <- lapply(TAB$ID,function(x)getGEO(x))
53df7871db21 Uploaded
testtool
parents:
diff changeset
3 input <- function(TAB) { if(is(TAB, "csv")){
53df7871db21 Uploaded
testtool
parents:
diff changeset
4 TAB <- read.csv("input.csv")}
53df7871db21 Uploaded
testtool
parents:
diff changeset
5 else{
53df7871db21 Uploaded
testtool
parents:
diff changeset
6 print("error in data file")
53df7871db21 Uploaded
testtool
parents:
diff changeset
7 }}
53df7871db21 Uploaded
testtool
parents:
diff changeset
8 input()
53df7871db21 Uploaded
testtool
parents:
diff changeset
9 TAB <- read.csv("input.csv")}
53df7871db21 Uploaded
testtool
parents:
diff changeset
10 TAB <- read.csv("input.csv")
53df7871db21 Uploaded
testtool
parents:
diff changeset
11 test_func <- function(
53df7871db21 Uploaded
testtool
parents:
diff changeset
12 clusterSize=2,
53df7871db21 Uploaded
testtool
parents:
diff changeset
13 cutoff=0.2,
53df7871db21 Uploaded
testtool
parents:
diff changeset
14 platform_id='HM450',
53df7871db21 Uploaded
testtool
parents:
diff changeset
15 genome_id='hg19')
53df7871db21 Uploaded
testtool
parents:
diff changeset
16 {
53df7871db21 Uploaded
testtool
parents:
diff changeset
17 args = commandArgs(trailingOnly=TRUE)
53df7871db21 Uploaded
testtool
parents:
diff changeset
18 methyl_file = args[1]
53df7871db21 Uploaded
testtool
parents:
diff changeset
19 ChiPseq_file = args[2]
53df7871db21 Uploaded
testtool
parents:
diff changeset
20 output_file = args[3]
53df7871db21 Uploaded
testtool
parents:
diff changeset
21 options(warn=-1)
53df7871db21 Uploaded
testtool
parents:
diff changeset
22 TAB=read.csv(methyl_file)
53df7871db21 Uploaded
testtool
parents:
diff changeset
23 ChiPseq=import(ChiPseq_file)
53df7871db21 Uploaded
testtool
parents:
diff changeset
24 if(is.null(TAB)){
53df7871db21 Uploaded
testtool
parents:
diff changeset
25 stop("Must specify input files")
53df7871db21 Uploaded
testtool
parents:
diff changeset
26 }else{
53df7871db21 Uploaded
testtool
parents:
diff changeset
27 mysamples <- lapply(TAB$ID,function(x) getGEO(x))
53df7871db21 Uploaded
testtool
parents:
diff changeset
28 }
53df7871db21 Uploaded
testtool
parents:
diff changeset
29 wrappedFunction <- function(test_func)
53df7871db21 Uploaded
testtool
parents:
diff changeset
30 s0 <- lapply(mysamples,Table)
53df7871db21 Uploaded
testtool
parents:
diff changeset
31 id_ref<-lapply(s0,function(x)x$ID_REF)
53df7871db21 Uploaded
testtool
parents:
diff changeset
32 if(length(unique(id_ref)) != 1) {
53df7871db21 Uploaded
testtool
parents:
diff changeset
33 stop("Error different ID_REF for samples")
53df7871db21 Uploaded
testtool
parents:
diff changeset
34 } else if (is.null((unlist(unique(id_ref))))) {
53df7871db21 Uploaded
testtool
parents:
diff changeset
35 stop("NO GSM data avaliable")
53df7871db21 Uploaded
testtool
parents:
diff changeset
36 } else {
53df7871db21 Uploaded
testtool
parents:
diff changeset
37 values<-do.call("cbind",lapply(s0,function(x)x$VALUE))
53df7871db21 Uploaded
testtool
parents:
diff changeset
38 colnames(values)=TAB$ID
53df7871db21 Uploaded
testtool
parents:
diff changeset
39 rownames(values)=id_ref[[1]]
53df7871db21 Uploaded
testtool
parents:
diff changeset
40 cg <- rownames(values)
53df7871db21 Uploaded
testtool
parents:
diff changeset
41 probe <- c(cg)
53df7871db21 Uploaded
testtool
parents:
diff changeset
42 hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id)
53df7871db21 Uploaded
testtool
parents:
diff changeset
43 probe.info <- hm450.hg19[probe]
53df7871db21 Uploaded
testtool
parents:
diff changeset
44 f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value,
53df7871db21 Uploaded
testtool
parents:
diff changeset
45 BP=as.numeric(probe.info@elementMetadata$probeStart))
53df7871db21 Uploaded
testtool
parents:
diff changeset
46 designMatrix <- model.matrix(~ TAB$Phenotype)
53df7871db21 Uploaded
testtool
parents:
diff changeset
47 DMR <- bumphunter(values, design = designMatrix,
53df7871db21 Uploaded
testtool
parents:
diff changeset
48 pos=f$BP,cutoff=cutoff,chr=f$CHR)
53df7871db21 Uploaded
testtool
parents:
diff changeset
49 MAT <- DMR$table[which(DMR$table$L>=clusterSize),]
53df7871db21 Uploaded
testtool
parents:
diff changeset
50 METH <- GRanges(seqnames=MAT$chr,
53df7871db21 Uploaded
testtool
parents:
diff changeset
51 ranges=IRanges
53df7871db21 Uploaded
testtool
parents:
diff changeset
52 (start=MAT$start,
53df7871db21 Uploaded
testtool
parents:
diff changeset
53 end=MAT$end),
53df7871db21 Uploaded
testtool
parents:
diff changeset
54 value_pos=MAT$value)
53df7871db21 Uploaded
testtool
parents:
diff changeset
55 peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000)
53df7871db21 Uploaded
testtool
parents:
diff changeset
56 p <- peaks$peaklist$`probe.info///ChiPseq`
53df7871db21 Uploaded
testtool
parents:
diff changeset
57 peakAnno <- annotatePeak(p, file=peakAnno)
53df7871db21 Uploaded
testtool
parents:
diff changeset
58 output_file <- annotatePeak(p, file=output_file)
53df7871db21 Uploaded
testtool
parents:
diff changeset
59 }}
53df7871db21 Uploaded
testtool
parents:
diff changeset
60 test_func <- function(
53df7871db21 Uploaded
testtool
parents:
diff changeset
61 clusterSize=2,
53df7871db21 Uploaded
testtool
parents:
diff changeset
62 cutoff=0.2,
53df7871db21 Uploaded
testtool
parents:
diff changeset
63 platform_id='HM450',
53df7871db21 Uploaded
testtool
parents:
diff changeset
64 genome_id='hg19')
53df7871db21 Uploaded
testtool
parents:
diff changeset
65 {
53df7871db21 Uploaded
testtool
parents:
diff changeset
66 args = commandArgs(trailingOnly=TRUE)
53df7871db21 Uploaded
testtool
parents:
diff changeset
67 methyl_file = args[1]
53df7871db21 Uploaded
testtool
parents:
diff changeset
68 ChiPseq_file = args[2]
53df7871db21 Uploaded
testtool
parents:
diff changeset
69 output_file = args[3]
53df7871db21 Uploaded
testtool
parents:
diff changeset
70 options(warn=-1)
53df7871db21 Uploaded
testtool
parents:
diff changeset
71 TAB=read.csv(methyl_file)
53df7871db21 Uploaded
testtool
parents:
diff changeset
72 ChiPseq=import(ChiPseq_file)
53df7871db21 Uploaded
testtool
parents:
diff changeset
73 if(is.null(TAB)){
53df7871db21 Uploaded
testtool
parents:
diff changeset
74 stop("Must specify input files")
53df7871db21 Uploaded
testtool
parents:
diff changeset
75 }else{
53df7871db21 Uploaded
testtool
parents:
diff changeset
76 mysamples <- lapply(TAB$ID,function(x) getGEO(x))
53df7871db21 Uploaded
testtool
parents:
diff changeset
77 }
53df7871db21 Uploaded
testtool
parents:
diff changeset
78 wrappedFunction <- function(test_func)
53df7871db21 Uploaded
testtool
parents:
diff changeset
79 s0 <- lapply(mysamples,Table)
53df7871db21 Uploaded
testtool
parents:
diff changeset
80 id_ref<-lapply(s0,function(x)x$ID_REF)
53df7871db21 Uploaded
testtool
parents:
diff changeset
81 if(length(unique(id_ref)) != 1) {
53df7871db21 Uploaded
testtool
parents:
diff changeset
82 stop("Error different ID_REF for samples")
53df7871db21 Uploaded
testtool
parents:
diff changeset
83 } else if (is.null((unlist(unique(id_ref))))) {
53df7871db21 Uploaded
testtool
parents:
diff changeset
84 stop("NO GSM data avaliable")
53df7871db21 Uploaded
testtool
parents:
diff changeset
85 } else {
53df7871db21 Uploaded
testtool
parents:
diff changeset
86 values<-do.call("cbind",lapply(s0,function(x)x$VALUE))
53df7871db21 Uploaded
testtool
parents:
diff changeset
87 colnames(values)=TAB$ID
53df7871db21 Uploaded
testtool
parents:
diff changeset
88 rownames(values)=id_ref[[1]]
53df7871db21 Uploaded
testtool
parents:
diff changeset
89 cg <- rownames(values)
53df7871db21 Uploaded
testtool
parents:
diff changeset
90 probe <- c(cg)
53df7871db21 Uploaded
testtool
parents:
diff changeset
91 hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id)
53df7871db21 Uploaded
testtool
parents:
diff changeset
92 probe.info <- hm450.hg19[probe]
53df7871db21 Uploaded
testtool
parents:
diff changeset
93 f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value,
53df7871db21 Uploaded
testtool
parents:
diff changeset
94 BP=as.numeric(probe.info@elementMetadata$probeStart))
53df7871db21 Uploaded
testtool
parents:
diff changeset
95 designMatrix <- model.matrix(~ TAB$Phenotype)
53df7871db21 Uploaded
testtool
parents:
diff changeset
96 DMR <- bumphunter(values, design = designMatrix,
53df7871db21 Uploaded
testtool
parents:
diff changeset
97 pos=f$BP,cutoff=cutoff,chr=f$CHR)
53df7871db21 Uploaded
testtool
parents:
diff changeset
98 MAT <- DMR$table[which(DMR$table$L>=clusterSize),]
53df7871db21 Uploaded
testtool
parents:
diff changeset
99 METH <- GRanges(seqnames=MAT$chr,
53df7871db21 Uploaded
testtool
parents:
diff changeset
100 ranges=IRanges
53df7871db21 Uploaded
testtool
parents:
diff changeset
101 (start=MAT$start,
53df7871db21 Uploaded
testtool
parents:
diff changeset
102 end=MAT$end),
53df7871db21 Uploaded
testtool
parents:
diff changeset
103 value_pos=MAT$value)
53df7871db21 Uploaded
testtool
parents:
diff changeset
104 peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000)
53df7871db21 Uploaded
testtool
parents:
diff changeset
105 p <- peaks$peaklist$`probe.info///ChiPseq`
53df7871db21 Uploaded
testtool
parents:
diff changeset
106 peakAnno <- annotatePeak(p, file=peakAnno)
53df7871db21 Uploaded
testtool
parents:
diff changeset
107 output_file <- annotatePeak(p, file=output_file)
53df7871db21 Uploaded
testtool
parents:
diff changeset
108 }}
53df7871db21 Uploaded
testtool
parents:
diff changeset
109 )
53df7871db21 Uploaded
testtool
parents:
diff changeset
110 a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5)
53df7871db21 Uploaded
testtool
parents:
diff changeset
111 a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5)
53df7871db21 Uploaded
testtool
parents:
diff changeset
112 b<-c(2.9,2.7,2,6,2.7,2.5,2.5,3.2,3.1,3.3,2.8)
53df7871db21 Uploaded
testtool
parents:
diff changeset
113 a-b
53df7871db21 Uploaded
testtool
parents:
diff changeset
114 file<-read.csv("~/Documents/SS2.csv")
53df7871db21 Uploaded
testtool
parents:
diff changeset
115 head(file)
53df7871db21 Uploaded
testtool
parents:
diff changeset
116 e
53df7871db21 Uploaded
testtool
parents:
diff changeset
117 file
53df7871db21 Uploaded
testtool
parents:
diff changeset
118 t.test(file$WBT,file$WBA)
53df7871db21 Uploaded
testtool
parents:
diff changeset
119 t.test(file$WBT,file$WBA,paired=TRUE)
53df7871db21 Uploaded
testtool
parents:
diff changeset
120 mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x))
53df7871db21 Uploaded
testtool
parents:
diff changeset
121 require("GEOquery",quietly = TRUE)
53df7871db21 Uploaded
testtool
parents:
diff changeset
122 require("BiocGenerics",quietly = TRUE)
53df7871db21 Uploaded
testtool
parents:
diff changeset
123 args <- commandArgs(trailingOnly = TRUE)
53df7871db21 Uploaded
testtool
parents:
diff changeset
124 csv_file = args[1]
53df7871db21 Uploaded
testtool
parents:
diff changeset
125 #csv_file <- ("test-data/input.csv")
53df7871db21 Uploaded
testtool
parents:
diff changeset
126 TAB=read.csv(csv_file)
53df7871db21 Uploaded
testtool
parents:
diff changeset
127 if(is.null(TAB)){
53df7871db21 Uploaded
testtool
parents:
diff changeset
128 stop("Must specify input files")
53df7871db21 Uploaded
testtool
parents:
diff changeset
129 }else{
53df7871db21 Uploaded
testtool
parents:
diff changeset
130 mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x))
53df7871db21 Uploaded
testtool
parents:
diff changeset
131 }
53df7871db21 Uploaded
testtool
parents:
diff changeset
132 csv_file <- ("test-data/input.csv")
53df7871db21 Uploaded
testtool
parents:
diff changeset
133 TAB=read.csv(csv_file)
53df7871db21 Uploaded
testtool
parents:
diff changeset
134 TAB
53df7871db21 Uploaded
testtool
parents:
diff changeset
135 csv_file<-("test-data/input.csv")
53df7871db21 Uploaded
testtool
parents:
diff changeset
136 TAB = read.csv(csv_file)
53df7871db21 Uploaded
testtool
parents:
diff changeset
137 TAB
53df7871db21 Uploaded
testtool
parents:
diff changeset
138 ??bumphunter
18
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
139 ??toGranges
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
140 require("ChIPseeker", quietly = TRUE)
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
141 require("ChIPpeakAnno", quietly = TRUE)
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
142 options(warn = -1)
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
143 args <- commandArgs(trailingOnly = TRUE)
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
144 DMR = args[1]
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
145 annoPeakTable = args[2]
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
146 DMRInfo = read.table(
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
147 DMR,
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
148 header = FALSE,
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
149 sep = "\t",
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
150 stringsAsFactors = FALSE,
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
151 quote = ""
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
152 )
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
153 peaks <- GRanges(seqnames = DMRInfo[, 1],
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
154 ranges = IRanges
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
155 (start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
156 peaks[1:2]
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
157 DMR <- ("test-data/DMR.bed")
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
158 DMRInfo = read.table(
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
159 DMR,
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
160 header = FALSE,
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
161 sep = "\t",
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
162 stringsAsFactors = FALSE,
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
163 quote = ""
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
164 )
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
165 peaks <- GRanges(seqnames = DMRInfo[, 1],
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
166 ranges = IRanges
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
167 (start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
168 source("https://bioconductor.org/biocLite.R")
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
169 biocLite("EnsDb.Hsapiens.v75")
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
170 peaks <- toGRanges(seqnames = DMRInfo[, 1],
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
171 ranges = IRanges
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
172 (start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
173 peaks <- toGRanges(DMRInfo)
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
174 peaks <- toGRanges(seqnames = DMRInfo[, 1],
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
175 ranges = IRanges
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
176 (start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
177 DMR <- ("test-data/DMR.bed")
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
178 DMRInfo = read.table(
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
179 DMR,
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
180 header = FALSE,
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
181 sep = "\t",
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
182 stringsAsFactors = FALSE,
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
183 quote = ""
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
184 )
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
185 peaks <- toGRanges(seqnames = DMRInfo[, 1],
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
186 ranges = IRanges
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
187 (start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
188 peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED"), feature=c("gene"), header=FALSE,
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
189 (start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
190 peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED")
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
191 feature=c("gene") header=FALSE,(start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
192 peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED"(start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
193 peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED"(start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
194 class(DMRInfo)
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
195 toGRanges(DMRInfo, seqnames = DMRInfo[, 1],(start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
196 toGRanges(DMRInfo, seqnames = DMRInfo[, 1],(start = DMRInfo[, 2] end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
197 toGRanges(DMRInfo, seqnames = DMRInfo[, 1],start = DMRInfo[, 2], end = DMRInfo[, 3])
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
198 toGRanges(DMRInfo, seqnames = DMRInfo[, 1](start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
199 toGRanges(DMRInfo, colNames = DMRInfo[, 1](start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
200 peaks <- toGRanges(DMRInfo)
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
201 peaks <- GRanges(seqnames = DMRInfo[, 1],
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
202 ranges = IRanges
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
203 (start = DMRInfo[, 2], end = DMRInfo[, 3]))
6a9b9694acbf Uploaded
testtool
parents: 15
diff changeset
204 ??fread