Mercurial > repos > testtool > annotate_peak

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/annotate_peak/.Rhistory	Thu Mar 16 11:03:24 2017 -0400
@@ -0,0 +1,138 @@
+TAB <- read.csv("input.csv")
+mysamples <- lapply(TAB$ID,function(x)getGEO(x))
+input <- function(TAB) { if(is(TAB, "csv")){
+TAB <- read.csv("input.csv")}
+else{
+print("error in data file")
+}}
+input()
+TAB <- read.csv("input.csv")}
+TAB <- read.csv("input.csv")
+test_func <- function(
+clusterSize=2,
+cutoff=0.2,
+platform_id='HM450',
+genome_id='hg19')
+{
+args = commandArgs(trailingOnly=TRUE)
+methyl_file = args[1]
+ChiPseq_file = args[2]
+output_file = args[3]
+options(warn=-1)
+TAB=read.csv(methyl_file)
+ChiPseq=import(ChiPseq_file)
+if(is.null(TAB)){
+stop("Must specify input files")
+}else{
+mysamples <- lapply(TAB$ID,function(x) getGEO(x))
+}
+wrappedFunction <- function(test_func)
+s0 <- lapply(mysamples,Table)
+id_ref<-lapply(s0,function(x)x$ID_REF)
+if(length(unique(id_ref)) != 1) {
+stop("Error different ID_REF for samples")
+} else if (is.null((unlist(unique(id_ref))))) {
+stop("NO GSM data avaliable")
+} else {
+values<-do.call("cbind",lapply(s0,function(x)x$VALUE))
+colnames(values)=TAB$ID
+rownames(values)=id_ref[[1]]
+cg <-  rownames(values)
+probe <- c(cg)
+hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id)
+probe.info <- hm450.hg19[probe]
+f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value,
+BP=as.numeric(probe.info@elementMetadata$probeStart))
+designMatrix <- model.matrix(~ TAB$Phenotype)
+DMR <- bumphunter(values, design = designMatrix,
+pos=f$BP,cutoff=cutoff,chr=f$CHR)
+MAT <- DMR$table[which(DMR$table$L>=clusterSize),]
+METH <- GRanges(seqnames=MAT$chr,
+ranges=IRanges
+(start=MAT$start,
+end=MAT$end),
+value_pos=MAT$value)
+peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000)
+p <- peaks$peaklist$`probe.info///ChiPseq`
+peakAnno <- annotatePeak(p, file=peakAnno)
+output_file <- annotatePeak(p, file=output_file)
+}}
+test_func <- function(
+clusterSize=2,
+cutoff=0.2,
+platform_id='HM450',
+genome_id='hg19')
+{
+args = commandArgs(trailingOnly=TRUE)
+methyl_file = args[1]
+ChiPseq_file = args[2]
+output_file = args[3]
+options(warn=-1)
+TAB=read.csv(methyl_file)
+ChiPseq=import(ChiPseq_file)
+if(is.null(TAB)){
+stop("Must specify input files")
+}else{
+mysamples <- lapply(TAB$ID,function(x) getGEO(x))
+}
+wrappedFunction <- function(test_func)
+s0 <- lapply(mysamples,Table)
+id_ref<-lapply(s0,function(x)x$ID_REF)
+if(length(unique(id_ref)) != 1) {
+stop("Error different ID_REF for samples")
+} else if (is.null((unlist(unique(id_ref))))) {
+stop("NO GSM data avaliable")
+} else {
+values<-do.call("cbind",lapply(s0,function(x)x$VALUE))
+colnames(values)=TAB$ID
+rownames(values)=id_ref[[1]]
+cg <-  rownames(values)
+probe <- c(cg)
+hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id)
+probe.info <- hm450.hg19[probe]
+f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value,
+BP=as.numeric(probe.info@elementMetadata$probeStart))
+designMatrix <- model.matrix(~ TAB$Phenotype)
+DMR <- bumphunter(values, design = designMatrix,
+pos=f$BP,cutoff=cutoff,chr=f$CHR)
+MAT <- DMR$table[which(DMR$table$L>=clusterSize),]
+METH <- GRanges(seqnames=MAT$chr,
+ranges=IRanges
+(start=MAT$start,
+end=MAT$end),
+value_pos=MAT$value)
+peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000)
+p <- peaks$peaklist$`probe.info///ChiPseq`
+peakAnno <- annotatePeak(p, file=peakAnno)
+output_file <- annotatePeak(p, file=output_file)
+}}
+)
+a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5)
+a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5)
+b<-c(2.9,2.7,2,6,2.7,2.5,2.5,3.2,3.1,3.3,2.8)
+a-b
+file<-read.csv("~/Documents/SS2.csv")
+head(file)
+e
+file
+t.test(file$WBT,file$WBA)
+t.test(file$WBT,file$WBA,paired=TRUE)
+mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x))
+require("GEOquery",quietly = TRUE)
+require("BiocGenerics",quietly = TRUE)
+args <- commandArgs(trailingOnly = TRUE)
+csv_file = args[1]
+#csv_file <- ("test-data/input.csv")
+TAB=read.csv(csv_file)
+if(is.null(TAB)){
+stop("Must specify input files")
+}else{
+mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x))
+}
+csv_file <- ("test-data/input.csv")
+TAB=read.csv(csv_file)
+TAB
+csv_file<-("test-data/input.csv")
+TAB = read.csv(csv_file)
+TAB
+??bumphunter
--- a/annotate_peak/annotatePeak.R	Thu Mar 16 10:59:08 2017 -0400
+++ b/annotate_peak/annotatePeak.R	Thu Mar 16 11:03:24 2017 -0400
@@ -3,7 +3,7 @@
 DMR = args[1]
 annoPeakTable = args[2]

-DMR <- ("test-data/DMR.bed")
+#DMR <- ("test-data/DMR.bed")
 DMRInfo = read.table(
   DMR,
   header = FALSE,
@@ -28,7 +28,7 @@
 ## do annotation by nearest TSS
 anno <- annotatePeakInBatch(peaks, AnnotationData=annoData)
 anno[1:2]
-annoPeakTable <- ('test-data/ChIPPeak.csv')
+#annoPeakTable <- ('test-data/ChIPPeak.csv')
 write.csv(anno, annoPeakTable, row.names = FALSE)