Mercurial > repos > testtool > annotate_peak
changeset 15:53df7871db21 draft
Uploaded
author | testtool |
---|---|
date | Thu, 16 Mar 2017 11:03:24 -0400 |
parents | 4a489b0d247f |
children | ee3fc24b2108 |
files | annotate_peak/.Rhistory annotate_peak/annotatePeak.R |
diffstat | 2 files changed, 140 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annotate_peak/.Rhistory Thu Mar 16 11:03:24 2017 -0400 @@ -0,0 +1,138 @@ +TAB <- read.csv("input.csv") +mysamples <- lapply(TAB$ID,function(x)getGEO(x)) +input <- function(TAB) { if(is(TAB, "csv")){ +TAB <- read.csv("input.csv")} +else{ +print("error in data file") +}} +input() +TAB <- read.csv("input.csv")} +TAB <- read.csv("input.csv") +test_func <- function( +clusterSize=2, +cutoff=0.2, +platform_id='HM450', +genome_id='hg19') +{ +args = commandArgs(trailingOnly=TRUE) +methyl_file = args[1] +ChiPseq_file = args[2] +output_file = args[3] +options(warn=-1) +TAB=read.csv(methyl_file) +ChiPseq=import(ChiPseq_file) +if(is.null(TAB)){ +stop("Must specify input files") +}else{ +mysamples <- lapply(TAB$ID,function(x) getGEO(x)) +} +wrappedFunction <- function(test_func) +s0 <- lapply(mysamples,Table) +id_ref<-lapply(s0,function(x)x$ID_REF) +if(length(unique(id_ref)) != 1) { +stop("Error different ID_REF for samples") +} else if (is.null((unlist(unique(id_ref))))) { +stop("NO GSM data avaliable") +} else { +values<-do.call("cbind",lapply(s0,function(x)x$VALUE)) +colnames(values)=TAB$ID +rownames(values)=id_ref[[1]] +cg <- rownames(values) +probe <- c(cg) +hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id) +probe.info <- hm450.hg19[probe] +f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value, +BP=as.numeric(probe.info@elementMetadata$probeStart)) +designMatrix <- model.matrix(~ TAB$Phenotype) +DMR <- bumphunter(values, design = designMatrix, +pos=f$BP,cutoff=cutoff,chr=f$CHR) +MAT <- DMR$table[which(DMR$table$L>=clusterSize),] +METH <- GRanges(seqnames=MAT$chr, +ranges=IRanges +(start=MAT$start, +end=MAT$end), +value_pos=MAT$value) +peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000) +p <- peaks$peaklist$`probe.info///ChiPseq` +peakAnno <- annotatePeak(p, file=peakAnno) +output_file <- annotatePeak(p, file=output_file) +}} +test_func <- function( +clusterSize=2, +cutoff=0.2, +platform_id='HM450', +genome_id='hg19') +{ +args = commandArgs(trailingOnly=TRUE) +methyl_file = args[1] +ChiPseq_file = args[2] +output_file = args[3] +options(warn=-1) +TAB=read.csv(methyl_file) +ChiPseq=import(ChiPseq_file) +if(is.null(TAB)){ +stop("Must specify input files") +}else{ +mysamples <- lapply(TAB$ID,function(x) getGEO(x)) +} +wrappedFunction <- function(test_func) +s0 <- lapply(mysamples,Table) +id_ref<-lapply(s0,function(x)x$ID_REF) +if(length(unique(id_ref)) != 1) { +stop("Error different ID_REF for samples") +} else if (is.null((unlist(unique(id_ref))))) { +stop("NO GSM data avaliable") +} else { +values<-do.call("cbind",lapply(s0,function(x)x$VALUE)) +colnames(values)=TAB$ID +rownames(values)=id_ref[[1]] +cg <- rownames(values) +probe <- c(cg) +hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id) +probe.info <- hm450.hg19[probe] +f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value, +BP=as.numeric(probe.info@elementMetadata$probeStart)) +designMatrix <- model.matrix(~ TAB$Phenotype) +DMR <- bumphunter(values, design = designMatrix, +pos=f$BP,cutoff=cutoff,chr=f$CHR) +MAT <- DMR$table[which(DMR$table$L>=clusterSize),] +METH <- GRanges(seqnames=MAT$chr, +ranges=IRanges +(start=MAT$start, +end=MAT$end), +value_pos=MAT$value) +peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000) +p <- peaks$peaklist$`probe.info///ChiPseq` +peakAnno <- annotatePeak(p, file=peakAnno) +output_file <- annotatePeak(p, file=output_file) +}} +) +a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5) +a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5) +b<-c(2.9,2.7,2,6,2.7,2.5,2.5,3.2,3.1,3.3,2.8) +a-b +file<-read.csv("~/Documents/SS2.csv") +head(file) +e +file +t.test(file$WBT,file$WBA) +t.test(file$WBT,file$WBA,paired=TRUE) +mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x)) +require("GEOquery",quietly = TRUE) +require("BiocGenerics",quietly = TRUE) +args <- commandArgs(trailingOnly = TRUE) +csv_file = args[1] +#csv_file <- ("test-data/input.csv") +TAB=read.csv(csv_file) +if(is.null(TAB)){ +stop("Must specify input files") +}else{ +mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x)) +} +csv_file <- ("test-data/input.csv") +TAB=read.csv(csv_file) +TAB +csv_file<-("test-data/input.csv") +TAB = read.csv(csv_file) +TAB +??bumphunter
--- a/annotate_peak/annotatePeak.R Thu Mar 16 10:59:08 2017 -0400 +++ b/annotate_peak/annotatePeak.R Thu Mar 16 11:03:24 2017 -0400 @@ -3,7 +3,7 @@ DMR = args[1] annoPeakTable = args[2] -DMR <- ("test-data/DMR.bed") +#DMR <- ("test-data/DMR.bed") DMRInfo = read.table( DMR, header = FALSE, @@ -28,7 +28,7 @@ ## do annotation by nearest TSS anno <- annotatePeakInBatch(peaks, AnnotationData=annoData) anno[1:2] -annoPeakTable <- ('test-data/ChIPPeak.csv') +#annoPeakTable <- ('test-data/ChIPPeak.csv') write.csv(anno, annoPeakTable, row.names = FALSE)