comparison chipseeker.R @ 3:535321abf9a4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/chipseeker commit 861db0d1f76bb320f49c2501f4e656cf88d389ce
author rnateam
date Fri, 01 Jun 2018 02:13:33 -0400
parents 95f779f4adb7
children 90fe78a19b55
comparison
equal deleted inserted replaced
2:cb133602cd9b 3:535321abf9a4
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
5 5
6 suppressPackageStartupMessages({ 6 suppressPackageStartupMessages({
7 library(ChIPseeker) 7 library(ChIPseeker)
8 library(GenomicFeatures) 8 library(GenomicFeatures)
9 library(rtracklayer)
9 library(optparse) 10 library(optparse)
10 }) 11 })
11 12
12 option_list <- list( 13 option_list <- list(
13 make_option(c("-i","--infile"), type="character", help="Peaks file to be annotated"), 14 make_option(c("-i","--infile"), type="character", help="Peaks file to be annotated"),
15 make_option(c("-u","--upstream"), type="integer", help="TSS upstream region"), 16 make_option(c("-u","--upstream"), type="integer", help="TSS upstream region"),
16 make_option(c("-d","--downstream"), type="integer", help="TSS downstream region"), 17 make_option(c("-d","--downstream"), type="integer", help="TSS downstream region"),
17 make_option(c("-F","--flankgeneinfo"), type="logical", help="Add flanking gene info"), 18 make_option(c("-F","--flankgeneinfo"), type="logical", help="Add flanking gene info"),
18 make_option(c("-D","--flankgenedist"), type="integer", help="Flanking gene distance"), 19 make_option(c("-D","--flankgenedist"), type="integer", help="Flanking gene distance"),
19 make_option(c("-f","--format"), type="character", help="Output format (interval or tabular)."), 20 make_option(c("-f","--format"), type="character", help="Output format (interval or tabular)."),
20 make_option(c("-p","--plots"), type="character", help="PDF of plots.") 21 make_option(c("-p","--plots"), type="logical", help="PDF of plots."),
22 make_option(c("-r","--rdata"), type="logical", help="Output RData file.")
21 ) 23 )
22 24
23 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) 25 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
24 args = parse_args(parser) 26 args = parse_args(parser)
25 27
26 peaks = args$infile 28 peaks = args$infile
27 gtf = args$gtf 29 gtf = args$gtf
28 up = args$upstream 30 up = args$upstream
29 down = args$downstream 31 down = args$downstream
30 format = args$format 32 format = args$format
31 plots = args$plots
32 33
33 peaks <- readPeakFile(peaks) 34 peaks <- readPeakFile(peaks)
34 35
35 # Make TxDb from GTF 36 # Make TxDb from GTF
36 txdb <- makeTxDbFromGFF(gtf, format="gtf") 37 txdb <- makeTxDbFromGFF(gtf, format="gtf")
38 peakAnno <- annotatePeak(peaks, TxDb=txdb, tssRegion=c(-up, down), addFlankGeneInfo=args$flankgeneinfo, flankDistance=args$flankgenedist) 39 peakAnno <- annotatePeak(peaks, TxDb=txdb, tssRegion=c(-up, down), addFlankGeneInfo=args$flankgeneinfo, flankDistance=args$flankgenedist)
39 } else { 40 } else {
40 peakAnno <- annotatePeak(peaks, TxDb=txdb, tssRegion=c(-up, down)) 41 peakAnno <- annotatePeak(peaks, TxDb=txdb, tssRegion=c(-up, down))
41 } 42 }
42 43
44 # Add gene name
45 features <- import(gtf, format="gtf")
46 ann <- unique(mcols(features)[, c("gene_id", "gene_name")])
47 res <- as.data.frame(peakAnno)
48 res <- merge(res, ann, by.x="geneId", by.y="gene_id")
49 names(res)[names(res) == "gene_name"] <- "geneName"
50
51 #Extract metadata cols, 1st is geneId, rest should be from col 7 to end
52 metacols <- res[, c(7:ncol(res), 1)]
43 # Convert from 1-based to 0-based format 53 # Convert from 1-based to 0-based format
44 res <- as.GRanges(peakAnno)
45 metacols <- mcols(res)
46 if (format == "interval") { 54 if (format == "interval") {
47 metacols <- apply(as.data.frame(metacols), 1, function(col) paste(col, collapse="|")) 55 metacols <- apply(as.data.frame(metacols), 1, function(col) paste(col, collapse="|"))
48 resout <- data.frame(Chrom=seqnames(res), 56 resout <- data.frame(Chrom=res$seqnames,
49 Start=start(res) - 1, 57 Start=res$start - 1,
50 End=end(res), 58 End=res$end,
51 Comment=metacols) 59 Comment=metacols)
52 } else { 60 } else {
53 resout <- data.frame(Chrom=seqnames(res), 61 resout <- data.frame(Chrom=res$seqnames,
54 Start=start(res) - 1, 62 Start=res$start - 1,
55 End=end(res), 63 End=res$end,
56 metacols) 64 metacols)
57 } 65 }
58 66
59 write.table(resout, file="out.tab", sep="\t", row.names=FALSE, quote=FALSE) 67 write.table(resout, file="out.tab", sep="\t", row.names=FALSE, quote=FALSE)
60 68
61 if (!is.null(plots)) { 69 if (!is.null(args$plots)) {
62 pdf("out.pdf", width=14) 70 pdf("out.pdf", width=14)
63 plotAnnoPie(peakAnno) 71 plotAnnoPie(peakAnno)
64 plotAnnoBar(peakAnno) 72 plotAnnoBar(peakAnno)
65 vennpie(peakAnno) 73 vennpie(peakAnno)
66 upsetplot(peakAnno) 74 upsetplot(peakAnno)
67 plotDistToTSS(peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS") 75 plotDistToTSS(peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS")
68 dev.off() 76 dev.off()
69 } 77 }
78
79 ## Output RData file
80
81 if (!is.null(args$rdata)) {
82 save.image(file = "ChIPseeker_analysis.RData")
83 }