diff chipseeker.R @ 3:535321abf9a4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/chipseeker commit 861db0d1f76bb320f49c2501f4e656cf88d389ce
author rnateam
date Fri, 01 Jun 2018 02:13:33 -0400
parents 95f779f4adb7
children 90fe78a19b55
line wrap: on
line diff
--- a/chipseeker.R	Wed May 30 06:12:27 2018 -0400
+++ b/chipseeker.R	Fri Jun 01 02:13:33 2018 -0400
@@ -6,6 +6,7 @@
 suppressPackageStartupMessages({
     library(ChIPseeker)
     library(GenomicFeatures)
+    library(rtracklayer)
     library(optparse)
 })
 
@@ -17,7 +18,8 @@
     make_option(c("-F","--flankgeneinfo"), type="logical", help="Add flanking gene info"),
     make_option(c("-D","--flankgenedist"), type="integer", help="Flanking gene distance"),
     make_option(c("-f","--format"), type="character", help="Output format (interval or tabular)."),
-    make_option(c("-p","--plots"), type="character", help="PDF of plots.")
+    make_option(c("-p","--plots"), type="logical", help="PDF of plots."),
+    make_option(c("-r","--rdata"), type="logical", help="Output RData file.")
   )
 
 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
@@ -28,7 +30,6 @@
 up = args$upstream
 down = args$downstream
 format = args$format
-plots = args$plots
 
 peaks <- readPeakFile(peaks)
 
@@ -40,25 +41,32 @@
     peakAnno <-  annotatePeak(peaks, TxDb=txdb, tssRegion=c(-up, down))
 }
 
+# Add gene name
+features <- import(gtf, format="gtf")
+ann <- unique(mcols(features)[, c("gene_id", "gene_name")])
+res <- as.data.frame(peakAnno)
+res <- merge(res, ann, by.x="geneId", by.y="gene_id")
+names(res)[names(res) == "gene_name"] <- "geneName"
+
+#Extract metadata cols, 1st is geneId, rest should be from col 7 to end
+metacols <- res[, c(7:ncol(res), 1)]
 # Convert from 1-based to 0-based format
-res <- as.GRanges(peakAnno)
-metacols <- mcols(res)
 if (format == "interval") {
     metacols <- apply(as.data.frame(metacols), 1, function(col) paste(col, collapse="|"))
-    resout  <- data.frame(Chrom=seqnames(res),
-                    Start=start(res) - 1,
-                    End=end(res),
+    resout <- data.frame(Chrom=res$seqnames,
+                    Start=res$start - 1,
+                    End=res$end,
                     Comment=metacols)
 } else {
-    resout <- data.frame(Chrom=seqnames(res),
-                    Start=start(res) - 1,
-                    End=end(res),
+    resout <- data.frame(Chrom=res$seqnames,
+                    Start=res$start - 1,
+                    End=res$end,
                     metacols)
 }
 
 write.table(resout, file="out.tab", sep="\t", row.names=FALSE, quote=FALSE)
 
-if (!is.null(plots)) {
+if (!is.null(args$plots)) {
     pdf("out.pdf", width=14)
     plotAnnoPie(peakAnno)
     plotAnnoBar(peakAnno)
@@ -66,4 +74,10 @@
     upsetplot(peakAnno)
     plotDistToTSS(peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS")
     dev.off()
+}
+
+## Output RData file
+
+if (!is.null(args$rdata)) {
+  save.image(file = "ChIPseeker_analysis.RData")
 }
\ No newline at end of file