Mercurial > repos > rnateam > chipseeker
diff chipseeker.R @ 8:8bd92f2404dd draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/chipseeker commit d30c91c3b4f71ec45b72976f7c2f08ea7df1e376-dirty"
author | rnateam |
---|---|
date | Fri, 27 Aug 2021 10:49:39 +0000 |
parents | 1b9a9409831d |
children |
line wrap: on
line diff
--- a/chipseeker.R Sat Apr 27 11:04:35 2019 -0400 +++ b/chipseeker.R Fri Aug 27 10:49:39 2021 +0000 @@ -1,117 +1,140 @@ -options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +options( + show.error.messages = F, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, F) + } +) # we need that to not crash galaxy with an UTF8 error on German LC settings. loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") suppressPackageStartupMessages({ - library(ChIPseeker) - library(GenomicFeatures) - library(rtracklayer) - library(optparse) + library(ChIPseeker) + library(GenomicFeatures) + library(rtracklayer) + library(optparse) + library(ggupset) }) option_list <- list( - make_option(c("-i","--infile"), type="character", help="Peaks file to be annotated"), - make_option(c("-H","--header"), type="logical", help="Peaks file contains header row"), - make_option(c("-G","--gtf"), type="character", help="GTF to create TxDb."), - make_option(c("-u","--upstream"), type="integer", help="TSS upstream region"), - make_option(c("-d","--downstream"), type="integer", help="TSS downstream region"), - make_option(c("-F","--flankgeneinfo"), type="logical", help="Add flanking gene info"), - make_option(c("-D","--flankgenedist"), type="integer", help="Flanking gene distance"), - make_option(c("-j","--ignoreUpstream"), type="logical", help="Ignore upstream"), - make_option(c("-k","--ignoreDownstream"), type="logical", help="Ignore downstream"), - make_option(c("-f","--format"), type="character", help="Output format (interval or tabular)."), - make_option(c("-p","--plots"), type="logical", help="PDF of plots."), - make_option(c("-r","--rdata"), type="logical", help="Output RData file.") - ) + make_option(c("-i", "--infile"), type = "character", help = "Peaks file to be annotated"), + make_option(c("-H", "--header"), type = "logical", help = "Peaks file contains header row"), + make_option(c("-G", "--gtf"), type = "character", help = "GTF to create TxDb."), + make_option(c("-u", "--upstream"), type = "integer", help = "TSS upstream region"), + make_option(c("-d", "--downstream"), type = "integer", help = "TSS downstream region"), + make_option(c("-F", "--flankgeneinfo"), type = "logical", help = "Add flanking gene info"), + make_option(c("-D", "--flankgenedist"), type = "integer", help = "Flanking gene distance"), + make_option(c("-j", "--ignore_upstream"), type = "logical", help = "Ignore upstream"), + make_option( + c("-k", "--ignore_downstream"), + type = "logical", + help = "Ignore downstream" + ), + make_option(c("-f", "--format"), type = "character", help = "Output format (interval or tabular)."), + make_option(c("-p", "--plots"), type = "logical", help = "PDF of plots."), + make_option(c("-r", "--rdata"), type = "logical", help = "Output RData file.") +) -parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) -args = parse_args(parser) +parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) +args <- parse_args(parser) -peaks = args$infile -gtf = args$gtf -up = args$upstream -down = args$downstream -format = args$format +peaks <- args$infile +gtf <- args$gtf +up <- args$upstream +down <- args$downstream +format <- args$format if (!is.null(args$flankgeneinfo)) { - flankgeneinfo <- TRUE + flankgeneinfo <- TRUE } else { - flankgeneinfo <- FALSE + flankgeneinfo <- FALSE } -if (!is.null(args$ignoreUpstream)) { - ignoreUpstream <- TRUE +if (!is.null(args$ignore_upstream)) { + ignore_upstream <- TRUE } else { - ignoreUpstream <- FALSE + ignore_upstream <- FALSE } -if (!is.null(args$ignoreDownstream)) { - ignoreDownstream <- TRUE +if (!is.null(args$ignore_downstream)) { + ignore_downstream <- TRUE } else { - ignoreDownstream <- FALSE + ignore_downstream <- FALSE } if (!is.null(args$header)) { - header <- TRUE + header <- TRUE } else { - header <- FALSE + header <- FALSE } -peaks <- readPeakFile(peaks, header=header) +peaks <- readPeakFile(peaks, header = header) # Make TxDb from GTF -txdb <- makeTxDbFromGFF(gtf, format="gtf") +txdb <- makeTxDbFromGFF(gtf, format = "gtf") # Annotate peaks -peakAnno <- annotatePeak(peaks, TxDb=txdb, - tssRegion=c(-up, down), - addFlankGeneInfo=flankgeneinfo, - flankDistance=args$flankgenedist, - ignoreUpstream=ignoreUpstream, - ignoreDownstream=ignoreDownstream) +peak_anno <- annotatePeak( + peaks, + TxDb = txdb, + tssRegion = c(-up, down), + addFlankGeneInfo = flankgeneinfo, + flankDistance = args$flankgenedist, + ignoreUpstream = ignore_upstream, + ignoreDownstream = ignore_downstream +) # Add gene name -features <- import(gtf, format="gtf") +features <- import(gtf, format = "gtf") ann <- unique(mcols(features)[, c("gene_id", "gene_name")]) -res <- as.data.frame(peakAnno) -res <- merge(res, ann, by.x="geneId", by.y="gene_id") +res <- as.data.frame(peak_anno) +res <- merge(res, ann, by.x = "geneId", by.y = "gene_id") names(res)[names(res) == "gene_name"] <- "geneName" #Extract metadata cols, 1st is geneId, rest should be from col 7 to end metacols <- res[, c(7:ncol(res), 1)] # Convert from 1-based to 0-based format if (format == "interval") { - metacols <- apply(as.data.frame(metacols), 1, function(col) paste(col, collapse="|")) - resout <- data.frame(res$seqnames, - res$start - 1, - res$end, - metacols) - colnames(resout)[1:4] <- c("Chrom", "Start", "End", "Comment") + metacols <- + apply(as.data.frame(metacols), 1, function(col) + paste(col, collapse = "|")) + resout <- data.frame(res$seqnames, + res$start - 1, + res$end, + metacols) + colnames(resout)[1:4] <- c("Chrom", "Start", "End", "Comment") } else { - resout <- data.frame(res$seqnames, - res$start - 1, - res$end, - metacols) - colnames(resout)[1:3] <- c("Chrom", "Start", "End") + resout <- data.frame(res$seqnames, + res$start - 1, + res$end, + metacols) + colnames(resout)[1:3] <- c("Chrom", "Start", "End") } -write.table(resout, file="out.tab", sep="\t", row.names=FALSE, quote=FALSE) +write.table( + resout, + file = "out.tab", + sep = "\t", + row.names = FALSE, + quote = FALSE +) if (!is.null(args$plots)) { - pdf("out.pdf", width=14) - plotAnnoPie(peakAnno) - p1 <- plotAnnoBar(peakAnno) - print(p1) - vennpie(peakAnno) - upsetplot(peakAnno) - p2 <- plotDistToTSS(peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS") - print(p2) - dev.off() - rm(p1, p2) + pdf("out.pdf", width = 14) + plotAnnoPie(peak_anno) + p1 <- plotAnnoBar(peak_anno) + print(p1) + vennpie(peak_anno) + upsetplot(peak_anno) + p2 <- + plotDistToTSS(peak_anno, title = "Distribution of transcription factor-binding loci\nrelative to TSS") + print(p2) + dev.off() + rm(p1, p2) } ## Output RData file if (!is.null(args$rdata)) { save.image(file = "ChIPseeker_analysis.RData") -} \ No newline at end of file +}