view chipseeker.R @ 7:1b9a9409831d draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/chipseeker commit e07ee851144e07fde91877727d6a39a1906f7639
author rnateam
date Sat, 27 Apr 2019 11:04:35 -0400
parents b418a1d3585d
children 8bd92f2404dd
line wrap: on
line source

options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )

# we need that to not crash galaxy with an UTF8 error on German LC settings.
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")

suppressPackageStartupMessages({
    library(ChIPseeker)
    library(GenomicFeatures)
    library(rtracklayer)
    library(optparse)
})

option_list <- list(
    make_option(c("-i","--infile"), type="character", help="Peaks file to be annotated"),
    make_option(c("-H","--header"), type="logical", help="Peaks file contains header row"),
    make_option(c("-G","--gtf"), type="character", help="GTF to create TxDb."),
    make_option(c("-u","--upstream"), type="integer", help="TSS upstream region"),
    make_option(c("-d","--downstream"), type="integer", help="TSS downstream region"),
    make_option(c("-F","--flankgeneinfo"), type="logical", help="Add flanking gene info"),
    make_option(c("-D","--flankgenedist"), type="integer", help="Flanking gene distance"),
    make_option(c("-j","--ignoreUpstream"), type="logical", help="Ignore upstream"),
    make_option(c("-k","--ignoreDownstream"), type="logical", help="Ignore downstream"),
    make_option(c("-f","--format"), type="character", help="Output format (interval or tabular)."),
    make_option(c("-p","--plots"), type="logical", help="PDF of plots."),
    make_option(c("-r","--rdata"), type="logical", help="Output RData file.")
  )

parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
args = parse_args(parser)

peaks = args$infile
gtf = args$gtf
up = args$upstream
down = args$downstream
format = args$format

if (!is.null(args$flankgeneinfo)) {
    flankgeneinfo <- TRUE
} else {
    flankgeneinfo <- FALSE
}

if (!is.null(args$ignoreUpstream)) {
    ignoreUpstream <- TRUE
} else {
    ignoreUpstream <- FALSE
}

if (!is.null(args$ignoreDownstream)) {
    ignoreDownstream <- TRUE
} else {
    ignoreDownstream <- FALSE
}

if (!is.null(args$header)) {
    header <- TRUE
} else {
    header <- FALSE
}

peaks <- readPeakFile(peaks, header=header)

# Make TxDb from GTF
txdb <- makeTxDbFromGFF(gtf, format="gtf")

# Annotate peaks
peakAnno <-  annotatePeak(peaks, TxDb=txdb,
    tssRegion=c(-up, down),
    addFlankGeneInfo=flankgeneinfo,
    flankDistance=args$flankgenedist,
    ignoreUpstream=ignoreUpstream,
    ignoreDownstream=ignoreDownstream)

# Add gene name
features <- import(gtf, format="gtf")
ann <- unique(mcols(features)[, c("gene_id", "gene_name")])
res <- as.data.frame(peakAnno)
res <- merge(res, ann, by.x="geneId", by.y="gene_id")
names(res)[names(res) == "gene_name"] <- "geneName"

#Extract metadata cols, 1st is geneId, rest should be from col 7 to end
metacols <- res[, c(7:ncol(res), 1)]
# Convert from 1-based to 0-based format
if (format == "interval") {
    metacols <- apply(as.data.frame(metacols), 1, function(col) paste(col, collapse="|"))
    resout <- data.frame(res$seqnames,
                    res$start - 1,
                    res$end,
                    metacols)
    colnames(resout)[1:4] <- c("Chrom", "Start", "End", "Comment")
} else {
    resout <- data.frame(res$seqnames,
                    res$start - 1,
                    res$end,
                    metacols)
    colnames(resout)[1:3] <- c("Chrom", "Start", "End")
}
write.table(resout, file="out.tab", sep="\t", row.names=FALSE, quote=FALSE)

if (!is.null(args$plots)) {
    pdf("out.pdf", width=14)
    plotAnnoPie(peakAnno)
    p1 <- plotAnnoBar(peakAnno)
    print(p1)
    vennpie(peakAnno)
    upsetplot(peakAnno)
    p2 <- plotDistToTSS(peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS")
    print(p2)
    dev.off()
    rm(p1, p2)
}

## Output RData file

if (!is.null(args$rdata)) {
  save.image(file = "ChIPseeker_analysis.RData")
}