Mercurial > repos > rnateam > chipseeker
comparison chipseeker.R @ 8:8bd92f2404dd draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/chipseeker commit d30c91c3b4f71ec45b72976f7c2f08ea7df1e376-dirty"
author | rnateam |
---|---|
date | Fri, 27 Aug 2021 10:49:39 +0000 |
parents | 1b9a9409831d |
children |
comparison
equal
deleted
inserted
replaced
7:1b9a9409831d | 8:8bd92f2404dd |
---|---|
1 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 1 options( |
2 show.error.messages = F, | |
3 error = function() { | |
4 cat(geterrmessage(), file = stderr()) | |
5 q("no", 1, F) | |
6 } | |
7 ) | |
2 | 8 |
3 # we need that to not crash galaxy with an UTF8 error on German LC settings. | 9 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 10 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
5 | 11 |
6 suppressPackageStartupMessages({ | 12 suppressPackageStartupMessages({ |
7 library(ChIPseeker) | 13 library(ChIPseeker) |
8 library(GenomicFeatures) | 14 library(GenomicFeatures) |
9 library(rtracklayer) | 15 library(rtracklayer) |
10 library(optparse) | 16 library(optparse) |
17 library(ggupset) | |
11 }) | 18 }) |
12 | 19 |
13 option_list <- list( | 20 option_list <- list( |
14 make_option(c("-i","--infile"), type="character", help="Peaks file to be annotated"), | 21 make_option(c("-i", "--infile"), type = "character", help = "Peaks file to be annotated"), |
15 make_option(c("-H","--header"), type="logical", help="Peaks file contains header row"), | 22 make_option(c("-H", "--header"), type = "logical", help = "Peaks file contains header row"), |
16 make_option(c("-G","--gtf"), type="character", help="GTF to create TxDb."), | 23 make_option(c("-G", "--gtf"), type = "character", help = "GTF to create TxDb."), |
17 make_option(c("-u","--upstream"), type="integer", help="TSS upstream region"), | 24 make_option(c("-u", "--upstream"), type = "integer", help = "TSS upstream region"), |
18 make_option(c("-d","--downstream"), type="integer", help="TSS downstream region"), | 25 make_option(c("-d", "--downstream"), type = "integer", help = "TSS downstream region"), |
19 make_option(c("-F","--flankgeneinfo"), type="logical", help="Add flanking gene info"), | 26 make_option(c("-F", "--flankgeneinfo"), type = "logical", help = "Add flanking gene info"), |
20 make_option(c("-D","--flankgenedist"), type="integer", help="Flanking gene distance"), | 27 make_option(c("-D", "--flankgenedist"), type = "integer", help = "Flanking gene distance"), |
21 make_option(c("-j","--ignoreUpstream"), type="logical", help="Ignore upstream"), | 28 make_option(c("-j", "--ignore_upstream"), type = "logical", help = "Ignore upstream"), |
22 make_option(c("-k","--ignoreDownstream"), type="logical", help="Ignore downstream"), | 29 make_option( |
23 make_option(c("-f","--format"), type="character", help="Output format (interval or tabular)."), | 30 c("-k", "--ignore_downstream"), |
24 make_option(c("-p","--plots"), type="logical", help="PDF of plots."), | 31 type = "logical", |
25 make_option(c("-r","--rdata"), type="logical", help="Output RData file.") | 32 help = "Ignore downstream" |
26 ) | 33 ), |
34 make_option(c("-f", "--format"), type = "character", help = "Output format (interval or tabular)."), | |
35 make_option(c("-p", "--plots"), type = "logical", help = "PDF of plots."), | |
36 make_option(c("-r", "--rdata"), type = "logical", help = "Output RData file.") | |
37 ) | |
27 | 38 |
28 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) | 39 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) |
29 args = parse_args(parser) | 40 args <- parse_args(parser) |
30 | 41 |
31 peaks = args$infile | 42 peaks <- args$infile |
32 gtf = args$gtf | 43 gtf <- args$gtf |
33 up = args$upstream | 44 up <- args$upstream |
34 down = args$downstream | 45 down <- args$downstream |
35 format = args$format | 46 format <- args$format |
36 | 47 |
37 if (!is.null(args$flankgeneinfo)) { | 48 if (!is.null(args$flankgeneinfo)) { |
38 flankgeneinfo <- TRUE | 49 flankgeneinfo <- TRUE |
39 } else { | 50 } else { |
40 flankgeneinfo <- FALSE | 51 flankgeneinfo <- FALSE |
41 } | 52 } |
42 | 53 |
43 if (!is.null(args$ignoreUpstream)) { | 54 if (!is.null(args$ignore_upstream)) { |
44 ignoreUpstream <- TRUE | 55 ignore_upstream <- TRUE |
45 } else { | 56 } else { |
46 ignoreUpstream <- FALSE | 57 ignore_upstream <- FALSE |
47 } | 58 } |
48 | 59 |
49 if (!is.null(args$ignoreDownstream)) { | 60 if (!is.null(args$ignore_downstream)) { |
50 ignoreDownstream <- TRUE | 61 ignore_downstream <- TRUE |
51 } else { | 62 } else { |
52 ignoreDownstream <- FALSE | 63 ignore_downstream <- FALSE |
53 } | 64 } |
54 | 65 |
55 if (!is.null(args$header)) { | 66 if (!is.null(args$header)) { |
56 header <- TRUE | 67 header <- TRUE |
57 } else { | 68 } else { |
58 header <- FALSE | 69 header <- FALSE |
59 } | 70 } |
60 | 71 |
61 peaks <- readPeakFile(peaks, header=header) | 72 peaks <- readPeakFile(peaks, header = header) |
62 | 73 |
63 # Make TxDb from GTF | 74 # Make TxDb from GTF |
64 txdb <- makeTxDbFromGFF(gtf, format="gtf") | 75 txdb <- makeTxDbFromGFF(gtf, format = "gtf") |
65 | 76 |
66 # Annotate peaks | 77 # Annotate peaks |
67 peakAnno <- annotatePeak(peaks, TxDb=txdb, | 78 peak_anno <- annotatePeak( |
68 tssRegion=c(-up, down), | 79 peaks, |
69 addFlankGeneInfo=flankgeneinfo, | 80 TxDb = txdb, |
70 flankDistance=args$flankgenedist, | 81 tssRegion = c(-up, down), |
71 ignoreUpstream=ignoreUpstream, | 82 addFlankGeneInfo = flankgeneinfo, |
72 ignoreDownstream=ignoreDownstream) | 83 flankDistance = args$flankgenedist, |
84 ignoreUpstream = ignore_upstream, | |
85 ignoreDownstream = ignore_downstream | |
86 ) | |
73 | 87 |
74 # Add gene name | 88 # Add gene name |
75 features <- import(gtf, format="gtf") | 89 features <- import(gtf, format = "gtf") |
76 ann <- unique(mcols(features)[, c("gene_id", "gene_name")]) | 90 ann <- unique(mcols(features)[, c("gene_id", "gene_name")]) |
77 res <- as.data.frame(peakAnno) | 91 res <- as.data.frame(peak_anno) |
78 res <- merge(res, ann, by.x="geneId", by.y="gene_id") | 92 res <- merge(res, ann, by.x = "geneId", by.y = "gene_id") |
79 names(res)[names(res) == "gene_name"] <- "geneName" | 93 names(res)[names(res) == "gene_name"] <- "geneName" |
80 | 94 |
81 #Extract metadata cols, 1st is geneId, rest should be from col 7 to end | 95 #Extract metadata cols, 1st is geneId, rest should be from col 7 to end |
82 metacols <- res[, c(7:ncol(res), 1)] | 96 metacols <- res[, c(7:ncol(res), 1)] |
83 # Convert from 1-based to 0-based format | 97 # Convert from 1-based to 0-based format |
84 if (format == "interval") { | 98 if (format == "interval") { |
85 metacols <- apply(as.data.frame(metacols), 1, function(col) paste(col, collapse="|")) | 99 metacols <- |
86 resout <- data.frame(res$seqnames, | 100 apply(as.data.frame(metacols), 1, function(col) |
87 res$start - 1, | 101 paste(col, collapse = "|")) |
88 res$end, | 102 resout <- data.frame(res$seqnames, |
89 metacols) | 103 res$start - 1, |
90 colnames(resout)[1:4] <- c("Chrom", "Start", "End", "Comment") | 104 res$end, |
105 metacols) | |
106 colnames(resout)[1:4] <- c("Chrom", "Start", "End", "Comment") | |
91 } else { | 107 } else { |
92 resout <- data.frame(res$seqnames, | 108 resout <- data.frame(res$seqnames, |
93 res$start - 1, | 109 res$start - 1, |
94 res$end, | 110 res$end, |
95 metacols) | 111 metacols) |
96 colnames(resout)[1:3] <- c("Chrom", "Start", "End") | 112 colnames(resout)[1:3] <- c("Chrom", "Start", "End") |
97 } | 113 } |
98 write.table(resout, file="out.tab", sep="\t", row.names=FALSE, quote=FALSE) | 114 write.table( |
115 resout, | |
116 file = "out.tab", | |
117 sep = "\t", | |
118 row.names = FALSE, | |
119 quote = FALSE | |
120 ) | |
99 | 121 |
100 if (!is.null(args$plots)) { | 122 if (!is.null(args$plots)) { |
101 pdf("out.pdf", width=14) | 123 pdf("out.pdf", width = 14) |
102 plotAnnoPie(peakAnno) | 124 plotAnnoPie(peak_anno) |
103 p1 <- plotAnnoBar(peakAnno) | 125 p1 <- plotAnnoBar(peak_anno) |
104 print(p1) | 126 print(p1) |
105 vennpie(peakAnno) | 127 vennpie(peak_anno) |
106 upsetplot(peakAnno) | 128 upsetplot(peak_anno) |
107 p2 <- plotDistToTSS(peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS") | 129 p2 <- |
108 print(p2) | 130 plotDistToTSS(peak_anno, title = "Distribution of transcription factor-binding loci\nrelative to TSS") |
109 dev.off() | 131 print(p2) |
110 rm(p1, p2) | 132 dev.off() |
133 rm(p1, p2) | |
111 } | 134 } |
112 | 135 |
113 ## Output RData file | 136 ## Output RData file |
114 | 137 |
115 if (!is.null(args$rdata)) { | 138 if (!is.null(args$rdata)) { |