comparison brew3r.r_script.R @ 0:928a52b5c938 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/brew3r_r commit 3e3c47b732510a9ef0b2864b284aa14308e75ab0
author iuc
date Tue, 11 Jun 2024 08:26:37 +0000
parents
children 3198f52bffaa
comparison
equal deleted inserted replaced
-1:000000000000 0:928a52b5c938
1 library("getopt")
2 suppressPackageStartupMessages(library("rtracklayer"))
3 library(GenomicRanges)
4 library("BREW3R.r")
5
6 options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
7 args <- commandArgs(trailingOnly = TRUE)
8 # - Column 1: the long flag name. A multi-character string.
9 # - Column 2: short flag alias of Column 1. A single-character string.
10 # - Column 3: Argument mask of the flag. An integer.
11 # Possible values: 0=no argument, 1=required argument, 2=optional argument.
12 # - Column 4: Data type to which the flag's argument shall be cast using
13 # storage.mode(). A multi-character string. This only considered for same-row
14 # Column 3 values of 1,2. Possible values: logical, integer, double, complex,
15 # character. If numeric is encountered then it will be converted to double.
16 # - Column 5 (optional): A brief description of the purpose of the option.
17 spec <- matrix(c(
18 "help", "h", 0, "logical", "display help",
19 "gtf_to_extend", "i", 1, "character", "input gtf file to be extended on 3'",
20 "gtf_to_overlap", "g", 1, "character",
21 "input gtf file that will be used to extend",
22 "output", "o", 1, "character", "output extended gtf",
23 "sup_output", "s", 1, "character",
24 "supplementary output file with resolution of overlaps",
25 "no_add", "n", 0, "logical", "do not add new exons",
26 "exclude_pattern", "e", 1, "character", "do not extend genes with names matching this pattern",
27 "filter_unstranded", "f", 0, "logical",
28 "remove unstranded intervals from gtf_to_overlap which overlap intervals from gtf_to_extend of both strands",
29 "quiet", "q", 0, "logical", "decrease verbosity",
30 "verbose", "v", 0, "logical", "increase verbosity"
31 ), byrow = TRUE, ncol = 5)
32 opt <- getopt(spec)
33
34 # if help was asked for print a friendly message
35 # and exit with a non-zero error code
36 if (!is.null(opt$help)) {
37 cat(getopt(spec, usage = TRUE))
38 q(status = 1)
39 }
40
41 # Check all required arguments
42 if (is.null(opt$gtf_to_extend)) {
43 stop("--gtf_to_extend is required")
44 }
45 if (is.null(opt$gtf_to_overlap)) {
46 stop("--gtf_to_overlap is required")
47 }
48 if (is.null(opt$output)) {
49 stop("--output is required")
50 }
51
52 # Check incompatible arguments
53 if (!is.null(opt$quiet) && !is.null(opt$verbose)) {
54 stop("quiet and verbose are mutually exclusive options")
55 }
56
57 # Adjust verbosity
58 if (!is.null(opt$quiet)) {
59 options(rlib_message_verbosity = "quiet")
60 }
61
62 if (!is.null(opt$verbose)) {
63 options(BREW3R.r.verbose = "progression")
64 }
65
66 # Load gtfs as GenomicRanges
67 input_gr_to_extend <- rtracklayer::import(opt$gtf_to_extend, format = "gtf")
68 input_gr_template <- rtracklayer::import(opt$gtf_to_overlap, format = "gtf")
69
70 # Save CDS info
71 input_gr_CDS <- subset(input_gr_to_extend, type == "CDS")
72
73 # Filter the template if needed
74 if (!is.null(opt$filter_unstranded)) {
75 # Find intervals without strand information in template
76 unstranded.intervals <- which(strand(input_gr_template) == "*")
77 if (length(unstranded.intervals) > 0) {
78 # Check if they overlap genes from input with different strands
79 # First compute the overlap
80 ov <- suppressWarnings(
81 as.data.frame(findOverlaps(
82 input_gr_template[unstranded.intervals],
83 input_gr_to_extend
84 ))
85 )
86 # Add the strand information
87 ov$strand <- as.factor(strand(input_gr_to_extend))[ov$subjectHits]
88 # Simplify the dataframe to get only the strand info
89 ov.simple <- unique(ov[, c("queryHits", "strand")])
90 # If the queryHits is duplicated it means there are different strands
91 multi.strand.query <- ov.simple$queryHits[duplicated(ov.simple$queryHits)]
92 to.remove <- unstranded.intervals[multi.strand.query]
93 # Remove these potentially error-prone intervals from the template
94 input_gr_template <- input_gr_template[-to.remove]
95 }
96 }
97
98 # Run BREW3R.r main function
99 new_gr_exons <- extend_granges(
100 input_gr_to_extend = input_gr_to_extend,
101 input_gr_to_overlap = input_gr_template,
102 add_new_exons = is.null(opt$no_add),
103 overlap_resolution_fn = opt$sup_output
104 )
105 # Prevent extension using pattern
106 if (!is.null(opt$exclude_pattern)) {
107 input_gr_pattern <- subset(
108 input_gr_to_extend,
109 type == "exon" & grepl(opt$exclude_pattern, gene_name)
110 )
111 new_gr_no_pattern <- subset(
112 new_gr_exons,
113 !grepl(opt$exclude_pattern, gene_name)
114 )
115 new_gr_exons <- c(new_gr_no_pattern, input_gr_pattern)
116 }
117
118 # Recompose with CDS
119 new_gr <- c(new_gr_exons, input_gr_CDS)
120
121 # Export
122 rtracklayer::export.gff(sort(new_gr, ignore.strand = TRUE), opt$output)