Mercurial > repos > iuc > brew3r_r
comparison brew3r.r_script.R @ 0:928a52b5c938 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/brew3r_r commit 3e3c47b732510a9ef0b2864b284aa14308e75ab0
author | iuc |
---|---|
date | Tue, 11 Jun 2024 08:26:37 +0000 |
parents | |
children | 3198f52bffaa |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:928a52b5c938 |
---|---|
1 library("getopt") | |
2 suppressPackageStartupMessages(library("rtracklayer")) | |
3 library(GenomicRanges) | |
4 library("BREW3R.r") | |
5 | |
6 options(stringAsFactors = FALSE, useFancyQuotes = FALSE) | |
7 args <- commandArgs(trailingOnly = TRUE) | |
8 # - Column 1: the long flag name. A multi-character string. | |
9 # - Column 2: short flag alias of Column 1. A single-character string. | |
10 # - Column 3: Argument mask of the flag. An integer. | |
11 # Possible values: 0=no argument, 1=required argument, 2=optional argument. | |
12 # - Column 4: Data type to which the flag's argument shall be cast using | |
13 # storage.mode(). A multi-character string. This only considered for same-row | |
14 # Column 3 values of 1,2. Possible values: logical, integer, double, complex, | |
15 # character. If numeric is encountered then it will be converted to double. | |
16 # - Column 5 (optional): A brief description of the purpose of the option. | |
17 spec <- matrix(c( | |
18 "help", "h", 0, "logical", "display help", | |
19 "gtf_to_extend", "i", 1, "character", "input gtf file to be extended on 3'", | |
20 "gtf_to_overlap", "g", 1, "character", | |
21 "input gtf file that will be used to extend", | |
22 "output", "o", 1, "character", "output extended gtf", | |
23 "sup_output", "s", 1, "character", | |
24 "supplementary output file with resolution of overlaps", | |
25 "no_add", "n", 0, "logical", "do not add new exons", | |
26 "exclude_pattern", "e", 1, "character", "do not extend genes with names matching this pattern", | |
27 "filter_unstranded", "f", 0, "logical", | |
28 "remove unstranded intervals from gtf_to_overlap which overlap intervals from gtf_to_extend of both strands", | |
29 "quiet", "q", 0, "logical", "decrease verbosity", | |
30 "verbose", "v", 0, "logical", "increase verbosity" | |
31 ), byrow = TRUE, ncol = 5) | |
32 opt <- getopt(spec) | |
33 | |
34 # if help was asked for print a friendly message | |
35 # and exit with a non-zero error code | |
36 if (!is.null(opt$help)) { | |
37 cat(getopt(spec, usage = TRUE)) | |
38 q(status = 1) | |
39 } | |
40 | |
41 # Check all required arguments | |
42 if (is.null(opt$gtf_to_extend)) { | |
43 stop("--gtf_to_extend is required") | |
44 } | |
45 if (is.null(opt$gtf_to_overlap)) { | |
46 stop("--gtf_to_overlap is required") | |
47 } | |
48 if (is.null(opt$output)) { | |
49 stop("--output is required") | |
50 } | |
51 | |
52 # Check incompatible arguments | |
53 if (!is.null(opt$quiet) && !is.null(opt$verbose)) { | |
54 stop("quiet and verbose are mutually exclusive options") | |
55 } | |
56 | |
57 # Adjust verbosity | |
58 if (!is.null(opt$quiet)) { | |
59 options(rlib_message_verbosity = "quiet") | |
60 } | |
61 | |
62 if (!is.null(opt$verbose)) { | |
63 options(BREW3R.r.verbose = "progression") | |
64 } | |
65 | |
66 # Load gtfs as GenomicRanges | |
67 input_gr_to_extend <- rtracklayer::import(opt$gtf_to_extend, format = "gtf") | |
68 input_gr_template <- rtracklayer::import(opt$gtf_to_overlap, format = "gtf") | |
69 | |
70 # Save CDS info | |
71 input_gr_CDS <- subset(input_gr_to_extend, type == "CDS") | |
72 | |
73 # Filter the template if needed | |
74 if (!is.null(opt$filter_unstranded)) { | |
75 # Find intervals without strand information in template | |
76 unstranded.intervals <- which(strand(input_gr_template) == "*") | |
77 if (length(unstranded.intervals) > 0) { | |
78 # Check if they overlap genes from input with different strands | |
79 # First compute the overlap | |
80 ov <- suppressWarnings( | |
81 as.data.frame(findOverlaps( | |
82 input_gr_template[unstranded.intervals], | |
83 input_gr_to_extend | |
84 )) | |
85 ) | |
86 # Add the strand information | |
87 ov$strand <- as.factor(strand(input_gr_to_extend))[ov$subjectHits] | |
88 # Simplify the dataframe to get only the strand info | |
89 ov.simple <- unique(ov[, c("queryHits", "strand")]) | |
90 # If the queryHits is duplicated it means there are different strands | |
91 multi.strand.query <- ov.simple$queryHits[duplicated(ov.simple$queryHits)] | |
92 to.remove <- unstranded.intervals[multi.strand.query] | |
93 # Remove these potentially error-prone intervals from the template | |
94 input_gr_template <- input_gr_template[-to.remove] | |
95 } | |
96 } | |
97 | |
98 # Run BREW3R.r main function | |
99 new_gr_exons <- extend_granges( | |
100 input_gr_to_extend = input_gr_to_extend, | |
101 input_gr_to_overlap = input_gr_template, | |
102 add_new_exons = is.null(opt$no_add), | |
103 overlap_resolution_fn = opt$sup_output | |
104 ) | |
105 # Prevent extension using pattern | |
106 if (!is.null(opt$exclude_pattern)) { | |
107 input_gr_pattern <- subset( | |
108 input_gr_to_extend, | |
109 type == "exon" & grepl(opt$exclude_pattern, gene_name) | |
110 ) | |
111 new_gr_no_pattern <- subset( | |
112 new_gr_exons, | |
113 !grepl(opt$exclude_pattern, gene_name) | |
114 ) | |
115 new_gr_exons <- c(new_gr_no_pattern, input_gr_pattern) | |
116 } | |
117 | |
118 # Recompose with CDS | |
119 new_gr <- c(new_gr_exons, input_gr_CDS) | |
120 | |
121 # Export | |
122 rtracklayer::export.gff(sort(new_gr, ignore.strand = TRUE), opt$output) |