annotate clean_ltr.R @ 13:559940c04c44 draft

"planemo upload commit 139c041f671459192beb10ae45a8b371367c23b6"
author petr-novak
date Thu, 11 Aug 2022 07:29:06 +0000
parents ff01d4263391
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
1 #!/usr/bin/env Rscript
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
2 initial_options <- commandArgs(trailingOnly = FALSE)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
3 file_arg_name <- "--file="
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
4 script_name <- normalizePath(sub(file_arg_name, "", initial_options[grep
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
5 (file_arg_name,
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
6 initial_options)]))
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
7 script_dir <- dirname(script_name)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
8 library(optparse)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
9
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
10 parser <- OptionParser()
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
11 option_list <- list(
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
12 make_option(c("-g", "--gff3"), action = "store", type = "character",
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
13 help = "gff3 with LTR Transposable elements", default = NULL),
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
14 make_option(c("-s", "--reference_sequence"), action = "store", type = "character",
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
15 help = "reference sequence as fasta",
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
16 default = NULL),
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
17 make_option(c("-o", "--output"), action = "store", type = "character",
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
18 help = "output file prefix", default = NULL),
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
19 make_option(c("-c", "--cpu"), type =
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
20 "integer", default = 5, help = "Number of cpu to use [default %default]",
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
21 metavar = "number")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
22
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
23 )
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
24 description <- paste(strwrap(""))
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
25
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
26 epilogue <- ""
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
27 parser <- OptionParser(option_list = option_list, epilogue = epilogue, description =
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
28 description, usage = "usage: %prog COMMAND [OPTIONS]")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
29 opt <- parse_args(parser, args = commandArgs(TRUE))
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
30
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
31
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
32 # load packages
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
33 suppressPackageStartupMessages({ library(rtracklayer)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
34 library(Biostrings)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
35 library(BSgenome)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
36 library(parallel)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
37 })
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
38
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
39 # CONFIGURATION
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
40 # load configuration files and functions:
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
41 lineage_file <- paste0(script_dir, "/databases/lineage_domain_order.csv")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
42 ltr_utils_r <- paste0(script_dir, "/R/ltr_utils.R")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
43 if (file.exists(lineage_file)) {
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
44 lineage_info <- read.table(lineage_file, sep = "\t",
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
45 header = TRUE,
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
46 as.is = TRUE)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
47 source(ltr_utils_r)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
48 }else {
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
49 lineage_file <- paste0(script_dir, "/.
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
50 ./share/dante_ltr/databases/lineage_domain_order.csv")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
51 ltr_utils_r <- paste0(script_dir, "/.
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
52 ./share/dante_ltr/R/ltr_utils.R")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
53 if (file.exists(lineage_file)) {
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
54 lineage_info <- read.table(lineage_file, sep = "\t",
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
55 header = TRUE,
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
56 as.is = TRUE)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
57 source(ltr_utils_r)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
58 }else {
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
59 (stop("configuration files not found"))
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
60 }
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
61 }
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
62
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
63
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
64 ncpus <- opt$cpu
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
65
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
66
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
67 # load data
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
68 cat("reading fasta...")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
69 s <- readDNAStringSet(opt$reference_sequence) # genome assembly
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
70 cat("done\n")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
71 outfile <- opt$output
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
72 # clean sequence names:
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
73 names(s) <- gsub(" .+", "", names(s))
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
74 cat("reading gff...")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
75 g <- rtracklayer::import(opt$gff3, format = 'gff3') # DANTE gff3
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
76 cat("done\n")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
77 # testing
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
78 if (FALSE) {
4
93d35ae65e1b "planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents: 3
diff changeset
79 g <- rtracklayer::import("./test_data/big_test_data/dante_ltr_unfiltered_t.cacao.gff3")
93d35ae65e1b "planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents: 3
diff changeset
80 s <- readDNAStringSet("./test_data/big_test_data/T_cacao_chromosomes.fasta")
93d35ae65e1b "planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents: 3
diff changeset
81
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
82 g <- rtracklayer::import("./test_data/sample_ltr_annotation.gff3")
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
83 g <- rtracklayer::import("./test_data/sample_DANTE_LTR_annotation.gff3")
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
84 s <- readDNAStringSet("./test_data/sample_genome.fasta")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
85
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
86 g <- rtracklayer::import("./test_data/DANTE_LTR_Vfaba_chr5.gff3")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
87 s <- readDNAStringSet("./test_data/211010_Vfaba_chr5.fasta")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
88 names(s) <- gsub(" .+", "", names(s))
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
89 ncpus <- 10
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
90 lineage_info <- read.table("databases/lineage_domain_order.csv", sep = "\t", header =
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
91 TRUE, as.is = TRUE)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
92 source("./R/ltr_utils.R")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
93 }
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
94
4
93d35ae65e1b "planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents: 3
diff changeset
95 ## ID in g must be unique - this could be a problem if gff is concatenated from multiple files!
93d35ae65e1b "planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents: 3
diff changeset
96 ## id ID is renamed - rename parent to!
93d35ae65e1b "planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents: 3
diff changeset
97 ## add chromosom index to disctinguish same IDs
12
ff01d4263391 "planemo upload commit 414119ad7c44562d2e956b765e97ca113bc35b2b-dirty"
petr-novak
parents: 7
diff changeset
98 ## do this only if IDs are not unique
ff01d4263391 "planemo upload commit 414119ad7c44562d2e956b765e97ca113bc35b2b-dirty"
petr-novak
parents: 7
diff changeset
99 if (any(duplicated(na.omit(g$ID)))){
ff01d4263391 "planemo upload commit 414119ad7c44562d2e956b765e97ca113bc35b2b-dirty"
petr-novak
parents: 7
diff changeset
100 suffix <- as.numeric(seqnames(g))
ff01d4263391 "planemo upload commit 414119ad7c44562d2e956b765e97ca113bc35b2b-dirty"
petr-novak
parents: 7
diff changeset
101 g$ID <- ifelse(is.na(g$ID), NA, paste0(g$ID,"_", suffix))
ff01d4263391 "planemo upload commit 414119ad7c44562d2e956b765e97ca113bc35b2b-dirty"
petr-novak
parents: 7
diff changeset
102 g$Parent <- ifelse(is.na(g$Parent), NA, paste0(g$Parent,"_", suffix))
ff01d4263391 "planemo upload commit 414119ad7c44562d2e956b765e97ca113bc35b2b-dirty"
petr-novak
parents: 7
diff changeset
103 }
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
104
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
105 # get te sequence based on rank
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
106
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
107 # best quality - split by lineage
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
108 s_te <- get_te_sequences(g, s) # split by 'element quality'
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
109 # evaluate best TE - DLTP grou
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
110
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
111 # comparison parameters
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
112 word_size <- 11
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
113 task <- "blastn"
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
114 perc_identity <- 80
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
115
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
116 # best TE
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
117 TE_DLTP_info <- analyze_TE(s_te$DLTP,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
118 word_size = word_size,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
119 ncpus = ncpus,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
120 perc_identity = perc_identity,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
121 task = task)
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
122
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
123 # TE rank 2:
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
124 TE_DLT_plus_DLP_info <- analyze_TE(c(s_te$DLP, s_te$DLT),
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
125 word_size = word_size,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
126 ncpus = ncpus,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
127 perc_identity = perc_identity,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
128 task = task)
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
129
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
130 TE_D_plus_DL_info <- analyze_TE(c(s_te$DL, s_te$D),
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
131 word_size = word_size,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
132 ncpus = ncpus,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
133 perc_identity = perc_identity,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
134 task = task)
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
135
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
136 TE_DLT_plus_DLP_info_DLTP_verified <- compare_TE_datasets(
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
137 c(s_te$DLT, s_te$DLP),
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
138 ncpus = ncpus,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
139 TE_DLTP_info$seqs_representative,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
140 word_size = word_size,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
141 perc_identity = perc_identity,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
142 task = task)
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
143
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
144 TE_DLT_plus_DLP_info_multiplicity <- verify_based_on_multiplicity(TE_DLT_plus_DLP_info)
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
145 TE_D_plus_DL_info_multiplicity <- verify_based_on_multiplicity(TE_D_plus_DL_info)
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
146
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
147 # create additional library from rank 2 verified by multiplicity and DLTP
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
148 id_for_additional_library <- union(
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
149 TE_DLT_plus_DLP_info_multiplicity$id_ok_mp_verified,
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
150 TE_DLT_plus_DLP_info_DLTP_verified$id_ok_verified)
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
151
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
152 if (length(id_for_additional_library) > 1) {
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
153 seqs_for_additional_library <- c(s_te$DLP, s_te$DLT)[names(c(s_te$DLP, s_te$DLT)) %in%
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
154 id_for_additional_library]
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
155 seqs_additional_info <- analyze_TE(seqs_for_additional_library, word_size =
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
156 word_size, ncpus = ncpus)
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
157 seqs_representative <- c(
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
158 TE_DLTP_info$seqs_representative,
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
159 seqs_additional_info$seqs_representative
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
160 )
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
161 }else {
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
162 if (length(id_for_additional_library) == 1) {
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
163 seqs_representative <- c(
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
164 TE_DLTP_info$seqs_representative,
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
165 c(s_te$DLP, s_te$DLT)[names(c(s_te$DLP, s_te$DLT)) %in% id_for_additional_library]
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
166 )
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
167 }else {
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
168 seqs_representative <- TE_DLTP_info$seqs_representative
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
169 }
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
170 }
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
171
4
93d35ae65e1b "planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents: 3
diff changeset
172 # TE rank 3 - verify agains good DLTP
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
173 TE_DL_info_DLTP_verified <- compare_TE_datasets(
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
174 s_te$DL,
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
175 seqs_representative, min_coverage = 0.90,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
176 ncpus = ncpus,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
177 word_size = word_size,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
178 task = task,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
179 perc_identity = perc_identity)
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
180
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
181 TE_D_info_DLTP_verified <- compare_TE_datasets(
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
182 s_te$D,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
183 seqs_representative, min_coverage = 0.90,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
184 ncpus = ncpus,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
185 word_size = word_size,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
186 task = task,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
187 perc_identity = perc_identity)
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
188
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
189
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
190
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
191 R <- seq_diversity(seqs_representative)$richness
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
192 SI <- seq_diversity(seqs_representative)$shannon_index
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
193
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
194 # final RM library:
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
195 seqs_representative_no_ssr <- seqs_representative[R > 20]
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
196
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
197 ID <- g$ID[g$type == "transposable_element"]
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
198 names(ID) <- paste0(seqnames(g), "_",
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
199 start(g), "_",
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
200 end(g), "#",
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
201 g$Final_Classification
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
202 )[g$type %in% "transposable_element"]
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
203
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
204 # create clean gff3
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
205 id_of_good_te <- unique(c(
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
206 TE_DLTP_info$te_conflict_info$ok,
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
207 TE_DLT_plus_DLP_info_DLTP_verified$id_ok_verified,
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
208 TE_DLT_plus_DLP_info_multiplicity$id_ok_mp_verified,
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
209 TE_DL_info_DLTP_verified$id_ok_verified,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
210 TE_D_info_DLTP_verified$id_ok_verified,
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
211 TE_D_plus_DL_info_multiplicity$id_ok_mp_verified),
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
212 )
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
213
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
214 c1 <- g$ID %in% ID[id_of_good_te]
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
215 c2 <- sapply(g$Parent, function(x)ifelse(length(x) == 0, "", x)) %in% ID[id_of_good_te]
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
216
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
217 gff_out <- g[c1 | c2]
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
218
3
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
219 gff_te <- gff_out[gff_out$type %in% "transposable_element"]
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
220 # remove partial elements
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
221 gff_te_with_ltr <- gff_out[gff_out$type %in% "transposable_element" & gff_out$Rank != "D"]
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
222
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
223
3
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
224 gff_5ltr <- gff_out[gff_out$LTR %in% "5LTR"]
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
225 gff_3ltr <- gff_out[gff_out$LTR %in% "3LTR"]
6
b91ca438a1cb "planemo upload commit 9633fb98932151f059ce02a0ce202a4374ef8d68"
petr-novak
parents: 4
diff changeset
226
3
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
227 full_te <- getSeqNamed(s, gff_te)
6
b91ca438a1cb "planemo upload commit 9633fb98932151f059ce02a0ce202a4374ef8d68"
petr-novak
parents: 4
diff changeset
228 names(full_te) <- paste0(gff_te$ID,":",names(full_te))
3
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
229 ltr5 <- getSeqNamed(s, gff_5ltr)
6
b91ca438a1cb "planemo upload commit 9633fb98932151f059ce02a0ce202a4374ef8d68"
petr-novak
parents: 4
diff changeset
230 names(ltr5) <- paste0(gff_5ltr$Parent,":",names(ltr5))
3
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
231 ltr3 <- getSeqNamed(s, gff_3ltr)
6
b91ca438a1cb "planemo upload commit 9633fb98932151f059ce02a0ce202a4374ef8d68"
petr-novak
parents: 4
diff changeset
232 names(ltr3) <- paste0(gff_3ltr$Parent,":",names(ltr3))
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
233 inc <- gff_te_with_ltr$Rank != "DL"
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
234
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
235 writeXStringSet(seqs_representative, paste0(opt$output, "_RM_lib_non_redundant.fasta"))
3
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
236 writeXStringSet(full_te, paste0(opt$output, "_RM_lib_full_TE.fasta"))
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
237 writeXStringSet(ltr5, paste0(opt$output, "_RM_lib_5LTR.fasta"))
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
238 writeXStringSet(ltr3, paste0(opt$output, "_RM_lib_3LTR.fasta"))
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
239
0
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
240 export(gff_out, paste0(opt$output, "_clean.gff3"), format = "gff3")
7b0bbe7477c4 "planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff changeset
241
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
242 lv <- sort(unique(gff_te_with_ltr$Final_Classification))
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
243 te_count <- table(factor(gff_te_with_ltr$Final_Classification, levels=lv))
3
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
244
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
245 pdf(paste0(opt$output, "_summary.pdf"), width = 13, height=8, pointsize = 10)
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
246 par(mfrow=c(1,2), mar=c(5,7,2,0), las=1)
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
247 boxplot(width(gff_te_with_ltr) ~ factor(gff_te_with_ltr$Final_Classification, levels=lv),
3
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
248 horizontal = TRUE, xlab="length[bp]", ylab="",
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
249 names = paste0(gsub("^.+[|]", "", lv), " (", te_count, ")"),
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
250 main = 'Full TE', at = seq_along(lv)*4
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
251 )
7
c33d6583e548 "planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents: 6
diff changeset
252 boxplot(width(gff_te_with_ltr[inc]) ~ factor(gff_te_with_ltr$Final_Classification[inc], levels=lv),
3
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
253 horizontal = TRUE, xlab="length[bp]", ylab="",
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
254 names = rep("", length(lv)),
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
255 main = 'Full TE', at = seq_along(lv)*4-1, add=TRUE, col=2
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
256 )
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
257 par(mar=c(5,0,2,7))
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
258 boxplot(width(gff_5ltr) ~ factor(gff_5ltr$Final_Classification, levels=lv),
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
259 horizontal = TRUE, xlab="length[bp]", ylab="",
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
260 names = rep("", length(lv)),
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
261 main = "5'LTR", at = seq_along(lv)*4
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
262 )
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
263 boxplot(width(gff_5ltr[inc]) ~ factor(gff_5ltr$Final_Classification[inc], levels=lv),
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
264 horizontal = TRUE, xlab="length[bp]", ylab="",
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
265 names = rep("", length(lv)),
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
266 main = "5'LTR", at = seq_along(lv)*4-1, add=TRUE, col=2
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
267 )
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
268 legend('bottomright', col=c("grey","2"), legend=c("All TE", "TE with PBS/TSD"), pch=15)
6ae4a341d1f3 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents: 0
diff changeset
269 dev.off()