Mercurial > repos > petr-novak > dante_ltr
annotate clean_ltr.R @ 11:54bd36973253 draft
"planemo upload commit ca5a8b9bbf761419a408bce11a17e880d1b1152c"
author | petr-novak |
---|---|
date | Wed, 13 Jul 2022 11:02:55 +0000 |
parents | c33d6583e548 |
children | ff01d4263391 |
rev | line source |
---|---|
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
1 #!/usr/bin/env Rscript |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
2 initial_options <- commandArgs(trailingOnly = FALSE) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
3 file_arg_name <- "--file=" |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
4 script_name <- normalizePath(sub(file_arg_name, "", initial_options[grep |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
5 (file_arg_name, |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
6 initial_options)])) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
7 script_dir <- dirname(script_name) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
8 library(optparse) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
9 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
10 parser <- OptionParser() |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
11 option_list <- list( |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
12 make_option(c("-g", "--gff3"), action = "store", type = "character", |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
13 help = "gff3 with LTR Transposable elements", default = NULL), |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
14 make_option(c("-s", "--reference_sequence"), action = "store", type = "character", |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
15 help = "reference sequence as fasta", |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
16 default = NULL), |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
17 make_option(c("-o", "--output"), action = "store", type = "character", |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
18 help = "output file prefix", default = NULL), |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
19 make_option(c("-c", "--cpu"), type = |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
20 "integer", default = 5, help = "Number of cpu to use [default %default]", |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
21 metavar = "number") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
22 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
23 ) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
24 description <- paste(strwrap("")) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
25 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
26 epilogue <- "" |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
27 parser <- OptionParser(option_list = option_list, epilogue = epilogue, description = |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
28 description, usage = "usage: %prog COMMAND [OPTIONS]") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
29 opt <- parse_args(parser, args = commandArgs(TRUE)) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
30 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
31 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
32 # load packages |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
33 suppressPackageStartupMessages({ library(rtracklayer) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
34 library(Biostrings) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
35 library(BSgenome) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
36 library(parallel) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
37 }) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
38 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
39 # CONFIGURATION |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
40 # load configuration files and functions: |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
41 lineage_file <- paste0(script_dir, "/databases/lineage_domain_order.csv") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
42 ltr_utils_r <- paste0(script_dir, "/R/ltr_utils.R") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
43 if (file.exists(lineage_file)) { |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
44 lineage_info <- read.table(lineage_file, sep = "\t", |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
45 header = TRUE, |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
46 as.is = TRUE) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
47 source(ltr_utils_r) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
48 }else { |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
49 lineage_file <- paste0(script_dir, "/. |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
50 ./share/dante_ltr/databases/lineage_domain_order.csv") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
51 ltr_utils_r <- paste0(script_dir, "/. |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
52 ./share/dante_ltr/R/ltr_utils.R") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
53 if (file.exists(lineage_file)) { |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
54 lineage_info <- read.table(lineage_file, sep = "\t", |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
55 header = TRUE, |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
56 as.is = TRUE) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
57 source(ltr_utils_r) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
58 }else { |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
59 (stop("configuration files not found")) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
60 } |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
61 } |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
62 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
63 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
64 ncpus <- opt$cpu |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
65 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
66 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
67 # load data |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
68 cat("reading fasta...") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
69 s <- readDNAStringSet(opt$reference_sequence) # genome assembly |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
70 cat("done\n") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
71 outfile <- opt$output |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
72 # clean sequence names: |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
73 names(s) <- gsub(" .+", "", names(s)) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
74 cat("reading gff...") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
75 g <- rtracklayer::import(opt$gff3, format = 'gff3') # DANTE gff3 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
76 cat("done\n") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
77 # testing |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
78 if (FALSE) { |
4
93d35ae65e1b
"planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents:
3
diff
changeset
|
79 g <- rtracklayer::import("./test_data/big_test_data/dante_ltr_unfiltered_t.cacao.gff3") |
93d35ae65e1b
"planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents:
3
diff
changeset
|
80 s <- readDNAStringSet("./test_data/big_test_data/T_cacao_chromosomes.fasta") |
93d35ae65e1b
"planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents:
3
diff
changeset
|
81 |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
82 g <- rtracklayer::import("./test_data/sample_ltr_annotation.gff3") |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
83 g <- rtracklayer::import("./test_data/sample_DANTE_LTR_annotation.gff3") |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
84 s <- readDNAStringSet("./test_data/sample_genome.fasta") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
85 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
86 g <- rtracklayer::import("./test_data/DANTE_LTR_Vfaba_chr5.gff3") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
87 s <- readDNAStringSet("./test_data/211010_Vfaba_chr5.fasta") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
88 names(s) <- gsub(" .+", "", names(s)) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
89 ncpus <- 10 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
90 lineage_info <- read.table("databases/lineage_domain_order.csv", sep = "\t", header = |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
91 TRUE, as.is = TRUE) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
92 source("./R/ltr_utils.R") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
93 } |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
94 |
4
93d35ae65e1b
"planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents:
3
diff
changeset
|
95 ## ID in g must be unique - this could be a problem if gff is concatenated from multiple files! |
93d35ae65e1b
"planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents:
3
diff
changeset
|
96 ## id ID is renamed - rename parent to! |
93d35ae65e1b
"planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents:
3
diff
changeset
|
97 ## add chromosom index to disctinguish same IDs |
93d35ae65e1b
"planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents:
3
diff
changeset
|
98 suffix <- as.numeric(seqnames(g)) |
93d35ae65e1b
"planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents:
3
diff
changeset
|
99 g$ID <- ifelse(is.na(g$ID), NA, paste0(g$ID,"_", suffix)) |
93d35ae65e1b
"planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents:
3
diff
changeset
|
100 g$Parent <- ifelse(is.na(g$Parent), NA, paste0(g$Parent,"_", suffix)) |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
101 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
102 # get te sequence based on rank |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
103 |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
104 # best quality - split by lineage |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
105 s_te <- get_te_sequences(g, s) # split by 'element quality' |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
106 # evaluate best TE - DLTP grou |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
107 |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
108 # comparison parameters |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
109 word_size <- 11 |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
110 task <- "blastn" |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
111 perc_identity <- 80 |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
112 |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
113 # best TE |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
114 TE_DLTP_info <- analyze_TE(s_te$DLTP, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
115 word_size = word_size, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
116 ncpus = ncpus, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
117 perc_identity = perc_identity, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
118 task = task) |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
119 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
120 # TE rank 2: |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
121 TE_DLT_plus_DLP_info <- analyze_TE(c(s_te$DLP, s_te$DLT), |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
122 word_size = word_size, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
123 ncpus = ncpus, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
124 perc_identity = perc_identity, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
125 task = task) |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
126 |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
127 TE_D_plus_DL_info <- analyze_TE(c(s_te$DL, s_te$D), |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
128 word_size = word_size, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
129 ncpus = ncpus, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
130 perc_identity = perc_identity, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
131 task = task) |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
132 |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
133 TE_DLT_plus_DLP_info_DLTP_verified <- compare_TE_datasets( |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
134 c(s_te$DLT, s_te$DLP), |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
135 ncpus = ncpus, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
136 TE_DLTP_info$seqs_representative, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
137 word_size = word_size, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
138 perc_identity = perc_identity, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
139 task = task) |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
140 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
141 TE_DLT_plus_DLP_info_multiplicity <- verify_based_on_multiplicity(TE_DLT_plus_DLP_info) |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
142 TE_D_plus_DL_info_multiplicity <- verify_based_on_multiplicity(TE_D_plus_DL_info) |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
143 |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
144 # create additional library from rank 2 verified by multiplicity and DLTP |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
145 id_for_additional_library <- union( |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
146 TE_DLT_plus_DLP_info_multiplicity$id_ok_mp_verified, |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
147 TE_DLT_plus_DLP_info_DLTP_verified$id_ok_verified) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
148 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
149 if (length(id_for_additional_library) > 1) { |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
150 seqs_for_additional_library <- c(s_te$DLP, s_te$DLT)[names(c(s_te$DLP, s_te$DLT)) %in% |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
151 id_for_additional_library] |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
152 seqs_additional_info <- analyze_TE(seqs_for_additional_library, word_size = |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
153 word_size, ncpus = ncpus) |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
154 seqs_representative <- c( |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
155 TE_DLTP_info$seqs_representative, |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
156 seqs_additional_info$seqs_representative |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
157 ) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
158 }else { |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
159 if (length(id_for_additional_library) == 1) { |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
160 seqs_representative <- c( |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
161 TE_DLTP_info$seqs_representative, |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
162 c(s_te$DLP, s_te$DLT)[names(c(s_te$DLP, s_te$DLT)) %in% id_for_additional_library] |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
163 ) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
164 }else { |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
165 seqs_representative <- TE_DLTP_info$seqs_representative |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
166 } |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
167 } |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
168 |
4
93d35ae65e1b
"planemo upload commit 57a4f4a749b60b4e1d992dc3a879add7bb4bb56b"
petr-novak
parents:
3
diff
changeset
|
169 # TE rank 3 - verify agains good DLTP |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
170 TE_DL_info_DLTP_verified <- compare_TE_datasets( |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
171 s_te$DL, |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
172 seqs_representative, min_coverage = 0.90, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
173 ncpus = ncpus, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
174 word_size = word_size, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
175 task = task, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
176 perc_identity = perc_identity) |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
177 |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
178 TE_D_info_DLTP_verified <- compare_TE_datasets( |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
179 s_te$D, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
180 seqs_representative, min_coverage = 0.90, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
181 ncpus = ncpus, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
182 word_size = word_size, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
183 task = task, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
184 perc_identity = perc_identity) |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
185 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
186 |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
187 |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
188 R <- seq_diversity(seqs_representative)$richness |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
189 SI <- seq_diversity(seqs_representative)$shannon_index |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
190 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
191 # final RM library: |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
192 seqs_representative_no_ssr <- seqs_representative[R > 20] |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
193 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
194 ID <- g$ID[g$type == "transposable_element"] |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
195 names(ID) <- paste0(seqnames(g), "_", |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
196 start(g), "_", |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
197 end(g), "#", |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
198 g$Final_Classification |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
199 )[g$type %in% "transposable_element"] |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
200 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
201 # create clean gff3 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
202 id_of_good_te <- unique(c( |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
203 TE_DLTP_info$te_conflict_info$ok, |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
204 TE_DLT_plus_DLP_info_DLTP_verified$id_ok_verified, |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
205 TE_DLT_plus_DLP_info_multiplicity$id_ok_mp_verified, |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
206 TE_DL_info_DLTP_verified$id_ok_verified, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
207 TE_D_info_DLTP_verified$id_ok_verified, |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
208 TE_D_plus_DL_info_multiplicity$id_ok_mp_verified), |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
209 ) |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
210 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
211 c1 <- g$ID %in% ID[id_of_good_te] |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
212 c2 <- sapply(g$Parent, function(x)ifelse(length(x) == 0, "", x)) %in% ID[id_of_good_te] |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
213 |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
214 gff_out <- g[c1 | c2] |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
215 |
3
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
216 gff_te <- gff_out[gff_out$type %in% "transposable_element"] |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
217 # remove partial elements |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
218 gff_te_with_ltr <- gff_out[gff_out$type %in% "transposable_element" & gff_out$Rank != "D"] |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
219 |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
220 |
3
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
221 gff_5ltr <- gff_out[gff_out$LTR %in% "5LTR"] |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
222 gff_3ltr <- gff_out[gff_out$LTR %in% "3LTR"] |
6
b91ca438a1cb
"planemo upload commit 9633fb98932151f059ce02a0ce202a4374ef8d68"
petr-novak
parents:
4
diff
changeset
|
223 |
3
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
224 full_te <- getSeqNamed(s, gff_te) |
6
b91ca438a1cb
"planemo upload commit 9633fb98932151f059ce02a0ce202a4374ef8d68"
petr-novak
parents:
4
diff
changeset
|
225 names(full_te) <- paste0(gff_te$ID,":",names(full_te)) |
3
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
226 ltr5 <- getSeqNamed(s, gff_5ltr) |
6
b91ca438a1cb
"planemo upload commit 9633fb98932151f059ce02a0ce202a4374ef8d68"
petr-novak
parents:
4
diff
changeset
|
227 names(ltr5) <- paste0(gff_5ltr$Parent,":",names(ltr5)) |
3
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
228 ltr3 <- getSeqNamed(s, gff_3ltr) |
6
b91ca438a1cb
"planemo upload commit 9633fb98932151f059ce02a0ce202a4374ef8d68"
petr-novak
parents:
4
diff
changeset
|
229 names(ltr3) <- paste0(gff_3ltr$Parent,":",names(ltr3)) |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
230 inc <- gff_te_with_ltr$Rank != "DL" |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
231 |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
232 writeXStringSet(seqs_representative, paste0(opt$output, "_RM_lib_non_redundant.fasta")) |
3
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
233 writeXStringSet(full_te, paste0(opt$output, "_RM_lib_full_TE.fasta")) |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
234 writeXStringSet(ltr5, paste0(opt$output, "_RM_lib_5LTR.fasta")) |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
235 writeXStringSet(ltr3, paste0(opt$output, "_RM_lib_3LTR.fasta")) |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
236 |
0
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
237 export(gff_out, paste0(opt$output, "_clean.gff3"), format = "gff3") |
7b0bbe7477c4
"planemo upload commit 92c684dff3b377c8c08654c7f3d46a133385e3e0-dirty"
petr-novak
parents:
diff
changeset
|
238 |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
239 lv <- sort(unique(gff_te_with_ltr$Final_Classification)) |
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
240 te_count <- table(factor(gff_te_with_ltr$Final_Classification, levels=lv)) |
3
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
241 |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
242 pdf(paste0(opt$output, "_summary.pdf"), width = 13, height=8, pointsize = 10) |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
243 par(mfrow=c(1,2), mar=c(5,7,2,0), las=1) |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
244 boxplot(width(gff_te_with_ltr) ~ factor(gff_te_with_ltr$Final_Classification, levels=lv), |
3
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
245 horizontal = TRUE, xlab="length[bp]", ylab="", |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
246 names = paste0(gsub("^.+[|]", "", lv), " (", te_count, ")"), |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
247 main = 'Full TE', at = seq_along(lv)*4 |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
248 ) |
7
c33d6583e548
"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
petr-novak
parents:
6
diff
changeset
|
249 boxplot(width(gff_te_with_ltr[inc]) ~ factor(gff_te_with_ltr$Final_Classification[inc], levels=lv), |
3
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
250 horizontal = TRUE, xlab="length[bp]", ylab="", |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
251 names = rep("", length(lv)), |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
252 main = 'Full TE', at = seq_along(lv)*4-1, add=TRUE, col=2 |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
253 ) |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
254 par(mar=c(5,0,2,7)) |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
255 boxplot(width(gff_5ltr) ~ factor(gff_5ltr$Final_Classification, levels=lv), |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
256 horizontal = TRUE, xlab="length[bp]", ylab="", |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
257 names = rep("", length(lv)), |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
258 main = "5'LTR", at = seq_along(lv)*4 |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
259 ) |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
260 boxplot(width(gff_5ltr[inc]) ~ factor(gff_5ltr$Final_Classification[inc], levels=lv), |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
261 horizontal = TRUE, xlab="length[bp]", ylab="", |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
262 names = rep("", length(lv)), |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
263 main = "5'LTR", at = seq_along(lv)*4-1, add=TRUE, col=2 |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
264 ) |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
265 legend('bottomright', col=c("grey","2"), legend=c("All TE", "TE with PBS/TSD"), pch=15) |
6ae4a341d1f3
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
petr-novak
parents:
0
diff
changeset
|
266 dev.off() |