Mercurial > repos > petr-novak > dante_ltr
comparison clean_ltr.R @ 12:ff01d4263391 draft
"planemo upload commit 414119ad7c44562d2e956b765e97ca113bc35b2b-dirty"
author | petr-novak |
---|---|
date | Thu, 21 Jul 2022 08:23:15 +0000 |
parents | c33d6583e548 |
children |
comparison
equal
deleted
inserted
replaced
11:54bd36973253 | 12:ff01d4263391 |
---|---|
93 } | 93 } |
94 | 94 |
95 ## ID in g must be unique - this could be a problem if gff is concatenated from multiple files! | 95 ## ID in g must be unique - this could be a problem if gff is concatenated from multiple files! |
96 ## id ID is renamed - rename parent to! | 96 ## id ID is renamed - rename parent to! |
97 ## add chromosom index to disctinguish same IDs | 97 ## add chromosom index to disctinguish same IDs |
98 suffix <- as.numeric(seqnames(g)) | 98 ## do this only if IDs are not unique |
99 g$ID <- ifelse(is.na(g$ID), NA, paste0(g$ID,"_", suffix)) | 99 if (any(duplicated(na.omit(g$ID)))){ |
100 g$Parent <- ifelse(is.na(g$Parent), NA, paste0(g$Parent,"_", suffix)) | 100 suffix <- as.numeric(seqnames(g)) |
101 g$ID <- ifelse(is.na(g$ID), NA, paste0(g$ID,"_", suffix)) | |
102 g$Parent <- ifelse(is.na(g$Parent), NA, paste0(g$Parent,"_", suffix)) | |
103 } | |
101 | 104 |
102 # get te sequence based on rank | 105 # get te sequence based on rank |
103 | 106 |
104 # best quality - split by lineage | 107 # best quality - split by lineage |
105 s_te <- get_te_sequences(g, s) # split by 'element quality' | 108 s_te <- get_te_sequences(g, s) # split by 'element quality' |