diff R/ltr_utils.R @ 2:f131886ea194 draft

"planemo upload commit 891bfe9acf7349c2b887aff6d7e52a7f4ebf3b3a"
author petr-novak
date Tue, 12 Apr 2022 12:55:32 +0000
parents 7b0bbe7477c4
children 6ae4a341d1f3
line wrap: on
line diff
--- a/R/ltr_utils.R	Wed Mar 09 09:31:31 2022 +0000
+++ b/R/ltr_utils.R	Tue Apr 12 12:55:32 2022 +0000
@@ -92,22 +92,23 @@
   }
 }
 
+
+
 trim2TGAC <- function(bl) {
   for (i in 1:nrow(bl)) {
-    tg_L <- firstTG(bl$qseq[i])
-    tg_R <- firstTG(bl$sseq[i])
-    ca_L <- lastCA(bl$qseq[i])
-    ca_R <- lastCA(bl$sseq[i])
-    e_dist <- bl$length[i] - ca_R
-    no_match <- any(tg_L == 0, tg_R == 0, ca_L == 0, ca_R == 0)
+    cons <- consensusString(c(bl$qseq[i], bl$sseq[i]), ambiguityMap="?")
+    tg_P <- firstTG(cons)
+    ca_P <- lastCA(cons)
+    e_dist <- bl$length[i] - ca_P
+    max_dist = 50 # was 25
+    no_match <- any(tg_P == 0, ca_P == 0)
     if (!no_match &
-      tg_L == tg_R &
-      ca_L == ca_R &
-      tg_L < 8 &
-      e_dist < 8) {
+      tg_P < max_dist &
+      e_dist < max_dist) {
       ## trim alignment
-      bl[i,] <- trim_blast_table(bl[i,], tg_L, e_dist - 1)
+      bl[i,] <- trim_blast_table(bl[i,], tg_P, e_dist - 1)
     }
+
   }
   return(bl)
 }
@@ -566,6 +567,30 @@
   return(te)
 }
 
+
+get_te_rank <- function (gr){
+  DL_id <- gr$ID[gr$type == "transposable_element" &
+                  gr$TSD == "not_found" &
+                  is.na(gr$trna_id)]
+  DLT_id <- gr$ID[gr$type == "transposable_element" &
+                          gr$TSD != "not_found" &
+                          is.na(gr$trna_id)]
+  DLTP_id <- gr$ID[gr$type == "transposable_element" &
+                              gr$TSD != "not_found" &
+                              !is.na(gr$trna_id)]
+  DLP_id <- gr$ID[gr$type == "transposable_element" &
+                          gr$TSD == "not_found" &
+                          !is.na(gr$trna_id)]
+  Rank <- character(length(gr))
+  ID <- unlist(ifelse(is.na(gr$ID), gr$Parent, gr$ID))
+
+  Rank[ID %in% DL_id] <- "DL"
+  Rank[ID %in% DLT_id] <- "DLT"
+  Rank[ID %in% DLP_id] <- "DLP"
+  Rank[ID %in% DLTP_id] <- "DLTP"
+  return(Rank)
+}
+
 get_te_statistics <- function(gr, RT) {
   DOMAINS_LTR <- gr[gr$type == "transposable_element" &
                       gr$TSD == "not_found" &