changeset 2:7f1032da7a0a draft

Uploaded
author petr-novak
date Mon, 21 Feb 2022 10:35:13 +0000
parents 814cba36e435
children 4ea506b39297
files README.org clean_rm_output.R repeat_annotate_custom.xml
diffstat 3 files changed, 21 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/README.org	Mon Feb 21 10:21:39 2022 +0000
+++ b/README.org	Mon Feb 21 10:35:13 2022 +0000
@@ -30,5 +30,6 @@
 
 #+begin_comment
 create tarball for toolshed:
-tar -czvf ../repeat_annotation_pipeline.tar.gz --exclude test_data --exclude .git  --exclude tmp  .
+tar -czvf ../repeat_annotation_pipeline.tar.gz --exclude test_data \
+--exclude .git  --exclude tmp  --exclude hg_repository --exclude .idea --exclude .gitignore .
 #+end_comment
--- a/clean_rm_output.R	Mon Feb 21 10:21:39 2022 +0000
+++ b/clean_rm_output.R	Mon Feb 21 10:35:13 2022 +0000
@@ -16,13 +16,13 @@
   new_annot_lca = lca_annot[new_annot] 
   #new_annot_lca = sapply(sapply(gff_names, unique), resolve_name)
   strand_attribute = sapply(sapply(gff_strands, unique), paste, collapse="|")
-  gff_disjoin$strands=strand_attribute
   gff_disjoin$source="RM"
   gff_disjoin$type="repeat"
   gff_disjoin$score=NA
   gff_disjoin$phase=NA
   gff_disjoin$Name=new_annot_lca
   gff_disjoin$Original_names=new_annot
+  gff_disjoin$strands=strand_attribute
   gff_disjoin$revmap=NULL
   return(gff_disjoin)
 }
@@ -45,11 +45,19 @@
   }
 }
 
+convert_names <- function(n, old_sep = "|" , new_sep = "\""){
+  # remove all characters which are new_sep with -
+  n_new = gsub(old_sep, new_sep,
+               gsub(new_sep,"-", n, fixed = TRUE),
+               fixed = TRUE)
+  return(n_new)
+}
 
 
 infile = commandArgs(T)[1]
 outfile = commandArgs(T)[2]
 
+
 ## infile = "./test_data/raw_rm.out"
 
 rm_out = read.table(infile, as.is=TRUE, sep="", skip = 2, fill=TRUE, header=FALSE, col.names=paste0("V",1:16))
@@ -57,7 +65,15 @@
 gff = GRanges(seqnames = rm_out$V5, ranges = IRanges(start = rm_out$V6, end=rm_out$V7))
 
 # repeat class after # symbol - syntax 1
-gff$Name=rm_out$V11
+# detect separator
+# if "|" is present replace "|" -> "/" and "/" -> "-"
+if (any(grepl("|", rm_out$V11, fixed = TRUE))){
+  gff$Name <- convert_names(rm_out$V11, old_sep = "|", new_sep = "/")
+  message('replacing classification separator character "|" with "/"')
+  print(gff)
+}else{
+  gff$Name <- rm_out$V11
+}
 
 ## is repeat type is specifies by double underscore:
 ## then rm_out$V11 is unspecified
--- a/repeat_annotate_custom.xml	Mon Feb 21 10:21:39 2022 +0000
+++ b/repeat_annotate_custom.xml	Mon Feb 21 10:35:13 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="repeat_annotate" name="RepeatExplorer Based Assembly Annotation" version="0.1.1" python_template_version="3.5">
+<tool id="repeat_annotate" name="RepeatExplorer Based Assembly Annotation" version="0.1.2" python_template_version="3.5">
     <requirements>
         <requirement type="package">repeatmasker</requirement>
         <requirement type="package">bioconductor-rtracklayer</requirement>