Mercurial > repos > petr-novak > repeat_annotation_pipeline3
changeset 2:7f1032da7a0a draft
Uploaded
author | petr-novak |
---|---|
date | Mon, 21 Feb 2022 10:35:13 +0000 |
parents | 814cba36e435 |
children | 4ea506b39297 |
files | README.org clean_rm_output.R repeat_annotate_custom.xml |
diffstat | 3 files changed, 21 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/README.org Mon Feb 21 10:21:39 2022 +0000 +++ b/README.org Mon Feb 21 10:35:13 2022 +0000 @@ -30,5 +30,6 @@ #+begin_comment create tarball for toolshed: -tar -czvf ../repeat_annotation_pipeline.tar.gz --exclude test_data --exclude .git --exclude tmp . +tar -czvf ../repeat_annotation_pipeline.tar.gz --exclude test_data \ +--exclude .git --exclude tmp --exclude hg_repository --exclude .idea --exclude .gitignore . #+end_comment
--- a/clean_rm_output.R Mon Feb 21 10:21:39 2022 +0000 +++ b/clean_rm_output.R Mon Feb 21 10:35:13 2022 +0000 @@ -16,13 +16,13 @@ new_annot_lca = lca_annot[new_annot] #new_annot_lca = sapply(sapply(gff_names, unique), resolve_name) strand_attribute = sapply(sapply(gff_strands, unique), paste, collapse="|") - gff_disjoin$strands=strand_attribute gff_disjoin$source="RM" gff_disjoin$type="repeat" gff_disjoin$score=NA gff_disjoin$phase=NA gff_disjoin$Name=new_annot_lca gff_disjoin$Original_names=new_annot + gff_disjoin$strands=strand_attribute gff_disjoin$revmap=NULL return(gff_disjoin) } @@ -45,11 +45,19 @@ } } +convert_names <- function(n, old_sep = "|" , new_sep = "\""){ + # remove all characters which are new_sep with - + n_new = gsub(old_sep, new_sep, + gsub(new_sep,"-", n, fixed = TRUE), + fixed = TRUE) + return(n_new) +} infile = commandArgs(T)[1] outfile = commandArgs(T)[2] + ## infile = "./test_data/raw_rm.out" rm_out = read.table(infile, as.is=TRUE, sep="", skip = 2, fill=TRUE, header=FALSE, col.names=paste0("V",1:16)) @@ -57,7 +65,15 @@ gff = GRanges(seqnames = rm_out$V5, ranges = IRanges(start = rm_out$V6, end=rm_out$V7)) # repeat class after # symbol - syntax 1 -gff$Name=rm_out$V11 +# detect separator +# if "|" is present replace "|" -> "/" and "/" -> "-" +if (any(grepl("|", rm_out$V11, fixed = TRUE))){ + gff$Name <- convert_names(rm_out$V11, old_sep = "|", new_sep = "/") + message('replacing classification separator character "|" with "/"') + print(gff) +}else{ + gff$Name <- rm_out$V11 +} ## is repeat type is specifies by double underscore: ## then rm_out$V11 is unspecified
--- a/repeat_annotate_custom.xml Mon Feb 21 10:21:39 2022 +0000 +++ b/repeat_annotate_custom.xml Mon Feb 21 10:35:13 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="repeat_annotate" name="RepeatExplorer Based Assembly Annotation" version="0.1.1" python_template_version="3.5"> +<tool id="repeat_annotate" name="RepeatExplorer Based Assembly Annotation" version="0.1.2" python_template_version="3.5"> <requirements> <requirement type="package">repeatmasker</requirement> <requirement type="package">bioconductor-rtracklayer</requirement>