changeset 6:b91ca438a1cb draft

"planemo upload commit 9633fb98932151f059ce02a0ce202a4374ef8d68"
author petr-novak
date Thu, 19 May 2022 08:21:55 +0000
parents 0c3111ab729b
children c33d6583e548
files clean_dante_ltr.xml clean_ltr.R dante_ltr_search.xml
diffstat 3 files changed, 25 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/clean_dante_ltr.xml	Mon May 16 07:50:41 2022 +0000
+++ b/clean_dante_ltr.xml	Thu May 19 08:21:55 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="clean_dante_ltr" name="DANTE_LTR transposamble elements filtering" version="0.1.5" python_template_version="3.5">
+<tool id="clean_dante_ltr" name="DANTE_LTR retrotransposons filtering" version="0.1.5" python_template_version="3.5">
     <requirements>
 
         <requirement type="package">r-optparse</requirement>
@@ -24,33 +24,30 @@
 
     ]]></command>
     <inputs>
-        <param type="data" name="dante_ltr" format="gff3" />
-        <param type="data" name="reference" format="fasta" />
+        <param type="data" name="dante_ltr" format="gff3"
+               label="GFF3 output from DANTE_LTR retrotransposon identification pipeline"/>
+        <param type="data" name="reference" format="fasta" label="Reference sequence matching input GFF3" />
     </inputs>
     <outputs>
-        <data name="dante_ltr_clean" format="gff3" label="Annotation of validated LTR transposable
-        elements based on annotation $dante_ltr.hid and reference $reference.hid"/>
-        <data name="rm_lib" format="fasta" label="Non-redundant library of LTR transposable
-        elements based on annotation $dante_ltr.hid and reference $reference.hid"/>
-
-        <data name="te_full" format="fasta" label="Full length LTR transposable
-        elements based on annotation $dante_ltr.hid and reference $reference.hid"/>
+        <data name="dante_ltr_clean" format="gff3"
+              label="Validated LTR retrotransposons annotation (GFF3) based on annotation
+               $dante_ltr.hid and reference $reference.hid"/>
+        <data name="rm_lib" format="fasta" label="Non-redundant library of LTR retrotransposons (FASTA) based on annotation $dante_ltr.hid and reference $reference.hid"/>
 
-        <data name="ltr5" format="fasta" label="5'LTR of transposable
-        elements based on annotation $dante_ltr.hid and reference $reference.hid"/>
+        <data name="te_full" format="fasta" label="Library of full length LTR retrotransposons (FASTA) based on annotation $dante_ltr.hid and reference $reference.hid"/>
+
+        <data name="ltr5" format="fasta" label="Library of 5'LTR of retrotransposons (FASTA) based on annotation $dante_ltr.hid and reference $reference.hid"/>
 
-        <data name="ltr3" format="fasta" label="3'LTR of transposable
-        elements based on annotation $dante_ltr.hid and reference $reference.hid"/>
+        <data name="ltr3" format="fasta" label="Library of 3'LTR of retrotransposons (FASTA) based on annotation $dante_ltr.hid and reference $reference.hid"/>
 
-        <data name="summary" format="pdf" label="Summary of TE and LTR lenghts based on
-         $dante_ltr.hid and reference $reference.hid"/>
+        <data name="summary" format="pdf" label="LTR retrotransposons lengths summary  based on $dante_ltr.hid and reference $reference.hid"/>
 
     </outputs>
     <help><![CDATA[
-        This tool takes output from DANTE_LTR search identifies good quality transposable elements.
-         Good quality TE are considered those which does not have any cross-similarity between distinct lineages.
+        This tool takes output from DANTE_LTR search identifies good quality retrotransposons.
+         Good quality retrotransposons are considered those which does not have any cross-similarity between distinct lineages.
          Output from this tool is a annotation in GFF3 format and
-         non-redundant library of elements for custom RepeatMasker search.
+         libraries of elements for custom RepeatMasker search.
 
     ]]></help>
 </tool>
\ No newline at end of file
--- a/clean_ltr.R	Mon May 16 07:50:41 2022 +0000
+++ b/clean_ltr.R	Thu May 19 08:21:55 2022 +0000
@@ -177,10 +177,14 @@
 gff_te <- gff_out[gff_out$type %in% "transposable_element"]
 gff_5ltr <- gff_out[gff_out$LTR %in% "5LTR"]
 gff_3ltr <- gff_out[gff_out$LTR %in% "3LTR"]
+
 full_te <- getSeqNamed(s, gff_te)
+names(full_te) <- paste0(gff_te$ID,":",names(full_te))
 ltr5 <-  getSeqNamed(s, gff_5ltr)
+names(ltr5) <-  paste0(gff_5ltr$Parent,":",names(ltr5))
 ltr3 <-  getSeqNamed(s, gff_3ltr)
-inc <-  gff_te$Rank != "DL"
+names(ltr3) <- paste0(gff_3ltr$Parent,":",names(ltr3))
+inc <- gff_te$Rank != "DL"
 
 writeXStringSet(seq_representative, paste0(opt$output, "_RM_lib_non_redundant.fasta"))
 writeXStringSet(full_te, paste0(opt$output, "_RM_lib_full_TE.fasta"))
--- a/dante_ltr_search.xml	Mon May 16 07:50:41 2022 +0000
+++ b/dante_ltr_search.xml	Thu May 19 08:21:55 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="dante_ltr_search" name="DANTE_LTR transposable element identification" version="0.1.5" python_template_version="3.5">
+<tool id="dante_ltr_search" name="DANTE_LTR retrotransposon identification" version="0.1.5" python_template_version="3.5">
     <requirements>
         <requirement type="package">blast</requirement>
         <requirement type="package">r-optparse</requirement>
@@ -13,12 +13,12 @@
         mv output.gff3 $te_ltr_gff
     ]]></command>
     <inputs>
-        <param type="data" name="dante" format="gff3" label="Filtered gff3 output from DANTE pipeline"/>
+        <param type="data" name="dante" format="gff3" label="Filtered GFF3 output from DANTE pipeline"/>
         <param type="data" name="reference" format="fasta" label="Reference sequence matching DANTE output" />
     </inputs>
     <outputs>
-        <data name="te_ltr_gff" format="gff3" label="Annotation of detected LTR transposable elements
-         based on the annotation $dante.hid and reference $reference.hid" />
+        <data name="te_ltr_gff" format="gff3" label="LTR retrotransposons annotation (GFF3)
+        based on DANTE annotation $dante.hid and reference $reference.hid" />
     </outputs>
     <help><![CDATA[
         This tool uses output from DANTE annotation pipeline to identify full length LTR