diff dante_ltr_search.xml @ 7:c33d6583e548 draft

"planemo upload commit 50884f7f0269a0bbde078f24fe5020975693bcd9"
author petr-novak
date Fri, 24 Jun 2022 14:19:48 +0000
parents b91ca438a1cb
children 9de392f2fc02
line wrap: on
line diff
--- a/dante_ltr_search.xml	Thu May 19 08:21:55 2022 +0000
+++ b/dante_ltr_search.xml	Fri Jun 24 14:19:48 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="dante_ltr_search" name="DANTE_LTR retrotransposon identification" version="0.1.5" python_template_version="3.5">
+<tool id="dante_ltr_search" name="DANTE_LTR retrotransposon identification" version="0.1.6" python_template_version="3.5">
     <requirements>
         <requirement type="package">blast</requirement>
         <requirement type="package">r-optparse</requirement>
@@ -8,31 +8,37 @@
 
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        Rscript ${__tool_directory__}/extract_putative_ltr.R --gff3 '$dante' --reference_sequence '$reference' --output output --cpu 32
+        Rscript ${__tool_directory__}/extract_putative_ltr.R --gff3 '$dante' --reference_sequence '$reference' -M $max_missing --output output --cpu 32
         &&
         mv output.gff3 $te_ltr_gff
+        &&
+        mv output_statistics.csv $statistics
     ]]></command>
     <inputs>
         <param type="data" name="dante" format="gff3" label="Filtered GFF3 output from DANTE pipeline"/>
         <param type="data" name="reference" format="fasta" label="Reference sequence matching DANTE output" />
+        <param type="integer" name="max_missing" min="0" max="3" value="1" label="Maximum number of missing protein domains to tolerate in full length retrotransposon" />
     </inputs>
     <outputs>
         <data name="te_ltr_gff" format="gff3" label="LTR retrotransposons annotation (GFF3)
         based on DANTE annotation $dante.hid and reference $reference.hid" />
+        <data name="statistics" format="tabular" label="LTR retrotransposons detection
+        summary based on $dante.hid and reference $reference.hid" />
     </outputs>
     <help><![CDATA[
         This tool uses output from DANTE annotation pipeline to identify full length LTR
         transposable elements. Output is in the GFF3 format and include annotation of
-        5' and 3' Longe Terminal Repeats, Target Site Duplication (TSD) and primer binding site (PBS).
+        5' and 3' Long Terminal Repeats, Target Site Duplication (TSD) and primer binding site (PBS).
+
+        All identified elements contains set of protein domains as defined in
+        REXdb_.Based on the results of detection of structural features,
+        elements falls into five categories:
 
-        All identified elements contains complete set of protein domains as defined in
-        REXdb_. Based on the results detection  structural feature,
-        elements falls into four categories:
-
-        - elements with domains, 5'LTR, 3'LTR, TSD and PBS
-        - elements with domains, 5'LTR, 3'LTR and PBS (TSD was not found)
-        - elements with domains, 5' LTR, 3'LTR, TSD (PBS was not found)
-        - elements with protein domains, 5'LTR and 3'LTR (PBS and LDS were not found)
+        - elements with domains, 5'LTR, 3'LTR, TSD and PBS - rank DLTP
+        - elements with domains, 5'LTR, 3'LTR and PBS (TSD was not found) rank DLP
+        - elements with domains, 5' LTR, 3'LTR, TSD (PBS was not found) - rank DTL
+        - elements with protein domains, 5'LTR and 3'LTR (PBS and LDS were not found)  - rank DL
+        - elements as cluster of proteins domains with same classification, no LTRs - rank D
 
        .. _REXdb: https://doi.org/10.1186/s13100-018-0144-1