diff repeatmodeler.xml @ 1:dda44fd49bcd draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repeatmodeler commit a4bb321c4a8bd6e8d331df6ed840e00d1c4599f2"
author iuc
date Thu, 26 Aug 2021 13:25:32 +0000
parents 4f0c878b36d4
children 41bfbaf3c959
line wrap: on
line diff
--- a/repeatmodeler.xml	Tue Nov 24 04:14:46 2020 +0000
+++ b/repeatmodeler.xml	Thu Aug 26 13:25:32 2021 +0000
@@ -1,135 +1,52 @@
-<tool id="repeatmodeler" name="RepeatModeler - Model repetitive DNA" version="0.1.0" python_template_version="3.5">
+<tool id="repeatmodeler" name="RepeatModeler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
+    <description>Model repetitive DNA</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
     <requirements>
-        <requirement type="package" version="2.0.1">repeatmodeler</requirement>
+        <expand macro="requirements" />
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        BuildDatabase -name '$name' '$input_file' && RepeatModeler -database '$name' -pa '$pa' && cp '$name'-families.fa '$output'
+BuildDatabase -name 'rmdb' '$input_file'
+
+&&
+
+## "RMBlast jobs will use 4 cores each"
+pa=\$(( (\${GALAXY_SLOTS:-1}+3)/4 ))
+
+&&
+
+RepeatModeler -database 'rmdb' -pa \$pa
     ]]></command>
     <inputs>
         <param type="data" name="input_file" format="fasta" label="Input genome fasta"/>
-        <param argument="-name" type="text" value="" label="Title for building database" />
-        <param argument="-pa" type="text" value="" label="Numer of paralleled job: # of nodes" />
     </inputs>
     <outputs>
-      <!-- <data format="fasta" name="RepeatModels" from_work_dir="*-families.fa" label="${tool.name} on ${on_string}: RepeatModels::FASTA" /> -->
-      <!-- <data format="txt" name="StockholmFormat" from_work_dir="*-families.stk" label="${tool.name} on ${on_string}: RepeatModels::StockholmFormat" /> -->
-      <data format="fasta" name="output" label="${tool.name} on ${on_string}: RepeatModels::FASTA" />
+        <data format="fasta" name="sequences" from_work_dir="rmdb-families.fa" label="${tool.name} on ${on_string}: consensus sequences" />
+        <data format="stockholm" name="seeds" from_work_dir="rmdb-families.stk" label="${tool.name} on ${on_string}: seed alignments" />
     </outputs>
     <tests>
         <test>
-            <param name="input_file" value="eco.fasta" ftype="fasta"/>
+            <param name="input_file" value="eco.fasta.gz" ftype="fasta.gz"/>
             <param name="name" value="eco" />
             <param name="pa" value="4" />
-            <output name="output" file="consensi.fa.classified" compare="sim_size" delta_frac="0.1" />
+            <output name="sequences" ftype="fasta">
+                <assert_contents>
+                    <has_text text="( RepeatScout Family Size ="/>
+                    <has_text text="rnd-1_family-0"/>
+                </assert_contents>
+            </output>
+            <output name="seeds" ftype="stockholm">
+                <assert_contents>
+                    <has_text text="#=GF DE    RepeatModeler Generated"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
-      RepeatModeler - 2.0.1
-
-      NAME
-          RepeatModeler - Model repetitive DNA
-
-      SYNOPSIS
-            RepeatModeler [-options] -database <XDF Database>
-
-      DESCRIPTION
-          The options are:
-
-          -h(elp)
-              Detailed help
-
-          -database <DBNAME>
-              The name of the sequence database to run an analysis on. This is the
-              name that was provided to the BuildDatabase script using the "-name"
-              option.
-
-          -pa #
-              Specify the number of parallel search jobs to run. RMBlast jobs will
-              use 4 cores each and ABBlast jobs will use a single core each. i.e.
-              on a machine with 12 cores and running with RMBlast you would use
-              -pa 3 to fully utilize the machine.
-
-          -recoverDir <Previous Output Directory>
-              If a run fails in the middle of processing, it may be possible
-              recover some results and continue where the previous run left off.
-              Simply supply the output directory where the results of the failed
-              run were saved and the program will attempt to recover and continue
-              the run.
-
-          -srand #
-              Optionally set the seed of the random number generator to a known
-              value before the batches are randomly selected ( using Fisher Yates
-              Shuffling ). This is only useful if you need to reproduce the sample
-              choice between runs. This should be an integer number.
-
-          -LTRStruct [optional]
-              Run the LTR structural discovery pipeline ( LTR_Harvest and
-              LTR_retreiver ) and combine results with the RepeatScout/RECON
-              pipeline. [optional]
-
-          -genomeSampleSizeMax #
-              Optionally change the maximum bp of the genome to sample in all
-              rounds of RECON (default=243000000).
-
-      CONFIGURATION OVERRIDES
-          -ltr_retriever_dir <string>
-              The path to the installation of the LTR_Retriever structural LTR
-              analysis package.
+RepeatModeler is a de novo transposable element (TE) family identification and modeling package. At the heart of RepeatModeler are three de-novo repeat finding programs ( RECON, RepeatScout and LtrHarvest/Ltr_retriever ) which employ complementary computational methods for identifying repeat element boundaries and family relationships from sequence data.
 
-          -rmblast_dir <string>
-              The path to the installation of the RMBLAST sequence alignment
-              program.
-
-          -repeatmasker_dir <string>
-              The path to the installation of RepeatMasker.
-
-          -trf_prgm <string>
-              The full path including the name for the TRF program ( 4.0.9 or
-              higher )
-
-          -ninja_dir <string>
-              The path to the installation of the Ninja phylogenetic analysis
-              package.
-
-          -recon_dir <string>
-              The path to the installation of the RECON de-novo repeatfinding
-              program.
-
-          -genometools_dir <string>
-              The path to the installation of the GenomeTools package.
-
-          -abblast_dir <string>
-              The path to the installation of the ABBLAST sequence alignment
-              program.
-
-          -rscout_dir <string>
-              The path to the installation of the RepeatScout ( 1.0.6 or higher )
-              de-novo repeatfinding program.
-
-          -mafft_dir <string>
-              The path to the installation of the MAFFT multiple alignment
-              program.
-
-          -cdhit_dir <string>
-              The path to the installation of the CD-Hit sequence clustering
-              package.
-
-      SEE ALSO
-              RepeatMasker, RMBlast
-
-      COPYRIGHT
-           Copyright 2005-2019 Institute for Systems Biology
-
-      AUTHOR
-           RepeatModeler:
-             Robert Hubley <rhubley@systemsbiology.org>
-             Arian Smit <asmit@systemsbiology.org>
-
-           LTR Pipeline Extensions:
-             Jullien Michelle Flynn <jmf422@cornell.edu>
+RepeatModeler assists in automating the runs of the various algorithms given a genomic database, clustering redundant results, refining and classifying the families and producing a high quality library of TE families suitable for use with RepeatMasker and ultimately for submission to the Dfam database (http://dfam.org).
     ]]></help>
-    <citations>
-      <citation type="doi">10.1073/pnas.1921046117</citation>
-      <citation type="doi">10.1186/s13059-018-1577-z</citation>
-    </citations>
+    <expand macro="citations" />
 </tool>