Mercurial > repos > csbl > repeatmodeler
diff repeatmodeler.xml @ 1:dda44fd49bcd draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repeatmodeler commit a4bb321c4a8bd6e8d331df6ed840e00d1c4599f2"
author | iuc |
---|---|
date | Thu, 26 Aug 2021 13:25:32 +0000 |
parents | 4f0c878b36d4 |
children | 41bfbaf3c959 |
line wrap: on
line diff
--- a/repeatmodeler.xml Tue Nov 24 04:14:46 2020 +0000 +++ b/repeatmodeler.xml Thu Aug 26 13:25:32 2021 +0000 @@ -1,135 +1,52 @@ -<tool id="repeatmodeler" name="RepeatModeler - Model repetitive DNA" version="0.1.0" python_template_version="3.5"> +<tool id="repeatmodeler" name="RepeatModeler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> + <description>Model repetitive DNA</description> + <macros> + <import>macros.xml</import> + </macros> <requirements> - <requirement type="package" version="2.0.1">repeatmodeler</requirement> + <expand macro="requirements" /> </requirements> <command detect_errors="exit_code"><![CDATA[ - BuildDatabase -name '$name' '$input_file' && RepeatModeler -database '$name' -pa '$pa' && cp '$name'-families.fa '$output' +BuildDatabase -name 'rmdb' '$input_file' + +&& + +## "RMBlast jobs will use 4 cores each" +pa=\$(( (\${GALAXY_SLOTS:-1}+3)/4 )) + +&& + +RepeatModeler -database 'rmdb' -pa \$pa ]]></command> <inputs> <param type="data" name="input_file" format="fasta" label="Input genome fasta"/> - <param argument="-name" type="text" value="" label="Title for building database" /> - <param argument="-pa" type="text" value="" label="Numer of paralleled job: # of nodes" /> </inputs> <outputs> - <!-- <data format="fasta" name="RepeatModels" from_work_dir="*-families.fa" label="${tool.name} on ${on_string}: RepeatModels::FASTA" /> --> - <!-- <data format="txt" name="StockholmFormat" from_work_dir="*-families.stk" label="${tool.name} on ${on_string}: RepeatModels::StockholmFormat" /> --> - <data format="fasta" name="output" label="${tool.name} on ${on_string}: RepeatModels::FASTA" /> + <data format="fasta" name="sequences" from_work_dir="rmdb-families.fa" label="${tool.name} on ${on_string}: consensus sequences" /> + <data format="stockholm" name="seeds" from_work_dir="rmdb-families.stk" label="${tool.name} on ${on_string}: seed alignments" /> </outputs> <tests> <test> - <param name="input_file" value="eco.fasta" ftype="fasta"/> + <param name="input_file" value="eco.fasta.gz" ftype="fasta.gz"/> <param name="name" value="eco" /> <param name="pa" value="4" /> - <output name="output" file="consensi.fa.classified" compare="sim_size" delta_frac="0.1" /> + <output name="sequences" ftype="fasta"> + <assert_contents> + <has_text text="( RepeatScout Family Size ="/> + <has_text text="rnd-1_family-0"/> + </assert_contents> + </output> + <output name="seeds" ftype="stockholm"> + <assert_contents> + <has_text text="#=GF DE RepeatModeler Generated"/> + </assert_contents> + </output> </test> </tests> <help><![CDATA[ - RepeatModeler - 2.0.1 - - NAME - RepeatModeler - Model repetitive DNA - - SYNOPSIS - RepeatModeler [-options] -database <XDF Database> - - DESCRIPTION - The options are: - - -h(elp) - Detailed help - - -database <DBNAME> - The name of the sequence database to run an analysis on. This is the - name that was provided to the BuildDatabase script using the "-name" - option. - - -pa # - Specify the number of parallel search jobs to run. RMBlast jobs will - use 4 cores each and ABBlast jobs will use a single core each. i.e. - on a machine with 12 cores and running with RMBlast you would use - -pa 3 to fully utilize the machine. - - -recoverDir <Previous Output Directory> - If a run fails in the middle of processing, it may be possible - recover some results and continue where the previous run left off. - Simply supply the output directory where the results of the failed - run were saved and the program will attempt to recover and continue - the run. - - -srand # - Optionally set the seed of the random number generator to a known - value before the batches are randomly selected ( using Fisher Yates - Shuffling ). This is only useful if you need to reproduce the sample - choice between runs. This should be an integer number. - - -LTRStruct [optional] - Run the LTR structural discovery pipeline ( LTR_Harvest and - LTR_retreiver ) and combine results with the RepeatScout/RECON - pipeline. [optional] - - -genomeSampleSizeMax # - Optionally change the maximum bp of the genome to sample in all - rounds of RECON (default=243000000). - - CONFIGURATION OVERRIDES - -ltr_retriever_dir <string> - The path to the installation of the LTR_Retriever structural LTR - analysis package. +RepeatModeler is a de novo transposable element (TE) family identification and modeling package. At the heart of RepeatModeler are three de-novo repeat finding programs ( RECON, RepeatScout and LtrHarvest/Ltr_retriever ) which employ complementary computational methods for identifying repeat element boundaries and family relationships from sequence data. - -rmblast_dir <string> - The path to the installation of the RMBLAST sequence alignment - program. - - -repeatmasker_dir <string> - The path to the installation of RepeatMasker. - - -trf_prgm <string> - The full path including the name for the TRF program ( 4.0.9 or - higher ) - - -ninja_dir <string> - The path to the installation of the Ninja phylogenetic analysis - package. - - -recon_dir <string> - The path to the installation of the RECON de-novo repeatfinding - program. - - -genometools_dir <string> - The path to the installation of the GenomeTools package. - - -abblast_dir <string> - The path to the installation of the ABBLAST sequence alignment - program. - - -rscout_dir <string> - The path to the installation of the RepeatScout ( 1.0.6 or higher ) - de-novo repeatfinding program. - - -mafft_dir <string> - The path to the installation of the MAFFT multiple alignment - program. - - -cdhit_dir <string> - The path to the installation of the CD-Hit sequence clustering - package. - - SEE ALSO - RepeatMasker, RMBlast - - COPYRIGHT - Copyright 2005-2019 Institute for Systems Biology - - AUTHOR - RepeatModeler: - Robert Hubley <rhubley@systemsbiology.org> - Arian Smit <asmit@systemsbiology.org> - - LTR Pipeline Extensions: - Jullien Michelle Flynn <jmf422@cornell.edu> +RepeatModeler assists in automating the runs of the various algorithms given a genomic database, clustering redundant results, refining and classifying the families and producing a high quality library of TE families suitable for use with RepeatMasker and ultimately for submission to the Dfam database (http://dfam.org). ]]></help> - <citations> - <citation type="doi">10.1073/pnas.1921046117</citation> - <citation type="doi">10.1186/s13059-018-1577-z</citation> - </citations> + <expand macro="citations" /> </tool>