Repository 'repeatmodeler'
hg clone https://toolshed.g2.bx.psu.edu/repos/csbl/repeatmodeler

Changeset 1:dda44fd49bcd (2021-08-26)
Previous changeset 0:4f0c878b36d4 (2020-11-24) Next changeset 2:41bfbaf3c959 (2021-11-27)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repeatmodeler commit a4bb321c4a8bd6e8d331df6ed840e00d1c4599f2"
modified:
repeatmodeler.xml
added:
macros.xml
test-data/consensi.fa.classified.gz
test-data/eco.fasta.gz
test-data/seeds.stk.gz
removed:
test-data/consensi.fa.classified
test-data/eco.fasta
b
diff -r 4f0c878b36d4 -r dda44fd49bcd macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Aug 26 13:25:32 2021 +0000
b
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">2.0.2a</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+
+    <xml name="requirements">
+        <requirement type="package" version="@TOOL_VERSION@">repeatmodeler</requirement>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1073/pnas.1921046117</citation>
+            <citation type="doi">10.1186/s13059-018-1577-z</citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 4f0c878b36d4 -r dda44fd49bcd repeatmodeler.xml
--- a/repeatmodeler.xml Tue Nov 24 04:14:46 2020 +0000
+++ b/repeatmodeler.xml Thu Aug 26 13:25:32 2021 +0000
[
@@ -1,135 +1,52 @@
-<tool id="repeatmodeler" name="RepeatModeler - Model repetitive DNA" version="0.1.0" python_template_version="3.5">
+<tool id="repeatmodeler" name="RepeatModeler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
+    <description>Model repetitive DNA</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
     <requirements>
-        <requirement type="package" version="2.0.1">repeatmodeler</requirement>
+        <expand macro="requirements" />
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        BuildDatabase -name '$name' '$input_file' && RepeatModeler -database '$name' -pa '$pa' && cp '$name'-families.fa '$output'
+BuildDatabase -name 'rmdb' '$input_file'
+
+&&
+
+## "RMBlast jobs will use 4 cores each"
+pa=\$(( (\${GALAXY_SLOTS:-1}+3)/4 ))
+
+&&
+
+RepeatModeler -database 'rmdb' -pa \$pa
     ]]></command>
     <inputs>
         <param type="data" name="input_file" format="fasta" label="Input genome fasta"/>
-        <param argument="-name" type="text" value="" label="Title for building database" />
-        <param argument="-pa" type="text" value="" label="Numer of paralleled job: # of nodes" />
     </inputs>
     <outputs>
-      <!-- <data format="fasta" name="RepeatModels" from_work_dir="*-families.fa" label="${tool.name} on ${on_string}: RepeatModels::FASTA" /> -->
-      <!-- <data format="txt" name="StockholmFormat" from_work_dir="*-families.stk" label="${tool.name} on ${on_string}: RepeatModels::StockholmFormat" /> -->
-      <data format="fasta" name="output" label="${tool.name} on ${on_string}: RepeatModels::FASTA" />
+        <data format="fasta" name="sequences" from_work_dir="rmdb-families.fa" label="${tool.name} on ${on_string}: consensus sequences" />
+        <data format="stockholm" name="seeds" from_work_dir="rmdb-families.stk" label="${tool.name} on ${on_string}: seed alignments" />
     </outputs>
     <tests>
         <test>
-            <param name="input_file" value="eco.fasta" ftype="fasta"/>
+            <param name="input_file" value="eco.fasta.gz" ftype="fasta.gz"/>
             <param name="name" value="eco" />
             <param name="pa" value="4" />
-            <output name="output" file="consensi.fa.classified" compare="sim_size" delta_frac="0.1" />
+            <output name="sequences" ftype="fasta">
+                <assert_contents>
+                    <has_text text="( RepeatScout Family Size ="/>
+                    <has_text text="rnd-1_family-0"/>
+                </assert_contents>
+            </output>
+            <output name="seeds" ftype="stockholm">
+                <assert_contents>
+                    <has_text text="#=GF DE    RepeatModeler Generated"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
-      RepeatModeler - 2.0.1
-
-      NAME
-          RepeatModeler - Model repetitive DNA
-
-      SYNOPSIS
-            RepeatModeler [-options] -database <XDF Database>
-
-      DESCRIPTION
-          The options are:
-
-          -h(elp)
-              Detailed help
-
-          -database <DBNAME>
-              The name of the sequence database to run an analysis on. This is the
-              name that was provided to the BuildDatabase script using the "-name"
-              option.
-
-          -pa #
-              Specify the number of parallel search jobs to run. RMBlast jobs will
-              use 4 cores each and ABBlast jobs will use a single core each. i.e.
-              on a machine with 12 cores and running with RMBlast you would use
-              -pa 3 to fully utilize the machine.
-
-          -recoverDir <Previous Output Directory>
-              If a run fails in the middle of processing, it may be possible
-              recover some results and continue where the previous run left off.
-              Simply supply the output directory where the results of the failed
-              run were saved and the program will attempt to recover and continue
-              the run.
-
-          -srand #
-              Optionally set the seed of the random number generator to a known
-              value before the batches are randomly selected ( using Fisher Yates
-              Shuffling ). This is only useful if you need to reproduce the sample
-              choice between runs. This should be an integer number.
-
-          -LTRStruct [optional]
-              Run the LTR structural discovery pipeline ( LTR_Harvest and
-              LTR_retreiver ) and combine results with the RepeatScout/RECON
-              pipeline. [optional]
-
-          -genomeSampleSizeMax #
-              Optionally change the maximum bp of the genome to sample in all
-              rounds of RECON (default=243000000).
-
-      CONFIGURATION OVERRIDES
-          -ltr_retriever_dir <string>
-              The path to the installation of the LTR_Retriever structural LTR
-              analysis package.
+RepeatModeler is a de novo transposable element (TE) family identification and modeling package. At the heart of RepeatModeler are three de-novo repeat finding programs ( RECON, RepeatScout and LtrHarvest/Ltr_retriever ) which employ complementary computational methods for identifying repeat element boundaries and family relationships from sequence data.
 
-          -rmblast_dir <string>
-              The path to the installation of the RMBLAST sequence alignment
-              program.
-
-          -repeatmasker_dir <string>
-              The path to the installation of RepeatMasker.
-
-          -trf_prgm <string>
-              The full path including the name for the TRF program ( 4.0.9 or
-              higher )
-
-          -ninja_dir <string>
-              The path to the installation of the Ninja phylogenetic analysis
-              package.
-
-          -recon_dir <string>
-              The path to the installation of the RECON de-novo repeatfinding
-              program.
-
-          -genometools_dir <string>
-              The path to the installation of the GenomeTools package.
-
-          -abblast_dir <string>
-              The path to the installation of the ABBLAST sequence alignment
-              program.
-
-          -rscout_dir <string>
-              The path to the installation of the RepeatScout ( 1.0.6 or higher )
-              de-novo repeatfinding program.
-
-          -mafft_dir <string>
-              The path to the installation of the MAFFT multiple alignment
-              program.
-
-          -cdhit_dir <string>
-              The path to the installation of the CD-Hit sequence clustering
-              package.
-
-      SEE ALSO
-              RepeatMasker, RMBlast
-
-      COPYRIGHT
-           Copyright 2005-2019 Institute for Systems Biology
-
-      AUTHOR
-           RepeatModeler:
-             Robert Hubley <rhubley@systemsbiology.org>
-             Arian Smit <asmit@systemsbiology.org>
-
-           LTR Pipeline Extensions:
-             Jullien Michelle Flynn <jmf422@cornell.edu>
+RepeatModeler assists in automating the runs of the various algorithms given a genomic database, clustering redundant results, refining and classifying the families and producing a high quality library of TE families suitable for use with RepeatMasker and ultimately for submission to the Dfam database (http://dfam.org).
     ]]></help>
-    <citations>
-      <citation type="doi">10.1073/pnas.1921046117</citation>
-      <citation type="doi">10.1186/s13059-018-1577-z</citation>
-    </citations>
+    <expand macro="citations" />
 </tool>
b
diff -r 4f0c878b36d4 -r dda44fd49bcd test-data/consensi.fa.classified
--- a/test-data/consensi.fa.classified Tue Nov 24 04:14:46 2020 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,916 +0,0 @@\n->rnd-1_family-1#Unknown ( RepeatScout Family Size = 202, Final Multiple Alignment Size = 100, Localized to 1 out of 1 contigs )\n-ACAAACGCAAAATTGCCTGATGCGCTACGCTTATCAGGCCTTGTATTATC\n-CCTCCAGTGCAGAGAAAATCGGCCAGTTTTCTCTGCCTGCAGTCCGCATG\n-CCGTATCGGGCCTTGGGTTCTAACCTGTTGCGTAGATTTATGCAGCGGAC\n-TGCCTTTCTCCCAAAGTGATAAACCGGACAGTATCATGGACCGGTTTTCC\n-CGGTAATCCGTATTTGCAAGGTTGGTTTCACTATGGAACATGAACTTCAT\n-TATATCGGTATCGACACCGCTAAAGAGAAACTGGATGTCGATGTGTTGCG\n-TCCTGATGGTCGTCATCGCACCAAAAAATTCGCTAACACCACTAAAGGGC\n-ACGATGAGCTGGTGAGCTGGCTGAAAGGTCACAAGATTGACCATGCGCAT\n-ATCTGCATCGAAGCGACCGGCACCTATATGGAACCTGTCGCTGAGTGCCT\n-TTACGATGCTGGCTACATAGTGTCAGTCATTAATCCTGCGCTGGGTAAAG\n-CTTTCGCTCAGAGTGAAGGACTGCGTAACAAGACTGATACCGTGGATGCG\n-CGCATGCTGGCAGAGTTCTGTCGTCAGAAGCGCCCTGCAGCCTGGGAAGC\n-GCCTCACCCGCTTGAACGCGCGTTGCGTGCCCTGGTAGTCCGCCACCAGG\n-CGCTGACAGATATGCACACGCAGGAACTGAATCGCACTGAAACGGCGCGG\n-GAAGTCCAGAGACCGAGCATTGATGCTCACCTTCTGTGGCTTGAAGCAGA\n-GCTGAAGCGTCTTGAGAAGCAGATAAAAGACCTGACAGACGATGATCCGG\n-ATATGAAACACCGCAGGAAACTGCTGGAAAGCATCCCGGGTATCGGAGAG\n-AAAACATCTGCGGTATTGCTGGCTTATATCGGTCTGAAGGACCGCTTCGC\n-CCATGCCAGACAGTTCGCCGCTTTTGCGGGTCTGACACCACGGCGTTATG\n-AATCAGGTAGCAGTGTGAGAGGGGCGAGCCGGATGAGTAAGGCCGGACAT\n-GTGTCGCTTCGCAGGGCGTTGTATATGCCCGCAATGGTAGCCACCAGTAA\n-GACTGAGTGGGGACGGGCGTTCCGCGACCGTCTGGCGGCTAATGGCAAGA\n-AAGGAAAGGTGATTCTCGGCGCGATGATGCGCAAGCTGGCACAGGTGGCG\n-TATGGAGTGCTGAAGTCAGGCGTGCCGTTCGATGCGTCACGGCATAATCC\n-GGTAGCGGCGTAAAAATCGCGGAAGGGATGAAAAAAACAGCGCCTGACGG\n-CGCTGTGTCTGGCATGCCTGCAATCCGGGAAACCGGACCAGGAAAAAACT\n-TGCAGGCCATAACAGTATCTACGTAATCTCTGCAATATATTGAATTTGCA\n-CGATTTTGTAGGCCGGATAAGGCGTTCACGCCGCATCCGGCA\n->rnd-1_family-0#Unknown ( RepeatScout Family Size = 205, Final Multiple Alignment Size = 100, Localized to 1 out of 1 contigs )\n-CAAAGTGCGCTTTGTCATGCCGGATGCGGCGTGAACGCCTTATCCGGCCT\n-ACAAAATCGTGCAAATTCAATANATTGCAGAGATCNTGTAGGCCTGATAA\n-GCGTAGCGCATCAGGCAATTTTGCGTTTG\n->rnd-1_family-2#Unknown ( RepeatScout Family Size = 38, Final Multiple Alignment Size = 38, Localized to 1 out of 1 contigs )\n-GTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTTGTGAAATGTTGG\n-GTTAAGTCCCGCAACGAGCGCAACCCTTATCCTTTGTTGCCAGCGGTCCG\n-GCCGGGAACTCAAAGGAGACTGCCAGTGATAAACTGGAGGAAGGTGGGGA\n-TGACGTCAAGTCATCATGGCCCTTACGACCAGGGCTACACACGTGCTACA\n-ATGGCGCATACAAAGAGAAGCGACCTCGCGAGAGCAAGCGGACCTCATAA\n-AGTGCGTCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGG\n-AATCGCTAGTAATCGTGGATCAGAATGCCACGGTGAATACGTTCCCGGGC\n-CTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTGCAAAAGAAGTAG\n-GTAGCTTAACCTTCGGGAGGGCGCTTACCACTTTGTGATTCATGACTGGG\n-GTGAAGTCGTAACAAGGTAACCGTAGGGGAACCTGCGGTTGGATCACCTC\n-CTTACCTTAAAGAATNCGTATCTTCGCAGTGCCCACACAGATTGTCTGAT\n-GAATTGTTAANGAGCAGTAAGGCGTCTACGGGCTTGTAGCTCAGTTGGTT\n-AGAGCGCGCCCCTNATAAGGGCGAGGTCGCCGGTTCGAGTCCAGTCAGGC\n-CCACCAAATTTGCACGGCAAATTTGAAGAGGTTTTAACTACGTGTTATGG\n-GGCGATAGCTCAGCTGGGAGAGCGCCTGCCTTGCAAGCAGGGGGTCGGCG\n-GTTCGATCCCGTCATCGCTCCACCATCTCTGTAGTGATTAAATAAAAAAT\n-ACTTCAGAGTGTACCTGCAAAGGTTCACTGCGAAGTTTTGCTCTTTAAAA\n-ATCTGGATCAAGCTGAAAATTGAAACACTGAACAACGAAAGTTGTTCGTG\n-AGTCTCTCAAATTTTCGCAAC\n->rnd-1_family-3#Unknown ( RepeatScout Family Size = 34, Final Multiple Alignment Size = 34, Localized to 1 out of 1 contigs )\n-CAGTAAGCGNNCAGCGAGAACCGTATTGACGGGGATGTGTTACTGAGCAG\n-CTCAGNGCNACGCGCCAGGGCAGCAGNTCGCNNACCCGGTTTGCCGGCCA\n-GTCCTGGCTATGACGTCAAGGACGTAGCGNAGGCAGCTTTCTGGCTCCAC\n-NTCGTTCAGTTTGCACGTNCCGATCAGGCTGTACAGCAGCGCCGCNCGCT\n-CACCACCGCGGTCGGAACCGAAGAACAGGCAGTTTTTNCGGCCCAGACGG\n-CCACTCCCCGCAAGGCGTTCTCNGCGATGTTGTTGTCGATTTCCGCCCAG\n-CCATCACTGCATAGTACGCGTTCAGCGCNNNCCACTGNTTCAGCGCGTAT\n-GCGAACGCTTTCGCCGTATCTGAGTGNCGCGACAGGGTTTTCATCTTTTC\n-ACGCANCCAGCCNTCCAGGGACTTCANCAGTGGCGCGGCTTCGGCTTTTC\n-GCTGACGTTCGGCAAGGCGCTGCTCCGCCGNACATTCCCCTTATCTCTGC\n-CTCGATGGCGTACAGTTCGCCGATCCGCTNCAGGGCTTCCGTCGGTGACG\n-TGGGTGGGCGNTCTTACGTGCACATCGTGGATTTTTCGGCGGGCGTGAGC\n-CCAACAGGCGGCTTCCGTTATCCCGCCGGATTCGTATAACGCCCGTTGAA\n-CCCGGCGTANGCATCCGCCTGCAGCACACCGCTGNANCCGGCAAGGTGAG\n-TCTGCGGATGGATGCCTTTCCTGTCCGGGCTGTAAGCGAACCACACCGCC\n-GGNGCCANCGNTGACCCGGCGTTACGGTCGTCACGGACGTAGGCCCACAG\n-CCGCCCGGTCCGCGTTTTCCCGCTGCCCGGCANCAGNACCGGGACGGGGG\n-TATCATCAGCATGGAGTTTACCGGGCATCAGCACATACTGGC'..b'ATGGCATTCGGTACAGAGCGTCCAGAGATTCGTCTCCT\n-CATTACCACCACCGAACTGAAGTGCAATTCGGTGATCGAGTTCACTGTCA\n-CAGAGGTCAACCACACGACCACAGAGACAGCACTGCCCGGCATCCCTGAG\n-CCAGATATGACGCTTGAGGGAAACACGTGCACTGCCACTGACACGACGCT\n-GTTCACCCTTCAGAATATTCACCCGTCGGGTATTCAGTGTTTTGATTCTG\n-CCCGGTAACGTACGAAGCACAGCCATGTAAAATCCTCGCCATATAGCTTG\n-TCACCAGAGGAAAGAAAATGTCANCGAAGAACAGGACCCGCAGAACAACA\n-ACCCGCAACATCCGCTTTCCNAACCANATGATTGAACAAATTAACATCGC\n-TCTTGAGCANAAAGGGTCCGGGAATTTCTCAGCGTGGGTCATTGAAGCCT\n-GCAG\n->rnd-1_family-25#Unknown ( RepeatScout Family Size = 16, Final Multiple Alignment Size = 16, Localized to 1 out of 1 contigs )\n-AGTAAATATCAGTATGCTCTCTCTCATGCTTAGCGCCGCTGAATGCCACC\n-GCCGGAATAACAATCTGCCGGTCAAACGGCTGATCGTCATAAACCCTGAC\n-GGTAATGGTCCCTGATGGCCACCGCTCCGGTGCACGGGAGTCCCGGGGGA\n-AAGCTTTGCCCACTGTTTTAACGAGATCGCCTTCAATCTGGTTCGCGGAC\n-AGTTTTCCCNGAACCCGACAGTTCTCGTTAATCGTGACGTTGTTGAGCGT\n-CCCGGAGTTCGCATTCACGCTACCGCTGATATCCGCATTTTTAGCGGTCA\n-GCTTTCCGTCCGGCGTCAGGGAAAACGCCGGAGGATTGCCGCCGCTGGTA\n-ATGGTGGGGGCCGTCAGGCGCTTCAGGAACACGTCGTTCATGAATATCTG\n-ATTGCCCTGCGCCACAAACATCGGCGTTNCGTTGCCGTTCGCCGGGTTAA\n-TCATCGCGATACGGTCAGCCGCCAGCAGNANCTGGCTCGCTGCCTTCCTG\n-CTGGCCCGTGTCCTCAATNCCGGCGCCAATACCCGCGACATAAGGCGTGC\n-CGTCTTTCGTCTGCTGCANCTTCGACGGCCCACATGCCGTTCAGGTTATC\n-GTTTGCGTCCTTCTGCACGCGCTGTATCTGCTGNATGGCGGCGCTCTGGT\n-CCTCCAGCGTTTTACTGACCGTCTGCGTGATTTCATTGCGGGCNTCCGTG\n-ATGGNGGTCCTCATCTCCGCCATCTCATCCGCAAGCTGGCCGTTATCNAT\n-CAGCGTCCAGCAGCTCCTGAGCCAGATGGGTTTTCCCTATCTCGCCTTTG\n-AAAAAATCCAGATANCCTTCCGCATCATCGCTCGCCCGNCCGACGGCCTC\n-CACGAACGCCGATTTGCCAACCGTGTTCACGCTGCGGACGTAAAAGTAAT\n-AATCATGGCCCGGCTTGATATTGATACTGGCCGGCTACCTCCAGTGACAG\n-CCCCGTGCCAAGATAGCGGGCTGNGGTTTCAACCTGCCTGATATGCGGCA\n-ATCCGCGTTTCCGAGAACCAGAACTCAAACTGTACCGTCGGGTCATAAAC\n-GGCAAGACGCGGCGCGGCGGTTATCTGAAAATAGCCCGGCGTCAGCTCAA\n-TCCGCGACGGGTGCTGCCGGTGCGGCAATCCGGAACGATGCCGGCGGCCG\n-GNTCGCCCTGCTGCCCCCACGCGTTTACCGCCCGGACTGTCAGCCTGTAG\n-TTCCCCGAGCGCCAGACCGCGTGAAGCGGTGCGCGGTNTCCGCCGTCCGG\n-GCCGTGCTGACCAGCCGCTCGCTGCCGTCGTCCGCTGCCGCTGGTCAGNC\n-GNAGCATGAAGCTCACGCCCTTCACCACCCGCGGCGTGTCCCATCGCGCC\n-NGCGCCNGATACTGNCCGCCGGCTGCGCTCACCTCCGCCGTCAGGTGCTG\n-CACTGCCGGCGGGATGACGCCGTTCAGGGTGCCTGACTGCGGCTCAAAGC\n-TGGCCCCGTTATCCACGATGGCTTCTTTTTCCGGTACGTGCTGCACCGCC\n-GTGATGGCAAAGGTGCCGTCCGTGTTTTCCCGGATGGAGACACAGCGGAA\n-CAGGCGACGACGCAGTGACGGCAGGGAGAGTCCCCACACACCGTATGTCG\n-CCACGCCATCAGGCAGGGTACTGACCTGTATCCGGTCCGGCGCGGGGTGT\n-GCGGTGATGTCCACGCTCACCGGCTTACCGCTGCCGTTAATCAGGTTCAC\n-CGTCGNTGTACCTGTCTCCGGCAGCGTCACCTCACGGTCCAGCGTCAGGG\n-TGCGGCTGGCGGCATCGATGGACAGGACACGTCCGCCGGTCATGGTCCCG\n-GCATAGTCGTTATCACAGATTTCAATAACGTCGCCGGGTGCGTGNCGCAG\n-CCCCTGTGACCCGAGCGTGAAATCCACCGTCTGCGTTTCCAGCAGTCCGG\n-TCTTTATCACCCACAGCCCGGCACGGTGGGCCTGACCGCGACTGGTGCAG\n-CCGAACGCATCCATCTTCAGCAGGTTGCGCCCGTAGCGCAGTATGGCTTC\n-CGGGTCTTCCACCAGTTCCGTGGAGGTCTGCCAGCCGTTCTGCGGGTCGG\n-TGTAATTCACCTCCACCGCCGTGTGGCGGTCCTTCAGGGCGCTGAAGCTG\n-TAGCGGAACCCCACGCCGTTATCATCCACCACCACATCGCTGTTGGTGTA\n-CGGCCACACCACATCCGACGGGCGGTCCTGAACGAACGTCAGCGTCTGGC\n-CGTTCCATACCGGCATACAGCGCATCGCCGAGCAGAAATCACTGAGAACG\n-TCCCACGCCTTACGCTGTTGTGACAGGTACGCATTAAAGGTCATCCGCGG\n-CTCCGTGCCCCCGAAGCCGTCCGGGACCGTCTGGTCGCAGTACTGCGCGA\n-TGGCATACAGCGCCCATTTGTCCACGTCCGCCGCCCCCAGACGTTTTCCC\n-ATGCCGTAGCGCGGGTGAGTCAGCATGTCCCACAGGCACCAGGCCGGGTT\n-GTTGCTGTATGCCGGTTTCAGGCTGCCGTCCCAGATGCCGCTGTACGTGC\n-GTTTTTCCGGGTCATAGTTTGACGGCACCTGGATGATGCGACCGCGGATA\n-TGGTAGTTCACCGTCATCTGCTGGCCGCCGAACTGCTCCGCATCCACCTG\n-CAGCCCCACAATCGCCGTGTTCGGGTAGCACTGTTTCACATCGATGATTT\n-CGGTGTATGACGACCACAGCGTCTTGTTCTGCAGCTGGTCCGTGGTGCTG\n-TCCGCCGTCTCCCTGACCATCCGGATGTTAAAGGGGCGGGGAGGCAGATT\n-ATCCAGAATCACCGAGGCCAGGAACTGCGAGGTGGTCTTGCCGTTAATGG\n-TGACGTCCTTTTCCGTCACCCAGTTACCGTTACGCTGCAGCTGAATCAGC\n-AGGCGGACGGATGCCGGGTTACGGTCACCCTTTGAGGTGGTCTCCACCAG\n-TGACTGCACCCCGAAGGTGACCCGCAGGCGGTCAATGTTCGCGGACGTAA\n-TGGTGCGCGTCACCGGCTTTGCCTTCGTCACTTCCACGCCCAGCGCGGTT\n-TCGGAGCCGGAGGACTCAAAACCTTCCGGCGGTGTCTGCTCCTGCTCCCC\n-GGCGCGCCAGACCGCCGTCACACCGTGTATCACGGGATTACCGTCCGTGT\n-CCGTCAGCGGGGTTTTGTTCACCAGAATACTCTGCAGCCCCTTCACCGGA\n-CCTTCAATCGGCCCTTCACCAATGGCGTCAATCACGCTCATCATCTGCGT\n-GGACTTAAGATTGTCCTTCGCCTCTACCGGCGTGTGCGCCTTGCCGCCAC\n-CTTTACCCATTCTGTCCCCCTCTCCTGT\n'
b
diff -r 4f0c878b36d4 -r dda44fd49bcd test-data/consensi.fa.classified.gz
b
Binary file test-data/consensi.fa.classified.gz has changed
b
diff -r 4f0c878b36d4 -r dda44fd49bcd test-data/eco.fasta
--- a/test-data/eco.fasta Tue Nov 24 04:14:46 2020 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,84902 +0,0 @@\n->CP027599.1 Escherichia coli strain 97-3250 chromosome, complete genome\n-ATCCCGGCCCCGGCAGAACCGACCTATCGTTCTAACGTAAACGTCAAACACACGTTTGATAACTTCGTTG\n-AAGGTAAATCTAACCAACTGGCGCGCGCGGCGGCTCGCCAGGTGGCGGATAACCCTGGCGGTGCCTATAA\n-CCCGTTGTTCCTTTATGGCGGCACGGGTCTGGGTAAAACTCACCTGCTGCATGCGGTGGGTAACGGCATT\n-ATGGCGCGCAAGCCGAATGCCAAAGTGGTTTATATGCACTCCGAGCGCTTTGTTCAGGACATGGTTAAAG\n-CCCTGCAAAACAACGCGATCGAAGAGTTTAAACGCTACTACCGTTCCGTAGATGCACTGCTGATCGACGA\n-TATTCAGTTTTTTGCTAATAAAGAACGATCTCAGGAAGAGTTTTTCCATACCTTCAACGCCCTGCTGGAA\n-GGTAATCAACAGATCATTCTCACCTCGGATCGCTATCCGAAAGAGATCAACGGCGTTGAGGATCGTTTGA\n-AATCCCGCTTCGGTTGGGGACTGACTGTGGCAATCGAACCGCCAGAGCTGGAAACCCGTGTGGCGATCCT\n-GATGAAAAAGGCCGACGAAAACGACATTCGTTTGCCGGGCGAAGTGGCGTTCTTTATCGCCAAGCGTCTA\n-CGATCTAACGTACGTGAGCTGGAAGGGGCGCTGAACCGCGTCATTGCCAATGCCAACTTTACCGGACGGG\n-CGATCACCATCGACTTCGTGCGTGAGGCGCTGCGCGACTTGCTGGCATTGCAGGAAAAACTGGTCACCAT\n-CGACAATATTCAGAAGACGGTGGCGGAGTACTACAAGATCAAAGTCGCGGATCTCCTTTCCAAGCGTCGA\n-TCCCGCTCGGTGGCGCGTCCGCGCCAGATGGCGATGGCGCTGGCGAAAGAGCTGACTAACCACAGTCTGC\n-CGGAGATTGGCGATGCGTTTGGTGGTCGTGACCACACGACGGTGCTTCATGCCTGCCGTAAGATCGAGCA\n-GTTGCGTGAAGAGAGCCACGATATCAAAGAAGATTTTTCAAATTTAATCAGAACATTGTCATCGTAAACC\n-TATGAAATTTACCGTAGAACGTGAGCATTTATTAAAACCGCTACAACAGGTGAGCGGTCCGTTAGGTGGT\n-CGTCCTACGCTACCGATTCTCGGTAATCTGCTGTTACAGGTTGCTGACGGTACGTTGTCGCTGACCGGTA\n-CTGATCTCGAGATGGAAATGGTGGCACGTGTTGCGCTGGTTCAGCCACACGAGCCAGGAGCGACGACCGT\n-TCCGGCGCGCAAATTCTTTGATATCTGCCGTGGTCTGCCTGAAGGCGCGGAAATTGCCGTGCAGCTGGAA\n-GGTGAACGGATGCTGGTACGCTCCGGGCGTAGCCGTTTTTCGCTGTCTACCCTGCCAGCGGCGGATTTCC\n-CGAACCTCGATGACTGGCAGAGTGAAGTCGAATTTACCCTGCCGCAGGCAACGATGAAGCGTCTGATTGA\n-AGCGACCCAGTTTTCGATGGCGCATCAGGACGTTCGCTATTACTTAAATGGTATGCTGTTTGAAACCGAA\n-GGTGAAGAACTGCGCACCGTGGCAACCGACGGCCACCGTCTGGCGGTCTGTTCAATGCCAATTGGTCAAT\n-CTTTGCCAAGCCATTCGGTGATCGTACCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGCGG\n-CGACAATCCGCTGCGCGTGCAGATTGGCAGCAACAATATTCGCGCCCACGTTGGCGACTTTATCTTCACC\n-TCCAAACTGGTGGATGGTCGCTTCCCGGATTACCGCCGCGTTCTGCCGAAGAATCCGGACAAACATCTGG\n-AAGCTGGCTGCGATCTGCTCAAGCAGGCGTTTGCCCGTGCGGCAATTCTCTCTAACGAGAAATTCCGCGG\n-CGTGCGCCTGTATGTCAGCGAAAACCAGCTGAAAATCACCGCCAACAACCCGGAACAGGAAGAAGCGGAA\n-GAGATCCTCGACGTTACCTATAGCGGTGCGGAGATGGAAATCGGCTTCAACGTCAGCTATGTGCTGGATG\n-TTCTGAACGCGCTGAAATGCGAAAACGTCCGCATGATGCTGACCGATTCGGTTTCCAGCGTGCAGATTGA\n-AGATGCCGCATCACAGTCGGCTGCCTATGTTGTCATGCCAATGAGACTGTAATGTCCCTCACCCGCTTGT\n-TGATCCGCGATTTCCGCAACATTGAAACCGCGGATCTCGCTTTATCTCCCGGCTTTAACTTTCTGGTAGG\n-TGCCAACGGCAGTGGCAAAACCAGCGTGCTGGAAGCCATCTATACGCTCGGCCATGGTCGGGCGTTTCGC\n-AGTTTGCAGATTGGTCGCGTCATTCGCCATGAGCAGGAGGCATTTGTTCTCCATGGGCGATTACAGGGCG\n-AAGAGCGCGAGACGGCGATTGGCTTAACCAAGGACAAACAGGGCGACAGCAAAGTCCGCATCGACGGTAC\n-TGACGGGCATAAAGTCGCGGAACTGGCGCACCTGATGCCAATGCAGCTGATAACGCCAGAAGGGTTTACT\n-TTACTCAACGGCGGCCCCAAATACAGAAGAGCATTCCTCGACTGGGGATGCTTTCACAACGAACCCGGAT\n-TTTTCACCGCCTGGAGCAATCTCAAGCGATTGCTCAAGCAGCGCAATGCGGCGCTGCGCCAGGTGACACG\n-TTACGAACAGCTACGCCCGTGGGATAAAGAACTGATCCCGCTGGCGGAGCAAATCAGCACCTGGCGCGCG\n-GAGTATAGCGCCGGTATCGCGGCCGATATGGCCGATACCTGTAAGCAATTTCTCCCTGAGTTTTCTCTGA\n-CTTTCTCTTTCCAGCGCGGCTGGGAGAAAGAGACAGAATATGCTGAGGTGCTGGAACGTAATTTTGAACG\n-CGATCGCCAGCTAACCTACACCGCGCATGGCCCGCATAAAGCGGACTTACGCATTCGCGCCGACGGTGCG\n-CCGGTGGAAGATACCTTATCGCGTGGGCAGCTTAAGCTGTTGATGTGCGCCTTACGTCTGGCGCAAGGAG\n-AGTTCCTCACCCGTGAAAGCGGGCGGCGGTGTCTCTACCTGATAGATGATTTTGCCTCTGAGCTTGATGA\n-TGAGCGTCGTGGGTTGCTTGCCAGCCGCTTAAAAGCGACGCAATCACAGGTCTTTGTCAGCGCGATCAGT\n-GCTGAACACGTTATAGACATGTCGGACGAAAATTCGAAGATGTTTACCGTGGAAAAGGGTAAAATAACGG\n-ATTAACCCAAGTATAAATGAGCGAGAAACGTTGATGTCGAATTCTTATGACTCCTCCAGTATCAAAGTCC\n-TGAAAGGGCTGGATGCGGTGCGTAAGCGCCCGGGTATGTATATCGGCGACACGGATGACGGCACCGGTCT\n-GCACCACATGGTATTCGAGGTGGTAGATAACGCTATCGACGAAGCGCTCGCGGGTCACTGTAAAGAAATT\n-ATCGTCACCATTCACGCCGACAACTCTGTCTCTGTACAGGATGACGGGCGCGGCATTCCGACCGGTATTC\n-ACCCGGAAGAGGGCGTATCGGCGGCGGAAGTGATCATGACCGTTCTGCACGCAGGCGGTAAATTCGACGA\n-TAACTCCTATAAAGTGTCCGGCGGTCTGCACGGCGTTGGTGTTTCGGTAGTAAACGCCCTGTCGCAAAAA\n-CTGGAGCTGGTTATCCAGCGCGAGGGTAAAATTCACCGTCAGATCTACGAACACGGTGTACCGCAGGCCC\n-CGCTGGCGGTTACCGGCGAGACTGAAAAAACCGGCACCATGGTGCGTTTCTGGCCTAGCCTCGAAACTTT\n-CACCAATGTGACCGAGT'..b'GCCGAACGGGCCGCCTGTTCCGAGCTGGCGTCGATAAGATCGGCAATCGCCTCAGCCTGGGCTAAGT\n-CGAGTTTATCGTAAAGAAACGCGCGTTCGGAAAACTCACCAGGGCGAGCAATCCGCAGGCCGGGAATGGT\n-CAGAATGCGTTTTAACAGCAGGTCGAGGATCACCGGACCGCCATGACCCTGCAGTTCAAGCACATCTTCG\n-CCGGTGAACGAGTTCGGGCCAGGGAACCATAGCGCAATCCCCTGATCGAGCACGCTGCCGTCGGCGTCTT\n-TAAATGGCAAATAATCGGCGTAGCGCGGCTTAGGTAGTTTACCCAGCACGGTTTCGGCAACTTCACGGGC\n-TTTGAGGCCGGAGATGCGCAGGATGCCAACACCGCCACGTCCCGGAGGCGTGGCCTGGGCTACGATAGTG\n-TCATTATCGCTCATGATGTTCCTGTTGCTTTGTGTGGCGGATGCGCGGTGCTTATCCGCCCTACGAAAAG\n-AAAAAAGGCGGTCAACTGACCGCCCTTATTTTAGCGAAAACTCACCGAATCAGGATTTTTTCTTCTCGCG\n-GCTATGCAGGCCACGTTTTTCCAGACCACGGTAAATCAGCTGCTGCTGAATAATGGTTACCAGGTTGCTG\n-ACGATATAGTACAGCACCAGACCTGACGGGAACCACAGGAAGAACACGGTGAAGATGACCGGCATAAAGG\n-TCATGATCTTCTGCTGCATCGGGTCGGTCACTGTGGTCGGCGACATCTTCTGAATGAAGAACATCGTTAC\n-GCCCATCAGGATCGGCAGGATGTAGTACGGGTCCTGTGCCGACAGGTCGTGGATCCACAGTGCAAACGGT\n-GCCTGACGCAGTTCAACGGAACCCATCAGCATGTAGTACAACGCCAGGAAGATTGGCATCTGGATCAGCA\n-GCGGGAAGCAGCCGCCCAGCGGGTTAACCTTCTCAGCTTTGTACAGCGCCATCATTTCCTGGCTGATACG\n-CTGTTTGTCATCGCCCAGACGCTCACGCATTGCCTGAATCTTCGGCTGCAGCATACGCATCTTCGCCATG\n-GAGGTGTACTGCGCTTTGGTCAGCGGGTACATGATGCCACGAACGATAAAGGTGATGATGATAATGGAGA\n-AGCCCCAGTTACCCACAAAGCTATGGATCCATTTCAGCAGTTTGAACAGCGGCTGAGAGATGAACCACAA\n-CCAACCGTAATCAACGGTCAGATCCAGGTGCGGAGCAACAGCTGCCATTTTGTCCTGGATTTCCGGGCCA\n-ACCCACAGGGTGCTGTTCATCGCGCCAGTCTGACCAGGCTGAACCAGTACCGGCTGAGATTTATAGCCGA\n-TAGCGGCGATGCCGTTACCCAGATTAGCGGTATAGAAGTTGTTGGTACCGTCGTTATGCGGGATCCACGC\n-CGTCGCGAAATACTGTTGCAGCATCGCCACCCAACCACCTTTCGAAGAGATGTTCAGGTTTTCGTTATCG\n-GCAATGGTATCGAACTTGTATTTCTCATACTTCTCGTCAGGCGTGGAGTACGCCGCGCCACGGAAGGTGT\n-GCAGTGCGAAGTTGCTGCTTCCGGTATCGAGATGCGGTGGCAGAGTGATGGATTGCTTCAACTGACCAAA\n-GGTGGAGATTTCCAGCGGTTTCTCGCCAGCGTTCTGCACGTTGTAGTTGACGTTGACAGCGTAATCACCA\n-CGTTTCAGGACAAACGTTTTGGTAAACGTGTTGCCTGCCGCGTCGGTATACGTCATCGGCACCTGCAGTT\n-CGTTTTGACCTTCAGCCAGCACATAAGCGTCTTTTTCAACGTTATACAGCGGACGCGGGCCGTTAGCCGG\n-GTTATCCGGGCCATCACGACCGGTCAGACCGCTCTGTGCCTGATAAATAAACTGCGGTGAAGTTTCCAGC\n-AGCTGGAACGGCTGGGTAGAGTTCAGCTCTTTCGGGTAAGCAGGCAGCAGAGCTTGCTCAACATCACCAC\n-CACGGGTGTTGATGGTCAGATCAAGCACGTCGGTCTTAACCGAGATCAGTTTCCCCTGGCCACTGGCCGG\n-TACGCCCTGGTCGGCGGCGCTACCCGCTGCGGTGGTCGTTGTCTGCGTGGTCTGTTGGGCCTGAGGTTGC\n-GGGTTTTTATCCTGCTCCCAGGCTTGCCAGATCATGAAAGACACGAACAGCAAAGCGATGACTAAAAGAT\n-TGCGTTGCGAATCCATCGTTAGTGTTCTCTGGTATCAAATGGTCCGGGCGGGACGGGATCGTCACCACCA\n-GGGTGTAAAGGGTGGCATTTTAATACGCGTTTCACCGTCAACCAACTGCCTTTTATCACTCCAAACCTGC\n-GCAATGCCTCAATTCCGTAGCTTGAACAGGTTGGAGTGAAACGACAATGCGGCCCGAGTAGCGGACTAAT\n-CAGGCGTTGATAGACCCGAATGAGGGCTATCAGGACCCGCGAGCCAGGCGACAGTGGCGGCGCCATAATT\n-TTTCCAACGCTTCCGAGAGAGCACGGTTATCGAGGTCGGCAACCCCTTTTTTCGCCACCACCACGAAATC\n-CATAGCCGGGAGTTCATGTTGGCGCAGACGGAAGCTTTCACGCGTCAGACGTTTAATCCGATTGCGTTCA\n-TGGGCGCGTCGAACGTTTTTCTTGGCGACTGTAAGACCGATACGGGGATGCCCCAGCGAATTCAGGCGGC\n-CGAGAATGGTAATTTGCGGCGTGCCAGCCCGTTGTGGCTGCTGGAAGACGAATGTGAATTGACTGGGAGT\n-TAACAAGCGTAACTCCCTGGGAAATGCGAGCTTAACCACTCAGGGGTTAGCTTTATTACTTAGAAACGGT\n-CAGACGAGCGCGGCCTTTAGCACGACGACGTGCCAGAACCTGACGACCATTTTTAGTAGCCATACGAGCA\n-CGGAAGCCGTGAGAACGGTTGCGCTTCAGTACAGACGGTTGAAAAGTGCGTTTCATGGCGATTTCTACCT\n-AAACTTGAATAAATTCAATGGCTTTATTGGATATCCGCCGAAAAATGAAACGATGGACACCGAAGCCATG\n-GGTGATTAAAGAGGCCGGATTGTAATAATTGTACACTCCGGAGTCAATTCTCTTTCCTTATTTACCGCGC\n-TTTTCCGCACCTTTTCGCAGGGAAAATGTACGACCTCACACCAGTGAAAACCAGCATGGCGCGCCGGGTG\n-GAGGATTATACGGGCTGATGGGTAAAGCGCAAGGATCGTCCTGGATCTTTATTAGATCGATTAAGCCAAT\n-TTTTGTCTATGGTCATTAAATTTTCCAATATGCGGCGTAAATCGTGCCCGCCTCGCGGCAGGATCGTTTA\n-CACTTAGCGAGGTCTGGAAAGTCCTGTGGATAAATCGGGAAAATCTGTGAGAAACAGAAGATCTCTTGCG\n-CAGTTTAGGCTATGATCCGCGGTCCCGATCGTTTTGCAGGATCTTGATCGGGCATATAACCGCAGACAGC\n-GGTTCGTGCGTCACCCTCAAGCAGGGTCTTTTCGACGTACGTCAACAATCATGAATGTTTCAGCCTTAGT\n-CATTATCGACTTTTGTTCGAGTGGAGTCCGCCGTGTCACTTTCGCTTTGGCAGCAGTGTCTTGCCCGATT\n-GCAGGATGAGTTACCAGCCACAGAATTCAGTATGTGGATACGCCCATTGCAGGCGGAACTGAGCGATAAC\n-ACGCTGGCCCTGTACGCGCCAAACCGTTTTGTCCTCGATTGGGTACGGGACAAGTACCTTAATAATATCA\n-ATGGACTGCTAACCAGTTTCTGCGGAGCGGATGCCCCACAGCTGCGTTTTGAAGTCGGCACCAAACCGGT\n-GACGCAAACGCCACAAGCGGCAGTGACGAGCAACGTCGCGGCCCCTGCACAGGTGGCGCAAACGCAGCCG\n-CAACGTGCTGCGCCTTCTACGCGCTCAGGTTGGGATAAC\n-\n'
b
diff -r 4f0c878b36d4 -r dda44fd49bcd test-data/eco.fasta.gz
b
Binary file test-data/eco.fasta.gz has changed
b
diff -r 4f0c878b36d4 -r dda44fd49bcd test-data/seeds.stk.gz
b
Binary file test-data/seeds.stk.gz has changed