Repository 'repeat_masker'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/repeat_masker

Changeset 3:bdfc22c1c3e3 (2018-05-02)
Previous changeset 2:5673e72241aa (2013-09-17) Next changeset 4:04f5c3d7448e (2018-05-04)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/repeat_masker commit 8dacb4321122d92df0983e0794cf23572be03224
added:
repeatmasker.xml
test-data/repeats.fasta
test-data/small.fasta
test-data/small.fasta.align
test-data/small.fasta.cat
test-data/small.fasta.gff
test-data/small.fasta.log
test-data/small.fasta.masked
test-data/small.fasta.poly
test-data/small.fasta.stats
removed:
RepeatMasker.xml
readme.rst
b
diff -r 5673e72241aa -r bdfc22c1c3e3 RepeatMasker.xml
--- a/RepeatMasker.xml Tue Sep 17 03:23:49 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,307 +0,0 @@\n-<tool id="repeatmasker_wrapper" name="RepeatMasker" version="0.1.2">\n-    <description>Masks different kind of repeats</description>\n-    <requirements>\n-        <requirement type="binary">RepeatMasker</requirement>\n-    </requirements>\n-    <command>\n-## The command is a Cheetah template which allows some Python based syntax.\n-## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n-\n-## create temp directory\n-#import tempfile, os\n-#set $dirname = os.path.abspath( tempfile.mkdtemp() )\n-#set $input_filename = os.path.split( str($query) )[-1]\n-#set $output_basename = os.path.join( $dirname, $input_filename )\n-\n-\n-RepeatMasker \n--parallel 8\n-\n-$nolow\n-$noint\n-$norna\n-\n-#if str($species)!="all":\n-    $species\n-#end if\n-\n-\n--dir $dirname\n-\n-#if $adv_opts.adv_opts_selector=="advanced":\n-\n-    #if str($adv_opts.gc)!="0":\n-        -gc $adv_opts.gc\n-    #end if\n-\n-    $adv_opts.gccalc\n-\n-    #set $output_files_list = str($adv_opts.output_files).split(\',\')\n-    #if "gff" in $output_files_list:\n-        -gff\n-    #end if\n-    #if "html" in $output_files_list:\n-        -html\n-    #end if\n-\n-    $adv_opts.slow_search\n-    $adv_opts.quick_search\n-    $adv_opts.rush_search\n-    $adv_opts.only_alus\n-    $adv_opts.is_only\n-\n-#else:\n-    ## Set defaults\n-    -gff\n-\n-## End of advanced options:\n-#end if\n-\n-$query\n-\n-2>&#38;1;\n-\n-## Copy the output files to galaxy\n-## AgR: if there are no repeats, the output files may not exist.\n-## This causes the job to fail, so touch files to ensure they exist.\n-#if $adv_opts.adv_opts_selector=="advanced":\n-\n-    #if "summary" in $output_files_list:\n-        ## Write out the summary file (default)\n-        #set $summary_file = $output_basename + \'.tbl\'\n-        touch $summary_file;\n-        cp $summary_file $output_summary;\n-    #end if\n-\n-    #if "gff" in $output_files_list:\n-        ## Write out the gff file (default)\n-        #set $gff_file = $output_basename + \'.out.gff\'\n-        touch $gff_file;\n-        cp $gff_file $output_gff;\n-    #end if\n-\n-    #if "html" in $output_files_list:\n-        ## Write out the html file\n-        #set $html_file = $output_basename + \'.out.html\'\n-        touch $html_file;\n-        cp $html_file $output_html;\n-    #end if\n-\n-#else:\n-\n-    ## Write out the summary file (default)\n-    #set $summary_file = $output_basename + \'.tbl\'\n-    touch $summary_file;\n-    cp $summary_file $output_summary;\n-\n-    ## Write out the gff file (default)\n-    #set $gff_file = $output_basename + \'.out.gff\'\n-    touch $gff_file;\n-    cp $gff_file $output_gff;\n-\n-\n-## End of advanced options:\n-#end if\n-\n-## Write out mask sequence file\n-#set $mask_sequence_file = $output_basename + \'.masked\'\n-touch $mask_sequence_file;\n-cp $mask_sequence_file $output_mask;\n-\n-## Write out standard file (default)\n-## The default \'.out\' file from RepeatMasker has a 3-line header and spaces rather\n-## than tabs. Remove the header and replace the whitespaces with tab\n-#set $standard_file = $output_basename + \'.out\'\n-tail -n +4 $standard_file | tr -s \' \' \'\\t\' > $output_std;\n-\n-## Delete all temporary files\n-rm $dirname -r\n-\n-    </command>\n-    <inputs>\n-        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> \n-\n-        <param name="nolow" type="boolean" label="No low complexity DNA" truevalue="-nolow" falsevalue="" checked="false" help="Does not mask low_complexity DNA or simple repeats."/>\n-        <param name="noint" type="boolean" label="No interspersed repeats" truevalue="-noint" falsevalue="" checked="false" help="Only masks low complex/simple repeats (no interspersed repeats)."/>\n-\n-        <param name="norna" type="boolean" label="No small RNA genes" truevalue="-norna" falsevalue="" checked="false" help="Does not mask small RNA (pseudo) genes."/>\n-\n-        <!--\n-            Specify the species or clade of the input sequence. The species name\n-            must be a valid NCBI Taxonomy Database species name and be contained\n-      '..b'   1\n-   12204 10.0      2.4       1.8       HSU08988   6782        7714      \\(21529)  C            TIGGER1         DNA/MER2_type       2418             1493           \\(0)    2\n-     279  3.0      0.0       0.0       HSU08988   7719        7751      \\(21492)  +            (TTTTA)n        Simple_repeat       1                33             \\(0)    3\n-    1765 13.4      6.5       1.8       HSU08988   7752        8022      \\(21221)  C            AluSx           SINE/Alu            289              1              \\(23)   4\n-   12204 10.0      2.4       1.8       HSU08988   8023        8694      \\(20549)  C            TIGGER1         DNA/MER2_type       1493             827            \\(925)  5\n-    1984 11.1      0.3       0.7       HSU08988   8695        9000      \\(20243)  C            AluSg           SINE/Alu            305              1              \\(5)    6\n-   12204 10.0      2.4       1.8       HSU08988   9001        9695      \\(19548)  C            TIGGER1         DNA/MER2_type       827              2              \\(1591) 7\n-     711 21.2      1.4       0.0       HSU08988   9696        9816      \\(19427)  C            MER7A           DNA/MER2_type       122              2              \\(224)  8\n-======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==\n-\n-This is a sequence in which a Tigger1 DNA transposon has integrated into a MER7 DNA transposon copy.\n-Subsequently two Alus integrated in the Tigger1 sequence. The simple repeat is derived from the\n-poly A of the Alu element. The first line is interpreted like this:\n-\n-:Table description:\n-\n-1. **1306** = Smith-Waterman score of the match, usually complexity adjusted\n-        The SW scores are not always directly comparable. Sometimes\n-        the complexity adjustment has been turned off, and a variety of\n-        scoring-matrices are used.\n-\n-#. **15.6** = % substitutions in matching region compared to the consensus\n-#. **6.2** = % of bases opposite a gap in the query sequence (deleted bp)\n-#. **0.0** = % of bases opposite a gap in the repeat consensus (inserted bp)\n-#. **HSU08988** = name of query sequence\n-#. **6563** = starting position of match in query sequence\n-#. **7714** = ending position of match in query sequence\n-#. **(22462)** = no. of bases in query sequence past the ending position of match\n-#. **C**       = match is with the Complement of the consensus sequence in the database\n-#. **MER7A**   = name of the matching interspersed repeat\n-#. **DNA/MER2_type** = the class of the repeat, in this case a DNA transposon fossil of the MER2 group (see below for list and references)\n-#. **2418**    = starting position of match in database sequence (using top-strand numbering)\n-#. **1465**    = ending position of match in database sequence\n-#. **(0)**     = no. of bases in (complement of) the repeat consensus sequence prior to beginning of the match (so 0 means that the match extended all the way to the end of the repeat consensus sequence)\n-#. **1**    = Identifier\n-\n-An asterisk (\\*) in the final column (no example shown) indicates that there is\n-a higher-scoring match whose domain partly (&lt;80%) includes the domain of this match. \n-\n-Note that the SW score and divergence numbers for the three Tigger1 lines are identical.\n-This is because the information is derived from a single alignment (the Alus were deleted\n-from the query before the alignment with the Tigger element was performed).\n-The program makes educated guesses about many fragments if they are derived from\n-the same element (e.g. it knows that the MER7A fragments represent one insert).\n-In a next version I can identify each element with a unique ID, if interest exists\n-(this could help to represent repeats cleaner in graphic displays). \n-\n-\n--------\n-\n-**References**\n-\n-Smit, AFA, Hubley, R and Green, P. RepeatMasker Open-3.0.\n-\n-http://www.repeatmasker.org/\n-\n-    </help>\n-</tool>\n'
b
diff -r 5673e72241aa -r bdfc22c1c3e3 readme.rst
--- a/readme.rst Tue Sep 17 03:23:49 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,63 +0,0 @@
-===============================
-Galaxy wrapper for RepeatMasker
-===============================
-
-This wrapper is copyright 2013 by Björn Grüning.
-
-This is a wrapper for the command line tool of RepeatMasker from the Institute for Systems Biology.
-http://www.repeatmasker.org/
-
-
-Smit, AFA, Hubley, R & Green, P. RepeatMasker Open-3.0.
-1996-2010 <http://www.repeatmasker.org>. 
-
-
-Additional Information:
-Using RepeatMasker to identify repetitive elements in genomic sequences.
-http://www.ncbi.nlm.nih.gov/pubmed/19274634
-
-============
-Installation
-============
-
-To install RepeatMasker, please use the following instructions:
-
-http://www.repeatmasker.org/RMDownload.html
-
-To install the wrapper copy the file RepeatMasker.xml in the galaxy tools
-folder and modify the tools_conf.xml file to make the tool available to Galaxy.
-Add a line like the following:
-
-Add the tool definition to your tool_conf.xml file under Galaxy root.
- <tool file="RepeatMasker/RepeatMasker.xml" />
-
-=======
-History
-=======
-
-- v1.1: Initial public release
-- v0.1.1: patch from Simon Guest, to create empty files if no repeat is found
-- v0.1.2: remove trailing semicolon, redirect all output to stdout
-
-===============================
-Wrapper Licence (MIT/BSD style)
-===============================
-
-Permission to use, copy, modify, and distribute this software and its
-documentation with or without modifications and for any purpose and
-without fee is hereby granted, provided that any copyright notices
-appear in all copies and that both those copyright notices and this
-permission notice appear in supporting documentation, and that the
-names of the contributors or copyright holders not be used in
-advertising or publicity pertaining to distribution of the software
-without specific prior permission.
-
-THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
-WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
-OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
-OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
-OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
-OR PERFORMANCE OF THIS SOFTWARE.
-
b
diff -r 5673e72241aa -r bdfc22c1c3e3 repeatmasker.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repeatmasker.xml Wed May 02 20:18:11 2018 -0400
[
b'@@ -0,0 +1,230 @@\n+<tool id="repeatmasker_wrapper" name="RepeatMasker" version="4.0.7" profile="17.01">\n+  <description>RepeatMasker</description>\n+\n+  <requirements>\n+    <requirement type="package" version="4.0.7">repeatmasker</requirement>\n+  </requirements>\n+\n+  <command detect_errors="exit_code"><![CDATA[\n+    RM_LIB_PATH=\\$(dirname \\$(which RepeatMasker))/../share/RepeatMasker/Libraries &&\n+    mkdir lib &&\n+    export REPEATMASKER_LIB_DIR=\\$(pwd)/lib &&\n+      for file in \\$(ls \\$RM_LIB_PATH) ; do  ln -s \\$RM_LIB_PATH/\\$file lib/\\$file ; done &&\n+    #if $repeat_source.source_type == "repbase":\n+      cp \'${repeat_source.repbase_file}\' lib/RMRBSeqs.embl &&\n+    #end if\n+    ln -s \'${input_fasta}\' rm_input.fasta &&\n+    RepeatMasker -dir \\$(pwd)\n+    #if $repeat_source.source_type == "library":\n+      -lib \'${repeat_source.repeat_lib}\'\n+      -cutoff \'${repeat_source.cutoff}\'\n+    #else if $repeat_source.source_type == "repbase":\n+      #if $repeat_source.species_source.species_from_list == \'yes\':\n+        $repeat_source.species_source.species_list\n+      #else\n+        -species \'${repeat_source.species_source.species_name}\'\n+      #end if\n+    #end if\n+    -parallel \\${GALAXY_SLOTS:-1}\n+    \'${gff}\'\n+    \'${ignore_n_stretches}\'\n+    \'${advanced.is_only}\'\n+    \'${advanced.is_clip}\'\n+    \'${advanced.no_is}\'\n+    \'${advanced.rodspec}\'\n+    \'${advanced.primspec}\'\n+    \'${advanced.nolow}\'\n+    \'${advanced.noint}\'\n+    \'${advanced.norna}\'\n+    \'${advanced.alu}\'\n+    \'${advanced.div}\'\n+    \'${advanced.search_speed}\'\n+    \'${advanced.frag}\'\n+    \'${advanced.maxsize}\'\n+    #if $advanced.gc is not None:\n+      \'${advanced.gc}\'\n+    #end if\n+    \'${advanced.gccalc}\'\n+    \'${advanced.nocut}\'\n+    \'${advanced.keep_alignments}\'\n+    \'${advanced.invert_alignments}\'\n+    \'${advanced.xout}\'\n+    \'${advanced.xsmall}\'\n+    \'${advanced.poly}\'\n+    rm_input.fasta &&\n+    #if $advanced.is_only != \'-is_only\':\n+      mv rm_input.fasta.masked \'${output_masked_genome}\' &&\n+      sed -r \'s/^ *// ; s/ *$//; s/\\+ //; s/ +/\\t/g ;  1,2c SW score\\t% div.\\t% del.\\t% ins.\\tquery sequence\\tpos in  query: begin\\tend\\t(left)\\trepeat\\tclass/family\\tpos in repeat: begin\\tend\\t(left)\\tID\' rm_input.fasta.out >\'${output_log}\' &&\n+      mv rm_input.fasta.tbl \'${output_table}\' &&\n+      #if $gff == \'-gff\':\n+        mv rm_input.fasta.out.gff \'${output_gff}\' &&\n+      #end if\n+      #if $advanced.keep_alignments == \'-ali\':\n+        mv rm_input.fasta.align \'${output_alignment}\' &&\n+      #end if\n+      #if $advanced.poly == \'-poly\':\n+        sed -r \'s/^ *// ; s/ *$//; s/\\+ //; s/ +/\\t/g\' rm_input.fasta.polyout >\'${output_polymorphic}\' &&\n+      #end if\n+    #end if\n+    mv rm_input.fasta.cat \'${output_repeat_catalog}\'\n+    ]]>\n+  </command>\n+\n+  <inputs>\n+    <param name="input_fasta" type="data" format="fasta" label="Genomic DNA" />\n+    <conditional name="repeat_source">\n+      <param label="Repeat library source" name="source_type" type="select">\n+        <option selected="true" value="repbase">RepBase</option>\n+        <option value="library">Custom library of repeats</option>\n+      </param>\n+      <when value="repbase">\n+        <param name="repbase_file" type="data" format="embl" label="RepBase (RMRBSeqs.embl) file" />\n+        <conditional name="species_source">\n+          <param label="Select species name from a list?" name="species_from_list" type="select">\n+            <option value="yes" selected="true">Yes</option>\n+            <option value="no">No</option>\n+          </param>\n+          <when value="yes">\n+            <param name="species_list" type="select" label="Species">\n+              <option value="-species anopheles" selected="true">anopheles</option>\n+              <option value="-species arabidopsis">arabidopsis</option>\n+              <option value="-species artiodactyl">artiodactyl</option>\n+              <option value="-species aspergillus">aspergillus</option>\n+              <option value="-species carnivore">carnivore</option>\n+        '..b'nce, not the query sequence" />\n+      <param type="boolean" argument="-xsmall" truevalue="-xsmall" falsevalue="" checked="false" label="Output repetitive regions as lowercase, non-repetitive regions as uppercase" />\n+      <param type="boolean" argument="-poly" truevalue="-poly" falsevalue="" checked="false" label="Output list of potentially polymorphic microsatellites" />\n+    </section>\n+  </inputs>\n+  <outputs>\n+    <data name="output_masked_genome" format="fasta" label="RepeatMasker masked sequence on ${on_string}">\n+      <filter>not advanced[\'is_only\']</filter>\n+    </data>\n+    <data name="output_log" format="tabular" label="RepeatMasker output log on ${on_string}">\n+      <filter>not advanced[\'is_only\']</filter>\n+    </data>\n+    <data name="output_table" format="txt" label="RepeatMasker repeat statistics on ${on_string}">\n+      <filter>not advanced[\'is_only\']</filter>\n+    </data>\n+    <data name="output_repeat_catalog" format="txt" label="RepeatMasker repeat catalogue on ${on_string}" />\n+    <data name="output_alignment" format="txt" label="RepeatMasker alignment on ${on_string}">\n+      <filter>not advanced[\'is_only\'] and advanced[\'keep_alignments\']</filter>\n+    </data>\n+    <data name="output_polymorphic" format="tabular" label="RepeatMasker possible polymorphic repeats on ${on_string}">\n+      <filter>not advanced[\'is_only\'] and advanced[\'poly\']</filter>\n+    </data>\n+    <data name="output_gff" format="gff" label="RepeatMasker repeat annotation on ${on_string}">\n+      <filter>not advanced[\'is_only\'] and gff is True</filter>\n+    </data>\n+  </outputs>\n+  <tests>\n+    <test expect_num_outputs="4">\n+      <param name="input_fasta" value="small.fasta" ftype="fasta" />\n+      <param name="source_type" value="library" />\n+      <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />\n+      <output name="output_masked_genome" file="small.fasta.masked" />\n+      <output name="output_table" file="small.fasta.stats" lines_diff="2" />\n+      <output name="output_repeat_catalog" file="small.fasta.cat" />\n+      <output name="output_log" file="small.fasta.log" />\n+    </test>\n+    <test expect_num_outputs="7">\n+      <param name="input_fasta" value="small.fasta" ftype="fasta" />\n+      <param name="source_type" value="library" />\n+      <param name="gff" value="-gff" />\n+      <!-- <param name="show" value="yes" /> -->\n+      <param name="keep_alignments" value="-ali" />\n+      <param name="poly" value="-poly" />\n+      <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />\n+      <output name="output_masked_genome" file="small.fasta.masked" />\n+      <output name="output_table" file="small.fasta.stats" lines_diff="4" />\n+      <output name="output_repeat_catalog" file="small.fasta.cat" />\n+      <output name="output_log" file="small.fasta.log" />\n+      <output name="output_alignment" file="small.fasta.align" />\n+      <output name="output_polymorphic" file="small.fasta.poly" />\n+      <output name="output_gff" file="small.fasta.gff" lines_diff="4" />\n+    </test>\n+  </tests>\n+  <help><![CDATA[\n+RepeatMasker is a program that screens DNA for interspersed repeats and low\n+complexity DNA sequences. The database of repeats to screen for can be\n+provided as a FASTA file or downloaded from RepBase_. If the RepBase option is\n+chosen the RepBaseRepeatMaskerEdition file should be downloaded and\n+unpacked, and the enclosed EMBL format file (\'RMRBSeqs.embl\') should\n+be uploaded to Galaxy for use with this tool.\n+\n+Further documentation is available on the RepeatMasker homepage_.\n+\n+.. _RepBase: http://www.girinst.org/repbase/\n+.. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html\n+    ]]>\n+  </help>\n+  <citations>\n+    <citation type="bibtex">\n+      @misc{RepeatMasker,\n+        title = {RepeatMasker Open-4.0},\n+        howpublished = {\\url{http://www.repeatmasker.org}},\n+        author = {Smit, AFA and Hubley, R and Green, P.},\n+        year = {2013-2015}}\n+    </citation>\n+  </citations>\n+</tool>\n'
b
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/repeats.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/repeats.fasta Wed May 02 20:18:11 2018 -0400
b
@@ -0,0 +1,64 @@
+>Asian_seabass_ONSAT_SB_Concensus_Lenght_170_bp
+CGAAAAATTTAATAATTTAGGGGTCTTGAGCATGGGCGTGGTAAAATGCCCTCGGTAGCG
+CCACCTACATTTTTAAACGGAACAGCCCCTCAAGCCCGTTGCGCCTAAAAATCTGAAAAT
+CTGCACACATATGTAACATCCCATGACGCACCAAAAAGTCTCTTGGAGCCA
+>Asian_seabass_MOSAT_SB_T_34a_satellite_DNA
+TTTGTGACATCACTACATAGTTTGTTGAAAACGTAC
+>Asian_seabass_MOSAT_SB_T_34b_satellite_DNA
+TTTGTGACATCACACATAGTTGTGGGTCAGTAC 
+>Sat_38
+AAAAAATGTCATAGTATAGTATGGCGTCAAAAAACATG
+>Asian_seabass_Sat_217_Consensus_Length_217_bp
+AGTAAACAAGCATTATGGTTGAAACCATAATTTCCTGTCGGGAGAGCCTTTCCCTCTTTT
+GTGCACTGTATGCAATCCCAGAGTGTGAATAAGCGCTTTTCCAGCGTTTTGAGGCTTATT
+CAGCTCAGAATGGCTTAATACTGCACTATCTGACCAGGACAAAGACAAACTAAGAACTCA
+GCCACACGGACATGAAAGTTGTTTTACTTACAATATT
+>Asian_seabass_Sat_217_Consensus_Length_427_bp
+CCAGCGTTTTGAGGCTTATTCAGCTCAGAATGGCTTAATACTGCACTATCTGACCAGGAC
+AAGACAAACTAAGAACTCAACACACTGACATGAAAGTTTCTATATTTCTATTAAAATAAA
+CAAACATTATGGTTGAAACCATAATTTCATTTCGGGAGAGCCTTTCCCTCTTTGTGGCAC
+TGTATGTAATCTGAAGTGTGAATAACGCTTTTCCCGCGTTTGAGGCTTATTCAGCTCAGA
+ATGGCTTAATACTGCACTATCTGACCAGGACAAGACAAACTAAGAACTCAGCCACACGGA
+CATGAAAGTTGTTTACTTTACAATATTAGTAAACAAGCATTATGGTTGAAACCATAATTT
+CCTGTCGGGAAGAGCTTCCCTCTTTTTGTGCACTGTATGCAATCCCAGAGTGTGAATAAG
+CGCTTTT
+>Asian_seabass_Sat_LM_Consensus_Length_453_bp
+CCCAACAAAATAGCACTTACTGCTTTCCAAATTCAAGTCTGTGCCTGTGTTAGGGTAATG
+TTGGTGATAAATTGATGTACTTAGCTTATCCTGTGAAAATGGTGTAAAATGGCAGTCTCC
+ATGTTTGTTGAAGAAACTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAG
+AAATCATGAGTGGCTCTTATTGAAGTGATCAGAGTTGGACTCCATTAAGAAAATGGCTTT
+CAATGTAGTCAGTTGCTTTGTCACTTAAAACTGCCCCAACAAGTAGCACTTACTGCAATT
+TGGTAGTCAAAGTCAATGCTCAAGTTAAGGGTAATAGTAGTGACATAATAGAAAAATCTC
+TAAATATATTTGCTGGCAGCTTTAATATACAGAATGAGTGCCATGATGAATTCTTGATAC
+AGAACAGGGACTTCCAAAATCAGCCAACACTAA
+>Asian_seabass_Sat_LM_Consensus_Length_218_bp
+CCCAACAAAATAGCACTTACTGCTTTCCAAATTCAAGTCTGTGCCTGTGTTAGGGTAATG
+TTGGTGATAAATTGATGTACTTAGCTTATCCTGTGAAAATGGTGTAAAATGGCAGTCTCC
+TGTTTGTTGAAGAAACTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAGA
+AATCATGAGTGGCTCTTATTGAAGTGATCAGAGTTGG
+>Asian_seabass_Sat_LM Consensus_Length_150_bp 
+CTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAGAAATCATGAGTGGCTC
+TTATTGAAGTGATCAGAGTTGGACTCCATTAAGAAAATGGCTTTCAATGTAGTCAGTTGC
+TTTGTCACTTAAAACTGCCCCAACAAGTAGCA
+>Asian_seabass_Sat_Unk_Consensus_Length_341_bp
+GTTTGGGTAAAAATATTGTCTATTTACGAGCTATCCTCTATATATTTTTGATCTGATTAA
+TAAATACCTCTAGCCCACAAACTGTGGTATTTTGCTATGTGGCAGTTCACCCGAAAGTCC
+AGTAGCATTTATCGCATTTTCTAAAGATAGTCAGTGCCTGAAAGTTTGAGGCAGATAAAC
+AAATTGTTCAAGTAAGAACTATATCTTTCTTATGATTTTACCGCAATCATACAGGTTGTT
+TCTTGTGGTCTGCTGGGCATTGTATCCCTTTGTTGTATGGATTTTTCCTTTCTTTAATGA
+TCTCCTCCCTGGAGTTTGTAATCCCTGTTTGTAGTGGAATT
+>Asian_seabass_Sat_Unk_Consensus_Length_789_bp
+CAAAAAAATGGAAAAAAAAAAAGTGGCTCATTTGAAGTGAATCAGAGTTGGGTAACATTG
+TCCTTAATTACAGCTATGCTTATATACTTGATCTGATAATAATACTCTAGCCACAATCTG
+TGGGTATTCTGCTATTTGCAGTCCACCCAAAGTCATAGCATTTAATCCATTCTAGGAAGA
+TACGTCAGTGCTGAGTTGAGCCAATAACACCAAATTGTCATAAGAACTATTATTTTCTTT
+ATGATTTTTACGCATCAGTACAGGTGTTTTCTTGTGGTTTCTGCTGCATGTATCCCTGTT
+GTAAGTGGATTCTCCTTTGCTTATGATCTCCTCCGCTGCGTTGTATCCTGTTTGTAGTGG
+ATTTCCTTGCACTGATTCTGCCCGCTCTTAATACTGAATGAACGCCCTCCCACCAGTAGT
+ACTGCCACCTTTGTTTTTTCACAAAGTGTTCAATGGCCTTGATAATGACTTGGTAACTAC
+ATCACCACTTTTTTGTACGTAATCAACAGACAATCACCCATTAAATTCATGCGGCCATTA
+GGCAGCTTGATGAGCTGGACATTTGACCTCCCCAATGAATCTTTGCTGATGGGAAACTTA
+TTGCCACCATAAAGTGAACCAGCATTGCTATGTCCATGCTGTAATTTGAAAACCCAATAC
+AAAGAAGCCCTGCATATATACCTCCCAAATTGGCTTCTGAGACCCGGGCGTAATCACTGC
+ATGGCCATAGATAAACATTGAAATCTTCTGTTGCAGTCAGTTTGCTTTTTGTCAACTTTA
+AAAACTGCC
b
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta Wed May 02 20:18:11 2018 -0400
b
b'@@ -0,0 +1,238 @@\n+>scaffold_1\n+TGCTTTTCTGAGATTTATGTCAGCTCCCCATTGGGAGATACAGGAGAATCCAATATATTC\n+CTGTCTCGCTTCTTTTATATTTATCCTTTCTACACCTGACATCCTCAGTGATTGAAGTGA\n+CAAAAAGTGGAGCGCACGCAGTGGTCATTACCCTCCAATGGTACTTCTAATTAAGGAAAG\n+ATTTTTTGCATTCACTGAGCAAAACACTTATTTGCATGAATGGAAAATCATAAATGAGGG\n+GTCCATTAAACAAATTTCCTAATTGTTTGCTTTTTTTTCTTCCTACTTCACCATCCCTCT\n+AAAGCTATTACTCTCTATTCTACCCCATTATCGAGTGCACAAACACCAGCCCCAAATTGT\n+CTTATCTCTGCTTTGATAAATGATATTTTTTTCTCTTATAATTGTGTTTCTCTCTAGATG\n+TGCGGTGTCCATCAAGACACTGGATGTCACGTGGTCACTCCTGTGAAGAGCGAACTGTGT\n+GGAACCCGAAGTACTGTGTGGTTGCTGACTGTCAGATGCTGCTTCTGAATGAGGAGGAGG\n+TGGTGAGAGCAGACAGCACAATCTCTTGTCTGGCATGTTCCTCTGTGTCACCATTGTCTC\n+TCTCATCTGCTCGTGTGTGTGTGTGTGTGTGTCCTTGTCAATGTCATTACACAATTTATG\n+TTGGTGCACTGCATGTCAGAGTCTGAACTTTGTTGGACATCTACTTTCTGTCACCTGATA\n+CCTCATACTTTACACTCTAATCCCTTTTTCCTTTCTAACTGCTGCTGTGTTAAGCTGTCA\n+TATTAATATACTAAATAATGATAATATTAATTCTAATAATGATAGTGAATGGAGATTCAC\n+AATGAAGAAACACAAGGTCCAGACTGAACCCCTGTGCTGTGTGAAGAGTTCACTCTACAT\n+TTACATTCATCCCACATCAGAGACCAGCAATAGCCAGGTTAAGCAGAGCAAGGCTGAGTC\n+TGGCTCTTTGCCTCTGTTGTTTTGATTAGTGTCACACTGGATTATGCTAGTTTGTGTGAG\n+ATTAGAGGTCCTGTTGTTACGAGCAAATCAGCAGTCAGCAGGGTAATCTAAATTACATTG\n+TTCTGTCGGCAATATCCTCTCCTCGCTCAACAGCTTTGTGTGAGCTCATATTTAAAGCCC\n+CCACAGTAGTGCAGTGCATAAAATCATGCAGATACAGATCAGAAGCTTCAGGTAATGTTT\n+ACATCAAACATCAGGGGGAAAATGTGATCTCAGTGCCTCTGACCGTGGTGTGATTGTTGG\n+TGCCGGACAGGCTGGTTTGAGTATTTCTAAAACTGCTGATCTCCTGGGATTTTCACACAC\n+GGCATTCTCTAGAGTTTACTCAGAACGGTGTGGAAAGCAAAAAAACATCCCGCGAGCTGC\n+AATCCTGCGGATGGAAACACCTTGTTGATGGGATATGTCAGAGGAATGGCCAGGCTGGAT\n+CAATCTGACAGAAAGGCGGTAACTCAGATAAGCACTCTCTACAACTGTGGTGAGCAGAAA\n+AGCGTTCTCAGAATGCACAACACGTTCAACCTTGAGGAAGATGTTGCTACAGCAGCAGAT\n+GAAGACCACGTCTGGTTCCACTCCTGTCAGCCAAGAACATCTCAGACTGCACAGGACACC\n+AAAACAAAAACACATCCTGTTTTCTACTGGTGGTAGAGTCACAATTTGGCAATAAGATAA\n+ATCCATGGACCCAACTTGCCTTGTGTCAATAGACCAAGCTGCTGGTGGTCTTGGGGAATG\n+TTTTCTTTACACTTTCACACTATCGGCCTATATAAAATCCTATTATAAACTGCTGTACAG\n+TGCATTGGGTTTGATAAGCCCTCTAAGTCTGTATATCATGATCTCATTGATACCTGCGGC\n+AATACACCCCCACAAACGCAGCTCCCTGCATTTTAAGTATTATAGAGAGTAATTTCACTG\n+TCCATGTCTTTTTTGATCATAAAGCAGGTCTAGGTGCTGTATACAACACTGTATTGAAAT\n+GCTGAAGCAGGTCTAGGTGCTGTATACAACACTGTATTGAAATGCTGAAGCCAATATATC\n+AATCTATCTGTCTATATTTTATGAAATATCAGAGCATTTTAGTCCAAATTTGTCTGTTTT\n+GTTCATTGCAATGCTGACTGCTTAACATTTTCCTCAGTGGTGGCCATCACCACAGCATTT\n+ACTGGTGAGTTTTACCAAGCTACAGTGGGCCACAATGAGTTACCTGTTATTGGCCTGGCT\n+CTATGGCATGCAGAAAACAGTCAGCCAATCAGAGGACAGACTCAGAGACAGACACAAAGT\n+GCCCTGTTCTTGTTAGAGCAGAGATAAATGAGACAGAGGAGATGTAAAACTATACTGAGA\n+TGGTTTTTTGGTTCTTAAAACCACAAATATATTGTATTACGGATATCAAAACTTCAAATA\n+AAACCAGTAAAGTGTAAAACATGGGACCTCTAAATGAAGGGCTGTTTTGTTTCGTAGTTC\n+TAGATTATGTCACTATGTGGCCTCGGTCTCTGTCTGCGTAAAGCGGCTACATGTAGAATT\n+TGACCCACTTTGGTGCCCACATATGGTAACTAAAACACTACAGACAGTATGCACTCCAAC\n+CCTATATAATTTATGTCCTTCAGTGAAGCCTACAAAATGTGCAGCATGATTTGCTATGAT\n+TAATACCTAGAATTATATTAAATTTTAGAAGTTTAGAGAGATGGCTTATACCTTTTTCTG\n+AGCATGGAGTCATCTCATGTAATTTGTCACATACCTTTGGCCAGCTTAAACCTGGTCATT\n+CCAGTGAGAGCTGCTCACCCAAATGTTTCTGTATGCACCTCCTGCTGTGGGTGCCAATAG\n+CTCATGCAAGTTGGTGTCTTTCAACTAAGGCCACTATGTGAGGACCATCAGCAGCAAATT\n+AAATTAGAAGTGCCTTGGAGTTTCTCAGGCCCTCTTCTATGATCATTAAGGTCTCAGTGA\n+CAGAACAGTGCCCCTTTGAGGGACTATGGCATCTTGAGCACTTAAGTTGGCCGAATGTTT\n+GATCTCAAATATGAGTGCACTGTTTCAAAGTCACAAAAGCCTCTGGACAAGGCATAGTTA\n+CTGGGCTTGGTTAACCTTTTTTTCATACCGGCGTGACTGCTGGAACTTGGAACATGTAGC\n+CTAAGTTTCCCTTTGTCTGCAATTACTGAGGTATGCCATGTTGAAATAGAACAATAATTG\n+CAGGGAGGAATTATTCCAATCGACCCATCGGCAGGCTTTCCAATACAGTGCTTTTTGTTT\n+CACAGCAACAAAGAACAAGAGCTTTTAAATGCATCCATCTGGACCCTGACTATGAAAGGT\n+CATTGGTAACGCTGCTGGATTTCTGTTATGTCAGCAAACTGCAGGTCAAAGCGATGATAC\n+AGTCTTATAATGGCTTTCTCCAAGTGAAGGTGGGATAACATGGTTTAAACAAGCTCAGTT\n+AAATGGATCAGAGCATCAACTGCTGTGAACAAGCAATCACACGGTCAAGGTCAAACATTT\n+ACACTTCTGAGAGATCTGGAGAGTATGTCATGGCAGTATTGCTTTTCAGTTATTTCAGTA\n+ACTGATTACGCCTTGTACTTAGTCTCAGAATTAGTGATTTGATTCAAAGTGTTTTATATA\n+TGTATATTTCAAAGCAAAAATACAGCTTAGGTAAAGCTCCTTAGACTCATGCAGCCTAAT\n+TTACTGTAGACATTCCTCCATGTACAGTACTGTGCAAATGTTTTAGGCAGTTTTAGGCAC\n+TAAAGGTGAACTGAGGATGCAGTCCCACGATTAATTTTTATTCATCAGTTAACCTCATGT\n+GAAGTGTAGTAAACAGAAAAAACCTAAATCAGATCAGTATTTCTTATGACCCCCCTCCCC\n'..b'TCATTGGCGTAGGCCCCACCTGCTCTTGTC\n+CTTGTGTGACACAGGCTCTAAATAAGCAGCATGATGAATAAAAATGACACTGAGATGAAT\n+AGGAATCCACAGCAAATCAGAGAGCACTCTCATCTCGTCTCATCCTCCGACTGAATAGCG\n+AGCGGCTGCCTCCTTTATTCTTTTGAACTCTTCGCGGTTTTGGCACAAACATGCCAGAGA\n+CAGAGAGCGCTGTTACGCTCCCACCAGAGCAACTACGCTTAATTTTACTGAGGTGAAAGT\n+TTTTTTTTTTTTTCTGGATAAAGCTTTGGAAAAGTTCTCAACTGTTGCTTCTTTAAAGAT\n+GCTGGGGCATATTCTGCCCTATTTTCCAGTTTTTTACATCTCCCATAAATATTTTCACAA\n+GCTGTAACATTTTAGACAGGATTAAATTGAACTAGATAACTGACCAAAGGCTGCTAAATT\n+ATTTTTATGTGCCTTGTAGCACAAATACTGTCTCACATATATTTTAAGTATCTTAATATT\n+CCCTCTGATGTTTAATTTAACAAAGCTGTCCTTCCCTCCATCTCTCTCCTCATAGTTTTT\n+CATTACAATTAGCTCTCATTAGATAGAATTGTATTTGTTGTCTTTGTGCCAGTCACTCCA\n+GTCTATTTTGCCAGCACACAACTAAATAATTGTTTTGCCTTGTCAGTGTCTTGTTTTACA\n+GTTTTAATGATGAGCCCAAAACATGTCAAATATGAGGAACTATAACTTATCAAGAAAGTG\n+GGGTAGTTGAGATAAAACTGTTTCCGAGGTGGAGGTTGGCTGAGCTCCATTTGGCCACAA\n+AATGTAGCTGAAAGGGCAGAGAAACCCACTTTAATGGAGTACAGGTTGCATATGAGCTGG\n+TAGGAAAGATATAGAATCATTATCAGCTGATAATCTAACAGTAGCAGTCAGTGTAGATGC\n+TGTGCTAACGCAAAGTTGTGAAACTACTGTCTGTAAGCAACTCATCCAATTGTTATTGCT\n+GCCAAGCTTAAGTATTTTAACAGCTTTTAGGTGTTGTTAGCACAGTACTCGTTTGGATCC\n+GGCCATGACAAGAAATCTTTATCGGCCACTAATTTAATTAATCATCTTTTTTCAAGCAAA\n+AATCACAAACTTTCTTTGGTTCCACTTTTTCTCATTGTAGATTAAATATCTTTTGGGTTT\n+TGGCACAGGCTGGACAAAAAAACCTCTGAGACGCTGTGATAAGAATTTATTTTCACATTT\n+TTTTTACTTTTCAGGGACTACACAATTATTTGGTGATGAAAATAATATTTTGTTGCAGCC\n+CAAATGTTAACTTGCCACTCAAGCTGTCAATTCAAGAAGGCAAAAGCAACAAATACTGCT\n+TTGCCGAGCTGAAATGAGAGTAGAAACCATGCCCGGGCCAGCTCAGCCTGGTCAGGTTTT\n+TGAGTCCATCTATTGTTAACATTCAGGTTGCAGCACAAGGGAAGTTCCTAAATTCTTCTG\n+GCATGTTAATGTTTTCCAAACTGATGTTACCAAGTCCTCGTTATGATGAGAAAAAAATAT\n+GCTGAGAGTGAAATTGATCAAAGTGGAAACATTAAGCACTGCCACAGTCCCCTCCACCCA\n+TCTTTCTCCCTCTCTCTCCACCGCAACGTGGAAACTGCTCCTCTCTCTCTCTCCCTCTCC\n+CTCTCCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCCTCCCTCCCA\n+TTCTGTGCTCCGGTATACTCGCTCTCTCACTCTCTCCCTCTCTCCCCACTCTCCAGAGAG\n+GGGTTCAGTCAGACAGATGTAACACAGCAGTAGAAGCCTGAGCTGAGCTGGCAGGCTGCG\n+GAGGCCAGACCAGAGCCAGCAGCTCTGACTGGAGAGAACAGCGAAGAGGTGACGGCAGCA\n+GCGGCAGCACAGGGGATAAAACAGTAGTTGTAGTAGCAGTTTCAGTAGTTGTACTTCAGT\n+TACAGAGCTGACTTTACCTCAGGATATGGGTGTGTTGATCTGGGCGCTCATCACCCTGTG\n+AATTTAACATGGACCCTGACACCAGCACCCATCCAGAGACACAACATGGTGAGTGAGATT\n+TGAAGGAGGAAAAGATTAGAAACAATGAGAGTGATACTGTGGGAAAAGTTGAGGAGCGTG\n+TTAGTGAACAAGGGAGTCAGGTCACATGGTTTTCCCGTTAGGGTTGCTATTATGACTAGG\n+CTCTCCATTTGGCTCTAAAGCTGTTGACTAGGGCCTCTGTGGTCAAATGAATAGGACTTC\n+AGTCTGTTCTGTTGAGTAGGGGGTGAAGGGGTGAGGGGTGTTACATGGCAATGTGATGAA\n+ATCAACTGCCTGCTTAAGACATTTTGCTCTAATGACCCTCTTGTCATTACTTTAACAGGG\n+TTGTAAAGTTTTTTTTGCTTTTTTGTAGCTAAGAAAGTTGAGAATTGTTTTGTCTGAACT\n+CTCTCTGGGATTTGTCTTGTCGGTTTTTGGCTGGTTTTTGGTGTGAGCTTCACAGAGCAG\n+GAAGCTCTCCTCCTGCTCTTTGAATGGAATAATTAACTGTGAGGGCCGTGGGTGTACCAT\n+TTCTGCTCAAAAGCAGCAAGATAATTTGATGGTTATGTGAATCTGTATATTTGTATGCTG\n+TGTAGTGGTGCTGCATGTGGGTGTTTGTGTTAGAGACACAGAGGGGCACCAAAGAATGAA\n+AGTGGGAGACAGAGGGAACGAGAAGGGGAGAGACCGAGAGAGAAGGACTTATGTACACAA\n+ATAAATCCAGGGGGATCTAGACTGCACTGTAGGCCAGTGCGAATGCTCACTCTTTCTCTT\n+CCCCCTCATTTTCCGCTGCCTCTTTTTTTTCCTCCACTGCTCCAAATGTTGGAGCCCAAA\n+CTAGATAGTTAAAAGGCAGAAAAAAATGGTGTGTTATTAACTGGGCCAGAAAAGACGATG\n+TTTTGTCTTCATGGCCGATGGGAAGGACTCTGCTGTGCTGCTGATGAAGAATGTGGTTAA\n+CAGAATGAGTCAACAGAGAACATATTTCCAAGAGACTGTAGTTTCTCTTCGCCAGCGTCT\n+AAGCCACATTGCTTTATGCACTGGGCTCTCTGCCATGACGGCGAAGGAGAGACAAAGAGA\n+GGGGAAAGAGAATCGCTAAAGATAAAACTCTGTTTTTATCCATCTCCTGAGACACAAAAC\n+TTCTGTTAGCAATCTAACCAAATAAGCGAGAACATTAAGCTTCTTCTCACCTTCCCAAAA\n+TAAAATGTCAACTTCATGCAGTAATACTCACTCTCTTCTCTCAACCAGTCAGCCAGCCTC\n+TTTCAGTGCATACAGCTTGAGGAGATCCCTTCTAAAGGTCCAATATAAATAGAAAAGTGG\n+GAGTAGAAAGGGCAATAATCTGATATCATCTGATTACATTCACACCTCAGGCTTGCACGC\n+TACAGGAAGAGTTCACGCTTCCCAAGGCTTGCAGGCATCACTTTTCACTCATTCTCAGAT\n+AGACACACAAACACATGCACAGAAATATCATTGCTGTTGTTTGCAACAAATCCTGGAGTA\n+GACTACTTTGCATTGAATTTCTATATGCATGTGTTTTCATATATTACACATTGCTTTTTG\n+TACTTTGCATATAAAGTAGATGCTGATCTGCTATCTGCATATATAGTAGCTGCAGATAGC\n+ACTGTAACTACATCTACATATTTCTTGTTTGTATCTACATCAAGCAGATAGTTGCTGATG\n+CTGGACAACCTCTTCCGATATTCATTATTTTGGAAAAAAAAATATCAATGGCATTCCTAC\n+CTTACAACTTAAATTAAAAGTGGTCACTTGAGTAACTGCAGAACATGTGTTCACCCAATA\n+GTTTTTTTTAGAGAGTGTCACTCAAAGTATTCCTCCTTACAAGCACCCATGCCTAAATCA\n'
b
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.align
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.align Wed May 02 20:18:11 2018 -0400
b
@@ -0,0 +1,104 @@
+18 0.00 0.00 0.00 scaffold_1 613 632 (13519) (GT)n#Simple_repeat 1 20 (0) m_b1s252i0 1
+
+  scaffold_1           613 GTGTGTGTGTGTGTGTGTGT 632
+                                               
+  (GT)n#Simple_          1 GTGTGTGTGTGTGTGTGTGT 20
+
+Matrix = Unknown
+Transitions / transversions = 1.00 (0/0)
+Gap_init rate = 0.00 (0 / 19), avg. gap size = 0.0 (0 / 0)
+
+
+16 18.30 2.22 2.22 scaffold_1 780 824 (13327) (ATAATA)n#Simple_repeat 1 45 (0) m_b1s252i1 2
+
+  scaffold_1           780 ATATTAAT-ATACTAAATAATGATAATATTAATTCTAATAATGATA 824
+                              v    -   v -      i      v    vv       i   
+  (ATAATA)n#Sim          1 ATAATAATAATAAT-AATAATAATAATAATAATAATAATAATAATA 45
+
+Matrix = Unknown
+Transitions / transversions = 0.40 (2/5)
+Gap_init rate = 0.05 (2 / 44), avg. gap size = 1.00 (2 / 2)
+
+
+12 23.88 4.55 0.00 scaffold_1 2231 2274 (11877) (CAGA)n#Simple_repeat 1 46 (0) m_b1s252i2 3
+
+  scaffold_1          2231 CAGA-AAACAGTCAGCCAATCAGA-GGACAGACTCAGAGACAGACA 2274
+                               - i    v   v  iv    -i       vv v         
+  (CAGA)n#Simpl          1 CAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACA 46
+
+Matrix = Unknown
+Transitions / transversions = 0.50 (3/6)
+Gap_init rate = 0.05 (2 / 43), avg. gap size = 1.00 (2 / 2)
+
+
+15 18.42 10.20 0.00 scaffold_1 4853 4901 (9250) (TC)n#Simple_repeat 1 54 (0) m_b1s252i3 4
+
+  scaffold_1          4853 TCTATTTTTCTCTCTCTCTCT-TCGGTCAGTTTCTCT-TCT-TCTC-CTC 4898
+                              v i i             -  vv  vv i     -   -    -   
+  (TC)n#Simple_          1 TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 50
+
+  scaffold_1          4899 -CTC 4901
+                           -   
+  (TC)n#Simple_         51 TCTC 54
+
+Matrix = Unknown
+Transitions / transversions = 0.60 (3/5)
+Gap_init rate = 0.10 (5 / 48), avg. gap size = 1.00 (5 / 5)
+
+
+13 19.07 1.82 7.69 scaffold_1 6230 6284 (7867) (TAATTAA)n#Simple_repeat 1 52 (0) m_b1s252i4 5
+
+  scaffold_1          6230 TAATGAGT-ATTACTAATTAATAATTGGATGAGTTTATGAATTCATCATT 6278
+                               v i -    v            -i  - i  v  -    v  v   
+  (TAATTAA)n#Si          1 TAATTAATAATTAATAATTAATAATT-AAT-AATTAAT-AATTAATAATT 47
+
+  scaffold_1          6279 AAATAA 6284
+                           -     
+  (TAATTAA)n#Si         48 -AATAA 52
+
+Matrix = Unknown
+Transitions / transversions = 0.60 (3/5)
+Gap_init rate = 0.09 (5 / 54), avg. gap size = 1.00 (5 / 5)
+
+
+15 28.31 0.00 3.51 scaffold_1 6548 6606 (7545) (GACA)n#Simple_repeat 1 57 (0) m_b1s252i5 6
+
+  scaffold_1          6548 GACAGAGAGATAAACAGACAGACATGGAAACAGAGGGACATAGAAAACAA 6597
+                                 v   i i           viv i     vi    v -v i  - 
+  (GACA)n#Simpl          1 GACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGA-CAGAC-A 48
+
+  scaffold_1          6598 GACAGAGAG 6606
+                                 v  
+  (GACA)n#Simpl         49 GACAGACAG 57
+
+Matrix = Unknown
+Transitions / transversions = 0.86 (6/7)
+Gap_init rate = 0.03 (2 / 58), avg. gap size = 1.00 (2 / 2)
+
+
+67 2.94 1.43 0.00 scaffold_1 11981 12050 (2170) (CT)n#Simple_repeat 1 71 (0) c_b1s251i0 7
+
+  scaffold_1         11981 CTCTCTCTCTCTCCCTCTCCCTCTC-CTCTCTCTCTCTCTCTCTCTCTCT 12029
+                                        i     i     -                        
+  (CT)n#Simple_          1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCT 50
+
+  scaffold_1         12030 CTCTCTCTCTCTCTCTCTCTC 12050
+                                                
+  (CT)n#Simple_         51 CTCTCTCTCTCTCTCTCTCTC 71
+
+Matrix = Unknown
+Transitions / transversions = 1.00 (2/0)
+Gap_init rate = 0.01 (1 / 69), avg. gap size = 1.00 (1 / 1)
+
+
+19 15.37 2.78 0.00 scaffold_1 12078 12113 (2107) (CT)n#Simple_repeat 1 37 (0) m_b1s252i6 8
+
+  scaffold_1         12078 CTCGCTCTCTCACTCTCTCCCTCTCTC-CCCACTCTC 12113
+                              v       v       i       - i v     
+  (CT)n#Simple_          1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 37
+
+Matrix = Unknown
+Transitions / transversions = 0.67 (2/3)
+Gap_init rate = 0.03 (1 / 35), avg. gap size = 1.00 (1 / 1)
+
+
b
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.cat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.cat Wed May 02 20:18:11 2018 -0400
b
@@ -0,0 +1,103 @@
+18 0.00 0.00 0.00 scaffold_1 613 632 (13519) (GT)n#Simple_repeat 1 20 (0) m_b1s252i0
+
+  scaffold_1           613 GTGTGTGTGTGTGTGTGTGT 632
+                                               
+  (GT)n#Simple_          1 GTGTGTGTGTGTGTGTGTGT 20
+
+Matrix = Unknown
+Transitions / transversions = 1.00 (0/0)
+Gap_init rate = 0.00 (0 / 19), avg. gap size = 0.0 (0 / 0)
+
+16 18.30 2.22 2.22 scaffold_1 780 824 (13327) (ATAATA)n#Simple_repeat 1 45 (0) m_b1s252i1
+
+  scaffold_1           780 ATATTAAT-ATACTAAATAATGATAATATTAATTCTAATAATGATA 824
+                              v    -   v -      i      v    vv       i   
+  (ATAATA)n#Sim          1 ATAATAATAATAAT-AATAATAATAATAATAATAATAATAATAATA 45
+
+Matrix = Unknown
+Transitions / transversions = 0.40 (2/5)
+Gap_init rate = 0.05 (2 / 44), avg. gap size = 1.00 (2 / 2)
+
+12 23.88 4.55 0.00 scaffold_1 2231 2274 (11877) (CAGA)n#Simple_repeat 1 46 (0) m_b1s252i2
+
+  scaffold_1          2231 CAGA-AAACAGTCAGCCAATCAGA-GGACAGACTCAGAGACAGACA 2274
+                               - i    v   v  iv    -i       vv v         
+  (CAGA)n#Simpl          1 CAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACA 46
+
+Matrix = Unknown
+Transitions / transversions = 0.50 (3/6)
+Gap_init rate = 0.05 (2 / 43), avg. gap size = 1.00 (2 / 2)
+
+15 18.42 10.20 0.00 scaffold_1 4853 4901 (9250) (TC)n#Simple_repeat 1 54 (0) m_b1s252i3
+
+  scaffold_1          4853 TCTATTTTTCTCTCTCTCTCT-TCGGTCAGTTTCTCT-TCT-TCTC-CTC 4898
+                              v i i             -  vv  vv i     -   -    -   
+  (TC)n#Simple_          1 TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 50
+
+  scaffold_1          4899 -CTC 4901
+                           -   
+  (TC)n#Simple_         51 TCTC 54
+
+Matrix = Unknown
+Transitions / transversions = 0.60 (3/5)
+Gap_init rate = 0.10 (5 / 48), avg. gap size = 1.00 (5 / 5)
+
+13 19.07 1.82 7.69 scaffold_1 6230 6284 (7867) (TAATTAA)n#Simple_repeat 1 52 (0) m_b1s252i4
+
+  scaffold_1          6230 TAATGAGT-ATTACTAATTAATAATTGGATGAGTTTATGAATTCATCATT 6278
+                               v i -    v            -i  - i  v  -    v  v   
+  (TAATTAA)n#Si          1 TAATTAATAATTAATAATTAATAATT-AAT-AATTAAT-AATTAATAATT 47
+
+  scaffold_1          6279 AAATAA 6284
+                           -     
+  (TAATTAA)n#Si         48 -AATAA 52
+
+Matrix = Unknown
+Transitions / transversions = 0.60 (3/5)
+Gap_init rate = 0.09 (5 / 54), avg. gap size = 1.00 (5 / 5)
+
+15 28.31 0.00 3.51 scaffold_1 6548 6606 (7545) (GACA)n#Simple_repeat 1 57 (0) m_b1s252i5
+
+  scaffold_1          6548 GACAGAGAGATAAACAGACAGACATGGAAACAGAGGGACATAGAAAACAA 6597
+                                 v   i i           viv i     vi    v -v i  - 
+  (GACA)n#Simpl          1 GACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGA-CAGAC-A 48
+
+  scaffold_1          6598 GACAGAGAG 6606
+                                 v  
+  (GACA)n#Simpl         49 GACAGACAG 57
+
+Matrix = Unknown
+Transitions / transversions = 0.86 (6/7)
+Gap_init rate = 0.03 (2 / 58), avg. gap size = 1.00 (2 / 2)
+
+67 2.94 1.43 0.00 scaffold_1 11981 12050 (2170) (CT)n#Simple_repeat 1 71 (0) c_b1s251i0
+
+  scaffold_1         11981 CTCTCTCTCTCTCCCTCTCCCTCTC-CTCTCTCTCTCTCTCTCTCTCTCT 12029
+                                        i     i     -                        
+  (CT)n#Simple_          1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCT 50
+
+  scaffold_1         12030 CTCTCTCTCTCTCTCTCTCTC 12050
+                                                
+  (CT)n#Simple_         51 CTCTCTCTCTCTCTCTCTCTC 71
+
+Matrix = Unknown
+Transitions / transversions = 1.00 (2/0)
+Gap_init rate = 0.01 (1 / 69), avg. gap size = 1.00 (1 / 1)
+
+19 15.37 2.78 0.00 scaffold_1 12078 12113 (2107) (CT)n#Simple_repeat 1 37 (0) m_b1s252i6
+
+  scaffold_1         12078 CTCGCTCTCTCACTCTCTCCCTCTCTC-CCCACTCTC 12113
+                              v       v       i       - i v     
+  (CT)n#Simple_          1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 37
+
+Matrix = Unknown
+Transitions / transversions = 0.67 (2/3)
+Gap_init rate = 0.03 (1 / 35), avg. gap size = 1.00 (1 / 1)
+
+## Total Sequences: 1
+## Total Length: 14220
+## Total NonMask ( excluding >20bp runs of N/X bases ): 14220
+## Total NonSub ( excluding all non ACGT bases ):14220
+RepeatMasker version open-4.0.7 , default mode
+run with rmblastn version 2.2.27+
+RepeatMasker Combined Database: Dfam_Consensus-20170127
b
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.gff Wed May 02 20:18:11 2018 -0400
b
@@ -0,0 +1,11 @@
+##gff-version 2
+##date 2018-04-21
+##sequence-region dataset_12.dat
+scaffold_1 RepeatMasker similarity 613 632  0.0 + . Target "Motif:(GT)n" 1 20
+scaffold_1 RepeatMasker similarity 780 824 18.3 + . Target "Motif:(ATAATA)n" 1 45
+scaffold_1 RepeatMasker similarity 2231 2274 23.9 + . Target "Motif:(CAGA)n" 1 46
+scaffold_1 RepeatMasker similarity 4853 4901 18.4 + . Target "Motif:(TC)n" 1 54
+scaffold_1 RepeatMasker similarity 6230 6284 19.1 + . Target "Motif:(TAATTAA)n" 1 52
+scaffold_1 RepeatMasker similarity 6548 6606 28.3 + . Target "Motif:(GACA)n" 1 57
+scaffold_1 RepeatMasker similarity 11981 12050  2.9 + . Target "Motif:(CT)n" 1 71
+scaffold_1 RepeatMasker similarity 12078 12113 15.4 + . Target "Motif:(CT)n" 1 37
b
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.log Wed May 02 20:18:11 2018 -0400
b
@@ -0,0 +1,10 @@
+SW score % div. % del. % ins. query sequence pos in  query: begin end (left) repeat class/family pos in repeat: begin end (left) ID
+
+18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) 1
+16 18.3 2.2 2.2 scaffold_1 780 824 (13396) (ATAATA)n Simple_repeat 1 45 (0) 2
+12 23.9 4.5 0.0 scaffold_1 2231 2274 (11946) (CAGA)n Simple_repeat 1 46 (0) 3
+15 18.4 10.2 0.0 scaffold_1 4853 4901 (9319) (TC)n Simple_repeat 1 54 (0) 4
+13 19.1 1.8 7.7 scaffold_1 6230 6284 (7936) (TAATTAA)n Simple_repeat 1 52 (0) 5
+15 28.3 0.0 3.5 scaffold_1 6548 6606 (7614) (GACA)n Simple_repeat 1 57 (0) 6
+67 2.9 1.4 0.0 scaffold_1 11981 12050 (2170) (CT)n Simple_repeat 1 71 (0) 7
+19 15.4 2.8 0.0 scaffold_1 12078 12113 (2107) (CT)n Simple_repeat 1 37 (0) 8
b
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.masked
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.masked Wed May 02 20:18:11 2018 -0400
b
b'@@ -0,0 +1,286 @@\n+>scaffold_1\n+TGCTTTTCTGAGATTTATGTCAGCTCCCCATTGGGAGATACAGGAGAATC\n+CAATATATTCCTGTCTCGCTTCTTTTATATTTATCCTTTCTACACCTGAC\n+ATCCTCAGTGATTGAAGTGACAAAAAGTGGAGCGCACGCAGTGGTCATTA\n+CCCTCCAATGGTACTTCTAATTAAGGAAAGATTTTTTGCATTCACTGAGC\n+AAAACACTTATTTGCATGAATGGAAAATCATAAATGAGGGGTCCATTAAA\n+CAAATTTCCTAATTGTTTGCTTTTTTTTCTTCCTACTTCACCATCCCTCT\n+AAAGCTATTACTCTCTATTCTACCCCATTATCGAGTGCACAAACACCAGC\n+CCCAAATTGTCTTATCTCTGCTTTGATAAATGATATTTTTTTCTCTTATA\n+ATTGTGTTTCTCTCTAGATGTGCGGTGTCCATCAAGACACTGGATGTCAC\n+GTGGTCACTCCTGTGAAGAGCGAACTGTGTGGAACCCGAAGTACTGTGTG\n+GTTGCTGACTGTCAGATGCTGCTTCTGAATGAGGAGGAGGTGGTGAGAGC\n+AGACAGCACAATCTCTTGTCTGGCATGTTCCTCTGTGTCACCATTGTCTC\n+TCTCATCTGCTCNNNNNNNNNNNNNNNNNNNNCCTTGTCAATGTCATTAC\n+ACAATTTATGTTGGTGCACTGCATGTCAGAGTCTGAACTTTGTTGGACAT\n+CTACTTTCTGTCACCTGATACCTCATACTTTACACTCTAATCCCTTTTTC\n+CTTTCTAACTGCTGCTGTGTTAAGCTGTCNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNGTGAATGGAGATTCACAATGAAGAAA\n+CACAAGGTCCAGACTGAACCCCTGTGCTGTGTGAAGAGTTCACTCTACAT\n+TTACATTCATCCCACATCAGAGACCAGCAATAGCCAGGTTAAGCAGAGCA\n+AGGCTGAGTCTGGCTCTTTGCCTCTGTTGTTTTGATTAGTGTCACACTGG\n+ATTATGCTAGTTTGTGTGAGATTAGAGGTCCTGTTGTTACGAGCAAATCA\n+GCAGTCAGCAGGGTAATCTAAATTACATTGTTCTGTCGGCAATATCCTCT\n+CCTCGCTCAACAGCTTTGTGTGAGCTCATATTTAAAGCCCCCACAGTAGT\n+GCAGTGCATAAAATCATGCAGATACAGATCAGAAGCTTCAGGTAATGTTT\n+ACATCAAACATCAGGGGGAAAATGTGATCTCAGTGCCTCTGACCGTGGTG\n+TGATTGTTGGTGCCGGACAGGCTGGTTTGAGTATTTCTAAAACTGCTGAT\n+CTCCTGGGATTTTCACACACGGCATTCTCTAGAGTTTACTCAGAACGGTG\n+TGGAAAGCAAAAAAACATCCCGCGAGCTGCAATCCTGCGGATGGAAACAC\n+CTTGTTGATGGGATATGTCAGAGGAATGGCCAGGCTGGATCAATCTGACA\n+GAAAGGCGGTAACTCAGATAAGCACTCTCTACAACTGTGGTGAGCAGAAA\n+AGCGTTCTCAGAATGCACAACACGTTCAACCTTGAGGAAGATGTTGCTAC\n+AGCAGCAGATGAAGACCACGTCTGGTTCCACTCCTGTCAGCCAAGAACAT\n+CTCAGACTGCACAGGACACCAAAACAAAAACACATCCTGTTTTCTACTGG\n+TGGTAGAGTCACAATTTGGCAATAAGATAAATCCATGGACCCAACTTGCC\n+TTGTGTCAATAGACCAAGCTGCTGGTGGTCTTGGGGAATGTTTTCTTTAC\n+ACTTTCACACTATCGGCCTATATAAAATCCTATTATAAACTGCTGTACAG\n+TGCATTGGGTTTGATAAGCCCTCTAAGTCTGTATATCATGATCTCATTGA\n+TACCTGCGGCAATACACCCCCACAAACGCAGCTCCCTGCATTTTAAGTAT\n+TATAGAGAGTAATTTCACTGTCCATGTCTTTTTTGATCATAAAGCAGGTC\n+TAGGTGCTGTATACAACACTGTATTGAAATGCTGAAGCAGGTCTAGGTGC\n+TGTATACAACACTGTATTGAAATGCTGAAGCCAATATATCAATCTATCTG\n+TCTATATTTTATGAAATATCAGAGCATTTTAGTCCAAATTTGTCTGTTTT\n+GTTCATTGCAATGCTGACTGCTTAACATTTTCCTCAGTGGTGGCCATCAC\n+CACAGCATTTACTGGTGAGTTTTACCAAGCTACAGTGGGCCACAATGAGT\n+TACCTGTTATTGGCCTGGCTCTATGGCATGNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNCAAAGTGCCCTGTTCTTGTTAGAGCA\n+GAGATAAATGAGACAGAGGAGATGTAAAACTATACTGAGATGGTTTTTTG\n+GTTCTTAAAACCACAAATATATTGTATTACGGATATCAAAACTTCAAATA\n+AAACCAGTAAAGTGTAAAACATGGGACCTCTAAATGAAGGGCTGTTTTGT\n+TTCGTAGTTCTAGATTATGTCACTATGTGGCCTCGGTCTCTGTCTGCGTA\n+AAGCGGCTACATGTAGAATTTGACCCACTTTGGTGCCCACATATGGTAAC\n+TAAAACACTACAGACAGTATGCACTCCAACCCTATATAATTTATGTCCTT\n+CAGTGAAGCCTACAAAATGTGCAGCATGATTTGCTATGATTAATACCTAG\n+AATTATATTAAATTTTAGAAGTTTAGAGAGATGGCTTATACCTTTTTCTG\n+AGCATGGAGTCATCTCATGTAATTTGTCACATACCTTTGGCCAGCTTAAA\n+CCTGGTCATTCCAGTGAGAGCTGCTCACCCAAATGTTTCTGTATGCACCT\n+CCTGCTGTGGGTGCCAATAGCTCATGCAAGTTGGTGTCTTTCAACTAAGG\n+CCACTATGTGAGGACCATCAGCAGCAAATTAAATTAGAAGTGCCTTGGAG\n+TTTCTCAGGCCCTCTTCTATGATCATTAAGGTCTCAGTGACAGAACAGTG\n+CCCCTTTGAGGGACTATGGCATCTTGAGCACTTAAGTTGGCCGAATGTTT\n+GATCTCAAATATGAGTGCACTGTTTCAAAGTCACAAAAGCCTCTGGACAA\n+GGCATAGTTACTGGGCTTGGTTAACCTTTTTTTCATACCGGCGTGACTGC\n+TGGAACTTGGAACATGTAGCCTAAGTTTCCCTTTGTCTGCAATTACTGAG\n+GTATGCCATGTTGAAATAGAACAATAATTGCAGGGAGGAATTATTCCAAT\n+CGACCCATCGGCAGGCTTTCCAATACAGTGCTTTTTGTTTCACAGCAACA\n+AAGAACAAGAGCTTTTAAATGCATCCATCTGGACCCTGACTATGAAAGGT\n+CATTGGTAACGCTGCTGGATTTCTGTTATGTCAGCAAACTGCAGGTCAAA\n+GCGATGATACAGTCTTATAATGGCTTTCTCCAAGTGAAGGTGGGATAACA\n+TGGTTTAAACAAGCTCAGTTAAATGGATCAGAGCATCAACTGCTGTGAAC\n+AAGCAATCACACGGTCAAGGTCAAACATTTACACTTCTGAGAGATCTGGA\n+GAGTATGTCATGGCAGTATTGCTTTTCAGTTATTTCAGTAACTGATTACG\n+CCTTGTACTTAGTCTCAGAATTAGTGATTTGATTCAAAGTGTTTTATATA\n+TGTATATTTCAAAGCAAAAATACAGCTTAGGTAAAGCTCCTTAGACTCAT\n+GCAGCCTAATTTACTGTAGACATTCCTCCATGTACAGTACTGTGCAAATG\n+TTTTAGGCAGTTTTAGGCACTAAAGGTGAACTGAGGATGCAGTCCCACGA\n+TTAATTTTTATTCATCAGTTAACCTCATGTGAAGTGTAGTAAACAGAAAA\n+AACCTAAATCAGATC'..b'TGTCCTTGTGTGACACAGGCTCTA\n+AATAAGCAGCATGATGAATAAAAATGACACTGAGATGAATAGGAATCCAC\n+AGCAAATCAGAGAGCACTCTCATCTCGTCTCATCCTCCGACTGAATAGCG\n+AGCGGCTGCCTCCTTTATTCTTTTGAACTCTTCGCGGTTTTGGCACAAAC\n+ATGCCAGAGACAGAGAGCGCTGTTACGCTCCCACCAGAGCAACTACGCTT\n+AATTTTACTGAGGTGAAAGTTTTTTTTTTTTTTCTGGATAAAGCTTTGGA\n+AAAGTTCTCAACTGTTGCTTCTTTAAAGATGCTGGGGCATATTCTGCCCT\n+ATTTTCCAGTTTTTTACATCTCCCATAAATATTTTCACAAGCTGTAACAT\n+TTTAGACAGGATTAAATTGAACTAGATAACTGACCAAAGGCTGCTAAATT\n+ATTTTTATGTGCCTTGTAGCACAAATACTGTCTCACATATATTTTAAGTA\n+TCTTAATATTCCCTCTGATGTTTAATTTAACAAAGCTGTCCTTCCCTCCA\n+TCTCTCTCCTCATAGTTTTTCATTACAATTAGCTCTCATTAGATAGAATT\n+GTATTTGTTGTCTTTGTGCCAGTCACTCCAGTCTATTTTGCCAGCACACA\n+ACTAAATAATTGTTTTGCCTTGTCAGTGTCTTGTTTTACAGTTTTAATGA\n+TGAGCCCAAAACATGTCAAATATGAGGAACTATAACTTATCAAGAAAGTG\n+GGGTAGTTGAGATAAAACTGTTTCCGAGGTGGAGGTTGGCTGAGCTCCAT\n+TTGGCCACAAAATGTAGCTGAAAGGGCAGAGAAACCCACTTTAATGGAGT\n+ACAGGTTGCATATGAGCTGGTAGGAAAGATATAGAATCATTATCAGCTGA\n+TAATCTAACAGTAGCAGTCAGTGTAGATGCTGTGCTAACGCAAAGTTGTG\n+AAACTACTGTCTGTAAGCAACTCATCCAATTGTTATTGCTGCCAAGCTTA\n+AGTATTTTAACAGCTTTTAGGTGTTGTTAGCACAGTACTCGTTTGGATCC\n+GGCCATGACAAGAAATCTTTATCGGCCACTAATTTAATTAATCATCTTTT\n+TTCAAGCAAAAATCACAAACTTTCTTTGGTTCCACTTTTTCTCATTGTAG\n+ATTAAATATCTTTTGGGTTTTGGCACAGGCTGGACAAAAAAACCTCTGAG\n+ACGCTGTGATAAGAATTTATTTTCACATTTTTTTTACTTTTCAGGGACTA\n+CACAATTATTTGGTGATGAAAATAATATTTTGTTGCAGCCCAAATGTTAA\n+CTTGCCACTCAAGCTGTCAATTCAAGAAGGCAAAAGCAACAAATACTGCT\n+TTGCCGAGCTGAAATGAGAGTAGAAACCATGCCCGGGCCAGCTCAGCCTG\n+GTCAGGTTTTTGAGTCCATCTATTGTTAACATTCAGGTTGCAGCACAAGG\n+GAAGTTCCTAAATTCTTCTGGCATGTTAATGTTTTCCAAACTGATGTTAC\n+CAAGTCCTCGTTATGATGAGAAAAAAATATGCTGAGAGTGAAATTGATCA\n+AAGTGGAAACATTAAGCACTGCCACAGTCCCCTCCACCCATCTTTCTCCC\n+TCTCTCTCCACCGCAACGTGGAAACTGCTCNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+CTCCCTCCCATTCTGTGCTCCGGTATANNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNCAGAGAGGGGTTCAGTCAGACAGATGTAACACAGCAG\n+TAGAAGCCTGAGCTGAGCTGGCAGGCTGCGGAGGCCAGACCAGAGCCAGC\n+AGCTCTGACTGGAGAGAACAGCGAAGAGGTGACGGCAGCAGCGGCAGCAC\n+AGGGGATAAAACAGTAGTTGTAGTAGCAGTTTCAGTAGTTGTACTTCAGT\n+TACAGAGCTGACTTTACCTCAGGATATGGGTGTGTTGATCTGGGCGCTCA\n+TCACCCTGTGAATTTAACATGGACCCTGACACCAGCACCCATCCAGAGAC\n+ACAACATGGTGAGTGAGATTTGAAGGAGGAAAAGATTAGAAACAATGAGA\n+GTGATACTGTGGGAAAAGTTGAGGAGCGTGTTAGTGAACAAGGGAGTCAG\n+GTCACATGGTTTTCCCGTTAGGGTTGCTATTATGACTAGGCTCTCCATTT\n+GGCTCTAAAGCTGTTGACTAGGGCCTCTGTGGTCAAATGAATAGGACTTC\n+AGTCTGTTCTGTTGAGTAGGGGGTGAAGGGGTGAGGGGTGTTACATGGCA\n+ATGTGATGAAATCAACTGCCTGCTTAAGACATTTTGCTCTAATGACCCTC\n+TTGTCATTACTTTAACAGGGTTGTAAAGTTTTTTTTGCTTTTTTGTAGCT\n+AAGAAAGTTGAGAATTGTTTTGTCTGAACTCTCTCTGGGATTTGTCTTGT\n+CGGTTTTTGGCTGGTTTTTGGTGTGAGCTTCACAGAGCAGGAAGCTCTCC\n+TCCTGCTCTTTGAATGGAATAATTAACTGTGAGGGCCGTGGGTGTACCAT\n+TTCTGCTCAAAAGCAGCAAGATAATTTGATGGTTATGTGAATCTGTATAT\n+TTGTATGCTGTGTAGTGGTGCTGCATGTGGGTGTTTGTGTTAGAGACACA\n+GAGGGGCACCAAAGAATGAAAGTGGGAGACAGAGGGAACGAGAAGGGGAG\n+AGACCGAGAGAGAAGGACTTATGTACACAAATAAATCCAGGGGGATCTAG\n+ACTGCACTGTAGGCCAGTGCGAATGCTCACTCTTTCTCTTCCCCCTCATT\n+TTCCGCTGCCTCTTTTTTTTCCTCCACTGCTCCAAATGTTGGAGCCCAAA\n+CTAGATAGTTAAAAGGCAGAAAAAAATGGTGTGTTATTAACTGGGCCAGA\n+AAAGACGATGTTTTGTCTTCATGGCCGATGGGAAGGACTCTGCTGTGCTG\n+CTGATGAAGAATGTGGTTAACAGAATGAGTCAACAGAGAACATATTTCCA\n+AGAGACTGTAGTTTCTCTTCGCCAGCGTCTAAGCCACATTGCTTTATGCA\n+CTGGGCTCTCTGCCATGACGGCGAAGGAGAGACAAAGAGAGGGGAAAGAG\n+AATCGCTAAAGATAAAACTCTGTTTTTATCCATCTCCTGAGACACAAAAC\n+TTCTGTTAGCAATCTAACCAAATAAGCGAGAACATTAAGCTTCTTCTCAC\n+CTTCCCAAAATAAAATGTCAACTTCATGCAGTAATACTCACTCTCTTCTC\n+TCAACCAGTCAGCCAGCCTCTTTCAGTGCATACAGCTTGAGGAGATCCCT\n+TCTAAAGGTCCAATATAAATAGAAAAGTGGGAGTAGAAAGGGCAATAATC\n+TGATATCATCTGATTACATTCACACCTCAGGCTTGCACGCTACAGGAAGA\n+GTTCACGCTTCCCAAGGCTTGCAGGCATCACTTTTCACTCATTCTCAGAT\n+AGACACACAAACACATGCACAGAAATATCATTGCTGTTGTTTGCAACAAA\n+TCCTGGAGTAGACTACTTTGCATTGAATTTCTATATGCATGTGTTTTCAT\n+ATATTACACATTGCTTTTTGTACTTTGCATATAAAGTAGATGCTGATCTG\n+CTATCTGCATATATAGTAGCTGCAGATAGCACTGTAACTACATCTACATA\n+TTTCTTGTTTGTATCTACATCAAGCAGATAGTTGCTGATGCTGGACAACC\n+TCTTCCGATATTCATTATTTTGGAAAAAAAAATATCAATGGCATTCCTAC\n+CTTACAACTTAAATTAAAAGTGGTCACTTGAGTAACTGCAGAACATGTGT\n+TCACCCAATAGTTTTTTTTAGAGAGTGTCACTCAAAGTATTCCTCCTTAC\n+AAGCACCCATGCCTAAATCA\n'
b
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.poly
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.poly Wed May 02 20:18:11 2018 -0400
b
@@ -0,0 +1,2 @@
+18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0)
+67 2.9 1.4 0.0 scaffold_1 11981 12050 (2170) (CT)n Simple_repeat 1 71 (0)
b
diff -r 5673e72241aa -r bdfc22c1c3e3 test-data/small.fasta.stats
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.stats Wed May 02 20:18:11 2018 -0400
b
@@ -0,0 +1,51 @@
+==================================================
+file name: dataset_12.dat           
+sequences:             1
+total length:      14220 bp  (14220 bp excl N/X-runs) 
+GC level:         39.94 %
+bases masked:        378 bp ( 2.66 %)
+==================================================
+               number of      length   percentage
+               elements*    occupied  of sequence
+--------------------------------------------------
+SINEs:                0            0 bp    0.00 %
+      ALUs            0            0 bp    0.00 %
+      MIRs            0            0 bp    0.00 %
+
+LINEs:                0            0 bp    0.00 %
+      LINE1           0            0 bp    0.00 %
+      LINE2           0            0 bp    0.00 %
+      L3/CR1          0            0 bp    0.00 %
+
+LTR elements:         0            0 bp    0.00 %
+      ERVL            0            0 bp    0.00 %
+      ERVL-MaLRs      0            0 bp    0.00 %
+      ERV_classI      0            0 bp    0.00 %
+      ERV_classII     0            0 bp    0.00 %
+
+DNA elements:         0            0 bp    0.00 %
+     hAT-Charlie      0            0 bp    0.00 %
+     TcMar-Tigger     0            0 bp    0.00 %
+
+Unclassified:         0            0 bp    0.00 %
+
+Total interspersed repeats:        0 bp    0.00 %
+
+
+Small RNA:            0            0 bp    0.00 %
+
+Satellites:           0            0 bp    0.00 %
+Simple repeats:       8          378 bp    2.66 %
+Low complexity:       0            0 bp    0.00 %
+==================================================
+
+* most repeats fragmented by insertions or deletions
+  have been counted as one element
+                                                      
+
+The query species was assumed to be homo          
+RepeatMasker Combined Database: Dfam_Consensus-20170127
+                          
+run with rmblastn version 2.2.27+
+The query was compared to unclassified sequences in ".../dataset_2.dat"
+