changeset 3:bdfc22c1c3e3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/repeat_masker commit 8dacb4321122d92df0983e0794cf23572be03224
author iuc
date Wed, 02 May 2018 20:18:11 -0400
parents 5673e72241aa
children 04f5c3d7448e
files RepeatMasker.xml readme.rst repeatmasker.xml test-data/repeats.fasta test-data/small.fasta test-data/small.fasta.align test-data/small.fasta.cat test-data/small.fasta.gff test-data/small.fasta.log test-data/small.fasta.masked test-data/small.fasta.poly test-data/small.fasta.stats
diffstat 12 files changed, 1099 insertions(+), 370 deletions(-) [+]
line wrap: on
line diff
--- a/RepeatMasker.xml	Tue Sep 17 03:23:49 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,307 +0,0 @@
-<tool id="repeatmasker_wrapper" name="RepeatMasker" version="0.1.2">
-    <description>Masks different kind of repeats</description>
-    <requirements>
-        <requirement type="binary">RepeatMasker</requirement>
-    </requirements>
-    <command>
-## The command is a Cheetah template which allows some Python based syntax.
-## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
-
-## create temp directory
-#import tempfile, os
-#set $dirname = os.path.abspath( tempfile.mkdtemp() )
-#set $input_filename = os.path.split( str($query) )[-1]
-#set $output_basename = os.path.join( $dirname, $input_filename )
-
-
-RepeatMasker 
--parallel 8
-
-$nolow
-$noint
-$norna
-
-#if str($species)!="all":
-    $species
-#end if
-
-
--dir $dirname
-
-#if $adv_opts.adv_opts_selector=="advanced":
-
-    #if str($adv_opts.gc)!="0":
-        -gc $adv_opts.gc
-    #end if
-
-    $adv_opts.gccalc
-
-    #set $output_files_list = str($adv_opts.output_files).split(',')
-    #if "gff" in $output_files_list:
-        -gff
-    #end if
-    #if "html" in $output_files_list:
-        -html
-    #end if
-
-    $adv_opts.slow_search
-    $adv_opts.quick_search
-    $adv_opts.rush_search
-    $adv_opts.only_alus
-    $adv_opts.is_only
-
-#else:
-    ## Set defaults
-    -gff
-
-## End of advanced options:
-#end if
-
-$query
-
-2>&#38;1;
-
-## Copy the output files to galaxy
-## AgR: if there are no repeats, the output files may not exist.
-## This causes the job to fail, so touch files to ensure they exist.
-#if $adv_opts.adv_opts_selector=="advanced":
-
-    #if "summary" in $output_files_list:
-        ## Write out the summary file (default)
-        #set $summary_file = $output_basename + '.tbl'
-        touch $summary_file;
-        cp $summary_file $output_summary;
-    #end if
-
-    #if "gff" in $output_files_list:
-        ## Write out the gff file (default)
-        #set $gff_file = $output_basename + '.out.gff'
-        touch $gff_file;
-        cp $gff_file $output_gff;
-    #end if
-
-    #if "html" in $output_files_list:
-        ## Write out the html file
-        #set $html_file = $output_basename + '.out.html'
-        touch $html_file;
-        cp $html_file $output_html;
-    #end if
-
-#else:
-
-    ## Write out the summary file (default)
-    #set $summary_file = $output_basename + '.tbl'
-    touch $summary_file;
-    cp $summary_file $output_summary;
-
-    ## Write out the gff file (default)
-    #set $gff_file = $output_basename + '.out.gff'
-    touch $gff_file;
-    cp $gff_file $output_gff;
-
-
-## End of advanced options:
-#end if
-
-## Write out mask sequence file
-#set $mask_sequence_file = $output_basename + '.masked'
-touch $mask_sequence_file;
-cp $mask_sequence_file $output_mask;
-
-## Write out standard file (default)
-## The default '.out' file from RepeatMasker has a 3-line header and spaces rather
-## than tabs. Remove the header and replace the whitespaces with tab
-#set $standard_file = $output_basename + '.out'
-tail -n +4 $standard_file | tr -s ' ' '\t' > $output_std;
-
-## Delete all temporary files
-rm $dirname -r
-
-    </command>
-    <inputs>
-        <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> 
-
-        <param name="nolow" type="boolean" label="No low complexity DNA" truevalue="-nolow" falsevalue="" checked="false" help="Does not mask low_complexity DNA or simple repeats."/>
-        <param name="noint" type="boolean" label="No interspersed repeats" truevalue="-noint" falsevalue="" checked="false" help="Only masks low complex/simple repeats (no interspersed repeats)."/>
-
-        <param name="norna" type="boolean" label="No small RNA genes" truevalue="-norna" falsevalue="" checked="false" help="Does not mask small RNA (pseudo) genes."/>
-
-        <!--
-            Specify the species or clade of the input sequence. The species name
-            must be a valid NCBI Taxonomy Database species name and be contained
-            in the RepeatMasker repeat database. The following collection is not complete.
-        -->
-        <param name="species" type="select" label="Species" help="The list is not complete, if you need other species contact your administrator.">
-            <option value="-species anopheles">anopheles</option>
-            <option value="-species arabidopsis">arabidopsis</option>
-            <option value="-species artiodactyl">artiodactyl</option>
-            <option value="-species aspergillus">aspergillus</option>
-            <option value="-species carnivore">carnivore</option>
-            <option value="-species cat">cat</option>
-            <option value="-species chicken">chicken</option>
-            <option value="-species 'ciona intestinalis'">ciona intestinalis</option>
-            <option value="-species 'ciona savignyi'">ciona savignyi</option>
-            <option value="-species cow">cow</option>
-            <option value="-species danio">danio</option>
-            <option value="-species diatoaea">diatoaea</option>
-            <option value="-species dog">dog</option>
-            <option value="-species drosophila">drosophila</option>
-            <option value="-species elegans">elegans</option>
-            <option value="-species fugu">fugu</option>
-            <option value="-species fungi" selected="true">fungi</option>
-            <option value="-species human">human</option>
-            <option value="-species maize">maize</option>
-            <option value="-species mammal">mammal</option>
-            <option value="-species mouse">mouse</option>
-            <option value="-species pig">pig</option>
-            <option value="-species rat">rat</option>
-            <option value="-species rice">rice</option>
-            <option value="-species rodentia">rodentia</option>
-            <option value="-species ruminantia">ruminantia</option>
-            <option value="-species wheat">wheat</option>
-        </param>
-
-        <conditional name="adv_opts">
-            <param name="adv_opts_selector" type="select" label="Advanced Options">
-              <option value="basic" selected="True">Hide Advanced Options</option>
-              <option value="advanced">Show Advanced Options</option>
-            </param>
-            <when value="basic" />
-            <when value="advanced">
-              <param name="is_only" type="boolean" label="Mask only E coli insertion elements" truevalue="-is_only" falsevalue="" checked="false" help="Only clips E coli insertion elements out of fasta and .qual files."/>
-              <param name="slow_search" type="boolean" label="Slow search" truevalue="-s" falsevalue="" checked="false" help="0-5% more sensitive, 2-3 times slower than default."/>
-              <param name="quick_search" type="boolean" label="Quick search" truevalue="-q" falsevalue="" checked="false" help="5-10% less sensitive, 2-5 times faster than default."/>
-              <param name="rush_search" type="boolean" label="Rush search" truevalue="-qq" falsevalue="" checked="false" help="about 10% less sensitive, 4->10 times faster than default."/>
-              <param name="only_alus" type="boolean" label="Only Alus" truevalue="-alu" falsevalue="" checked="false" help="Only masks Alus (and 7SLRNA, SVA and LTR5)(only for primate DNA)."/>
-              <param name="gccalc" type="boolean" label="Use GC depended matrices, automaticly" truevalue="-gccalc" falsevalue="" checked="true" help="RepeatMasker calculates the GC content even for batch files/small seqs"/>
-
-              <param name="output_files" type="select" multiple="true" label="Additional output files">
-                  <option selected="true" value="summary">Summary file</option>
-                  <option value="gff">GFF file</option>
-                  <option value="html">HTML file</option>
-                  <option value="mask">Mask FastA file</option>
-              </param>
-
-              <param name="gc" type="integer" value="0" label="Use GC depended matrices" help="Use matrices calculated for 'number' percentage background GC level">
-                    <validator type="in_range" min="0" />
-                    <validator type="in_range" max="100" />
-              </param>
-
-            </when>
-        </conditional>
-
-    </inputs>
-    <outputs>
-        <data name="output_std" format="tabular" label="${tool.name} on ${on_string}: Standard" />
-        <data name="output_mask" format="fasta" label="${tool.name} on ${on_string}: Mask sequence">
-            <filter>
-                    (adv_opts['adv_opts_selector'] == 'advanced' and 'mask' in adv_opts['output_files'])
-            </filter>
-        </data>
-        <data name="output_summary" format="txt" label="${tool.name} on ${on_string}: Summary">
-            <filter>(
-                    (adv_opts['adv_opts_selector'] == 'advanced' and 'summary' in adv_opts['output_files'])
-                    or
-                    (adv_opts['adv_opts_selector'] == 'basic')
-                    )
-            </filter>
-        </data>
-        <data name="output_html" format="html" label="${tool.name} on ${on_string}: HTML">
-            <filter>(adv_opts['adv_opts_selector'] == 'advanced' and 'html' in adv_opts['output_files'])</filter>
-        </data>
-        <data name="output_gff" format="gff" label="${tool.name} on ${on_string}: GFF">
-            <filter>
-                    (adv_opts['adv_opts_selector'] == 'advanced' and 'gff' in adv_opts['output_files'])
-            </filter>
-        </data>
-    </outputs>
-    <help>
-    
-.. class:: warningmark
-
-**What it does**
-
-RepeatMasker is a program that screens DNA sequences for *interspersed repeats*
-and *low complexity* DNA sequences. The output of the program is a detailed
-annotation of the repeats that are present in the query sequence as well as a
-modified version of the query sequence in which all the annotated repeats have
-been masked (default: replaced by Ns).
-
------
-
-**How to read the results**
-
-
-
-The annotation file contains the cross_match output lines. It lists all best matches
-(above a set minimum score) between the query sequence and any of the sequences in
-the repeat database or with low complexity DNA. The term "best matches" reflects
-that a match is not shown if its domain is over 80% contained within the domain
-of a higher scoring match, where the "domain" of a match is the region in
-the query sequence that is defined by the alignment start and stop. These domains
-have been masked in the returned masked sequence file. In the output, matches are
-ordered by query name, and for each query by position of the start of the alignment.
-
-Example:
-
-======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
-SW score perc div. perc del. perc ins. query seq. q-pos begin q-pos end (left)    w complement matching repeat repeat class/family repeat-pos begin repeat-pos end (left)  ID
-======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
-    1306 15.6      6.2       0.0       HSU08988   6563        6781      \(22462)  C            MER7A           DNA/MER2_type       336              103            \(0)    1
-   12204 10.0      2.4       1.8       HSU08988   6782        7714      \(21529)  C            TIGGER1         DNA/MER2_type       2418             1493           \(0)    2
-     279  3.0      0.0       0.0       HSU08988   7719        7751      \(21492)  +            (TTTTA)n        Simple_repeat       1                33             \(0)    3
-    1765 13.4      6.5       1.8       HSU08988   7752        8022      \(21221)  C            AluSx           SINE/Alu            289              1              \(23)   4
-   12204 10.0      2.4       1.8       HSU08988   8023        8694      \(20549)  C            TIGGER1         DNA/MER2_type       1493             827            \(925)  5
-    1984 11.1      0.3       0.7       HSU08988   8695        9000      \(20243)  C            AluSg           SINE/Alu            305              1              \(5)    6
-   12204 10.0      2.4       1.8       HSU08988   9001        9695      \(19548)  C            TIGGER1         DNA/MER2_type       827              2              \(1591) 7
-     711 21.2      1.4       0.0       HSU08988   9696        9816      \(19427)  C            MER7A           DNA/MER2_type       122              2              \(224)  8
-======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
-
-This is a sequence in which a Tigger1 DNA transposon has integrated into a MER7 DNA transposon copy.
-Subsequently two Alus integrated in the Tigger1 sequence. The simple repeat is derived from the
-poly A of the Alu element. The first line is interpreted like this:
-
-:Table description:
-
-1. **1306** = Smith-Waterman score of the match, usually complexity adjusted
-        The SW scores are not always directly comparable. Sometimes
-        the complexity adjustment has been turned off, and a variety of
-        scoring-matrices are used.
-
-#. **15.6** = % substitutions in matching region compared to the consensus
-#. **6.2** = % of bases opposite a gap in the query sequence (deleted bp)
-#. **0.0** = % of bases opposite a gap in the repeat consensus (inserted bp)
-#. **HSU08988** = name of query sequence
-#. **6563** = starting position of match in query sequence
-#. **7714** = ending position of match in query sequence
-#. **(22462)** = no. of bases in query sequence past the ending position of match
-#. **C**       = match is with the Complement of the consensus sequence in the database
-#. **MER7A**   = name of the matching interspersed repeat
-#. **DNA/MER2_type** = the class of the repeat, in this case a DNA transposon fossil of the MER2 group (see below for list and references)
-#. **2418**    = starting position of match in database sequence (using top-strand numbering)
-#. **1465**    = ending position of match in database sequence
-#. **(0)**     = no. of bases in (complement of) the repeat consensus sequence prior to beginning of the match (so 0 means that the match extended all the way to the end of the repeat consensus sequence)
-#. **1**    = Identifier
-
-An asterisk (\*) in the final column (no example shown) indicates that there is
-a higher-scoring match whose domain partly (&lt;80%) includes the domain of this match. 
-
-Note that the SW score and divergence numbers for the three Tigger1 lines are identical.
-This is because the information is derived from a single alignment (the Alus were deleted
-from the query before the alignment with the Tigger element was performed).
-The program makes educated guesses about many fragments if they are derived from
-the same element (e.g. it knows that the MER7A fragments represent one insert).
-In a next version I can identify each element with a unique ID, if interest exists
-(this could help to represent repeats cleaner in graphic displays). 
-
-
--------
-
-**References**
-
-Smit, AFA, Hubley, R and Green, P. RepeatMasker Open-3.0.
-
-http://www.repeatmasker.org/
-
-    </help>
-</tool>
--- a/readme.rst	Tue Sep 17 03:23:49 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-===============================
-Galaxy wrapper for RepeatMasker
-===============================
-
-This wrapper is copyright 2013 by Björn Grüning.
-
-This is a wrapper for the command line tool of RepeatMasker from the Institute for Systems Biology.
-http://www.repeatmasker.org/
-
-
-Smit, AFA, Hubley, R & Green, P. RepeatMasker Open-3.0.
-1996-2010 <http://www.repeatmasker.org>. 
-
-
-Additional Information:
-Using RepeatMasker to identify repetitive elements in genomic sequences.
-http://www.ncbi.nlm.nih.gov/pubmed/19274634
-
-============
-Installation
-============
-
-To install RepeatMasker, please use the following instructions:
-
-http://www.repeatmasker.org/RMDownload.html
-
-To install the wrapper copy the file RepeatMasker.xml in the galaxy tools
-folder and modify the tools_conf.xml file to make the tool available to Galaxy.
-Add a line like the following:
-
-Add the tool definition to your tool_conf.xml file under Galaxy root.
-	<tool file="RepeatMasker/RepeatMasker.xml" />
-
-=======
-History
-=======
-
-- v1.1: Initial public release
-- v0.1.1: patch from Simon Guest, to create empty files if no repeat is found
-- v0.1.2: remove trailing semicolon, redirect all output to stdout
-
-===============================
-Wrapper Licence (MIT/BSD style)
-===============================
-
-Permission to use, copy, modify, and distribute this software and its
-documentation with or without modifications and for any purpose and
-without fee is hereby granted, provided that any copyright notices
-appear in all copies and that both those copyright notices and this
-permission notice appear in supporting documentation, and that the
-names of the contributors or copyright holders not be used in
-advertising or publicity pertaining to distribution of the software
-without specific prior permission.
-
-THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
-WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
-OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
-OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
-OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
-OR PERFORMANCE OF THIS SOFTWARE.
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repeatmasker.xml	Wed May 02 20:18:11 2018 -0400
@@ -0,0 +1,230 @@
+<tool id="repeatmasker_wrapper" name="RepeatMasker" version="4.0.7" profile="17.01">
+  <description>RepeatMasker</description>
+
+  <requirements>
+    <requirement type="package" version="4.0.7">repeatmasker</requirement>
+  </requirements>
+
+  <command detect_errors="exit_code"><![CDATA[
+    RM_LIB_PATH=\$(dirname \$(which RepeatMasker))/../share/RepeatMasker/Libraries &&
+    mkdir lib &&
+    export REPEATMASKER_LIB_DIR=\$(pwd)/lib &&
+      for file in \$(ls \$RM_LIB_PATH) ; do  ln -s \$RM_LIB_PATH/\$file lib/\$file ; done &&
+    #if $repeat_source.source_type == "repbase":
+      cp '${repeat_source.repbase_file}' lib/RMRBSeqs.embl &&
+    #end if
+    ln -s '${input_fasta}' rm_input.fasta &&
+    RepeatMasker -dir \$(pwd)
+    #if $repeat_source.source_type == "library":
+      -lib '${repeat_source.repeat_lib}'
+      -cutoff '${repeat_source.cutoff}'
+    #else if $repeat_source.source_type == "repbase":
+      #if $repeat_source.species_source.species_from_list == 'yes':
+        $repeat_source.species_source.species_list
+      #else
+        -species '${repeat_source.species_source.species_name}'
+      #end if
+    #end if
+    -parallel \${GALAXY_SLOTS:-1}
+    '${gff}'
+    '${ignore_n_stretches}'
+    '${advanced.is_only}'
+    '${advanced.is_clip}'
+    '${advanced.no_is}'
+    '${advanced.rodspec}'
+    '${advanced.primspec}'
+    '${advanced.nolow}'
+    '${advanced.noint}'
+    '${advanced.norna}'
+    '${advanced.alu}'
+    '${advanced.div}'
+    '${advanced.search_speed}'
+    '${advanced.frag}'
+    '${advanced.maxsize}'
+    #if $advanced.gc is not None:
+      '${advanced.gc}'
+    #end if
+    '${advanced.gccalc}'
+    '${advanced.nocut}'
+    '${advanced.keep_alignments}'
+    '${advanced.invert_alignments}'
+    '${advanced.xout}'
+    '${advanced.xsmall}'
+    '${advanced.poly}'
+    rm_input.fasta &&
+    #if $advanced.is_only != '-is_only':
+      mv rm_input.fasta.masked '${output_masked_genome}' &&
+      sed -r 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g ;  1,2c SW score\t% div.\t% del.\t% ins.\tquery sequence\tpos in  query: begin\tend\t(left)\trepeat\tclass/family\tpos in repeat: begin\tend\t(left)\tID' rm_input.fasta.out >'${output_log}' &&
+      mv rm_input.fasta.tbl '${output_table}' &&
+      #if $gff == '-gff':
+        mv rm_input.fasta.out.gff '${output_gff}' &&
+      #end if
+      #if $advanced.keep_alignments == '-ali':
+        mv rm_input.fasta.align '${output_alignment}' &&
+      #end if
+      #if $advanced.poly == '-poly':
+        sed -r 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g' rm_input.fasta.polyout >'${output_polymorphic}' &&
+      #end if
+    #end if
+    mv rm_input.fasta.cat '${output_repeat_catalog}'
+    ]]>
+  </command>
+
+  <inputs>
+    <param name="input_fasta" type="data" format="fasta" label="Genomic DNA" />
+    <conditional name="repeat_source">
+      <param label="Repeat library source" name="source_type" type="select">
+        <option selected="true" value="repbase">RepBase</option>
+        <option value="library">Custom library of repeats</option>
+      </param>
+      <when value="repbase">
+        <param name="repbase_file" type="data" format="embl" label="RepBase (RMRBSeqs.embl) file" />
+        <conditional name="species_source">
+          <param label="Select species name from a list?" name="species_from_list" type="select">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+          </param>
+          <when value="yes">
+            <param name="species_list" type="select" label="Species">
+              <option value="-species anopheles" selected="true">anopheles</option>
+              <option value="-species arabidopsis">arabidopsis</option>
+              <option value="-species artiodactyl">artiodactyl</option>
+              <option value="-species aspergillus">aspergillus</option>
+              <option value="-species carnivore">carnivore</option>
+              <option value="-species cat">cat</option>
+              <option value="-species chicken">chicken</option>
+              <option value="-species 'ciona intestinalis'">ciona intestinalis</option>
+              <option value="-species 'ciona savignyi'">ciona savignyi</option>
+              <option value="-species cow">cow</option>
+              <option value="-species danio">danio</option>
+              <option value="-species diatoaea">diatomea</option>
+              <option value="-species dog">dog</option>
+              <option value="-species drosophila">drosophila</option>
+              <option value="-species elegans">elegans</option>
+              <option value="-species fugu">fugu</option>
+              <option value="-species fungi" selected="true">fungi</option>
+              <option value="-species human">human</option>
+              <option value="-species maize">maize</option>
+              <option value="-species mammal">mammal</option>
+              <option value="-species mouse">mouse</option>
+              <option value="-species pig">pig</option>
+              <option value="-species rat">rat</option>
+              <option value="-species rice">rice</option>
+              <option value="-species rodentia">rodentia</option>
+              <option value="-species ruminantia">ruminantia</option>
+              <option value="-species wheat">wheat</option>
+            </param>
+          </when>
+          <when value="no">
+            <param name="species_name" type="text" value="homo sapiens" label="Repeat source species" help="Source species (or clade name) used to select repeats from RepBase" />
+          </when>
+        </conditional>
+      </when>
+      <when value="library">
+        <param name="repeat_lib" type="data" format="fasta" label="Custom library of repeats" />
+        <param name="cutoff" type="integer" argument="-cutoff" value="225" label="Cutoff score for masking repeats" />
+      </when>
+    </conditional>
+    <param type="boolean" argument="-gff" truevalue="-gff" falsevalue="" label="Output annotation of repeats in GFF format" checked="false" />
+    <param name="ignore_n_stretches" type="boolean" argument="-excln" falsevalue="" label="Ignore stretches of Ns when computing statistics" checked="true" help="Scaffolds are sometimes joined with stretches of 25 or more Ns. This option ignores them when calculating repeat statistics" />
+    <section name="advanced" title="Advanced options" expanded="false">
+      <param argument="-is_only" type="boolean" truevalue="-is_only" falsevalue="" checked="false" label="Only clip E coli insertion elements" />
+      <param argument="-is_clip" type="boolean" truevalue="-is_clip" falsevalue="" checked="false" label="Clip IS elements before analysis" help="Normally RepeatMasker will report on IS element, with this option selected it will clip them before analysis" />
+      <param argument="-no_is" type="boolean" truevalue="-no_is" falsevalue="" checked="false" label="Skip bacterial insertion element check" />
+      <param argument="-rodspec" type="boolean" truevalue="-rodspec" falsevalue="" checked="false" label="Only check for rodent specific repeats" help="If this option is select a check for rodent specific repeats is done instead of a full RepeatMasker run" />
+      <param argument="-primspec" type="boolean" truevalue="-primspec" falsevalue="" checked="false" label="Only check for primate specific repeats" help="If this option is select a check for primate specific repeats is done instead of a full RepeatMasker run" />
+      <param argument="-nolow" type="boolean" truevalue="-nolow" falsevalue="" checked="false" label="No low complexity masking" help="Skip masking of simple tandem repeats and low complexity regions." />
+      <param argument="-noint" type="boolean" truevalue="-noint" falsevalue="" checked="false" label="No interspersed repeat masking" help="Only mask simple repeats, skip masking of interspersed repeats." />
+      <param argument="-norna" type="boolean" truevalue="-norna" falsevalue="" checked="false" label="No repeat-like-RNA masking" help="Skip masking of small pol III transcribed RNA (these are masked by default because they resemble SINEs)" />
+      <param argument="-alu" type="boolean" truevalue="-alu" falsevalue="" checked="false" label="Limit masking to (primate) Alu repeats" />
+      <param argument="-div" type="boolean" truevalue="-div" falsevalue="" checked="false" label="Limit masking to less diverged (younger) repeats" />
+      <param type="select" name="search_speed" label="Search speed vs sensitiviy trade-off">
+        <option value="">Default</option>
+        <option value="-q">Quick (5-10% less sensitive, 3-4 times speedup)</option>
+        <option value="-qq">Rush (10% less sensitive)</option>
+        <option value="-s">Slow (0-5% more sensitive, 2.5 times slowdown)</option>
+      </param>
+      <param type="integer" argument="-frag" value="40000" label="Maximum contiguous sequence searched" help="Maximum length of sequencing that is search without fragmenting" />
+      <param type="integer" argument="-maxsize" value="4000000" label="Maximum length for IS or repeat clipped sequences" />
+      <param type="integer" argument="-gc" optional="True" label="Select matrices for this GC%" help="Valid values are a percentage or -1 to choose the default" />
+      <param type="boolean" argument="-gccalc" truevalue="-gcccalc" falsevalue="" checked="false" label="Calculate GC % for all sequences" help="By default RepeatMasker skips calculating GC % for small sequences" />
+      <param type="boolean" argument="-nocut" truevalue="-nocut" falsevalue="" checked="false" label="Skips cutting of repeats" />
+      <param name="xout" type="boolean" argument="-x" truevalue="-x" falsevalue="" checked="false" label="Mask with X instead of N characters" />
+      <param name="keep_alignments" type="boolean" argument="-ali" truevalue="-ali" falsevalue="" checked="false" label="Output alignments file" />
+      <param name="invert_alignments" type="boolean" argument="-inv" truevalue="-inv" falsevalue="" checked="false" label="Invert alignments in alignment file" help="Show alignments in the orientation of the repeat sequence, not the query sequence" />
+      <param type="boolean" argument="-xsmall" truevalue="-xsmall" falsevalue="" checked="false" label="Output repetitive regions as lowercase, non-repetitive regions as uppercase" />
+      <param type="boolean" argument="-poly" truevalue="-poly" falsevalue="" checked="false" label="Output list of potentially polymorphic microsatellites" />
+    </section>
+  </inputs>
+  <outputs>
+    <data name="output_masked_genome" format="fasta" label="RepeatMasker masked sequence on ${on_string}">
+      <filter>not advanced['is_only']</filter>
+    </data>
+    <data name="output_log" format="tabular" label="RepeatMasker output log on ${on_string}">
+      <filter>not advanced['is_only']</filter>
+    </data>
+    <data name="output_table" format="txt" label="RepeatMasker repeat statistics on ${on_string}">
+      <filter>not advanced['is_only']</filter>
+    </data>
+    <data name="output_repeat_catalog" format="txt" label="RepeatMasker repeat catalogue on ${on_string}" />
+    <data name="output_alignment" format="txt" label="RepeatMasker alignment on ${on_string}">
+      <filter>not advanced['is_only'] and advanced['keep_alignments']</filter>
+    </data>
+    <data name="output_polymorphic" format="tabular" label="RepeatMasker possible polymorphic repeats on ${on_string}">
+      <filter>not advanced['is_only'] and advanced['poly']</filter>
+    </data>
+    <data name="output_gff" format="gff" label="RepeatMasker repeat annotation on ${on_string}">
+      <filter>not advanced['is_only'] and gff is True</filter>
+    </data>
+  </outputs>
+  <tests>
+    <test expect_num_outputs="4">
+      <param name="input_fasta" value="small.fasta" ftype="fasta" />
+      <param name="source_type" value="library" />
+      <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />
+      <output name="output_masked_genome" file="small.fasta.masked" />
+      <output name="output_table" file="small.fasta.stats" lines_diff="2" />
+      <output name="output_repeat_catalog" file="small.fasta.cat" />
+      <output name="output_log" file="small.fasta.log" />
+    </test>
+    <test expect_num_outputs="7">
+      <param name="input_fasta" value="small.fasta" ftype="fasta" />
+      <param name="source_type" value="library" />
+      <param name="gff" value="-gff" />
+      <!-- <param name="show" value="yes" /> -->
+      <param name="keep_alignments" value="-ali" />
+      <param name="poly" value="-poly" />
+      <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />
+      <output name="output_masked_genome" file="small.fasta.masked" />
+      <output name="output_table" file="small.fasta.stats" lines_diff="4" />
+      <output name="output_repeat_catalog" file="small.fasta.cat" />
+      <output name="output_log" file="small.fasta.log" />
+      <output name="output_alignment" file="small.fasta.align" />
+      <output name="output_polymorphic" file="small.fasta.poly" />
+      <output name="output_gff" file="small.fasta.gff" lines_diff="4" />
+    </test>
+  </tests>
+  <help><![CDATA[
+RepeatMasker is a program that screens DNA for interspersed repeats and low
+complexity DNA sequences. The database of repeats to screen for can be
+provided as a FASTA file or downloaded from RepBase_. If the RepBase option is
+chosen the RepBaseRepeatMaskerEdition file should be downloaded and
+unpacked, and the enclosed EMBL format file ('RMRBSeqs.embl') should
+be uploaded to Galaxy for use with this tool.
+
+Further documentation is available on the RepeatMasker homepage_.
+
+.. _RepBase: http://www.girinst.org/repbase/
+.. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html
+    ]]>
+  </help>
+  <citations>
+    <citation type="bibtex">
+      @misc{RepeatMasker,
+        title = {RepeatMasker Open-4.0},
+        howpublished = {\url{http://www.repeatmasker.org}},
+        author = {Smit, AFA and Hubley, R and Green, P.},
+        year = {2013-2015}}
+    </citation>
+  </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/repeats.fasta	Wed May 02 20:18:11 2018 -0400
@@ -0,0 +1,64 @@
+>Asian_seabass_ONSAT_SB_Concensus_Lenght_170_bp
+CGAAAAATTTAATAATTTAGGGGTCTTGAGCATGGGCGTGGTAAAATGCCCTCGGTAGCG
+CCACCTACATTTTTAAACGGAACAGCCCCTCAAGCCCGTTGCGCCTAAAAATCTGAAAAT
+CTGCACACATATGTAACATCCCATGACGCACCAAAAAGTCTCTTGGAGCCA
+>Asian_seabass_MOSAT_SB_T_34a_satellite_DNA
+TTTGTGACATCACTACATAGTTTGTTGAAAACGTAC
+>Asian_seabass_MOSAT_SB_T_34b_satellite_DNA
+TTTGTGACATCACACATAGTTGTGGGTCAGTAC 
+>Sat_38
+AAAAAATGTCATAGTATAGTATGGCGTCAAAAAACATG
+>Asian_seabass_Sat_217_Consensus_Length_217_bp
+AGTAAACAAGCATTATGGTTGAAACCATAATTTCCTGTCGGGAGAGCCTTTCCCTCTTTT
+GTGCACTGTATGCAATCCCAGAGTGTGAATAAGCGCTTTTCCAGCGTTTTGAGGCTTATT
+CAGCTCAGAATGGCTTAATACTGCACTATCTGACCAGGACAAAGACAAACTAAGAACTCA
+GCCACACGGACATGAAAGTTGTTTTACTTACAATATT
+>Asian_seabass_Sat_217_Consensus_Length_427_bp
+CCAGCGTTTTGAGGCTTATTCAGCTCAGAATGGCTTAATACTGCACTATCTGACCAGGAC
+AAGACAAACTAAGAACTCAACACACTGACATGAAAGTTTCTATATTTCTATTAAAATAAA
+CAAACATTATGGTTGAAACCATAATTTCATTTCGGGAGAGCCTTTCCCTCTTTGTGGCAC
+TGTATGTAATCTGAAGTGTGAATAACGCTTTTCCCGCGTTTGAGGCTTATTCAGCTCAGA
+ATGGCTTAATACTGCACTATCTGACCAGGACAAGACAAACTAAGAACTCAGCCACACGGA
+CATGAAAGTTGTTTACTTTACAATATTAGTAAACAAGCATTATGGTTGAAACCATAATTT
+CCTGTCGGGAAGAGCTTCCCTCTTTTTGTGCACTGTATGCAATCCCAGAGTGTGAATAAG
+CGCTTTT
+>Asian_seabass_Sat_LM_Consensus_Length_453_bp
+CCCAACAAAATAGCACTTACTGCTTTCCAAATTCAAGTCTGTGCCTGTGTTAGGGTAATG
+TTGGTGATAAATTGATGTACTTAGCTTATCCTGTGAAAATGGTGTAAAATGGCAGTCTCC
+ATGTTTGTTGAAGAAACTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAG
+AAATCATGAGTGGCTCTTATTGAAGTGATCAGAGTTGGACTCCATTAAGAAAATGGCTTT
+CAATGTAGTCAGTTGCTTTGTCACTTAAAACTGCCCCAACAAGTAGCACTTACTGCAATT
+TGGTAGTCAAAGTCAATGCTCAAGTTAAGGGTAATAGTAGTGACATAATAGAAAAATCTC
+TAAATATATTTGCTGGCAGCTTTAATATACAGAATGAGTGCCATGATGAATTCTTGATAC
+AGAACAGGGACTTCCAAAATCAGCCAACACTAA
+>Asian_seabass_Sat_LM_Consensus_Length_218_bp
+CCCAACAAAATAGCACTTACTGCTTTCCAAATTCAAGTCTGTGCCTGTGTTAGGGTAATG
+TTGGTGATAAATTGATGTACTTAGCTTATCCTGTGAAAATGGTGTAAAATGGCAGTCTCC
+TGTTTGTTGAAGAAACTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAGA
+AATCATGAGTGGCTCTTATTGAAGTGATCAGAGTTGG
+>Asian_seabass_Sat_LM Consensus_Length_150_bp 
+CTCACAACCAAGACAACCCCAAAGAGAAGCACTTATGAAGACAGAAATCATGAGTGGCTC
+TTATTGAAGTGATCAGAGTTGGACTCCATTAAGAAAATGGCTTTCAATGTAGTCAGTTGC
+TTTGTCACTTAAAACTGCCCCAACAAGTAGCA
+>Asian_seabass_Sat_Unk_Consensus_Length_341_bp
+GTTTGGGTAAAAATATTGTCTATTTACGAGCTATCCTCTATATATTTTTGATCTGATTAA
+TAAATACCTCTAGCCCACAAACTGTGGTATTTTGCTATGTGGCAGTTCACCCGAAAGTCC
+AGTAGCATTTATCGCATTTTCTAAAGATAGTCAGTGCCTGAAAGTTTGAGGCAGATAAAC
+AAATTGTTCAAGTAAGAACTATATCTTTCTTATGATTTTACCGCAATCATACAGGTTGTT
+TCTTGTGGTCTGCTGGGCATTGTATCCCTTTGTTGTATGGATTTTTCCTTTCTTTAATGA
+TCTCCTCCCTGGAGTTTGTAATCCCTGTTTGTAGTGGAATT
+>Asian_seabass_Sat_Unk_Consensus_Length_789_bp
+CAAAAAAATGGAAAAAAAAAAAGTGGCTCATTTGAAGTGAATCAGAGTTGGGTAACATTG
+TCCTTAATTACAGCTATGCTTATATACTTGATCTGATAATAATACTCTAGCCACAATCTG
+TGGGTATTCTGCTATTTGCAGTCCACCCAAAGTCATAGCATTTAATCCATTCTAGGAAGA
+TACGTCAGTGCTGAGTTGAGCCAATAACACCAAATTGTCATAAGAACTATTATTTTCTTT
+ATGATTTTTACGCATCAGTACAGGTGTTTTCTTGTGGTTTCTGCTGCATGTATCCCTGTT
+GTAAGTGGATTCTCCTTTGCTTATGATCTCCTCCGCTGCGTTGTATCCTGTTTGTAGTGG
+ATTTCCTTGCACTGATTCTGCCCGCTCTTAATACTGAATGAACGCCCTCCCACCAGTAGT
+ACTGCCACCTTTGTTTTTTCACAAAGTGTTCAATGGCCTTGATAATGACTTGGTAACTAC
+ATCACCACTTTTTTGTACGTAATCAACAGACAATCACCCATTAAATTCATGCGGCCATTA
+GGCAGCTTGATGAGCTGGACATTTGACCTCCCCAATGAATCTTTGCTGATGGGAAACTTA
+TTGCCACCATAAAGTGAACCAGCATTGCTATGTCCATGCTGTAATTTGAAAACCCAATAC
+AAAGAAGCCCTGCATATATACCTCCCAAATTGGCTTCTGAGACCCGGGCGTAATCACTGC
+ATGGCCATAGATAAACATTGAAATCTTCTGTTGCAGTCAGTTTGCTTTTTGTCAACTTTA
+AAAACTGCC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta	Wed May 02 20:18:11 2018 -0400
@@ -0,0 +1,238 @@
+>scaffold_1
+TGCTTTTCTGAGATTTATGTCAGCTCCCCATTGGGAGATACAGGAGAATCCAATATATTC
+CTGTCTCGCTTCTTTTATATTTATCCTTTCTACACCTGACATCCTCAGTGATTGAAGTGA
+CAAAAAGTGGAGCGCACGCAGTGGTCATTACCCTCCAATGGTACTTCTAATTAAGGAAAG
+ATTTTTTGCATTCACTGAGCAAAACACTTATTTGCATGAATGGAAAATCATAAATGAGGG
+GTCCATTAAACAAATTTCCTAATTGTTTGCTTTTTTTTCTTCCTACTTCACCATCCCTCT
+AAAGCTATTACTCTCTATTCTACCCCATTATCGAGTGCACAAACACCAGCCCCAAATTGT
+CTTATCTCTGCTTTGATAAATGATATTTTTTTCTCTTATAATTGTGTTTCTCTCTAGATG
+TGCGGTGTCCATCAAGACACTGGATGTCACGTGGTCACTCCTGTGAAGAGCGAACTGTGT
+GGAACCCGAAGTACTGTGTGGTTGCTGACTGTCAGATGCTGCTTCTGAATGAGGAGGAGG
+TGGTGAGAGCAGACAGCACAATCTCTTGTCTGGCATGTTCCTCTGTGTCACCATTGTCTC
+TCTCATCTGCTCGTGTGTGTGTGTGTGTGTGTCCTTGTCAATGTCATTACACAATTTATG
+TTGGTGCACTGCATGTCAGAGTCTGAACTTTGTTGGACATCTACTTTCTGTCACCTGATA
+CCTCATACTTTACACTCTAATCCCTTTTTCCTTTCTAACTGCTGCTGTGTTAAGCTGTCA
+TATTAATATACTAAATAATGATAATATTAATTCTAATAATGATAGTGAATGGAGATTCAC
+AATGAAGAAACACAAGGTCCAGACTGAACCCCTGTGCTGTGTGAAGAGTTCACTCTACAT
+TTACATTCATCCCACATCAGAGACCAGCAATAGCCAGGTTAAGCAGAGCAAGGCTGAGTC
+TGGCTCTTTGCCTCTGTTGTTTTGATTAGTGTCACACTGGATTATGCTAGTTTGTGTGAG
+ATTAGAGGTCCTGTTGTTACGAGCAAATCAGCAGTCAGCAGGGTAATCTAAATTACATTG
+TTCTGTCGGCAATATCCTCTCCTCGCTCAACAGCTTTGTGTGAGCTCATATTTAAAGCCC
+CCACAGTAGTGCAGTGCATAAAATCATGCAGATACAGATCAGAAGCTTCAGGTAATGTTT
+ACATCAAACATCAGGGGGAAAATGTGATCTCAGTGCCTCTGACCGTGGTGTGATTGTTGG
+TGCCGGACAGGCTGGTTTGAGTATTTCTAAAACTGCTGATCTCCTGGGATTTTCACACAC
+GGCATTCTCTAGAGTTTACTCAGAACGGTGTGGAAAGCAAAAAAACATCCCGCGAGCTGC
+AATCCTGCGGATGGAAACACCTTGTTGATGGGATATGTCAGAGGAATGGCCAGGCTGGAT
+CAATCTGACAGAAAGGCGGTAACTCAGATAAGCACTCTCTACAACTGTGGTGAGCAGAAA
+AGCGTTCTCAGAATGCACAACACGTTCAACCTTGAGGAAGATGTTGCTACAGCAGCAGAT
+GAAGACCACGTCTGGTTCCACTCCTGTCAGCCAAGAACATCTCAGACTGCACAGGACACC
+AAAACAAAAACACATCCTGTTTTCTACTGGTGGTAGAGTCACAATTTGGCAATAAGATAA
+ATCCATGGACCCAACTTGCCTTGTGTCAATAGACCAAGCTGCTGGTGGTCTTGGGGAATG
+TTTTCTTTACACTTTCACACTATCGGCCTATATAAAATCCTATTATAAACTGCTGTACAG
+TGCATTGGGTTTGATAAGCCCTCTAAGTCTGTATATCATGATCTCATTGATACCTGCGGC
+AATACACCCCCACAAACGCAGCTCCCTGCATTTTAAGTATTATAGAGAGTAATTTCACTG
+TCCATGTCTTTTTTGATCATAAAGCAGGTCTAGGTGCTGTATACAACACTGTATTGAAAT
+GCTGAAGCAGGTCTAGGTGCTGTATACAACACTGTATTGAAATGCTGAAGCCAATATATC
+AATCTATCTGTCTATATTTTATGAAATATCAGAGCATTTTAGTCCAAATTTGTCTGTTTT
+GTTCATTGCAATGCTGACTGCTTAACATTTTCCTCAGTGGTGGCCATCACCACAGCATTT
+ACTGGTGAGTTTTACCAAGCTACAGTGGGCCACAATGAGTTACCTGTTATTGGCCTGGCT
+CTATGGCATGCAGAAAACAGTCAGCCAATCAGAGGACAGACTCAGAGACAGACACAAAGT
+GCCCTGTTCTTGTTAGAGCAGAGATAAATGAGACAGAGGAGATGTAAAACTATACTGAGA
+TGGTTTTTTGGTTCTTAAAACCACAAATATATTGTATTACGGATATCAAAACTTCAAATA
+AAACCAGTAAAGTGTAAAACATGGGACCTCTAAATGAAGGGCTGTTTTGTTTCGTAGTTC
+TAGATTATGTCACTATGTGGCCTCGGTCTCTGTCTGCGTAAAGCGGCTACATGTAGAATT
+TGACCCACTTTGGTGCCCACATATGGTAACTAAAACACTACAGACAGTATGCACTCCAAC
+CCTATATAATTTATGTCCTTCAGTGAAGCCTACAAAATGTGCAGCATGATTTGCTATGAT
+TAATACCTAGAATTATATTAAATTTTAGAAGTTTAGAGAGATGGCTTATACCTTTTTCTG
+AGCATGGAGTCATCTCATGTAATTTGTCACATACCTTTGGCCAGCTTAAACCTGGTCATT
+CCAGTGAGAGCTGCTCACCCAAATGTTTCTGTATGCACCTCCTGCTGTGGGTGCCAATAG
+CTCATGCAAGTTGGTGTCTTTCAACTAAGGCCACTATGTGAGGACCATCAGCAGCAAATT
+AAATTAGAAGTGCCTTGGAGTTTCTCAGGCCCTCTTCTATGATCATTAAGGTCTCAGTGA
+CAGAACAGTGCCCCTTTGAGGGACTATGGCATCTTGAGCACTTAAGTTGGCCGAATGTTT
+GATCTCAAATATGAGTGCACTGTTTCAAAGTCACAAAAGCCTCTGGACAAGGCATAGTTA
+CTGGGCTTGGTTAACCTTTTTTTCATACCGGCGTGACTGCTGGAACTTGGAACATGTAGC
+CTAAGTTTCCCTTTGTCTGCAATTACTGAGGTATGCCATGTTGAAATAGAACAATAATTG
+CAGGGAGGAATTATTCCAATCGACCCATCGGCAGGCTTTCCAATACAGTGCTTTTTGTTT
+CACAGCAACAAAGAACAAGAGCTTTTAAATGCATCCATCTGGACCCTGACTATGAAAGGT
+CATTGGTAACGCTGCTGGATTTCTGTTATGTCAGCAAACTGCAGGTCAAAGCGATGATAC
+AGTCTTATAATGGCTTTCTCCAAGTGAAGGTGGGATAACATGGTTTAAACAAGCTCAGTT
+AAATGGATCAGAGCATCAACTGCTGTGAACAAGCAATCACACGGTCAAGGTCAAACATTT
+ACACTTCTGAGAGATCTGGAGAGTATGTCATGGCAGTATTGCTTTTCAGTTATTTCAGTA
+ACTGATTACGCCTTGTACTTAGTCTCAGAATTAGTGATTTGATTCAAAGTGTTTTATATA
+TGTATATTTCAAAGCAAAAATACAGCTTAGGTAAAGCTCCTTAGACTCATGCAGCCTAAT
+TTACTGTAGACATTCCTCCATGTACAGTACTGTGCAAATGTTTTAGGCAGTTTTAGGCAC
+TAAAGGTGAACTGAGGATGCAGTCCCACGATTAATTTTTATTCATCAGTTAACCTCATGT
+GAAGTGTAGTAAACAGAAAAAACCTAAATCAGATCAGTATTTCTTATGACCCCCCTCCCC
+TACCACCCATCTTTAAAACAGCTCCAGTTCTCCTCAATCCCCCTGTACTTAGGTAGTTTT
+TTTCTAGCGTCTTGGAGAAGTTGCCTCAGTTCTTCTGTGGATTTTAGGCTGTCTCTGTGT
+CTTCTGTCTCCTCATGTATTCTCAGACTGACTCCATGATGTTGAGATCAGGGCTCTGTGG
+TGGGGGGGTCAGACCATCTGTTGCGGGACTCCTTGTTCTTCTTGTCTCTGAGGATAGTTC
+TTTATGATGCTGACTGTGTGATTGGACTCATTGTCCTGCTGCAGAATTAATCTGGGACTG
+ATCAGACATCTCCCTGATTGTACTGACAAAGGATAAGAGCAAAAACCTGTAGACTGCCTA
+AAACTTCTGCACAGTACTGCACAATACTGTATGTATGTGTGATAATACTCAGAAGTGATA
+TAGAGTGGTTCAGGAGAAGCCTTAACTTACTTGTTCTTCTTTTCCCTATGATGCAATTTC
+TCTCCAAGCAAACAGTACATAATGAAATATGTCTAAGAAAACACTAGCTGTTTGTCTTTT
+TGAAACACATTTAATAATTCAGATTTAGCCACAAATTAAAAATGAGCTTATACTTTGAAC
+TTGCCCATTGGTTTTGATGTATGAAGCTGACAGATTTAGTCCACTGCATACTGCACTTTT
+TTGGATAGTGAAATAAATTAATTTATTATGCAGCATTTAAGCATATTGCTGTTATCTAGA
+ATTTTATATATTAATAGGTATATATTATATTAACTCTTCATTTGCTTAAATTTGGCCTGT
+ATATTCCTCCATTTTATACAACCTTTAGAAAACACTGGAGTGAACAAAAATGTGAGGTTC
+AAAAGTGAGAGGAAAGAAAAATCAGTGTGTGCAGCAAGACTGTAAGAGTCCCTGAACAAA
+AGTGTTGATTTATTGTTGGCTGTGAAAGTTTTTGCTGCAGGGGTTTTTGTAAACAGTGAC
+TCTTCTTTATGTCTTTGTTTCTATCTTTTTGTGTTTTCTTTTTTCATCTTATTCTATTTT
+TCTCTCTCTCTCTTCGGTCAGTTTCTCTTCTTCTCCTCCTCATACTCATGCTTCTCTTTT
+ACTCTAATGGAAAACATAGAATTGAGGTGCAGTATCACTGAGAACAGCCAATGCTGATAC
+TGTCATTTGTTATAGGATGTTTACTTAAGAAATGATTGTGGCCTTATTTGCCAAAGACGA
+TTAGGCCCTGAGCCATTTGTACAGCCAATGTGACACCTCCGTAACCATAAGAAAAATTAT
+GCTTCCACAAACAGAAAGAGAGACAAATACAGACAGTGAAAGCAAAACTATGAGAGAAGG
+GATAGCCATCTCAGCATTCAGCCAGTGTGTCACATGTCCAGAGGGCTGCAATATGAAGCC
+AATAAACTGAGAGAAAGGTGTGATTCCAGAGGGCTTTCCGGACTTTAAACTTTTCTCATT
+TTCACCTTCAGCTCGTTGTGAAGCCTTCAATTGTTCCTTGGTCACATGAAGCCCCCTGTG
+TGTAAAATTGACATTCAGCTTATTACTCTCATCTACATCTCTAAAGTTGCACATGACTGC
+CAGTAGAGGGTGCTCATGATTTGAGACAAGTAACTTTTCTGTGCTTGTGTCAACAGTTAA
+AAATTCCCTATGGATACTTGAAAGAAATGCACCAGTGTTTAAATTTAGACTCAGTCCAAG
+TAGGCTTTGCCATTTGCTTTACTGTTGTAATCAAATCAGTGCTCACAGTACATCAGTGGC
+AAAAGCAATTAGCTTAATTGAGTTAGTAGGGAGTGAAAAGATAGATCACTATTTCCTGGC
+AGCGATATATTTTTCTCAGTTAACCTTCTCACAGAAAATTAGTTAGATGAGTTTCTTATC
+TCATTAGAACCATTGTTGTTTATTATTTAAACCTGACCTTTATCCAGAGAATGTTTTTCA
+CAGTAAGCATGTGTTTCACTCCAGCTCATACAGACACACCTTGTGGCCAGTCAGGGAATG
+TGCATTAAAACTAATGTTCTCTCTGGTAGAGCCATTCTGCCTATTCTGTTATTCACCAAA
+ACTTTCCTGCCCAGATTTTTCCATCTAGTCAGGGGACTTTTTCCTCAGCTGAACTGTCAG
+TAGCCAGCTGCTCTGACCTTTACTCACCTCAGAGATTACTCATTCAAGTTATTTTACCAA
+TGATGGAATAAAGTGAATACACACATCGTGACTTCATCATACTTTCCCTCCTTTACCGTA
+CTTTCCTAACATCAGTTAGCAAATTATACACTTAAATATGCTAAAATAGAGATTTGTTTC
+AAATTGAAGAGCAACACAGGACACATTTGCTTACACAAGTGACTCTATCTTATTTTTTAA
+GTGGCTGTCATCCTCAATATCATCAATATTTTAGGCATGTGAATGTAGGTAATGAGTATT
+ACTAATTAATAATTGGATGAGTTTATGAATTCATCATTAAATAAGATTATTATGATAAAG
+CTACACATTATACTGCAATTCCGGGCCAACACTATTGTTTCGCCCTTACAGTACAGGACT
+GTACATATCACTGTCACTACAAATTGTTTCAGTGAGGCTCAGATTACAGCTCATCTTGCT
+TAGTCATTACGGCTCAGACCACCAGACAATGTCTTTACGTCAGAGAAAGTCTGAGTGAGA
+GCTGTCCTTAAAACCAAGTCCCATGAGGCAAGTAAAGAGGTGACTCGCTGAGAGGATCAG
+GGAGAGGGACAGAGAGATAAACAGACAGACATGGAAACAGAGGGACATAGAAAACAAGAC
+AGAGAGGGTGCTGCATTGAGTCGAGAGGTGTGAGGATGTTCGATAAGCCATTATCACTCT
+GCTCGGAGGACGTCAGGCCCTATCCTTTTTTCTTTGAGTCACCACTTTCTCACCACAGCC
+ATCCTTCTTCCTTTGTCTGTTTAATCTTTTGCAGCCAAACATTTGGTATGCAAACTATAG
+GAAGATATGGTCTGACACCGACTGATGTGATGCTAATGCCTTCTCTGTTTTATCCAATGA
+CTCCTCATTTATCAGGACCAATCCAGCTATTGTGTGCTATTGGCCCAACACAGCATTAAA
+CATGGTGACCAGAATAGCCACAGCAAAATAAATAACGGGCCTCTGTGACTATTGTATGTG
+CATTTCTTACAGCTGTCATTTTTCTTTTCTGTTAGCTTTCATTCTACTGTGCGTGCTCTC
+TATCTGCCTACGTTGTATCTGTGCCTTTTGTTAAATTCATTCAGCCTTTTATCTGTCTTG
+AATTTCTTTCTCTGTGTCCTCCTTTCCCTCTCTGTCTCAGCAGATCCGTCCCTTGTTTTT
+GCAGGAGAAGCGCTCTGCGTCATGCAAGGTCCATCTTCTGCGACGCACCATCAGCGTTCC
+AGTGGAAACACAGTTTCCAGAACTCCATAGCCAGCTGTCCACTGAGAGCGGTGAGTAGCC
+TGAGGACATGCCTCTGCTGTGATTAGCATGCATCATTATTTCCATACACACACTGGAAAT
+CTTTTAATCGTTTTTCTTGACACAATTTAATTAATTTAAATAATTAATAACATTGAAATC
+AACTGTTAAACACTTGTTCAACATGGTTGTGGACAAAATCATCATAATGACTGTGGAGAC
+TAAGGAGAAACAATGCTTTGTTAAAATCTTTGCTCTGGTTTTGGCTTTTTCTATATTCTA
+TTTGTGTGTGCAGTGCTGCTAAACATCATCTTAGTCAGAGTCAAACTTGAGCACTCATGC
+AGATGCTTGTTCTATTCAGTGGTCAAATGGGCAGTGTAGCTTAGGAGAAGCACTTCTTTC
+CGCTCCTCAGTGACATAACCAGTCACACACTCACACAGGCAAGTGAGCCTTTTAAAATTG
+CTGATACTATTTTTTTCTACTTCATATCCACTAGATGGAGAGCTTGGTGCTTTGTAGCAC
+TCCAGTGTACTTGGAATGTTTCATCATAACCACTAAGGCAGACATTGATAAAGGCTTCGG
+TCTCTTTCAAACTGCAGGTGTCATGTTGCACTCTCCCTATTGCTTTTATGAAAATGTAGT
+GCCGGTAATATAACACAGTTACTCTCTGCATATATCACTATGTACACTTACTTCTATGGA
+AAAGATGGAGCGCCACAGTGAAAACTGTTTTGAGTCTGTGAGGGGAAAACACAGCATCAG
+TCACAGTGAAACACTAGGTGGCACTCAGGTTTGACATTCAAGCATTTGTATCCCACAGTT
+ACTGTTGCTGGGTTGTTGGCTGGCATGCAACTTAATATGATCTATCTTTAAATCAGTGTG
+TGCAGTGGTTATTTAGTTTAAGTGCTTTTTAATGATGTCACAACATTATTTTGTCTACAC
+CCTGCATACAGCACAGTATATTAAATTTAGGTTTTATTAAGTTAAGTAATGTTCTGAGGT
+GGCATTGCCCTCAGGTATATATCCCTCAGGCAGTGTTACTGGACAGCATATAGATTGTAA
+TGTTGTGTAAGCAGTGTTGTGTAAGCTTTTTTAACCAAAATGCTCTCATGTTTCTTTGTT
+ACCACAGTGGTTTTAGTGATGTTTTGTGCTGTGAACAGAATCATGATTTCTGCAGACACT
+CCACATCTGTTTAAACAGTGATTTTCACATCTGTCTGTAACTGAGTCAGTCAGTTTGTGG
+TTGGATTGATGTGTAGTGGGATCTACAGATCATCAACCCTCGGGGAAGCCATTTAGTTCT
+GTGTTAAATAAAAATACAACTTTTGAGCACTGTTTTTTCATATTTTTCTTCATCCCTTAG
+TTGATATTAAAGGTGCTATATGTAGATTTTTGCTATCACTACATAGCCAATGTTAGGATT
+AAGAACCCCAGCAACCCGAATGTTAACTCGTACTTTGCTTCTATTTCTATTACATATTTT
+CTTCTATGGAAGTTAGGATGTTAACCAGCTAACCCCAGGTCATCTCATAATACCACTTGG
+CAATAGTCAGTCACTGTTGCATTCAGTCTGCCCCTCCAGCATGAAAGGATGAAGAAGTAC
+CGCTACCCAGAAGGCGTAGTCTAACCCCTTGTCTTGTAAACACAGCAATGGCTGAAGCTC
+TTGGTAAGTAAACAGCTGTTAATGTCAGTGTTGGCTACATAGCTAAAACTTACACATAGC
+ACCGTTAAAAAAATAAATGGTAAGTTTTTGTCCTTTTAGTTAAAGCATGTAAATGTCAGT
+TAAAACAATTTAGAAAATTTCTCATTCAAATAGTCATTTTATTTTGTGTGCTATAAATAC
+CTTCAGTTTTTTTTCTTGTACTTTATTTTAAACTCTTGTCTTTCTCTTATGCCCCTTATC
+TACCAGGAGAGGTCACACTCTTGTTGATAGCATGATAAGGAAATGAGCAGAGATGTCCTT
+ATTTCTCCTCAAAGTTATTCCCTGCATAGCACATATTTGACTTTAAAAATACAAAGACTA
+CAGGTGCTAAATCACACACACAACACCCAAAAGAATAGAGGACTTTTAAATATGTGCATC
+CCATTTTCTAATCTATATTCACCACTGATACAAGTGATGACACGGTGATGAGAAAATAAC
+GTAACATACATTATTATACAAGATTGGTATGATTGCTGTAGGTTGGTTTTGTTTATCTGA
+GAAGGGACAGAGGCTAAAAGAATAAACAAAGTCTGCCAGCTACTTCACTAATTACAGAGT
+ACCCCTGCAACTCTGCCAATGACATTCTGAAACTTTTCCATGACTATTATGTAAGATAAT
+TTTTGAATCACTGATTCAGATTTCCATAGCCAAGATACACTATTCTGCATGTTGGCTAGA
+ATACATTACAATACATTAATATATTACTGTAGTGGTTATTATGTCAGCCATTTAAACATA
+ATTTCATACTTTCAGCTGTTCGCAGGGCAAACTTTTTATTTTTGTCACCTGAATCCAGTT
+GCATTTGGATTTGAATTCAGTGGTGACTGCCAATGAAAAATAACTCACAGGGGCACTTAG
+AGATCTGAGGCGGCATTGACTGAACATCGAAGGGCTACTATTACAGAAATGCAAGTGGAA
+AAAGAATCTGATATGATACTTGCCAGTATGAAAAGCATAATTAGGACTTTAAATGCATGT
+TTTAAATGTTTTTGTGAACCATAATTTCAGTGTAAAAGTTCCAGTTATATTTTAAAAGGA
+ACTCCAAATATCCAGTGGTAGCATCAAGGTAACCTGAGATAAGTGAAAAGTAATTCTAAA
+TTAATCAGTATGTTGAAATATCAGACATTTGTGTGCATTATTGTTAGTGTGACCATGGTG
+GGATACTTCAAACACACTTGTTCATTGGCAAAATAATTGTCACATGAATTAACACAAGTG
+ACACTTTCAGTATCATACTGACAGAATGTTTGTTGTTAACTTTATCCCTGAAAACAAACA
+CTAATACATGTTCTTGTCGCCCCTTCAGTGCCTAACCTTTAAAATTGTGCTTGTGAGTGT
+GTGTATGTGAGCGCACCTGTATGGTTTTCAAATAATGTTCCACATCACAACACTACAAAC
+ACTCACATGGTCTATGTTTTACTAAATTATTCATTGGCGTAGGCCCCACCTGCTCTTGTC
+CTTGTGTGACACAGGCTCTAAATAAGCAGCATGATGAATAAAAATGACACTGAGATGAAT
+AGGAATCCACAGCAAATCAGAGAGCACTCTCATCTCGTCTCATCCTCCGACTGAATAGCG
+AGCGGCTGCCTCCTTTATTCTTTTGAACTCTTCGCGGTTTTGGCACAAACATGCCAGAGA
+CAGAGAGCGCTGTTACGCTCCCACCAGAGCAACTACGCTTAATTTTACTGAGGTGAAAGT
+TTTTTTTTTTTTTCTGGATAAAGCTTTGGAAAAGTTCTCAACTGTTGCTTCTTTAAAGAT
+GCTGGGGCATATTCTGCCCTATTTTCCAGTTTTTTACATCTCCCATAAATATTTTCACAA
+GCTGTAACATTTTAGACAGGATTAAATTGAACTAGATAACTGACCAAAGGCTGCTAAATT
+ATTTTTATGTGCCTTGTAGCACAAATACTGTCTCACATATATTTTAAGTATCTTAATATT
+CCCTCTGATGTTTAATTTAACAAAGCTGTCCTTCCCTCCATCTCTCTCCTCATAGTTTTT
+CATTACAATTAGCTCTCATTAGATAGAATTGTATTTGTTGTCTTTGTGCCAGTCACTCCA
+GTCTATTTTGCCAGCACACAACTAAATAATTGTTTTGCCTTGTCAGTGTCTTGTTTTACA
+GTTTTAATGATGAGCCCAAAACATGTCAAATATGAGGAACTATAACTTATCAAGAAAGTG
+GGGTAGTTGAGATAAAACTGTTTCCGAGGTGGAGGTTGGCTGAGCTCCATTTGGCCACAA
+AATGTAGCTGAAAGGGCAGAGAAACCCACTTTAATGGAGTACAGGTTGCATATGAGCTGG
+TAGGAAAGATATAGAATCATTATCAGCTGATAATCTAACAGTAGCAGTCAGTGTAGATGC
+TGTGCTAACGCAAAGTTGTGAAACTACTGTCTGTAAGCAACTCATCCAATTGTTATTGCT
+GCCAAGCTTAAGTATTTTAACAGCTTTTAGGTGTTGTTAGCACAGTACTCGTTTGGATCC
+GGCCATGACAAGAAATCTTTATCGGCCACTAATTTAATTAATCATCTTTTTTCAAGCAAA
+AATCACAAACTTTCTTTGGTTCCACTTTTTCTCATTGTAGATTAAATATCTTTTGGGTTT
+TGGCACAGGCTGGACAAAAAAACCTCTGAGACGCTGTGATAAGAATTTATTTTCACATTT
+TTTTTACTTTTCAGGGACTACACAATTATTTGGTGATGAAAATAATATTTTGTTGCAGCC
+CAAATGTTAACTTGCCACTCAAGCTGTCAATTCAAGAAGGCAAAAGCAACAAATACTGCT
+TTGCCGAGCTGAAATGAGAGTAGAAACCATGCCCGGGCCAGCTCAGCCTGGTCAGGTTTT
+TGAGTCCATCTATTGTTAACATTCAGGTTGCAGCACAAGGGAAGTTCCTAAATTCTTCTG
+GCATGTTAATGTTTTCCAAACTGATGTTACCAAGTCCTCGTTATGATGAGAAAAAAATAT
+GCTGAGAGTGAAATTGATCAAAGTGGAAACATTAAGCACTGCCACAGTCCCCTCCACCCA
+TCTTTCTCCCTCTCTCTCCACCGCAACGTGGAAACTGCTCCTCTCTCTCTCTCCCTCTCC
+CTCTCCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCCTCCCTCCCA
+TTCTGTGCTCCGGTATACTCGCTCTCTCACTCTCTCCCTCTCTCCCCACTCTCCAGAGAG
+GGGTTCAGTCAGACAGATGTAACACAGCAGTAGAAGCCTGAGCTGAGCTGGCAGGCTGCG
+GAGGCCAGACCAGAGCCAGCAGCTCTGACTGGAGAGAACAGCGAAGAGGTGACGGCAGCA
+GCGGCAGCACAGGGGATAAAACAGTAGTTGTAGTAGCAGTTTCAGTAGTTGTACTTCAGT
+TACAGAGCTGACTTTACCTCAGGATATGGGTGTGTTGATCTGGGCGCTCATCACCCTGTG
+AATTTAACATGGACCCTGACACCAGCACCCATCCAGAGACACAACATGGTGAGTGAGATT
+TGAAGGAGGAAAAGATTAGAAACAATGAGAGTGATACTGTGGGAAAAGTTGAGGAGCGTG
+TTAGTGAACAAGGGAGTCAGGTCACATGGTTTTCCCGTTAGGGTTGCTATTATGACTAGG
+CTCTCCATTTGGCTCTAAAGCTGTTGACTAGGGCCTCTGTGGTCAAATGAATAGGACTTC
+AGTCTGTTCTGTTGAGTAGGGGGTGAAGGGGTGAGGGGTGTTACATGGCAATGTGATGAA
+ATCAACTGCCTGCTTAAGACATTTTGCTCTAATGACCCTCTTGTCATTACTTTAACAGGG
+TTGTAAAGTTTTTTTTGCTTTTTTGTAGCTAAGAAAGTTGAGAATTGTTTTGTCTGAACT
+CTCTCTGGGATTTGTCTTGTCGGTTTTTGGCTGGTTTTTGGTGTGAGCTTCACAGAGCAG
+GAAGCTCTCCTCCTGCTCTTTGAATGGAATAATTAACTGTGAGGGCCGTGGGTGTACCAT
+TTCTGCTCAAAAGCAGCAAGATAATTTGATGGTTATGTGAATCTGTATATTTGTATGCTG
+TGTAGTGGTGCTGCATGTGGGTGTTTGTGTTAGAGACACAGAGGGGCACCAAAGAATGAA
+AGTGGGAGACAGAGGGAACGAGAAGGGGAGAGACCGAGAGAGAAGGACTTATGTACACAA
+ATAAATCCAGGGGGATCTAGACTGCACTGTAGGCCAGTGCGAATGCTCACTCTTTCTCTT
+CCCCCTCATTTTCCGCTGCCTCTTTTTTTTCCTCCACTGCTCCAAATGTTGGAGCCCAAA
+CTAGATAGTTAAAAGGCAGAAAAAAATGGTGTGTTATTAACTGGGCCAGAAAAGACGATG
+TTTTGTCTTCATGGCCGATGGGAAGGACTCTGCTGTGCTGCTGATGAAGAATGTGGTTAA
+CAGAATGAGTCAACAGAGAACATATTTCCAAGAGACTGTAGTTTCTCTTCGCCAGCGTCT
+AAGCCACATTGCTTTATGCACTGGGCTCTCTGCCATGACGGCGAAGGAGAGACAAAGAGA
+GGGGAAAGAGAATCGCTAAAGATAAAACTCTGTTTTTATCCATCTCCTGAGACACAAAAC
+TTCTGTTAGCAATCTAACCAAATAAGCGAGAACATTAAGCTTCTTCTCACCTTCCCAAAA
+TAAAATGTCAACTTCATGCAGTAATACTCACTCTCTTCTCTCAACCAGTCAGCCAGCCTC
+TTTCAGTGCATACAGCTTGAGGAGATCCCTTCTAAAGGTCCAATATAAATAGAAAAGTGG
+GAGTAGAAAGGGCAATAATCTGATATCATCTGATTACATTCACACCTCAGGCTTGCACGC
+TACAGGAAGAGTTCACGCTTCCCAAGGCTTGCAGGCATCACTTTTCACTCATTCTCAGAT
+AGACACACAAACACATGCACAGAAATATCATTGCTGTTGTTTGCAACAAATCCTGGAGTA
+GACTACTTTGCATTGAATTTCTATATGCATGTGTTTTCATATATTACACATTGCTTTTTG
+TACTTTGCATATAAAGTAGATGCTGATCTGCTATCTGCATATATAGTAGCTGCAGATAGC
+ACTGTAACTACATCTACATATTTCTTGTTTGTATCTACATCAAGCAGATAGTTGCTGATG
+CTGGACAACCTCTTCCGATATTCATTATTTTGGAAAAAAAAATATCAATGGCATTCCTAC
+CTTACAACTTAAATTAAAAGTGGTCACTTGAGTAACTGCAGAACATGTGTTCACCCAATA
+GTTTTTTTTAGAGAGTGTCACTCAAAGTATTCCTCCTTACAAGCACCCATGCCTAAATCA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.align	Wed May 02 20:18:11 2018 -0400
@@ -0,0 +1,104 @@
+18 0.00 0.00 0.00 scaffold_1 613 632 (13519) (GT)n#Simple_repeat 1 20 (0) m_b1s252i0 1
+
+  scaffold_1           613 GTGTGTGTGTGTGTGTGTGT 632
+                                               
+  (GT)n#Simple_          1 GTGTGTGTGTGTGTGTGTGT 20
+
+Matrix = Unknown
+Transitions / transversions = 1.00 (0/0)
+Gap_init rate = 0.00 (0 / 19), avg. gap size = 0.0 (0 / 0)
+
+
+16 18.30 2.22 2.22 scaffold_1 780 824 (13327) (ATAATA)n#Simple_repeat 1 45 (0) m_b1s252i1 2
+
+  scaffold_1           780 ATATTAAT-ATACTAAATAATGATAATATTAATTCTAATAATGATA 824
+                              v    -   v -      i      v    vv       i   
+  (ATAATA)n#Sim          1 ATAATAATAATAAT-AATAATAATAATAATAATAATAATAATAATA 45
+
+Matrix = Unknown
+Transitions / transversions = 0.40 (2/5)
+Gap_init rate = 0.05 (2 / 44), avg. gap size = 1.00 (2 / 2)
+
+
+12 23.88 4.55 0.00 scaffold_1 2231 2274 (11877) (CAGA)n#Simple_repeat 1 46 (0) m_b1s252i2 3
+
+  scaffold_1          2231 CAGA-AAACAGTCAGCCAATCAGA-GGACAGACTCAGAGACAGACA 2274
+                               - i    v   v  iv    -i       vv v         
+  (CAGA)n#Simpl          1 CAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACA 46
+
+Matrix = Unknown
+Transitions / transversions = 0.50 (3/6)
+Gap_init rate = 0.05 (2 / 43), avg. gap size = 1.00 (2 / 2)
+
+
+15 18.42 10.20 0.00 scaffold_1 4853 4901 (9250) (TC)n#Simple_repeat 1 54 (0) m_b1s252i3 4
+
+  scaffold_1          4853 TCTATTTTTCTCTCTCTCTCT-TCGGTCAGTTTCTCT-TCT-TCTC-CTC 4898
+                              v i i             -  vv  vv i     -   -    -   
+  (TC)n#Simple_          1 TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 50
+
+  scaffold_1          4899 -CTC 4901
+                           -   
+  (TC)n#Simple_         51 TCTC 54
+
+Matrix = Unknown
+Transitions / transversions = 0.60 (3/5)
+Gap_init rate = 0.10 (5 / 48), avg. gap size = 1.00 (5 / 5)
+
+
+13 19.07 1.82 7.69 scaffold_1 6230 6284 (7867) (TAATTAA)n#Simple_repeat 1 52 (0) m_b1s252i4 5
+
+  scaffold_1          6230 TAATGAGT-ATTACTAATTAATAATTGGATGAGTTTATGAATTCATCATT 6278
+                               v i -    v            -i  - i  v  -    v  v   
+  (TAATTAA)n#Si          1 TAATTAATAATTAATAATTAATAATT-AAT-AATTAAT-AATTAATAATT 47
+
+  scaffold_1          6279 AAATAA 6284
+                           -     
+  (TAATTAA)n#Si         48 -AATAA 52
+
+Matrix = Unknown
+Transitions / transversions = 0.60 (3/5)
+Gap_init rate = 0.09 (5 / 54), avg. gap size = 1.00 (5 / 5)
+
+
+15 28.31 0.00 3.51 scaffold_1 6548 6606 (7545) (GACA)n#Simple_repeat 1 57 (0) m_b1s252i5 6
+
+  scaffold_1          6548 GACAGAGAGATAAACAGACAGACATGGAAACAGAGGGACATAGAAAACAA 6597
+                                 v   i i           viv i     vi    v -v i  - 
+  (GACA)n#Simpl          1 GACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGA-CAGAC-A 48
+
+  scaffold_1          6598 GACAGAGAG 6606
+                                 v  
+  (GACA)n#Simpl         49 GACAGACAG 57
+
+Matrix = Unknown
+Transitions / transversions = 0.86 (6/7)
+Gap_init rate = 0.03 (2 / 58), avg. gap size = 1.00 (2 / 2)
+
+
+67 2.94 1.43 0.00 scaffold_1 11981 12050 (2170) (CT)n#Simple_repeat 1 71 (0) c_b1s251i0 7
+
+  scaffold_1         11981 CTCTCTCTCTCTCCCTCTCCCTCTC-CTCTCTCTCTCTCTCTCTCTCTCT 12029
+                                        i     i     -                        
+  (CT)n#Simple_          1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCT 50
+
+  scaffold_1         12030 CTCTCTCTCTCTCTCTCTCTC 12050
+                                                
+  (CT)n#Simple_         51 CTCTCTCTCTCTCTCTCTCTC 71
+
+Matrix = Unknown
+Transitions / transversions = 1.00 (2/0)
+Gap_init rate = 0.01 (1 / 69), avg. gap size = 1.00 (1 / 1)
+
+
+19 15.37 2.78 0.00 scaffold_1 12078 12113 (2107) (CT)n#Simple_repeat 1 37 (0) m_b1s252i6 8
+
+  scaffold_1         12078 CTCGCTCTCTCACTCTCTCCCTCTCTC-CCCACTCTC 12113
+                              v       v       i       - i v     
+  (CT)n#Simple_          1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 37
+
+Matrix = Unknown
+Transitions / transversions = 0.67 (2/3)
+Gap_init rate = 0.03 (1 / 35), avg. gap size = 1.00 (1 / 1)
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.cat	Wed May 02 20:18:11 2018 -0400
@@ -0,0 +1,103 @@
+18 0.00 0.00 0.00 scaffold_1 613 632 (13519) (GT)n#Simple_repeat 1 20 (0) m_b1s252i0
+
+  scaffold_1           613 GTGTGTGTGTGTGTGTGTGT 632
+                                               
+  (GT)n#Simple_          1 GTGTGTGTGTGTGTGTGTGT 20
+
+Matrix = Unknown
+Transitions / transversions = 1.00 (0/0)
+Gap_init rate = 0.00 (0 / 19), avg. gap size = 0.0 (0 / 0)
+
+16 18.30 2.22 2.22 scaffold_1 780 824 (13327) (ATAATA)n#Simple_repeat 1 45 (0) m_b1s252i1
+
+  scaffold_1           780 ATATTAAT-ATACTAAATAATGATAATATTAATTCTAATAATGATA 824
+                              v    -   v -      i      v    vv       i   
+  (ATAATA)n#Sim          1 ATAATAATAATAAT-AATAATAATAATAATAATAATAATAATAATA 45
+
+Matrix = Unknown
+Transitions / transversions = 0.40 (2/5)
+Gap_init rate = 0.05 (2 / 44), avg. gap size = 1.00 (2 / 2)
+
+12 23.88 4.55 0.00 scaffold_1 2231 2274 (11877) (CAGA)n#Simple_repeat 1 46 (0) m_b1s252i2
+
+  scaffold_1          2231 CAGA-AAACAGTCAGCCAATCAGA-GGACAGACTCAGAGACAGACA 2274
+                               - i    v   v  iv    -i       vv v         
+  (CAGA)n#Simpl          1 CAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACA 46
+
+Matrix = Unknown
+Transitions / transversions = 0.50 (3/6)
+Gap_init rate = 0.05 (2 / 43), avg. gap size = 1.00 (2 / 2)
+
+15 18.42 10.20 0.00 scaffold_1 4853 4901 (9250) (TC)n#Simple_repeat 1 54 (0) m_b1s252i3
+
+  scaffold_1          4853 TCTATTTTTCTCTCTCTCTCT-TCGGTCAGTTTCTCT-TCT-TCTC-CTC 4898
+                              v i i             -  vv  vv i     -   -    -   
+  (TC)n#Simple_          1 TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 50
+
+  scaffold_1          4899 -CTC 4901
+                           -   
+  (TC)n#Simple_         51 TCTC 54
+
+Matrix = Unknown
+Transitions / transversions = 0.60 (3/5)
+Gap_init rate = 0.10 (5 / 48), avg. gap size = 1.00 (5 / 5)
+
+13 19.07 1.82 7.69 scaffold_1 6230 6284 (7867) (TAATTAA)n#Simple_repeat 1 52 (0) m_b1s252i4
+
+  scaffold_1          6230 TAATGAGT-ATTACTAATTAATAATTGGATGAGTTTATGAATTCATCATT 6278
+                               v i -    v            -i  - i  v  -    v  v   
+  (TAATTAA)n#Si          1 TAATTAATAATTAATAATTAATAATT-AAT-AATTAAT-AATTAATAATT 47
+
+  scaffold_1          6279 AAATAA 6284
+                           -     
+  (TAATTAA)n#Si         48 -AATAA 52
+
+Matrix = Unknown
+Transitions / transversions = 0.60 (3/5)
+Gap_init rate = 0.09 (5 / 54), avg. gap size = 1.00 (5 / 5)
+
+15 28.31 0.00 3.51 scaffold_1 6548 6606 (7545) (GACA)n#Simple_repeat 1 57 (0) m_b1s252i5
+
+  scaffold_1          6548 GACAGAGAGATAAACAGACAGACATGGAAACAGAGGGACATAGAAAACAA 6597
+                                 v   i i           viv i     vi    v -v i  - 
+  (GACA)n#Simpl          1 GACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGA-CAGAC-A 48
+
+  scaffold_1          6598 GACAGAGAG 6606
+                                 v  
+  (GACA)n#Simpl         49 GACAGACAG 57
+
+Matrix = Unknown
+Transitions / transversions = 0.86 (6/7)
+Gap_init rate = 0.03 (2 / 58), avg. gap size = 1.00 (2 / 2)
+
+67 2.94 1.43 0.00 scaffold_1 11981 12050 (2170) (CT)n#Simple_repeat 1 71 (0) c_b1s251i0
+
+  scaffold_1         11981 CTCTCTCTCTCTCCCTCTCCCTCTC-CTCTCTCTCTCTCTCTCTCTCTCT 12029
+                                        i     i     -                        
+  (CT)n#Simple_          1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCT 50
+
+  scaffold_1         12030 CTCTCTCTCTCTCTCTCTCTC 12050
+                                                
+  (CT)n#Simple_         51 CTCTCTCTCTCTCTCTCTCTC 71
+
+Matrix = Unknown
+Transitions / transversions = 1.00 (2/0)
+Gap_init rate = 0.01 (1 / 69), avg. gap size = 1.00 (1 / 1)
+
+19 15.37 2.78 0.00 scaffold_1 12078 12113 (2107) (CT)n#Simple_repeat 1 37 (0) m_b1s252i6
+
+  scaffold_1         12078 CTCGCTCTCTCACTCTCTCCCTCTCTC-CCCACTCTC 12113
+                              v       v       i       - i v     
+  (CT)n#Simple_          1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 37
+
+Matrix = Unknown
+Transitions / transversions = 0.67 (2/3)
+Gap_init rate = 0.03 (1 / 35), avg. gap size = 1.00 (1 / 1)
+
+## Total Sequences: 1
+## Total Length: 14220
+## Total NonMask ( excluding >20bp runs of N/X bases ): 14220
+## Total NonSub ( excluding all non ACGT bases ):14220
+RepeatMasker version open-4.0.7 , default mode
+run with rmblastn version 2.2.27+
+RepeatMasker Combined Database: Dfam_Consensus-20170127
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.gff	Wed May 02 20:18:11 2018 -0400
@@ -0,0 +1,11 @@
+##gff-version 2
+##date 2018-04-21
+##sequence-region dataset_12.dat
+scaffold_1	RepeatMasker	similarity	613	632	 0.0	+	.	Target "Motif:(GT)n" 1 20
+scaffold_1	RepeatMasker	similarity	780	824	18.3	+	.	Target "Motif:(ATAATA)n" 1 45
+scaffold_1	RepeatMasker	similarity	2231	2274	23.9	+	.	Target "Motif:(CAGA)n" 1 46
+scaffold_1	RepeatMasker	similarity	4853	4901	18.4	+	.	Target "Motif:(TC)n" 1 54
+scaffold_1	RepeatMasker	similarity	6230	6284	19.1	+	.	Target "Motif:(TAATTAA)n" 1 52
+scaffold_1	RepeatMasker	similarity	6548	6606	28.3	+	.	Target "Motif:(GACA)n" 1 57
+scaffold_1	RepeatMasker	similarity	11981	12050	 2.9	+	.	Target "Motif:(CT)n" 1 71
+scaffold_1	RepeatMasker	similarity	12078	12113	15.4	+	.	Target "Motif:(CT)n" 1 37
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.log	Wed May 02 20:18:11 2018 -0400
@@ -0,0 +1,10 @@
+SW score	% div.	% del.	% ins.	query sequence	pos in  query: begin	end	(left)	repeat	class/family	pos in repeat: begin	end	(left)	ID
+
+18	0.0	0.0	0.0	scaffold_1	613	632	(13588)	(GT)n	Simple_repeat	1	20	(0)	1
+16	18.3	2.2	2.2	scaffold_1	780	824	(13396)	(ATAATA)n	Simple_repeat	1	45	(0)	2
+12	23.9	4.5	0.0	scaffold_1	2231	2274	(11946)	(CAGA)n	Simple_repeat	1	46	(0)	3
+15	18.4	10.2	0.0	scaffold_1	4853	4901	(9319)	(TC)n	Simple_repeat	1	54	(0)	4
+13	19.1	1.8	7.7	scaffold_1	6230	6284	(7936)	(TAATTAA)n	Simple_repeat	1	52	(0)	5
+15	28.3	0.0	3.5	scaffold_1	6548	6606	(7614)	(GACA)n	Simple_repeat	1	57	(0)	6
+67	2.9	1.4	0.0	scaffold_1	11981	12050	(2170)	(CT)n	Simple_repeat	1	71	(0)	7
+19	15.4	2.8	0.0	scaffold_1	12078	12113	(2107)	(CT)n	Simple_repeat	1	37	(0)	8
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.masked	Wed May 02 20:18:11 2018 -0400
@@ -0,0 +1,286 @@
+>scaffold_1
+TGCTTTTCTGAGATTTATGTCAGCTCCCCATTGGGAGATACAGGAGAATC
+CAATATATTCCTGTCTCGCTTCTTTTATATTTATCCTTTCTACACCTGAC
+ATCCTCAGTGATTGAAGTGACAAAAAGTGGAGCGCACGCAGTGGTCATTA
+CCCTCCAATGGTACTTCTAATTAAGGAAAGATTTTTTGCATTCACTGAGC
+AAAACACTTATTTGCATGAATGGAAAATCATAAATGAGGGGTCCATTAAA
+CAAATTTCCTAATTGTTTGCTTTTTTTTCTTCCTACTTCACCATCCCTCT
+AAAGCTATTACTCTCTATTCTACCCCATTATCGAGTGCACAAACACCAGC
+CCCAAATTGTCTTATCTCTGCTTTGATAAATGATATTTTTTTCTCTTATA
+ATTGTGTTTCTCTCTAGATGTGCGGTGTCCATCAAGACACTGGATGTCAC
+GTGGTCACTCCTGTGAAGAGCGAACTGTGTGGAACCCGAAGTACTGTGTG
+GTTGCTGACTGTCAGATGCTGCTTCTGAATGAGGAGGAGGTGGTGAGAGC
+AGACAGCACAATCTCTTGTCTGGCATGTTCCTCTGTGTCACCATTGTCTC
+TCTCATCTGCTCNNNNNNNNNNNNNNNNNNNNCCTTGTCAATGTCATTAC
+ACAATTTATGTTGGTGCACTGCATGTCAGAGTCTGAACTTTGTTGGACAT
+CTACTTTCTGTCACCTGATACCTCATACTTTACACTCTAATCCCTTTTTC
+CTTTCTAACTGCTGCTGTGTTAAGCTGTCNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNGTGAATGGAGATTCACAATGAAGAAA
+CACAAGGTCCAGACTGAACCCCTGTGCTGTGTGAAGAGTTCACTCTACAT
+TTACATTCATCCCACATCAGAGACCAGCAATAGCCAGGTTAAGCAGAGCA
+AGGCTGAGTCTGGCTCTTTGCCTCTGTTGTTTTGATTAGTGTCACACTGG
+ATTATGCTAGTTTGTGTGAGATTAGAGGTCCTGTTGTTACGAGCAAATCA
+GCAGTCAGCAGGGTAATCTAAATTACATTGTTCTGTCGGCAATATCCTCT
+CCTCGCTCAACAGCTTTGTGTGAGCTCATATTTAAAGCCCCCACAGTAGT
+GCAGTGCATAAAATCATGCAGATACAGATCAGAAGCTTCAGGTAATGTTT
+ACATCAAACATCAGGGGGAAAATGTGATCTCAGTGCCTCTGACCGTGGTG
+TGATTGTTGGTGCCGGACAGGCTGGTTTGAGTATTTCTAAAACTGCTGAT
+CTCCTGGGATTTTCACACACGGCATTCTCTAGAGTTTACTCAGAACGGTG
+TGGAAAGCAAAAAAACATCCCGCGAGCTGCAATCCTGCGGATGGAAACAC
+CTTGTTGATGGGATATGTCAGAGGAATGGCCAGGCTGGATCAATCTGACA
+GAAAGGCGGTAACTCAGATAAGCACTCTCTACAACTGTGGTGAGCAGAAA
+AGCGTTCTCAGAATGCACAACACGTTCAACCTTGAGGAAGATGTTGCTAC
+AGCAGCAGATGAAGACCACGTCTGGTTCCACTCCTGTCAGCCAAGAACAT
+CTCAGACTGCACAGGACACCAAAACAAAAACACATCCTGTTTTCTACTGG
+TGGTAGAGTCACAATTTGGCAATAAGATAAATCCATGGACCCAACTTGCC
+TTGTGTCAATAGACCAAGCTGCTGGTGGTCTTGGGGAATGTTTTCTTTAC
+ACTTTCACACTATCGGCCTATATAAAATCCTATTATAAACTGCTGTACAG
+TGCATTGGGTTTGATAAGCCCTCTAAGTCTGTATATCATGATCTCATTGA
+TACCTGCGGCAATACACCCCCACAAACGCAGCTCCCTGCATTTTAAGTAT
+TATAGAGAGTAATTTCACTGTCCATGTCTTTTTTGATCATAAAGCAGGTC
+TAGGTGCTGTATACAACACTGTATTGAAATGCTGAAGCAGGTCTAGGTGC
+TGTATACAACACTGTATTGAAATGCTGAAGCCAATATATCAATCTATCTG
+TCTATATTTTATGAAATATCAGAGCATTTTAGTCCAAATTTGTCTGTTTT
+GTTCATTGCAATGCTGACTGCTTAACATTTTCCTCAGTGGTGGCCATCAC
+CACAGCATTTACTGGTGAGTTTTACCAAGCTACAGTGGGCCACAATGAGT
+TACCTGTTATTGGCCTGGCTCTATGGCATGNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNCAAAGTGCCCTGTTCTTGTTAGAGCA
+GAGATAAATGAGACAGAGGAGATGTAAAACTATACTGAGATGGTTTTTTG
+GTTCTTAAAACCACAAATATATTGTATTACGGATATCAAAACTTCAAATA
+AAACCAGTAAAGTGTAAAACATGGGACCTCTAAATGAAGGGCTGTTTTGT
+TTCGTAGTTCTAGATTATGTCACTATGTGGCCTCGGTCTCTGTCTGCGTA
+AAGCGGCTACATGTAGAATTTGACCCACTTTGGTGCCCACATATGGTAAC
+TAAAACACTACAGACAGTATGCACTCCAACCCTATATAATTTATGTCCTT
+CAGTGAAGCCTACAAAATGTGCAGCATGATTTGCTATGATTAATACCTAG
+AATTATATTAAATTTTAGAAGTTTAGAGAGATGGCTTATACCTTTTTCTG
+AGCATGGAGTCATCTCATGTAATTTGTCACATACCTTTGGCCAGCTTAAA
+CCTGGTCATTCCAGTGAGAGCTGCTCACCCAAATGTTTCTGTATGCACCT
+CCTGCTGTGGGTGCCAATAGCTCATGCAAGTTGGTGTCTTTCAACTAAGG
+CCACTATGTGAGGACCATCAGCAGCAAATTAAATTAGAAGTGCCTTGGAG
+TTTCTCAGGCCCTCTTCTATGATCATTAAGGTCTCAGTGACAGAACAGTG
+CCCCTTTGAGGGACTATGGCATCTTGAGCACTTAAGTTGGCCGAATGTTT
+GATCTCAAATATGAGTGCACTGTTTCAAAGTCACAAAAGCCTCTGGACAA
+GGCATAGTTACTGGGCTTGGTTAACCTTTTTTTCATACCGGCGTGACTGC
+TGGAACTTGGAACATGTAGCCTAAGTTTCCCTTTGTCTGCAATTACTGAG
+GTATGCCATGTTGAAATAGAACAATAATTGCAGGGAGGAATTATTCCAAT
+CGACCCATCGGCAGGCTTTCCAATACAGTGCTTTTTGTTTCACAGCAACA
+AAGAACAAGAGCTTTTAAATGCATCCATCTGGACCCTGACTATGAAAGGT
+CATTGGTAACGCTGCTGGATTTCTGTTATGTCAGCAAACTGCAGGTCAAA
+GCGATGATACAGTCTTATAATGGCTTTCTCCAAGTGAAGGTGGGATAACA
+TGGTTTAAACAAGCTCAGTTAAATGGATCAGAGCATCAACTGCTGTGAAC
+AAGCAATCACACGGTCAAGGTCAAACATTTACACTTCTGAGAGATCTGGA
+GAGTATGTCATGGCAGTATTGCTTTTCAGTTATTTCAGTAACTGATTACG
+CCTTGTACTTAGTCTCAGAATTAGTGATTTGATTCAAAGTGTTTTATATA
+TGTATATTTCAAAGCAAAAATACAGCTTAGGTAAAGCTCCTTAGACTCAT
+GCAGCCTAATTTACTGTAGACATTCCTCCATGTACAGTACTGTGCAAATG
+TTTTAGGCAGTTTTAGGCACTAAAGGTGAACTGAGGATGCAGTCCCACGA
+TTAATTTTTATTCATCAGTTAACCTCATGTGAAGTGTAGTAAACAGAAAA
+AACCTAAATCAGATCAGTATTTCTTATGACCCCCCTCCCCTACCACCCAT
+CTTTAAAACAGCTCCAGTTCTCCTCAATCCCCCTGTACTTAGGTAGTTTT
+TTTCTAGCGTCTTGGAGAAGTTGCCTCAGTTCTTCTGTGGATTTTAGGCT
+GTCTCTGTGTCTTCTGTCTCCTCATGTATTCTCAGACTGACTCCATGATG
+TTGAGATCAGGGCTCTGTGGTGGGGGGGTCAGACCATCTGTTGCGGGACT
+CCTTGTTCTTCTTGTCTCTGAGGATAGTTCTTTATGATGCTGACTGTGTG
+ATTGGACTCATTGTCCTGCTGCAGAATTAATCTGGGACTGATCAGACATC
+TCCCTGATTGTACTGACAAAGGATAAGAGCAAAAACCTGTAGACTGCCTA
+AAACTTCTGCACAGTACTGCACAATACTGTATGTATGTGTGATAATACTC
+AGAAGTGATATAGAGTGGTTCAGGAGAAGCCTTAACTTACTTGTTCTTCT
+TTTCCCTATGATGCAATTTCTCTCCAAGCAAACAGTACATAATGAAATAT
+GTCTAAGAAAACACTAGCTGTTTGTCTTTTTGAAACACATTTAATAATTC
+AGATTTAGCCACAAATTAAAAATGAGCTTATACTTTGAACTTGCCCATTG
+GTTTTGATGTATGAAGCTGACAGATTTAGTCCACTGCATACTGCACTTTT
+TTGGATAGTGAAATAAATTAATTTATTATGCAGCATTTAAGCATATTGCT
+GTTATCTAGAATTTTATATATTAATAGGTATATATTATATTAACTCTTCA
+TTTGCTTAAATTTGGCCTGTATATTCCTCCATTTTATACAACCTTTAGAA
+AACACTGGAGTGAACAAAAATGTGAGGTTCAAAAGTGAGAGGAAAGAAAA
+ATCAGTGTGTGCAGCAAGACTGTAAGAGTCCCTGAACAAAAGTGTTGATT
+TATTGTTGGCTGTGAAAGTTTTTGCTGCAGGGGTTTTTGTAAACAGTGAC
+TCTTCTTTATGTCTTTGTTTCTATCTTTTTGTGTTTTCTTTTTTCATCTT
+ATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NATACTCATGCTTCTCTTTTACTCTAATGGAAAACATAGAATTGAGGTGC
+AGTATCACTGAGAACAGCCAATGCTGATACTGTCATTTGTTATAGGATGT
+TTACTTAAGAAATGATTGTGGCCTTATTTGCCAAAGACGATTAGGCCCTG
+AGCCATTTGTACAGCCAATGTGACACCTCCGTAACCATAAGAAAAATTAT
+GCTTCCACAAACAGAAAGAGAGACAAATACAGACAGTGAAAGCAAAACTA
+TGAGAGAAGGGATAGCCATCTCAGCATTCAGCCAGTGTGTCACATGTCCA
+GAGGGCTGCAATATGAAGCCAATAAACTGAGAGAAAGGTGTGATTCCAGA
+GGGCTTTCCGGACTTTAAACTTTTCTCATTTTCACCTTCAGCTCGTTGTG
+AAGCCTTCAATTGTTCCTTGGTCACATGAAGCCCCCTGTGTGTAAAATTG
+ACATTCAGCTTATTACTCTCATCTACATCTCTAAAGTTGCACATGACTGC
+CAGTAGAGGGTGCTCATGATTTGAGACAAGTAACTTTTCTGTGCTTGTGT
+CAACAGTTAAAAATTCCCTATGGATACTTGAAAGAAATGCACCAGTGTTT
+AAATTTAGACTCAGTCCAAGTAGGCTTTGCCATTTGCTTTACTGTTGTAA
+TCAAATCAGTGCTCACAGTACATCAGTGGCAAAAGCAATTAGCTTAATTG
+AGTTAGTAGGGAGTGAAAAGATAGATCACTATTTCCTGGCAGCGATATAT
+TTTTCTCAGTTAACCTTCTCACAGAAAATTAGTTAGATGAGTTTCTTATC
+TCATTAGAACCATTGTTGTTTATTATTTAAACCTGACCTTTATCCAGAGA
+ATGTTTTTCACAGTAAGCATGTGTTTCACTCCAGCTCATACAGACACACC
+TTGTGGCCAGTCAGGGAATGTGCATTAAAACTAATGTTCTCTCTGGTAGA
+GCCATTCTGCCTATTCTGTTATTCACCAAAACTTTCCTGCCCAGATTTTT
+CCATCTAGTCAGGGGACTTTTTCCTCAGCTGAACTGTCAGTAGCCAGCTG
+CTCTGACCTTTACTCACCTCAGAGATTACTCATTCAAGTTATTTTACCAA
+TGATGGAATAAAGTGAATACACACATCGTGACTTCATCATACTTTCCCTC
+CTTTACCGTACTTTCCTAACATCAGTTAGCAAATTATACACTTAAATATG
+CTAAAATAGAGATTTGTTTCAAATTGAAGAGCAACACAGGACACATTTGC
+TTACACAAGTGACTCTATCTTATTTTTTAAGTGGCTGTCATCCTCAATAT
+CATCAATATTTTAGGCATGTGAATGTAGGNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGATTATTATGATAAAG
+CTACACATTATACTGCAATTCCGGGCCAACACTATTGTTTCGCCCTTACA
+GTACAGGACTGTACATATCACTGTCACTACAAATTGTTTCAGTGAGGCTC
+AGATTACAGCTCATCTTGCTTAGTCATTACGGCTCAGACCACCAGACAAT
+GTCTTTACGTCAGAGAAAGTCTGAGTGAGAGCTGTCCTTAAAACCAAGTC
+CCATGAGGCAAGTAAAGAGGTGACTCGCTGAGAGGATCAGGGAGAGGNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNGGTGCTGCATTGAGTCGAGAGGTGTGAGGATGTTCGATAAGCCA
+TTATCACTCTGCTCGGAGGACGTCAGGCCCTATCCTTTTTTCTTTGAGTC
+ACCACTTTCTCACCACAGCCATCCTTCTTCCTTTGTCTGTTTAATCTTTT
+GCAGCCAAACATTTGGTATGCAAACTATAGGAAGATATGGTCTGACACCG
+ACTGATGTGATGCTAATGCCTTCTCTGTTTTATCCAATGACTCCTCATTT
+ATCAGGACCAATCCAGCTATTGTGTGCTATTGGCCCAACACAGCATTAAA
+CATGGTGACCAGAATAGCCACAGCAAAATAAATAACGGGCCTCTGTGACT
+ATTGTATGTGCATTTCTTACAGCTGTCATTTTTCTTTTCTGTTAGCTTTC
+ATTCTACTGTGCGTGCTCTCTATCTGCCTACGTTGTATCTGTGCCTTTTG
+TTAAATTCATTCAGCCTTTTATCTGTCTTGAATTTCTTTCTCTGTGTCCT
+CCTTTCCCTCTCTGTCTCAGCAGATCCGTCCCTTGTTTTTGCAGGAGAAG
+CGCTCTGCGTCATGCAAGGTCCATCTTCTGCGACGCACCATCAGCGTTCC
+AGTGGAAACACAGTTTCCAGAACTCCATAGCCAGCTGTCCACTGAGAGCG
+GTGAGTAGCCTGAGGACATGCCTCTGCTGTGATTAGCATGCATCATTATT
+TCCATACACACACTGGAAATCTTTTAATCGTTTTTCTTGACACAATTTAA
+TTAATTTAAATAATTAATAACATTGAAATCAACTGTTAAACACTTGTTCA
+ACATGGTTGTGGACAAAATCATCATAATGACTGTGGAGACTAAGGAGAAA
+CAATGCTTTGTTAAAATCTTTGCTCTGGTTTTGGCTTTTTCTATATTCTA
+TTTGTGTGTGCAGTGCTGCTAAACATCATCTTAGTCAGAGTCAAACTTGA
+GCACTCATGCAGATGCTTGTTCTATTCAGTGGTCAAATGGGCAGTGTAGC
+TTAGGAGAAGCACTTCTTTCCGCTCCTCAGTGACATAACCAGTCACACAC
+TCACACAGGCAAGTGAGCCTTTTAAAATTGCTGATACTATTTTTTTCTAC
+TTCATATCCACTAGATGGAGAGCTTGGTGCTTTGTAGCACTCCAGTGTAC
+TTGGAATGTTTCATCATAACCACTAAGGCAGACATTGATAAAGGCTTCGG
+TCTCTTTCAAACTGCAGGTGTCATGTTGCACTCTCCCTATTGCTTTTATG
+AAAATGTAGTGCCGGTAATATAACACAGTTACTCTCTGCATATATCACTA
+TGTACACTTACTTCTATGGAAAAGATGGAGCGCCACAGTGAAAACTGTTT
+TGAGTCTGTGAGGGGAAAACACAGCATCAGTCACAGTGAAACACTAGGTG
+GCACTCAGGTTTGACATTCAAGCATTTGTATCCCACAGTTACTGTTGCTG
+GGTTGTTGGCTGGCATGCAACTTAATATGATCTATCTTTAAATCAGTGTG
+TGCAGTGGTTATTTAGTTTAAGTGCTTTTTAATGATGTCACAACATTATT
+TTGTCTACACCCTGCATACAGCACAGTATATTAAATTTAGGTTTTATTAA
+GTTAAGTAATGTTCTGAGGTGGCATTGCCCTCAGGTATATATCCCTCAGG
+CAGTGTTACTGGACAGCATATAGATTGTAATGTTGTGTAAGCAGTGTTGT
+GTAAGCTTTTTTAACCAAAATGCTCTCATGTTTCTTTGTTACCACAGTGG
+TTTTAGTGATGTTTTGTGCTGTGAACAGAATCATGATTTCTGCAGACACT
+CCACATCTGTTTAAACAGTGATTTTCACATCTGTCTGTAACTGAGTCAGT
+CAGTTTGTGGTTGGATTGATGTGTAGTGGGATCTACAGATCATCAACCCT
+CGGGGAAGCCATTTAGTTCTGTGTTAAATAAAAATACAACTTTTGAGCAC
+TGTTTTTTCATATTTTTCTTCATCCCTTAGTTGATATTAAAGGTGCTATA
+TGTAGATTTTTGCTATCACTACATAGCCAATGTTAGGATTAAGAACCCCA
+GCAACCCGAATGTTAACTCGTACTTTGCTTCTATTTCTATTACATATTTT
+CTTCTATGGAAGTTAGGATGTTAACCAGCTAACCCCAGGTCATCTCATAA
+TACCACTTGGCAATAGTCAGTCACTGTTGCATTCAGTCTGCCCCTCCAGC
+ATGAAAGGATGAAGAAGTACCGCTACCCAGAAGGCGTAGTCTAACCCCTT
+GTCTTGTAAACACAGCAATGGCTGAAGCTCTTGGTAAGTAAACAGCTGTT
+AATGTCAGTGTTGGCTACATAGCTAAAACTTACACATAGCACCGTTAAAA
+AAATAAATGGTAAGTTTTTGTCCTTTTAGTTAAAGCATGTAAATGTCAGT
+TAAAACAATTTAGAAAATTTCTCATTCAAATAGTCATTTTATTTTGTGTG
+CTATAAATACCTTCAGTTTTTTTTCTTGTACTTTATTTTAAACTCTTGTC
+TTTCTCTTATGCCCCTTATCTACCAGGAGAGGTCACACTCTTGTTGATAG
+CATGATAAGGAAATGAGCAGAGATGTCCTTATTTCTCCTCAAAGTTATTC
+CCTGCATAGCACATATTTGACTTTAAAAATACAAAGACTACAGGTGCTAA
+ATCACACACACAACACCCAAAAGAATAGAGGACTTTTAAATATGTGCATC
+CCATTTTCTAATCTATATTCACCACTGATACAAGTGATGACACGGTGATG
+AGAAAATAACGTAACATACATTATTATACAAGATTGGTATGATTGCTGTA
+GGTTGGTTTTGTTTATCTGAGAAGGGACAGAGGCTAAAAGAATAAACAAA
+GTCTGCCAGCTACTTCACTAATTACAGAGTACCCCTGCAACTCTGCCAAT
+GACATTCTGAAACTTTTCCATGACTATTATGTAAGATAATTTTTGAATCA
+CTGATTCAGATTTCCATAGCCAAGATACACTATTCTGCATGTTGGCTAGA
+ATACATTACAATACATTAATATATTACTGTAGTGGTTATTATGTCAGCCA
+TTTAAACATAATTTCATACTTTCAGCTGTTCGCAGGGCAAACTTTTTATT
+TTTGTCACCTGAATCCAGTTGCATTTGGATTTGAATTCAGTGGTGACTGC
+CAATGAAAAATAACTCACAGGGGCACTTAGAGATCTGAGGCGGCATTGAC
+TGAACATCGAAGGGCTACTATTACAGAAATGCAAGTGGAAAAAGAATCTG
+ATATGATACTTGCCAGTATGAAAAGCATAATTAGGACTTTAAATGCATGT
+TTTAAATGTTTTTGTGAACCATAATTTCAGTGTAAAAGTTCCAGTTATAT
+TTTAAAAGGAACTCCAAATATCCAGTGGTAGCATCAAGGTAACCTGAGAT
+AAGTGAAAAGTAATTCTAAATTAATCAGTATGTTGAAATATCAGACATTT
+GTGTGCATTATTGTTAGTGTGACCATGGTGGGATACTTCAAACACACTTG
+TTCATTGGCAAAATAATTGTCACATGAATTAACACAAGTGACACTTTCAG
+TATCATACTGACAGAATGTTTGTTGTTAACTTTATCCCTGAAAACAAACA
+CTAATACATGTTCTTGTCGCCCCTTCAGTGCCTAACCTTTAAAATTGTGC
+TTGTGAGTGTGTGTATGTGAGCGCACCTGTATGGTTTTCAAATAATGTTC
+CACATCACAACACTACAAACACTCACATGGTCTATGTTTTACTAAATTAT
+TCATTGGCGTAGGCCCCACCTGCTCTTGTCCTTGTGTGACACAGGCTCTA
+AATAAGCAGCATGATGAATAAAAATGACACTGAGATGAATAGGAATCCAC
+AGCAAATCAGAGAGCACTCTCATCTCGTCTCATCCTCCGACTGAATAGCG
+AGCGGCTGCCTCCTTTATTCTTTTGAACTCTTCGCGGTTTTGGCACAAAC
+ATGCCAGAGACAGAGAGCGCTGTTACGCTCCCACCAGAGCAACTACGCTT
+AATTTTACTGAGGTGAAAGTTTTTTTTTTTTTTCTGGATAAAGCTTTGGA
+AAAGTTCTCAACTGTTGCTTCTTTAAAGATGCTGGGGCATATTCTGCCCT
+ATTTTCCAGTTTTTTACATCTCCCATAAATATTTTCACAAGCTGTAACAT
+TTTAGACAGGATTAAATTGAACTAGATAACTGACCAAAGGCTGCTAAATT
+ATTTTTATGTGCCTTGTAGCACAAATACTGTCTCACATATATTTTAAGTA
+TCTTAATATTCCCTCTGATGTTTAATTTAACAAAGCTGTCCTTCCCTCCA
+TCTCTCTCCTCATAGTTTTTCATTACAATTAGCTCTCATTAGATAGAATT
+GTATTTGTTGTCTTTGTGCCAGTCACTCCAGTCTATTTTGCCAGCACACA
+ACTAAATAATTGTTTTGCCTTGTCAGTGTCTTGTTTTACAGTTTTAATGA
+TGAGCCCAAAACATGTCAAATATGAGGAACTATAACTTATCAAGAAAGTG
+GGGTAGTTGAGATAAAACTGTTTCCGAGGTGGAGGTTGGCTGAGCTCCAT
+TTGGCCACAAAATGTAGCTGAAAGGGCAGAGAAACCCACTTTAATGGAGT
+ACAGGTTGCATATGAGCTGGTAGGAAAGATATAGAATCATTATCAGCTGA
+TAATCTAACAGTAGCAGTCAGTGTAGATGCTGTGCTAACGCAAAGTTGTG
+AAACTACTGTCTGTAAGCAACTCATCCAATTGTTATTGCTGCCAAGCTTA
+AGTATTTTAACAGCTTTTAGGTGTTGTTAGCACAGTACTCGTTTGGATCC
+GGCCATGACAAGAAATCTTTATCGGCCACTAATTTAATTAATCATCTTTT
+TTCAAGCAAAAATCACAAACTTTCTTTGGTTCCACTTTTTCTCATTGTAG
+ATTAAATATCTTTTGGGTTTTGGCACAGGCTGGACAAAAAAACCTCTGAG
+ACGCTGTGATAAGAATTTATTTTCACATTTTTTTTACTTTTCAGGGACTA
+CACAATTATTTGGTGATGAAAATAATATTTTGTTGCAGCCCAAATGTTAA
+CTTGCCACTCAAGCTGTCAATTCAAGAAGGCAAAAGCAACAAATACTGCT
+TTGCCGAGCTGAAATGAGAGTAGAAACCATGCCCGGGCCAGCTCAGCCTG
+GTCAGGTTTTTGAGTCCATCTATTGTTAACATTCAGGTTGCAGCACAAGG
+GAAGTTCCTAAATTCTTCTGGCATGTTAATGTTTTCCAAACTGATGTTAC
+CAAGTCCTCGTTATGATGAGAAAAAAATATGCTGAGAGTGAAATTGATCA
+AAGTGGAAACATTAAGCACTGCCACAGTCCCCTCCACCCATCTTTCTCCC
+TCTCTCTCCACCGCAACGTGGAAACTGCTCNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+CTCCCTCCCATTCTGTGCTCCGGTATANNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNCAGAGAGGGGTTCAGTCAGACAGATGTAACACAGCAG
+TAGAAGCCTGAGCTGAGCTGGCAGGCTGCGGAGGCCAGACCAGAGCCAGC
+AGCTCTGACTGGAGAGAACAGCGAAGAGGTGACGGCAGCAGCGGCAGCAC
+AGGGGATAAAACAGTAGTTGTAGTAGCAGTTTCAGTAGTTGTACTTCAGT
+TACAGAGCTGACTTTACCTCAGGATATGGGTGTGTTGATCTGGGCGCTCA
+TCACCCTGTGAATTTAACATGGACCCTGACACCAGCACCCATCCAGAGAC
+ACAACATGGTGAGTGAGATTTGAAGGAGGAAAAGATTAGAAACAATGAGA
+GTGATACTGTGGGAAAAGTTGAGGAGCGTGTTAGTGAACAAGGGAGTCAG
+GTCACATGGTTTTCCCGTTAGGGTTGCTATTATGACTAGGCTCTCCATTT
+GGCTCTAAAGCTGTTGACTAGGGCCTCTGTGGTCAAATGAATAGGACTTC
+AGTCTGTTCTGTTGAGTAGGGGGTGAAGGGGTGAGGGGTGTTACATGGCA
+ATGTGATGAAATCAACTGCCTGCTTAAGACATTTTGCTCTAATGACCCTC
+TTGTCATTACTTTAACAGGGTTGTAAAGTTTTTTTTGCTTTTTTGTAGCT
+AAGAAAGTTGAGAATTGTTTTGTCTGAACTCTCTCTGGGATTTGTCTTGT
+CGGTTTTTGGCTGGTTTTTGGTGTGAGCTTCACAGAGCAGGAAGCTCTCC
+TCCTGCTCTTTGAATGGAATAATTAACTGTGAGGGCCGTGGGTGTACCAT
+TTCTGCTCAAAAGCAGCAAGATAATTTGATGGTTATGTGAATCTGTATAT
+TTGTATGCTGTGTAGTGGTGCTGCATGTGGGTGTTTGTGTTAGAGACACA
+GAGGGGCACCAAAGAATGAAAGTGGGAGACAGAGGGAACGAGAAGGGGAG
+AGACCGAGAGAGAAGGACTTATGTACACAAATAAATCCAGGGGGATCTAG
+ACTGCACTGTAGGCCAGTGCGAATGCTCACTCTTTCTCTTCCCCCTCATT
+TTCCGCTGCCTCTTTTTTTTCCTCCACTGCTCCAAATGTTGGAGCCCAAA
+CTAGATAGTTAAAAGGCAGAAAAAAATGGTGTGTTATTAACTGGGCCAGA
+AAAGACGATGTTTTGTCTTCATGGCCGATGGGAAGGACTCTGCTGTGCTG
+CTGATGAAGAATGTGGTTAACAGAATGAGTCAACAGAGAACATATTTCCA
+AGAGACTGTAGTTTCTCTTCGCCAGCGTCTAAGCCACATTGCTTTATGCA
+CTGGGCTCTCTGCCATGACGGCGAAGGAGAGACAAAGAGAGGGGAAAGAG
+AATCGCTAAAGATAAAACTCTGTTTTTATCCATCTCCTGAGACACAAAAC
+TTCTGTTAGCAATCTAACCAAATAAGCGAGAACATTAAGCTTCTTCTCAC
+CTTCCCAAAATAAAATGTCAACTTCATGCAGTAATACTCACTCTCTTCTC
+TCAACCAGTCAGCCAGCCTCTTTCAGTGCATACAGCTTGAGGAGATCCCT
+TCTAAAGGTCCAATATAAATAGAAAAGTGGGAGTAGAAAGGGCAATAATC
+TGATATCATCTGATTACATTCACACCTCAGGCTTGCACGCTACAGGAAGA
+GTTCACGCTTCCCAAGGCTTGCAGGCATCACTTTTCACTCATTCTCAGAT
+AGACACACAAACACATGCACAGAAATATCATTGCTGTTGTTTGCAACAAA
+TCCTGGAGTAGACTACTTTGCATTGAATTTCTATATGCATGTGTTTTCAT
+ATATTACACATTGCTTTTTGTACTTTGCATATAAAGTAGATGCTGATCTG
+CTATCTGCATATATAGTAGCTGCAGATAGCACTGTAACTACATCTACATA
+TTTCTTGTTTGTATCTACATCAAGCAGATAGTTGCTGATGCTGGACAACC
+TCTTCCGATATTCATTATTTTGGAAAAAAAAATATCAATGGCATTCCTAC
+CTTACAACTTAAATTAAAAGTGGTCACTTGAGTAACTGCAGAACATGTGT
+TCACCCAATAGTTTTTTTTAGAGAGTGTCACTCAAAGTATTCCTCCTTAC
+AAGCACCCATGCCTAAATCA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.poly	Wed May 02 20:18:11 2018 -0400
@@ -0,0 +1,2 @@
+18	0.0	0.0	0.0	scaffold_1	613	632	(13588)	(GT)n	Simple_repeat	1	20	(0)
+67	2.9	1.4	0.0	scaffold_1	11981	12050	(2170)	(CT)n	Simple_repeat	1	71	(0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fasta.stats	Wed May 02 20:18:11 2018 -0400
@@ -0,0 +1,51 @@
+==================================================
+file name: dataset_12.dat           
+sequences:             1
+total length:      14220 bp  (14220 bp excl N/X-runs) 
+GC level:         39.94 %
+bases masked:        378 bp ( 2.66 %)
+==================================================
+               number of      length   percentage
+               elements*    occupied  of sequence
+--------------------------------------------------
+SINEs:                0            0 bp    0.00 %
+      ALUs            0            0 bp    0.00 %
+      MIRs            0            0 bp    0.00 %
+
+LINEs:                0            0 bp    0.00 %
+      LINE1           0            0 bp    0.00 %
+      LINE2           0            0 bp    0.00 %
+      L3/CR1          0            0 bp    0.00 %
+
+LTR elements:         0            0 bp    0.00 %
+      ERVL            0            0 bp    0.00 %
+      ERVL-MaLRs      0            0 bp    0.00 %
+      ERV_classI      0            0 bp    0.00 %
+      ERV_classII     0            0 bp    0.00 %
+
+DNA elements:         0            0 bp    0.00 %
+     hAT-Charlie      0            0 bp    0.00 %
+     TcMar-Tigger     0            0 bp    0.00 %
+
+Unclassified:         0            0 bp    0.00 %
+
+Total interspersed repeats:        0 bp    0.00 %
+
+
+Small RNA:            0            0 bp    0.00 %
+
+Satellites:           0            0 bp    0.00 %
+Simple repeats:       8          378 bp    2.66 %
+Low complexity:       0            0 bp    0.00 %
+==================================================
+
+* most repeats fragmented by insertions or deletions
+  have been counted as one element
+                                                      
+
+The query species was assumed to be homo          
+RepeatMasker Combined Database: Dfam_Consensus-20170127
+                          
+run with rmblastn version 2.2.27+
+The query was compared to unclassified sequences in ".../dataset_2.dat"
+