changeset 14:7563ea7a922d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repeatmasker commit 7a5f368a5859e659aa36d0358bb96ca12574e2cc
author iuc
date Mon, 24 Apr 2023 10:29:31 +0000
parents 3f987772e283
children ba6d2c32f797
files macros.xml repeatmasker.xml.orig test-data/Dfam_partial_test.h5 test-data/README.md test-data/small.fasta.cat test-data/small.fasta.gff test-data/small.fasta.log test-data/small.fasta.stats test-data/small_dfam.fasta.cat test-data/small_dfam.fasta.log test-data/small_dfam.fasta.stats test-data/small_dfam_rattus.fasta.cat test-data/small_dfam_rattus.fasta.log test-data/small_dfam_rattus.fasta.stats test-data/small_dfam_up.fasta.cat test-data/small_dfam_up.fasta.log test-data/small_dfam_up.fasta.stats
diffstat 17 files changed, 312 insertions(+), 47 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Thu Oct 21 15:49:31 2021 +0000
+++ b/macros.xml	Mon Apr 24 10:29:31 2023 +0000
@@ -1,6 +1,6 @@
 <macros>
-    <token name="@TOOL_VERSION@">4.1.2-p1</token>
-    <token name="@VERSION_SUFFIX@">galaxy1</token>
+    <token name="@TOOL_VERSION@">4.1.5</token>
+    <token name="@VERSION_SUFFIX@">galaxy0</token>
     <xml name="edam_ontology">
         <edam_topics>
             <edam_topic>topic_0157</edam_topic>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repeatmasker.xml.orig	Mon Apr 24 10:29:31 2023 +0000
@@ -0,0 +1,260 @@
+<<<<<<< HEAD
+<tool id="repeatmasker_wrapper" name="RepeatMasker" version="@TOOL_VERSION@+@GALAXY_TOOL_VERSION@" profile="20.01">
+    <description>screen DNA sequences for interspersed repeats and low complexity regions</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro='xrefs'/>
+    <expand macro='edam_ontology' />
+    <expand macro='requirements' />
+    <version_command>repeatmasker --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+=======
+<tool id="repeatmasker_wrapper" name="RepeatMasker" version="4.1.1" profile="17.01">
+  <description>screen DNA sequences for interspersed repeats and low complexity regions</description>
+  <xrefs>
+      <xref type="bio.tools">RepeatMasker</xref>
+  </xrefs>
+  <requirements>
+    <requirement type="package" version="4.1.1">repeatmasker</requirement>
+  </requirements>
+
+  <command detect_errors="exit_code"><![CDATA[
+>>>>>>> c895e2728 (Update repeatmasker.xml)
+    RM_PATH=\$(which RepeatMasker) &&
+    if [ -z "\$RM_PATH" ] ; then echo "Failed to find RepeatMasker in PATH (\$PATH)" >&2 ; exit 1 ; fi &&
+
+    RM_LIB_PATH=\$(dirname \$RM_PATH)/../share/RepeatMasker/Libraries &&
+    #if $repeat_source.source_type == "dfam_up":
+      mkdir lib/ &&
+      ln -s '${repeat_source.dfam_lib}' lib/RepeatMaskerLib.h5 &&
+      RM_LIB_PATH=\$(pwd)/lib &&
+    #end if
+
+    ln -s '${input_fasta}' rm_input.fasta &&
+
+    RepeatMasker -dir \$(pwd)
+    -libdir \$RM_LIB_PATH
+    #if $repeat_source.source_type == "library":
+      -lib '${repeat_source.repeat_lib}'
+      -cutoff '${repeat_source.cutoff}'
+    #else if $repeat_source.source_type == "dfam":
+      #if $repeat_source.species_source.species_from_list == 'yes':
+        -species $repeat_source.species_source.species_list
+      #else
+        -species '${repeat_source.species_source.species_name}'
+      #end if
+    #else if $repeat_source.source_type == "dfam_up":
+        -species '${repeat_source.species_name}'
+    #end if
+    -parallel \${GALAXY_SLOTS:-1}
+    ${gff}
+    ${excln}
+    ${advanced.is_only}
+    ${advanced.is_clip}
+    ${advanced.no_is}
+    ${advanced.rodspec}
+    ${advanced.primspec}
+    ${advanced.nolow}
+    ${advanced.noint}
+    ${advanced.norna}
+    ${advanced.alu}
+    ${advanced.div}
+    ${advanced.search_speed}
+    -frag ${advanced.frag}
+    ## -maxsize ${advanced.maxsize}
+    #if str($advanced.gc):
+      -gc ${advanced.gc}
+    #end if
+    ${advanced.gccalc}
+    ${advanced.nocut}
+    ${advanced.keep_alignments}
+    ${advanced.invert_alignments}
+    ${advanced.xout}
+    ${advanced.xsmall}
+    ${advanced.poly}
+    rm_input.fasta &&
+    #if $advanced.is_only != '-is_only':
+      mv rm_input.fasta.masked '${output_masked_genome}' &&
+      sed -E 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g ;  1,2c SW score\t% div.\t% del.\t% ins.\tquery sequence\tpos in  query: begin\tend\t(left)\trepeat\tclass/family\tpos in repeat: begin\tend\t(left)\tID' rm_input.fasta.out >'${output_log}' &&
+      mv rm_input.fasta.tbl '${output_table}' &&
+      #if $gff == '-gff':
+        mv rm_input.fasta.out.gff '${output_gff}' &&
+      #end if
+      #if $advanced.keep_alignments == '-ali':
+        mv rm_input.fasta.align '${output_alignment}' &&
+      #end if
+      #if $advanced.poly == '-poly':
+        sed -E 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g' rm_input.fasta.polyout >'${output_polymorphic}' &&
+      #end if
+    #end if
+    if [ -f 'rm_input.fasta.cat.gz' ]; then
+      zcat 'rm_input.fasta.cat.gz' > '${output_repeat_catalog}';
+    else
+      mv rm_input.fasta.cat '${output_repeat_catalog}';
+    fi
+    ]]>
+  </command>
+
+  <inputs>
+    <param name="input_fasta" type="data" format="fasta" label="Genomic DNA" />
+    <conditional name="repeat_source">
+      <param label="Repeat library source" name="source_type" type="select" help="To use RepBase, choose 'Custom library of repeats' and select a fasta version of this non-free database.">
+        <option selected="true" value="dfam">DFam (curated only, bundled with RepeatMasker)</option>
+        <option value="dfam_up">DFam (full/specific version)</option>
+        <option value="library">Custom library of repeats</option>
+      </param>
+      <when value="dfam">
+        <conditional name="species_source">
+          <param label="Select species name from a list?" name="species_from_list" type="select">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+          </param>
+          <when value="yes">
+            <param name="species_list" type="select" label="Species">
+              <option value="human" selected="true">Human (Homo sapiens)</option>
+              <option value="rodent">Rodent (Order Rodentia)</option>
+              <option value="mouse">Mouse (Mus musculus)</option>
+              <option value="rattus">Rat (Rattus sp.)</option>
+              <option value="danio">Danio (zebra fish)</option>
+              <option value="drosophila">Fruit fly (Drosophila melanogaster)</option>
+              <option value="elegans">Caenorhabditis elegans (nematode)</option>
+            </param>
+          </when>
+          <when value="no">
+            <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" />
+          </when>
+        </conditional>
+      </when>
+      <when value="dfam_up">
+          <param name="dfam_lib" type="data" format="h5" label="DFam library" help="The full DFam library can be downloaded from https://www.dfam.org/releases/current/families/Dfam.h5.gz" />
+          <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" />
+      </when>
+      <when value="library">
+        <param name="repeat_lib" type="data" format="fasta" label="Custom library of repeats" />
+        <param name="cutoff" type="integer" argument="-cutoff" value="225" label="Cutoff score for masking repeats" />
+      </when>
+    </conditional>
+    <param type="boolean" argument="-gff" truevalue="-gff" falsevalue="" label="Output annotation of repeats in GFF format" checked="false" />
+    <param argument="-excln" type="boolean" truevalue="-excln" falsevalue="" label="Ignore stretches of Ns when computing statistics" checked="true" help="Scaffolds are sometimes joined with stretches of 25 or more Ns. This option ignores them when calculating repeat statistics" />
+    <section name="advanced" title="Advanced options" expanded="false">
+      <param argument="-is_only" type="boolean" truevalue="-is_only" falsevalue="" checked="false" label="Only clip E coli insertion elements" />
+      <param argument="-is_clip" type="boolean" truevalue="-is_clip" falsevalue="" checked="false" label="Clip IS elements before analysis" help="Normally RepeatMasker will report on IS element, with this option selected it will clip them before analysis" />
+      <param argument="-no_is" type="boolean" truevalue="-no_is" falsevalue="" checked="false" label="Skip bacterial insertion element check" />
+      <param argument="-rodspec" type="boolean" truevalue="-rodspec" falsevalue="" checked="false" label="Only check for rodent specific repeats" help="If this option is select a check for rodent specific repeats is done instead of a full RepeatMasker run" />
+      <param argument="-primspec" type="boolean" truevalue="-primspec" falsevalue="" checked="false" label="Only check for primate specific repeats" help="If this option is select a check for primate specific repeats is done instead of a full RepeatMasker run" />
+      <param argument="-nolow" type="boolean" truevalue="-nolow" falsevalue="" checked="false" label="No low complexity masking" help="Skip masking of simple tandem repeats and low complexity regions." />
+      <param argument="-noint" type="boolean" truevalue="-noint" falsevalue="" checked="false" label="No interspersed repeat masking" help="Only mask simple repeats, skip masking of interspersed repeats." />
+      <param argument="-norna" type="boolean" truevalue="-norna" falsevalue="" checked="false" label="No repeat-like-RNA masking" help="Skip masking of small pol III transcribed RNA (these are masked by default because they resemble SINEs)" />
+      <param argument="-alu" type="boolean" truevalue="-alu" falsevalue="" checked="false" label="Limit masking to (primate) Alu repeats" />
+      <param argument="-div" type="boolean" truevalue="-div" falsevalue="" checked="false" label="Limit masking to less diverged (younger) repeats" />
+      <param type="select" name="search_speed" label="Search speed vs sensitiviy trade-off">
+        <option value="">Default</option>
+        <option value="-q">Quick (5-10% less sensitive, 3-4 times speedup)</option>
+        <option value="-qq">Rush (10% less sensitive)</option>
+        <option value="-s">Slow (0-5% more sensitive, 2.5 times slowdown)</option>
+      </param>
+      <param type="integer" argument="-frag" value="40000" label="Maximum contiguous sequence searched" help="Maximum length of sequencing that is search without fragmenting" />
+      <!-- -maxsize option is in the help, but not in the code of repeatmasker-->
+      <!--param type="integer" argument="-maxsize" value="4000000" label="Maximum length for IS or repeat clipped sequences" /-->
+      <param type="integer" argument="-gc" optional="true" label="Select matrices for this GC%" help="Valid values are a percentage or -1 to choose the default" />
+      <param type="boolean" argument="-gccalc" truevalue="-gcccalc" falsevalue="" checked="false" label="Calculate GC % for all sequences" help="By default RepeatMasker skips calculating GC % for small sequences" />
+      <param type="boolean" argument="-nocut" truevalue="-nocut" falsevalue="" checked="false" label="Skips cutting of repeats" />
+      <param name="xout" type="boolean" argument="-x" truevalue="-x" falsevalue="" checked="false" label="Mask with X instead of N characters" />
+      <param name="keep_alignments" type="boolean" argument="-ali" truevalue="-ali" falsevalue="" checked="false" label="Output alignments file" />
+      <param name="invert_alignments" type="boolean" argument="-inv" truevalue="-inv" falsevalue="" checked="false" label="Invert alignments in alignment file" help="Show alignments in the orientation of the repeat sequence, not the query sequence" />
+      <param type="boolean" argument="-xsmall" truevalue="-xsmall" falsevalue="" checked="false" label="Output repetitive regions as lowercase, non-repetitive regions as uppercase" />
+      <param type="boolean" argument="-poly" truevalue="-poly" falsevalue="" checked="false" label="Output list of potentially polymorphic microsatellites" />
+    </section>
+  </inputs>
+  <outputs>
+    <data name="output_masked_genome" format="fasta" label="RepeatMasker masked sequence on ${on_string}">
+      <filter>not advanced['is_only']</filter>
+    </data>
+    <data name="output_log" format="tabular" label="RepeatMasker output log on ${on_string}">
+      <filter>not advanced['is_only']</filter>
+    </data>
+    <data name="output_table" format="txt" label="RepeatMasker repeat statistics on ${on_string}">
+      <filter>not advanced['is_only']</filter>
+    </data>
+    <data name="output_repeat_catalog" format="txt" label="RepeatMasker repeat catalogue on ${on_string}" />
+    <data name="output_alignment" format="txt" label="RepeatMasker alignment on ${on_string}">
+      <filter>not advanced['is_only'] and advanced['keep_alignments']</filter>
+    </data>
+    <data name="output_polymorphic" format="tabular" label="RepeatMasker possible polymorphic repeats on ${on_string}">
+      <filter>not advanced['is_only'] and advanced['poly']</filter>
+    </data>
+    <data name="output_gff" format="gff" label="RepeatMasker repeat annotation on ${on_string}">
+      <filter>not advanced['is_only'] and gff is True</filter>
+    </data>
+  </outputs>
+  <tests>
+    <test expect_num_outputs="4">
+      <param name="input_fasta" value="small.fasta" ftype="fasta" />
+      <param name="source_type" value="library" />
+      <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />
+      <output name="output_masked_genome" file="small.fasta.masked" />
+      <output name="output_table" file="small.fasta.stats" lines_diff="6" />
+      <output name="output_repeat_catalog" file="small.fasta.cat" lines_diff="2" />
+      <output name="output_log" file="small.fasta.log" lines_diff="2"/>
+    </test>
+    <test expect_num_outputs="7">
+      <param name="input_fasta" value="small.fasta" ftype="fasta" />
+      <param name="source_type" value="library" />
+      <param name="gff" value="-gff" />
+      <param name="keep_alignments" value="-ali" />
+      <param name="poly" value="-poly" />
+      <param name="repeat_lib" value="repeats.fasta" ftype="fasta" />
+      <output name="output_masked_genome" file="small.fasta.masked" />
+      <output name="output_table" file="small.fasta.stats" lines_diff="6" />
+      <output name="output_repeat_catalog" file="small.fasta.cat" lines_diff="2" />
+      <output name="output_log" file="small.fasta.log" lines_diff="2"/>
+      <output name="output_alignment" file="small.fasta.align" />
+      <output name="output_polymorphic" file="small.fasta.poly" />
+      <output name="output_gff" file="small.fasta.gff" lines_diff="4" />
+    </test>
+    <test expect_num_outputs="4">
+      <param name="input_fasta" value="small.fasta" ftype="fasta" />
+      <param name="source_type" value="dfam" />
+      <param name="species_list" value="human" />
+      <output name="output_masked_genome" file="small_dfam.fasta.masked" />
+      <output name="output_table" file="small_dfam.fasta.stats" lines_diff="2" />
+      <output name="output_repeat_catalog" file="small_dfam.fasta.cat" lines_diff="2" />
+      <output name="output_log" file="small_dfam.fasta.log" lines_diff="2"/>
+    </test>
+    <test expect_num_outputs="4">
+      <param name="input_fasta" value="small.fasta" ftype="fasta" />
+      <param name="source_type" value="dfam_up" />
+      <param name="dfam_lib" value="Dfam_partial_test.h5" ftype="h5" />
+      <param name="species_name" value="rodent" />
+      <output name="output_masked_genome" file="small_dfam_up.fasta.masked" />
+      <output name="output_table" file="small_dfam_up.fasta.stats" lines_diff="2" />
+      <output name="output_repeat_catalog" file="small_dfam_up.fasta.cat" lines_diff="2" />
+      <output name="output_log" file="small_dfam_up.fasta.log" lines_diff="2"/>
+    </test>
+    <test expect_num_outputs="4">
+      <param name="input_fasta" value="small.fasta" ftype="fasta" />
+      <param name="source_type" value="dfam" />
+      <param name="species_list" value="rattus" />
+      <output name="output_masked_genome" file="small_dfam_rattus.fasta.masked" />
+      <output name="output_table" file="small_dfam_rattus.fasta.stats" lines_diff="2" />
+      <output name="output_repeat_catalog" file="small_dfam_rattus.fasta.cat" lines_diff="2" />
+      <output name="output_log" file="small_dfam_rattus.fasta.log" lines_diff="2"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+RepeatMasker is a program that screens DNA for interspersed repeats and low
+complexity DNA sequences. The database of repeats to screen for can be
+provided as a FASTA file or downloaded from RepBase_. If the RepBase option is
+chosen the RepBaseRepeatMaskerEdition file should be downloaded and
+unpacked, and the enclosed EMBL format file ('RMRBSeqs.embl') should
+be uploaded to Galaxy for use with this tool.
+
+Further documentation is available on the RepeatMasker homepage_.
+
+.. _RepBase: http://www.girinst.org/repbase/
+.. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html
+    ]]>
+  </help>
+  <expand macro="citations" />
+</tool>
Binary file test-data/Dfam_partial_test.h5 has changed
--- a/test-data/README.md	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/README.md	Mon Apr 24 10:29:31 2023 +0000
@@ -1,1 +1,4 @@
-Dfam_partial_test.h5 was generated from the test dataset in https://github.com/Dfam-consortium/FamDB/ (commit: 6b28b66)
+Dfam_partial_test.h5 was generated from the test dataset in https://github.com/Dfam-consortium/FamDB/ (commit: 20c436d)
+
+`./export_dfam.py --from-tax-dump /path/to/taxonomy_dump/from_ncbi/ --from-hmm test_data/Dfam_partial.hmm --db-version 1.0 Dfam_partial_test.h5`
+
--- a/test-data/small.fasta.cat	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small.fasta.cat	Mon Apr 24 10:29:31 2023 +0000
@@ -98,6 +98,6 @@
 ## Total Length: 14220
 ## Total NonMask ( excluding >20bp runs of N/X bases ): 14220
 ## Total NonSub ( excluding all non ACGT bases ):14220
-RepeatMasker version 4.1.2-p1 , default mode
-run with rmblastn version 2.10.0+
+RepeatMasker version 4.1.5 , default mode
+run with rmblastn version 2.13.0+
 RM Library: 
--- a/test-data/small.fasta.gff	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small.fasta.gff	Mon Apr 24 10:29:31 2023 +0000
@@ -1,11 +1,10 @@
-##gff-version 2
-##date 2021-05-20
-##sequence-region rm_input.fasta
-scaffold_1	RepeatMasker	similarity	613	632	 0.0	+	.	Target "Motif:(GT)n" 1 20
-scaffold_1	RepeatMasker	similarity	780	824	18.3	+	.	Target "Motif:(ATAATA)n" 1 45
-scaffold_1	RepeatMasker	similarity	2231	2274	23.9	+	.	Target "Motif:(CAGA)n" 1 46
-scaffold_1	RepeatMasker	similarity	4853	4901	18.4	+	.	Target "Motif:(TC)n" 1 54
-scaffold_1	RepeatMasker	similarity	6230	6284	19.1	+	.	Target "Motif:(TAATTAA)n" 1 52
-scaffold_1	RepeatMasker	similarity	6548	6606	28.3	+	.	Target "Motif:(GACA)n" 1 57
-scaffold_1	RepeatMasker	similarity	11981	12050	 2.9	+	.	Target "Motif:(CT)n" 1 71
-scaffold_1	RepeatMasker	similarity	12078	12113	15.4	+	.	Target "Motif:(CT)n" 1 37
+##gff-version 3
+##sequence-region scaffold_1 1 14220
+scaffold_1	RepeatMasker	dispersed_repeat	613	632	 0.0	+	.	ID=1;Target "Motif:(GT)n" 1 20
+scaffold_1	RepeatMasker	dispersed_repeat	780	824	18.3	+	.	ID=2;Target "Motif:(ATAATA)n" 1 45
+scaffold_1	RepeatMasker	dispersed_repeat	2231	2274	23.9	+	.	ID=3;Target "Motif:(CAGA)n" 1 46
+scaffold_1	RepeatMasker	dispersed_repeat	4853	4901	18.4	+	.	ID=4;Target "Motif:(TC)n" 1 54
+scaffold_1	RepeatMasker	dispersed_repeat	6230	6284	19.1	+	.	ID=5;Target "Motif:(TAATTAA)n" 1 52
+scaffold_1	RepeatMasker	dispersed_repeat	6548	6606	28.3	+	.	ID=6;Target "Motif:(GACA)n" 1 57
+scaffold_1	RepeatMasker	dispersed_repeat	11981	12050	 2.9	+	.	ID=7;Target "Motif:(CT)n" 1 71
+scaffold_1	RepeatMasker	dispersed_repeat	12078	12113	15.4	+	.	ID=8;Target "Motif:(CT)n" 1 37
--- a/test-data/small.fasta.log	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small.fasta.log	Mon Apr 24 10:29:31 2023 +0000
@@ -1,4 +1,4 @@
-SW scoret% div.t% del.t% ins.tquery sequencetpos in  query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID
+SW score	% div.	% del.	% ins.	query sequence	pos in  query: begin	end	(left)	repeat	class/family	pos in repeat: begin	end	(left)	ID
 
 18	0.0	0.0	0.0	scaffold_1	613	632	(13588)	(GT)n	Simple_repeat	1	20	(0)	1
 16	18.3	2.2	2.2	scaffold_1	780	824	(13396)	(ATAATA)n	Simple_repeat	1	45	(0)	2
--- a/test-data/small.fasta.stats	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small.fasta.stats	Mon Apr 24 10:29:31 2023 +0000
@@ -10,7 +10,7 @@
 --------------------------------------------------
 Retroelements            0            0 bp    0.00 %
    SINEs:                0            0 bp    0.00 %
-   Penelope              0            0 bp    0.00 %
+   Penelope:             0            0 bp    0.00 %
    LINEs:                0            0 bp    0.00 %
     CRE/SLACS            0            0 bp    0.00 %
      L2/CR1/Rex          0            0 bp    0.00 %
@@ -28,7 +28,7 @@
    hobo-Activator        0            0 bp    0.00 %
    Tc1-IS630-Pogo        0            0 bp    0.00 %
    En-Spm                0            0 bp    0.00 %
-   MuDR-IS905            0            0 bp    0.00 %
+   MULE-MuDR             0            0 bp    0.00 %
    PiggyBac              0            0 bp    0.00 %
    Tourist/Harbinger     0            0 bp    0.00 %
    Other (Mirage,        0            0 bp    0.00 %
@@ -53,8 +53,8 @@
   Runs of >=20 X/Ns in query were excluded in % calcs
 
 
-RepeatMasker version 4.1.2-p1 , default mode
-                                     
-run with rmblastn version 2.10.0+
-The query was compared to unclassified sequences in ".../dataset_a3b3078d-de09-4651-9e83-62019a3d45ba.dat"
+RepeatMasker version 4.1.5 , default mode
+                                        
+run with rmblastn version 2.13.0+
+The query was compared to unclassified sequences in ".../dataset_9e3ddbd2-0776-4c6d-bed6-0f4cd415796c.dat"
 FamDB: 
--- a/test-data/small_dfam.fasta.cat	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small_dfam.fasta.cat	Mon Apr 24 10:29:31 2023 +0000
@@ -82,6 +82,7 @@
 
 Matrix = 25p39g.matrix
 Kimura (with divCpGMod) = 29.45
+CpG sites = 10, Kimura (unadjusted) = 31.65
 Transitions / transversions = 1.43 (10/7)
 Gap_init rate = 0.07 (5 / 70), avg. gap size = 1.00 (5 / 5)
 
@@ -113,6 +114,6 @@
 ## Total Length: 14220
 ## Total NonMask ( excluding >20bp runs of N/X bases ): 14220
 ## Total NonSub ( excluding all non ACGT bases ):14220
-RepeatMasker version 4.1.2-p1 , default mode
-run with rmblastn version 2.10.0+
-RM Library: CONS-Dfam_3.3
+RepeatMasker version 4.1.5 , default mode
+run with rmblastn version 2.13.0+
+RM Library: CONS-Dfam_3.7
--- a/test-data/small_dfam.fasta.log	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small_dfam.fasta.log	Mon Apr 24 10:29:31 2023 +0000
@@ -1,4 +1,4 @@
-SW scoret% div.t% del.t% ins.tquery sequencetpos in  query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID
+SW score	% div.	% del.	% ins.	query sequence	pos in  query: begin	end	(left)	repeat	class/family	pos in repeat: begin	end	(left)	ID
 
 18	0.0	0.0	0.0	scaffold_1	613	632	(13588)	(GT)n	Simple_repeat	1	20	(0)	1
 16	18.3	2.2	2.2	scaffold_1	780	824	(13396)	(ATAATA)n	Simple_repeat	1	45	(0)	2
--- a/test-data/small_dfam.fasta.stats	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small_dfam.fasta.stats	Mon Apr 24 10:29:31 2023 +0000
@@ -45,7 +45,7 @@
 
 
 The query species was assumed to be human         
-RepeatMasker version 4.1.2-p1 , default mode
-                                     
-run with rmblastn version 2.10.0+
-FamDB: CONS-Dfam_3.3
+RepeatMasker version 4.1.5 , default mode
+                                        
+run with rmblastn version 2.13.0+
+FamDB: CONS-Dfam_3.7
--- a/test-data/small_dfam_rattus.fasta.cat	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small_dfam_rattus.fasta.cat	Mon Apr 24 10:29:31 2023 +0000
@@ -98,6 +98,6 @@
 ## Total Length: 14220
 ## Total NonMask ( excluding >20bp runs of N/X bases ): 14220
 ## Total NonSub ( excluding all non ACGT bases ):14220
-RepeatMasker version 4.1.2-p1 , default mode
-run with rmblastn version 2.10.0+
-RM Library: CONS-Dfam_3.3
+RepeatMasker version 4.1.5 , default mode
+run with rmblastn version 2.13.0+
+RM Library: CONS-Dfam_3.7
--- a/test-data/small_dfam_rattus.fasta.log	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small_dfam_rattus.fasta.log	Mon Apr 24 10:29:31 2023 +0000
@@ -1,4 +1,4 @@
-SW scoret% div.t% del.t% ins.tquery sequencetpos in  query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID
+SW score	% div.	% del.	% ins.	query sequence	pos in  query: begin	end	(left)	repeat	class/family	pos in repeat: begin	end	(left)	ID
 
 18	0.0	0.0	0.0	scaffold_1	613	632	(13588)	(GT)n	Simple_repeat	1	20	(0)	1
 16	18.3	2.2	2.2	scaffold_1	780	824	(13396)	(ATAATA)n	Simple_repeat	1	45	(0)	2
--- a/test-data/small_dfam_rattus.fasta.stats	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small_dfam_rattus.fasta.stats	Mon Apr 24 10:29:31 2023 +0000
@@ -14,6 +14,7 @@
       IDs              0            0 bp    0.00 %
       MIRs             0            0 bp    0.00 %
 
+
 LINEs:                 0            0 bp    0.00 %
       LINE1            0            0 bp    0.00 %
       LINE2            0            0 bp    0.00 %
@@ -47,7 +48,7 @@
 
 
 The query species was assumed to be rattus        
-RepeatMasker version 4.1.2-p1 , default mode
-                                     
-run with rmblastn version 2.10.0+
-FamDB: CONS-Dfam_3.3
+RepeatMasker version 4.1.5 , default mode
+                                        
+run with rmblastn version 2.13.0+
+FamDB: CONS-Dfam_3.7
--- a/test-data/small_dfam_up.fasta.cat	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small_dfam_up.fasta.cat	Mon Apr 24 10:29:31 2023 +0000
@@ -98,6 +98,6 @@
 ## Total Length: 14220
 ## Total NonMask ( excluding >20bp runs of N/X bases ): 14220
 ## Total NonSub ( excluding all non ACGT bases ):14220
-RepeatMasker version 4.1.2-p1 , default mode
-run with rmblastn version 2.10.0+
-RM Library: CONS-_
+RepeatMasker version 4.1.5 , default mode
+run with rmblastn version 2.13.0+
+RM Library: CONS-Dfam_1.0
--- a/test-data/small_dfam_up.fasta.log	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small_dfam_up.fasta.log	Mon Apr 24 10:29:31 2023 +0000
@@ -1,4 +1,4 @@
-SW scoret% div.t% del.t% ins.tquery sequencetpos in  query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID
+SW score	% div.	% del.	% ins.	query sequence	pos in  query: begin	end	(left)	repeat	class/family	pos in repeat: begin	end	(left)	ID
 
 18	0.0	0.0	0.0	scaffold_1	613	632	(13588)	(GT)n	Simple_repeat	1	20	(0)	1
 16	18.3	2.2	2.2	scaffold_1	780	824	(13396)	(ATAATA)n	Simple_repeat	1	45	(0)	2
--- a/test-data/small_dfam_up.fasta.stats	Thu Oct 21 15:49:31 2021 +0000
+++ b/test-data/small_dfam_up.fasta.stats	Mon Apr 24 10:29:31 2023 +0000
@@ -14,6 +14,7 @@
       IDs              0            0 bp    0.00 %
       MIRs             0            0 bp    0.00 %
 
+
 LINEs:                 0            0 bp    0.00 %
       LINE1            0            0 bp    0.00 %
       LINE2            0            0 bp    0.00 %
@@ -47,7 +48,7 @@
 
 
 The query species was assumed to be rodent        
-RepeatMasker version 4.1.2-p1 , default mode
-                                     
-run with rmblastn version 2.10.0+
-FamDB: CONS-_
+RepeatMasker version 4.1.5 , default mode
+                                        
+run with rmblastn version 2.13.0+
+FamDB: CONS-Dfam_1.0