Repository 'repeat_masker'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/repeat_masker

Changeset 11:72aade318318 (2021-05-20)
Previous changeset 10:bfc70c8cc5ca (2020-12-11) Next changeset 12:39b40a9a6296 (2021-08-27)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repeatmasker commit c4bf14b461856220df4b4bbdbb4b9bbad1ff8749"
modified:
repeatmasker.xml
test-data/small.fasta.cat
test-data/small.fasta.gff
test-data/small.fasta.log
test-data/small.fasta.stats
test-data/small_dfam.fasta.cat
test-data/small_dfam.fasta.log
test-data/small_dfam.fasta.stats
test-data/small_dfam_up.fasta.cat
test-data/small_dfam_up.fasta.log
test-data/small_dfam_up.fasta.stats
added:
macros.xml
test-data/small_dfam_rattus.fasta.cat
test-data/small_dfam_rattus.fasta.log
test-data/small_dfam_rattus.fasta.masked
test-data/small_dfam_rattus.fasta.stats
b
diff -r bfc70c8cc5ca -r 72aade318318 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu May 20 12:56:42 2021 +0000
b
@@ -0,0 +1,33 @@
+<macros>
+    <token name="@TOOL_VERSION@">4.1.2-p1</token>
+    <token name="@GALAXY_TOOL_VERSION@">galaxy0</token>
+    <xml name="edam_ontology">
+        <edam_topics>                                                                                  
+            <edam_topic>topic_0157</edam_topic>
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_0237</edam_operation>
+        </edam_operations>
+    </xml>
+    <xml name="requirements">
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">repeatmasker</requirement>
+    </requirements>
+    </xml>
+    <xml name="xrefs">
+      <xrefs>
+        <xref type='bio.tools'>repeatmasker</xref>
+      </xrefs>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @misc{RepeatMasker,
+                    title = {RepeatMasker Open-4.0},
+                    howpublished = {\url{http://www.repeatmasker.org}},
+                    author = {Smit, AFA and Hubley, R and Green, P.},
+                    year = {2013-2015}}
+            </citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r bfc70c8cc5ca -r 72aade318318 repeatmasker.xml
--- a/repeatmasker.xml Fri Dec 11 22:26:02 2020 +0000
+++ b/repeatmasker.xml Thu May 20 12:56:42 2021 +0000
[
@@ -1,11 +1,13 @@
-<tool id="repeatmasker_wrapper" name="RepeatMasker" version="4.1.1" profile="17.01">
-  <description>screen DNA sequences for interspersed repeats and low complexity regions</description>
-
-  <requirements>
-    <requirement type="package" version="4.1.1">repeatmasker</requirement>
-  </requirements>
-
-  <command detect_errors="exit_code"><![CDATA[
+<tool id="repeatmasker_wrapper" name="RepeatMasker" version="@TOOL_VERSION@+@GALAXY_TOOL_VERSION@" profile="20.01">
+    <description>screen DNA sequences for interspersed repeats and low complexity regions</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro='xrefs'/>
+    <expand macro='edam_ontology' />
+    <expand macro='requirements' />
+    <version_command>repeatmasker --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
     RM_PATH=\$(which RepeatMasker) &&
     if [ -z "\$RM_PATH" ] ; then echo "Failed to find RepeatMasker in PATH (\$PATH)" >&2 ; exit 1 ; fi &&
 
@@ -97,12 +99,10 @@
           </param>
           <when value="yes">
             <param name="species_list" type="select" label="Species">
-              <option value="vertebrate">Vertebrate (other than below)</option>
-              <option value="mammal">Mammal (other than below)</option>
-              <option value="human" selected="true">Human</option>
-              <option value="rodent">Rodent</option>
-              <option value="mouse">Mouse</option>
-              <option value="rat">Rat</option>
+              <option value="human" selected="true">Human (Homo sapiens)</option>
+              <option value="rodent">Rodent (Order Rodentia)</option>
+              <option value="mouse">Mouse (Mus musculus)</option>
+              <option value="rattus">Rat (Rattus sp.)</option>
               <option value="danio">Danio (zebra fish)</option>
               <option value="drosophila">Fruit fly (Drosophila melanogaster)</option>
               <option value="elegans">Caenorhabditis elegans (nematode)</option>
@@ -219,6 +219,15 @@
       <output name="output_repeat_catalog" file="small_dfam_up.fasta.cat" lines_diff="2" />
       <output name="output_log" file="small_dfam_up.fasta.log" lines_diff="2"/>
     </test>
+    <test expect_num_outputs="4">
+      <param name="input_fasta" value="small.fasta" ftype="fasta" />
+      <param name="source_type" value="dfam" />
+      <param name="species_list" value="rattus" />
+      <output name="output_masked_genome" file="small_dfam_rattus.fasta.masked" />
+      <output name="output_table" file="small_dfam_rattus.fasta.stats" lines_diff="2" />
+      <output name="output_repeat_catalog" file="small_dfam_rattus.fasta.cat" lines_diff="2" />
+      <output name="output_log" file="small_dfam_rattus.fasta.log" lines_diff="2"/>
+    </test>
   </tests>
   <help><![CDATA[
 RepeatMasker is a program that screens DNA for interspersed repeats and low
@@ -234,13 +243,5 @@
 .. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html
     ]]>
   </help>
-  <citations>
-    <citation type="bibtex">
-      @misc{RepeatMasker,
-        title = {RepeatMasker Open-4.0},
-        howpublished = {\url{http://www.repeatmasker.org}},
-        author = {Smit, AFA and Hubley, R and Green, P.},
-        year = {2013-2015}}
-    </citation>
-  </citations>
+  <expand macro="citations" />
 </tool>
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small.fasta.cat
--- a/test-data/small.fasta.cat Fri Dec 11 22:26:02 2020 +0000
+++ b/test-data/small.fasta.cat Thu May 20 12:56:42 2021 +0000
b
@@ -98,6 +98,6 @@
 ## Total Length: 14220
 ## Total NonMask ( excluding >20bp runs of N/X bases ): 14220
 ## Total NonSub ( excluding all non ACGT bases ):14220
-RepeatMasker version 4.1.1 , default mode
+RepeatMasker version 4.1.2-p1 , default mode
 run with rmblastn version 2.10.0+
 RM Library: 
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small.fasta.gff
--- a/test-data/small.fasta.gff Fri Dec 11 22:26:02 2020 +0000
+++ b/test-data/small.fasta.gff Thu May 20 12:56:42 2021 +0000
b
@@ -1,5 +1,5 @@
 ##gff-version 2
-##date 2020-12-11
+##date 2021-05-20
 ##sequence-region rm_input.fasta
 scaffold_1 RepeatMasker similarity 613 632  0.0 + . Target "Motif:(GT)n" 1 20
 scaffold_1 RepeatMasker similarity 780 824 18.3 + . Target "Motif:(ATAATA)n" 1 45
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small.fasta.log
--- a/test-data/small.fasta.log Fri Dec 11 22:26:02 2020 +0000
+++ b/test-data/small.fasta.log Thu May 20 12:56:42 2021 +0000
b
@@ -1,4 +1,4 @@
-SW score % div. % del. % ins. query sequence pos in  query: begin end (left) repeat class/family pos in repeat: begin end (left) ID
+SW scoret% div.t% del.t% ins.tquery sequencetpos in  query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID
 
 18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) 1
 16 18.3 2.2 2.2 scaffold_1 780 824 (13396) (ATAATA)n Simple_repeat 1 45 (0) 2
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small.fasta.stats
--- a/test-data/small.fasta.stats Fri Dec 11 22:26:02 2020 +0000
+++ b/test-data/small.fasta.stats Thu May 20 12:56:42 2021 +0000
b
@@ -53,8 +53,8 @@
   Runs of >=20 X/Ns in query were excluded in % calcs
 
 
-RepeatMasker version 4.1.1 , default mode
-                                        
+RepeatMasker version 4.1.2-p1 , default mode
+                                     
 run with rmblastn version 2.10.0+
-The query was compared to unclassified sequences in ".../dataset_a9c6a294-8dbb-4a71-ad9c-e36735923fbf.dat"
-
+The query was compared to unclassified sequences in ".../dataset_a3b3078d-de09-4651-9e83-62019a3d45ba.dat"
+FamDB: 
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small_dfam.fasta.cat
--- a/test-data/small_dfam.fasta.cat Fri Dec 11 22:26:02 2020 +0000
+++ b/test-data/small_dfam.fasta.cat Thu May 20 12:56:42 2021 +0000
b
@@ -113,6 +113,6 @@
 ## Total Length: 14220
 ## Total NonMask ( excluding >20bp runs of N/X bases ): 14220
 ## Total NonSub ( excluding all non ACGT bases ):14220
-RepeatMasker version 4.1.1 , default mode
+RepeatMasker version 4.1.2-p1 , default mode
 run with rmblastn version 2.10.0+
-RM Library: CONS-Dfam_3.2
+RM Library: CONS-Dfam_3.3
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small_dfam.fasta.log
--- a/test-data/small_dfam.fasta.log Fri Dec 11 22:26:02 2020 +0000
+++ b/test-data/small_dfam.fasta.log Thu May 20 12:56:42 2021 +0000
b
@@ -1,4 +1,4 @@
-SW score % div. % del. % ins. query sequence pos in  query: begin end (left) repeat class/family pos in repeat: begin end (left) ID
+SW scoret% div.t% del.t% ins.tquery sequencetpos in  query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID
 
 18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) 1
 16 18.3 2.2 2.2 scaffold_1 780 824 (13396) (ATAATA)n Simple_repeat 1 45 (0) 2
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small_dfam.fasta.stats
--- a/test-data/small_dfam.fasta.stats Fri Dec 11 22:26:02 2020 +0000
+++ b/test-data/small_dfam.fasta.stats Thu May 20 12:56:42 2021 +0000
b
@@ -45,7 +45,7 @@
 
 
 The query species was assumed to be human         
-RepeatMasker version 4.1.1 , default mode
-                                        
+RepeatMasker version 4.1.2-p1 , default mode
+                                     
 run with rmblastn version 2.10.0+
-
+FamDB: CONS-Dfam_3.3
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small_dfam_rattus.fasta.cat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small_dfam_rattus.fasta.cat Thu May 20 12:56:42 2021 +0000
b
@@ -0,0 +1,103 @@
+18 0.00 0.00 0.00 scaffold_1 613 632 (13519) (GT)n#Simple_repeat 1 20 (0) m_b1s252i0
+
+  scaffold_1           613 GTGTGTGTGTGTGTGTGTGT 632
+                                               
+  (GT)n#Simple_          1 GTGTGTGTGTGTGTGTGTGT 20
+
+Matrix = Unknown
+Transitions / transversions = 1.00 (0/0)
+Gap_init rate = 0.00 (0 / 19), avg. gap size = 0.0 (0 / 0)
+
+16 18.30 2.22 2.22 scaffold_1 780 824 (13327) (ATAATA)n#Simple_repeat 1 45 (0) m_b1s252i1
+
+  scaffold_1           780 ATATTAAT-ATACTAAATAATGATAATATTAATTCTAATAATGATA 824
+                              v    -   v -      i      v    vv       i   
+  (ATAATA)n#Sim          1 ATAATAATAATAAT-AATAATAATAATAATAATAATAATAATAATA 45
+
+Matrix = Unknown
+Transitions / transversions = 0.40 (2/5)
+Gap_init rate = 0.05 (2 / 44), avg. gap size = 1.00 (2 / 2)
+
+12 23.88 4.55 0.00 scaffold_1 2231 2274 (11877) (CAGA)n#Simple_repeat 1 46 (0) m_b1s252i2
+
+  scaffold_1          2231 CAGA-AAACAGTCAGCCAATCAGA-GGACAGACTCAGAGACAGACA 2274
+                               - i    v   v  iv    -i       vv v         
+  (CAGA)n#Simpl          1 CAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGACA 46
+
+Matrix = Unknown
+Transitions / transversions = 0.50 (3/6)
+Gap_init rate = 0.05 (2 / 43), avg. gap size = 1.00 (2 / 2)
+
+15 18.42 10.20 0.00 scaffold_1 4853 4901 (9250) (TC)n#Simple_repeat 1 54 (0) m_b1s252i3
+
+  scaffold_1          4853 TCTATTTTTCTCTCTCTCTCT-TCGGTCAGTTTCTCT-TCT-TCTC-CTC 4898
+                              v i i             -  vv  vv i     -   -    -   
+  (TC)n#Simple_          1 TCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 50
+
+  scaffold_1          4899 -CTC 4901
+                           -   
+  (TC)n#Simple_         51 TCTC 54
+
+Matrix = Unknown
+Transitions / transversions = 0.60 (3/5)
+Gap_init rate = 0.10 (5 / 48), avg. gap size = 1.00 (5 / 5)
+
+13 19.07 1.82 7.69 scaffold_1 6230 6284 (7867) (TAATTAA)n#Simple_repeat 1 52 (0) m_b1s252i4
+
+  scaffold_1          6230 TAATGAGT-ATTACTAATTAATAATTGGATGAGTTTATGAATTCATCATT 6278
+                               v i -    v            -i  - i  v  -    v  v   
+  (TAATTAA)n#Si          1 TAATTAATAATTAATAATTAATAATT-AAT-AATTAAT-AATTAATAATT 47
+
+  scaffold_1          6279 AAATAA 6284
+                           -     
+  (TAATTAA)n#Si         48 -AATAA 52
+
+Matrix = Unknown
+Transitions / transversions = 0.60 (3/5)
+Gap_init rate = 0.09 (5 / 54), avg. gap size = 1.00 (5 / 5)
+
+15 28.31 0.00 3.51 scaffold_1 6548 6606 (7545) (GACA)n#Simple_repeat 1 57 (0) m_b1s252i5
+
+  scaffold_1          6548 GACAGAGAGATAAACAGACAGACATGGAAACAGAGGGACATAGAAAACAA 6597
+                                 v   i i           viv i     vi    v -v i  - 
+  (GACA)n#Simpl          1 GACAGACAGACAGACAGACAGACAGACAGACAGACAGACAGA-CAGAC-A 48
+
+  scaffold_1          6598 GACAGAGAG 6606
+                                 v  
+  (GACA)n#Simpl         49 GACAGACAG 57
+
+Matrix = Unknown
+Transitions / transversions = 0.86 (6/7)
+Gap_init rate = 0.03 (2 / 58), avg. gap size = 1.00 (2 / 2)
+
+67 2.94 1.43 0.00 scaffold_1 11981 12050 (2170) (CT)n#Simple_repeat 1 71 (0) c_b1s251i0
+
+  scaffold_1         11981 CTCTCTCTCTCTCCCTCTCCCTCTC-CTCTCTCTCTCTCTCTCTCTCTCT 12029
+                                        i     i     -                        
+  (CT)n#Simple_          1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCT 50
+
+  scaffold_1         12030 CTCTCTCTCTCTCTCTCTCTC 12050
+                                                
+  (CT)n#Simple_         51 CTCTCTCTCTCTCTCTCTCTC 71
+
+Matrix = Unknown
+Transitions / transversions = 1.00 (2/0)
+Gap_init rate = 0.01 (1 / 69), avg. gap size = 1.00 (1 / 1)
+
+19 15.37 2.78 0.00 scaffold_1 12078 12113 (2107) (CT)n#Simple_repeat 1 37 (0) m_b1s252i6
+
+  scaffold_1         12078 CTCGCTCTCTCACTCTCTCCCTCTCTC-CCCACTCTC 12113
+                              v       v       i       - i v     
+  (CT)n#Simple_          1 CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC 37
+
+Matrix = Unknown
+Transitions / transversions = 0.67 (2/3)
+Gap_init rate = 0.03 (1 / 35), avg. gap size = 1.00 (1 / 1)
+
+## Total Sequences: 1
+## Total Length: 14220
+## Total NonMask ( excluding >20bp runs of N/X bases ): 14220
+## Total NonSub ( excluding all non ACGT bases ):14220
+RepeatMasker version 4.1.2-p1 , default mode
+run with rmblastn version 2.10.0+
+RM Library: CONS-Dfam_3.3
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small_dfam_rattus.fasta.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small_dfam_rattus.fasta.log Thu May 20 12:56:42 2021 +0000
b
@@ -0,0 +1,10 @@
+SW scoret% div.t% del.t% ins.tquery sequencetpos in  query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID
+
+18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) 1
+16 18.3 2.2 2.2 scaffold_1 780 824 (13396) (ATAATA)n Simple_repeat 1 45 (0) 2
+12 23.9 4.5 0.0 scaffold_1 2231 2274 (11946) (CAGA)n Simple_repeat 1 46 (0) 3
+15 18.4 10.2 0.0 scaffold_1 4853 4901 (9319) (TC)n Simple_repeat 1 54 (0) 4
+13 19.1 1.8 7.7 scaffold_1 6230 6284 (7936) (TAATTAA)n Simple_repeat 1 52 (0) 5
+15 28.3 0.0 3.5 scaffold_1 6548 6606 (7614) (GACA)n Simple_repeat 1 57 (0) 6
+67 2.9 1.4 0.0 scaffold_1 11981 12050 (2170) (CT)n Simple_repeat 1 71 (0) 7
+19 15.4 2.8 0.0 scaffold_1 12078 12113 (2107) (CT)n Simple_repeat 1 37 (0) 7
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small_dfam_rattus.fasta.masked
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small_dfam_rattus.fasta.masked Thu May 20 12:56:42 2021 +0000
b
b'@@ -0,0 +1,286 @@\n+>scaffold_1\n+TGCTTTTCTGAGATTTATGTCAGCTCCCCATTGGGAGATACAGGAGAATC\n+CAATATATTCCTGTCTCGCTTCTTTTATATTTATCCTTTCTACACCTGAC\n+ATCCTCAGTGATTGAAGTGACAAAAAGTGGAGCGCACGCAGTGGTCATTA\n+CCCTCCAATGGTACTTCTAATTAAGGAAAGATTTTTTGCATTCACTGAGC\n+AAAACACTTATTTGCATGAATGGAAAATCATAAATGAGGGGTCCATTAAA\n+CAAATTTCCTAATTGTTTGCTTTTTTTTCTTCCTACTTCACCATCCCTCT\n+AAAGCTATTACTCTCTATTCTACCCCATTATCGAGTGCACAAACACCAGC\n+CCCAAATTGTCTTATCTCTGCTTTGATAAATGATATTTTTTTCTCTTATA\n+ATTGTGTTTCTCTCTAGATGTGCGGTGTCCATCAAGACACTGGATGTCAC\n+GTGGTCACTCCTGTGAAGAGCGAACTGTGTGGAACCCGAAGTACTGTGTG\n+GTTGCTGACTGTCAGATGCTGCTTCTGAATGAGGAGGAGGTGGTGAGAGC\n+AGACAGCACAATCTCTTGTCTGGCATGTTCCTCTGTGTCACCATTGTCTC\n+TCTCATCTGCTCNNNNNNNNNNNNNNNNNNNNCCTTGTCAATGTCATTAC\n+ACAATTTATGTTGGTGCACTGCATGTCAGAGTCTGAACTTTGTTGGACAT\n+CTACTTTCTGTCACCTGATACCTCATACTTTACACTCTAATCCCTTTTTC\n+CTTTCTAACTGCTGCTGTGTTAAGCTGTCNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNGTGAATGGAGATTCACAATGAAGAAA\n+CACAAGGTCCAGACTGAACCCCTGTGCTGTGTGAAGAGTTCACTCTACAT\n+TTACATTCATCCCACATCAGAGACCAGCAATAGCCAGGTTAAGCAGAGCA\n+AGGCTGAGTCTGGCTCTTTGCCTCTGTTGTTTTGATTAGTGTCACACTGG\n+ATTATGCTAGTTTGTGTGAGATTAGAGGTCCTGTTGTTACGAGCAAATCA\n+GCAGTCAGCAGGGTAATCTAAATTACATTGTTCTGTCGGCAATATCCTCT\n+CCTCGCTCAACAGCTTTGTGTGAGCTCATATTTAAAGCCCCCACAGTAGT\n+GCAGTGCATAAAATCATGCAGATACAGATCAGAAGCTTCAGGTAATGTTT\n+ACATCAAACATCAGGGGGAAAATGTGATCTCAGTGCCTCTGACCGTGGTG\n+TGATTGTTGGTGCCGGACAGGCTGGTTTGAGTATTTCTAAAACTGCTGAT\n+CTCCTGGGATTTTCACACACGGCATTCTCTAGAGTTTACTCAGAACGGTG\n+TGGAAAGCAAAAAAACATCCCGCGAGCTGCAATCCTGCGGATGGAAACAC\n+CTTGTTGATGGGATATGTCAGAGGAATGGCCAGGCTGGATCAATCTGACA\n+GAAAGGCGGTAACTCAGATAAGCACTCTCTACAACTGTGGTGAGCAGAAA\n+AGCGTTCTCAGAATGCACAACACGTTCAACCTTGAGGAAGATGTTGCTAC\n+AGCAGCAGATGAAGACCACGTCTGGTTCCACTCCTGTCAGCCAAGAACAT\n+CTCAGACTGCACAGGACACCAAAACAAAAACACATCCTGTTTTCTACTGG\n+TGGTAGAGTCACAATTTGGCAATAAGATAAATCCATGGACCCAACTTGCC\n+TTGTGTCAATAGACCAAGCTGCTGGTGGTCTTGGGGAATGTTTTCTTTAC\n+ACTTTCACACTATCGGCCTATATAAAATCCTATTATAAACTGCTGTACAG\n+TGCATTGGGTTTGATAAGCCCTCTAAGTCTGTATATCATGATCTCATTGA\n+TACCTGCGGCAATACACCCCCACAAACGCAGCTCCCTGCATTTTAAGTAT\n+TATAGAGAGTAATTTCACTGTCCATGTCTTTTTTGATCATAAAGCAGGTC\n+TAGGTGCTGTATACAACACTGTATTGAAATGCTGAAGCAGGTCTAGGTGC\n+TGTATACAACACTGTATTGAAATGCTGAAGCCAATATATCAATCTATCTG\n+TCTATATTTTATGAAATATCAGAGCATTTTAGTCCAAATTTGTCTGTTTT\n+GTTCATTGCAATGCTGACTGCTTAACATTTTCCTCAGTGGTGGCCATCAC\n+CACAGCATTTACTGGTGAGTTTTACCAAGCTACAGTGGGCCACAATGAGT\n+TACCTGTTATTGGCCTGGCTCTATGGCATGNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNCAAAGTGCCCTGTTCTTGTTAGAGCA\n+GAGATAAATGAGACAGAGGAGATGTAAAACTATACTGAGATGGTTTTTTG\n+GTTCTTAAAACCACAAATATATTGTATTACGGATATCAAAACTTCAAATA\n+AAACCAGTAAAGTGTAAAACATGGGACCTCTAAATGAAGGGCTGTTTTGT\n+TTCGTAGTTCTAGATTATGTCACTATGTGGCCTCGGTCTCTGTCTGCGTA\n+AAGCGGCTACATGTAGAATTTGACCCACTTTGGTGCCCACATATGGTAAC\n+TAAAACACTACAGACAGTATGCACTCCAACCCTATATAATTTATGTCCTT\n+CAGTGAAGCCTACAAAATGTGCAGCATGATTTGCTATGATTAATACCTAG\n+AATTATATTAAATTTTAGAAGTTTAGAGAGATGGCTTATACCTTTTTCTG\n+AGCATGGAGTCATCTCATGTAATTTGTCACATACCTTTGGCCAGCTTAAA\n+CCTGGTCATTCCAGTGAGAGCTGCTCACCCAAATGTTTCTGTATGCACCT\n+CCTGCTGTGGGTGCCAATAGCTCATGCAAGTTGGTGTCTTTCAACTAAGG\n+CCACTATGTGAGGACCATCAGCAGCAAATTAAATTAGAAGTGCCTTGGAG\n+TTTCTCAGGCCCTCTTCTATGATCATTAAGGTCTCAGTGACAGAACAGTG\n+CCCCTTTGAGGGACTATGGCATCTTGAGCACTTAAGTTGGCCGAATGTTT\n+GATCTCAAATATGAGTGCACTGTTTCAAAGTCACAAAAGCCTCTGGACAA\n+GGCATAGTTACTGGGCTTGGTTAACCTTTTTTTCATACCGGCGTGACTGC\n+TGGAACTTGGAACATGTAGCCTAAGTTTCCCTTTGTCTGCAATTACTGAG\n+GTATGCCATGTTGAAATAGAACAATAATTGCAGGGAGGAATTATTCCAAT\n+CGACCCATCGGCAGGCTTTCCAATACAGTGCTTTTTGTTTCACAGCAACA\n+AAGAACAAGAGCTTTTAAATGCATCCATCTGGACCCTGACTATGAAAGGT\n+CATTGGTAACGCTGCTGGATTTCTGTTATGTCAGCAAACTGCAGGTCAAA\n+GCGATGATACAGTCTTATAATGGCTTTCTCCAAGTGAAGGTGGGATAACA\n+TGGTTTAAACAAGCTCAGTTAAATGGATCAGAGCATCAACTGCTGTGAAC\n+AAGCAATCACACGGTCAAGGTCAAACATTTACACTTCTGAGAGATCTGGA\n+GAGTATGTCATGGCAGTATTGCTTTTCAGTTATTTCAGTAACTGATTACG\n+CCTTGTACTTAGTCTCAGAATTAGTGATTTGATTCAAAGTGTTTTATATA\n+TGTATATTTCAAAGCAAAAATACAGCTTAGGTAAAGCTCCTTAGACTCAT\n+GCAGCCTAATTTACTGTAGACATTCCTCCATGTACAGTACTGTGCAAATG\n+TTTTAGGCAGTTTTAGGCACTAAAGGTGAACTGAGGATGCAGTCCCACGA\n+TTAATTTTTATTCATCAGTTAACCTCATGTGAAGTGTAGTAAACAGAAAA\n+AACCTAAATCAGATC'..b'TGTCCTTGTGTGACACAGGCTCTA\n+AATAAGCAGCATGATGAATAAAAATGACACTGAGATGAATAGGAATCCAC\n+AGCAAATCAGAGAGCACTCTCATCTCGTCTCATCCTCCGACTGAATAGCG\n+AGCGGCTGCCTCCTTTATTCTTTTGAACTCTTCGCGGTTTTGGCACAAAC\n+ATGCCAGAGACAGAGAGCGCTGTTACGCTCCCACCAGAGCAACTACGCTT\n+AATTTTACTGAGGTGAAAGTTTTTTTTTTTTTTCTGGATAAAGCTTTGGA\n+AAAGTTCTCAACTGTTGCTTCTTTAAAGATGCTGGGGCATATTCTGCCCT\n+ATTTTCCAGTTTTTTACATCTCCCATAAATATTTTCACAAGCTGTAACAT\n+TTTAGACAGGATTAAATTGAACTAGATAACTGACCAAAGGCTGCTAAATT\n+ATTTTTATGTGCCTTGTAGCACAAATACTGTCTCACATATATTTTAAGTA\n+TCTTAATATTCCCTCTGATGTTTAATTTAACAAAGCTGTCCTTCCCTCCA\n+TCTCTCTCCTCATAGTTTTTCATTACAATTAGCTCTCATTAGATAGAATT\n+GTATTTGTTGTCTTTGTGCCAGTCACTCCAGTCTATTTTGCCAGCACACA\n+ACTAAATAATTGTTTTGCCTTGTCAGTGTCTTGTTTTACAGTTTTAATGA\n+TGAGCCCAAAACATGTCAAATATGAGGAACTATAACTTATCAAGAAAGTG\n+GGGTAGTTGAGATAAAACTGTTTCCGAGGTGGAGGTTGGCTGAGCTCCAT\n+TTGGCCACAAAATGTAGCTGAAAGGGCAGAGAAACCCACTTTAATGGAGT\n+ACAGGTTGCATATGAGCTGGTAGGAAAGATATAGAATCATTATCAGCTGA\n+TAATCTAACAGTAGCAGTCAGTGTAGATGCTGTGCTAACGCAAAGTTGTG\n+AAACTACTGTCTGTAAGCAACTCATCCAATTGTTATTGCTGCCAAGCTTA\n+AGTATTTTAACAGCTTTTAGGTGTTGTTAGCACAGTACTCGTTTGGATCC\n+GGCCATGACAAGAAATCTTTATCGGCCACTAATTTAATTAATCATCTTTT\n+TTCAAGCAAAAATCACAAACTTTCTTTGGTTCCACTTTTTCTCATTGTAG\n+ATTAAATATCTTTTGGGTTTTGGCACAGGCTGGACAAAAAAACCTCTGAG\n+ACGCTGTGATAAGAATTTATTTTCACATTTTTTTTACTTTTCAGGGACTA\n+CACAATTATTTGGTGATGAAAATAATATTTTGTTGCAGCCCAAATGTTAA\n+CTTGCCACTCAAGCTGTCAATTCAAGAAGGCAAAAGCAACAAATACTGCT\n+TTGCCGAGCTGAAATGAGAGTAGAAACCATGCCCGGGCCAGCTCAGCCTG\n+GTCAGGTTTTTGAGTCCATCTATTGTTAACATTCAGGTTGCAGCACAAGG\n+GAAGTTCCTAAATTCTTCTGGCATGTTAATGTTTTCCAAACTGATGTTAC\n+CAAGTCCTCGTTATGATGAGAAAAAAATATGCTGAGAGTGAAATTGATCA\n+AAGTGGAAACATTAAGCACTGCCACAGTCCCCTCCACCCATCTTTCTCCC\n+TCTCTCTCCACCGCAACGTGGAAACTGCTCNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+CTCCCTCCCATTCTGTGCTCCGGTATANNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNCAGAGAGGGGTTCAGTCAGACAGATGTAACACAGCAG\n+TAGAAGCCTGAGCTGAGCTGGCAGGCTGCGGAGGCCAGACCAGAGCCAGC\n+AGCTCTGACTGGAGAGAACAGCGAAGAGGTGACGGCAGCAGCGGCAGCAC\n+AGGGGATAAAACAGTAGTTGTAGTAGCAGTTTCAGTAGTTGTACTTCAGT\n+TACAGAGCTGACTTTACCTCAGGATATGGGTGTGTTGATCTGGGCGCTCA\n+TCACCCTGTGAATTTAACATGGACCCTGACACCAGCACCCATCCAGAGAC\n+ACAACATGGTGAGTGAGATTTGAAGGAGGAAAAGATTAGAAACAATGAGA\n+GTGATACTGTGGGAAAAGTTGAGGAGCGTGTTAGTGAACAAGGGAGTCAG\n+GTCACATGGTTTTCCCGTTAGGGTTGCTATTATGACTAGGCTCTCCATTT\n+GGCTCTAAAGCTGTTGACTAGGGCCTCTGTGGTCAAATGAATAGGACTTC\n+AGTCTGTTCTGTTGAGTAGGGGGTGAAGGGGTGAGGGGTGTTACATGGCA\n+ATGTGATGAAATCAACTGCCTGCTTAAGACATTTTGCTCTAATGACCCTC\n+TTGTCATTACTTTAACAGGGTTGTAAAGTTTTTTTTGCTTTTTTGTAGCT\n+AAGAAAGTTGAGAATTGTTTTGTCTGAACTCTCTCTGGGATTTGTCTTGT\n+CGGTTTTTGGCTGGTTTTTGGTGTGAGCTTCACAGAGCAGGAAGCTCTCC\n+TCCTGCTCTTTGAATGGAATAATTAACTGTGAGGGCCGTGGGTGTACCAT\n+TTCTGCTCAAAAGCAGCAAGATAATTTGATGGTTATGTGAATCTGTATAT\n+TTGTATGCTGTGTAGTGGTGCTGCATGTGGGTGTTTGTGTTAGAGACACA\n+GAGGGGCACCAAAGAATGAAAGTGGGAGACAGAGGGAACGAGAAGGGGAG\n+AGACCGAGAGAGAAGGACTTATGTACACAAATAAATCCAGGGGGATCTAG\n+ACTGCACTGTAGGCCAGTGCGAATGCTCACTCTTTCTCTTCCCCCTCATT\n+TTCCGCTGCCTCTTTTTTTTCCTCCACTGCTCCAAATGTTGGAGCCCAAA\n+CTAGATAGTTAAAAGGCAGAAAAAAATGGTGTGTTATTAACTGGGCCAGA\n+AAAGACGATGTTTTGTCTTCATGGCCGATGGGAAGGACTCTGCTGTGCTG\n+CTGATGAAGAATGTGGTTAACAGAATGAGTCAACAGAGAACATATTTCCA\n+AGAGACTGTAGTTTCTCTTCGCCAGCGTCTAAGCCACATTGCTTTATGCA\n+CTGGGCTCTCTGCCATGACGGCGAAGGAGAGACAAAGAGAGGGGAAAGAG\n+AATCGCTAAAGATAAAACTCTGTTTTTATCCATCTCCTGAGACACAAAAC\n+TTCTGTTAGCAATCTAACCAAATAAGCGAGAACATTAAGCTTCTTCTCAC\n+CTTCCCAAAATAAAATGTCAACTTCATGCAGTAATACTCACTCTCTTCTC\n+TCAACCAGTCAGCCAGCCTCTTTCAGTGCATACAGCTTGAGGAGATCCCT\n+TCTAAAGGTCCAATATAAATAGAAAAGTGGGAGTAGAAAGGGCAATAATC\n+TGATATCATCTGATTACATTCACACCTCAGGCTTGCACGCTACAGGAAGA\n+GTTCACGCTTCCCAAGGCTTGCAGGCATCACTTTTCACTCATTCTCAGAT\n+AGACACACAAACACATGCACAGAAATATCATTGCTGTTGTTTGCAACAAA\n+TCCTGGAGTAGACTACTTTGCATTGAATTTCTATATGCATGTGTTTTCAT\n+ATATTACACATTGCTTTTTGTACTTTGCATATAAAGTAGATGCTGATCTG\n+CTATCTGCATATATAGTAGCTGCAGATAGCACTGTAACTACATCTACATA\n+TTTCTTGTTTGTATCTACATCAAGCAGATAGTTGCTGATGCTGGACAACC\n+TCTTCCGATATTCATTATTTTGGAAAAAAAAATATCAATGGCATTCCTAC\n+CTTACAACTTAAATTAAAAGTGGTCACTTGAGTAACTGCAGAACATGTGT\n+TCACCCAATAGTTTTTTTTAGAGAGTGTCACTCAAAGTATTCCTCCTTAC\n+AAGCACCCATGCCTAAATCA\n'
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small_dfam_rattus.fasta.stats
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small_dfam_rattus.fasta.stats Thu May 20 12:56:42 2021 +0000
b
@@ -0,0 +1,53 @@
+==================================================
+file name: rm_input.fasta           
+sequences:             1
+total length:      14220 bp  (14220 bp excl N/X-runs) 
+GC level:         39.94 %
+bases masked:        378 bp ( 2.66 %)
+==================================================
+               number of      length   percentage
+               elements*    occupied  of sequence
+--------------------------------------------------
+SINEs:                 0            0 bp    0.00 %
+      Alu/B1           0            0 bp    0.00 %
+      B2-B4            0            0 bp    0.00 %
+      IDs              0            0 bp    0.00 %
+      MIRs             0            0 bp    0.00 %
+
+LINEs:                 0            0 bp    0.00 %
+      LINE1            0            0 bp    0.00 %
+      LINE2            0            0 bp    0.00 %
+      L3/CR1           0            0 bp    0.00 %
+
+LTR elements:          0            0 bp    0.00 %
+      ERVL             0            0 bp    0.00 %
+      ERVL-MaLRs       0            0 bp    0.00 %
+      ERV_classI       0            0 bp    0.00 %
+      ERV_classII      0            0 bp    0.00 %
+
+DNA elements:          0             0bp    0.00 %
+      hAT-Charlie      0            0 bp    0.00 %
+      TcMar-Tigger     0            0 bp    0.00 %
+
+Unclassified:          0            0 bp    0.00 %
+
+Total interspersed repeats:         0 bp    0.00 %
+
+
+Small RNA:             0            0 bp    0.00 %
+
+Satellites:            0            0 bp    0.00 %
+Simple repeats:        7          378 bp    2.66 %
+Low complexity:        0            0 bp    0.00 %
+==================================================
+
+* most repeats fragmented by insertions or deletions
+  have been counted as one element
+  Runs of >=20 X/Ns in query were excluded in % calcs
+
+
+The query species was assumed to be rattus        
+RepeatMasker version 4.1.2-p1 , default mode
+                                     
+run with rmblastn version 2.10.0+
+FamDB: CONS-Dfam_3.3
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small_dfam_up.fasta.cat
--- a/test-data/small_dfam_up.fasta.cat Fri Dec 11 22:26:02 2020 +0000
+++ b/test-data/small_dfam_up.fasta.cat Thu May 20 12:56:42 2021 +0000
b
@@ -98,6 +98,6 @@
 ## Total Length: 14220
 ## Total NonMask ( excluding >20bp runs of N/X bases ): 14220
 ## Total NonSub ( excluding all non ACGT bases ):14220
-RepeatMasker version 4.1.1 , default mode
+RepeatMasker version 4.1.2-p1 , default mode
 run with rmblastn version 2.10.0+
 RM Library: CONS-_
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small_dfam_up.fasta.log
--- a/test-data/small_dfam_up.fasta.log Fri Dec 11 22:26:02 2020 +0000
+++ b/test-data/small_dfam_up.fasta.log Thu May 20 12:56:42 2021 +0000
b
@@ -1,4 +1,4 @@
-SW score % div. % del. % ins. query sequence pos in  query: begin end (left) repeat class/family pos in repeat: begin end (left) ID
+SW scoret% div.t% del.t% ins.tquery sequencetpos in  query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID
 
 18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) 1
 16 18.3 2.2 2.2 scaffold_1 780 824 (13396) (ATAATA)n Simple_repeat 1 45 (0) 2
b
diff -r bfc70c8cc5ca -r 72aade318318 test-data/small_dfam_up.fasta.stats
--- a/test-data/small_dfam_up.fasta.stats Fri Dec 11 22:26:02 2020 +0000
+++ b/test-data/small_dfam_up.fasta.stats Thu May 20 12:56:42 2021 +0000
b
@@ -47,7 +47,7 @@
 
 
 The query species was assumed to be rodent        
-RepeatMasker version 4.1.1 , default mode
-                                        
+RepeatMasker version 4.1.2-p1 , default mode
+                                     
 run with rmblastn version 2.10.0+
-
+FamDB: CONS-_