Repository 'meme_dreme'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/meme_dreme

Changeset 7:0c97e5c18468 (2025-07-14)
Previous changeset 6:fd05b142b3a3 (2024-08-29)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meme commit 89ee0af6e955ff964b7984d77ad536e0a9154278
modified:
macros.xml
test-data/meme_output_test1.html
test-data/meme_output_test1.txt
test-data/meme_output_test1.xml
test-data/meme_output_test2.html
test-data/meme_output_test2.txt
test-data/meme_output_test2.xml
added:
test-data/alphabet.txt
test-data/meme_input_m.fasta
test-data/meme_output_custom.html
test-data/meme_output_custom.txt
test-data/meme_output_custom.xml
test-data/streme_input_neg2.fasta
test-data/streme_input_pos.fasta
test-data/streme_input_pos2.fasta
test-data/streme_output_test1.html
test-data/streme_output_test1.txt
test-data/streme_output_test1.xml
test-data/streme_output_test2.html
test-data/streme_output_test2.txt
test-data/streme_output_test2.xml
test-data/streme_output_test3.html
test-data/streme_output_test3.txt
test-data/streme_output_test3.xml
b
diff -r fd05b142b3a3 -r 0c97e5c18468 macros.xml
--- a/macros.xml Thu Aug 29 10:19:55 2024 +0000
+++ b/macros.xml Mon Jul 14 21:33:41 2025 +0000
b
@@ -1,6 +1,6 @@
 <macros>
     <token name="@VERSION_SUFFIX@">0</token>
-    <token name="@TOOL_VERSION@">5.5.6</token>
+    <token name="@TOOL_VERSION@">5.5.8</token>
     <token name="@PROFILE@">23.0</token>
     <xml name="requirements">
         <requirements>
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/alphabet.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/alphabet.txt Mon Jul 14 21:33:41 2025 +0000
b
@@ -0,0 +1,13 @@
+ALPHABET "DNA with covalent modifications" DNA-LIKE
+
+# Core uppercase symbols
+A "Adenine" 8510A8 ~ T "Thymine" A89610
+C "Cytosine" A50026 ~ G "Guanine" 313695
+
+# Covalent modifications
+m "5-Methylcytosine" D73027 ~ 1 "Guanine:5-Methylcytosine" 4575B4
+h "5-Hydroxymethylcytosine" F46D43 ~ 2 "Guanine:5-Hydroxymethylcytosine" 74ADD1
+f "5-Formylcytosine" FDAE61 ~ 3 "Guanine:5-Formylcytosine" ABD9E9
+c "5-Carboxylcytosine" FEE090 ~ 4 "Guanine:5-Carboxylcytosine" E0F3F8
+a "N6-methyladenine" 8510A8 ~ t "Thymine_lowercase" 756BB1
+g "modified_g" 00897B ~ 5 "cytosine_lowercase" 53698A
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/meme_input_m.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_input_m.fasta Mon Jul 14 21:33:41 2025 +0000
b
@@ -0,0 +1,66 @@
+>chr21_19617074_19617124_+
+AAAAATTATTACTAGGGAGGGGGCCGGAACCTCGGGACGTGGGTATATAA
+>chr21_26934381_26934431_+
+GCGCCTGGTCGGTTATGAGTCACAAGTGAGTTATAAAAGGGTCGCACGTT
+>chr21_28217753_28217803_-
+CAAAGGGGAGGAGTGGGGTGGGGGTGGGGGTTTCACTGGTmCACTATAAA
+>chr21_31710037_31710087_-
+AACACCCAGGTTTCTGAGTATATAATCGCCGCACCAAAGAATTTAATTTT
+>chr21_31744582_31744632_-
+CCmAGGTCTAAGAGCATATATAACTTGGAGTCCAGACTATGACATTCAAA
+>chr21_31768316_31768366_+
+AACGTATATAAATGGTCCTGTCCAGATGTGGCATGCAAACTmAGAATCTT
+>chr21_31914206_31914256_-
+TGACACCCACTACTTAGAGTATAAAATCATTCTGAGAAGTTAGAGACACC
+>chr21_31933633_31933683_-
+TCAGAGTATATATAAATGTTCCTGTCCAGTCACAGTCACCAAACTGACCT
+>chr21_31962741_31962791_-
+ACATATAACTCAGGTTGGATAAAATAATTTGTACAAATCAGGAGAGTCAA
+>chr21_31964683_31964733_+
+TCTGATTCACTGAGGCATATAAAAGGCCCTCTGCGGAGAAGTGTCCATAC
+>chr21_31973364_31973414_+
+aaacttaaaactctataaacttaaaactCTAGAATCTGATCCTGCTATAC
+>chr21_31992870_31992920_+
+CTCATACACTATTGAAGATGTATAAAATTTCATTTGCAGATGGTGACATT
+>chr21_32185595_32185645_-
+TCACCACCCACCAGAGCTGGGATATATAAAGAAGGTTCTGAGACTAGGAA
+>chr21_32202076_32202126_-
+TGCCCACCAGCTTGAGGTATAAAAAGCCCTGTACGGGAAGAGACCTTCAT
+>chr21_32253899_32253949_-
+AGCCCCACCCACCAGCAAGGATATATAAAAGCTCAGGAGTCTGGAGTGAC
+>chr21_32410820_32410870_-
+TCTACCCCACTAATCACTGAGGATGTATAAAAGTCCCAGGGAAGCTGGTG
+>chr21_36411748_36411798_-
+ATAGTTCTGTATAGTTTCAGTTGGCATCtaaaaattatataactttattt
+>chr21_37838750_37838800_-
+gatggttttataaggggcctcaccctcggctcagccctcattcttctcct
+>chr21_45705687_45705737_+
+CCGGGGCGGAGCGGCCTTTGCTCTTTGCGTGGTCGCGGGGGTATAACAGC
+>chr21_45971413_45971463_-
+CAGGCCCTGGGCATATAAAAGCCCCAGCAGCCAACAGGctcacacacaca
+>chr21_45978668_45978718_-
+CAGAGGGGTATAAAGGTTCCGACCACTCAGAGGCCTGGCACGAtcactca
+>chr21_45993530_45993580_+
+CCAAGGAGGAGTATAAAAGCCCCACAAACCCGAGCACCTCACTCACTCGC
+>chr21_46020421_46020471_+
+GAGACATATAAAAGCCAACATCCCTGAGCACCTAACACACGGactcactc
+>chr21_46031920_46031970_+
+GGAAAATACCCAGGGAGGGTATAAAACCTCAGCAGCCAGGGCACACAAAC
+>chr21_46046964_46047014_+
+ACAAGGCCAGGAGGGGTATAAAAGCCTGAGAGCCCCAAGAACctcacaca
+>chr21_46057197_46057247_+
+ATTGCTGAGTCTCCTGCTGGGAAAACACAGGCCCTGGGCATATAAAAGCC
+>chr21_46086869_46086919_-
+GACAGGTGTGCTTCTGTGCTGTGGGGATGCCTGGGCCCAGGTATAAAGGC
+>chr21_46102103_46102153_-
+AGGTGTGTGCTTCTGTGCTGTGGGGATGCCTGGGTCCAGGTATAAAGGCT
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGmGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGGAGGTGmTGCCGGTGAGCGTATAAAGGCCCTGGCG
+>chr21_47575506_47575556_-
+TGAGAAGCmGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/meme_output_custom.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_custom.html Mon Jul 14 21:33:41 2025 +0000
[
b'@@ -0,0 +1,276 @@\n+        "options": {\n+          "mod": "zoops",\n+          "revcomp": false,\n+          "nmotifs": 1,\n+          "objfun": "E-value of product of p-values",\n+          "spfun": "E-value of product of p-values",\n+          "minw": 8,\n+          "maxw": 50,\n+          "minsites": 2,\n+          "maxsites": 30,\n+          "wnsites": 0.8,\n+          "spmap": "uni",\n+          "spfuzz": 0.5,\n+          "searchsize": 1500,\n+          "maxsize": 1000000,\n+          "norand": "no",\n+          "csites": 1000,\n+          "hsfrac": 0,\n+          "prior": "dirichlet",\n+          "b": 0.01,\n+          "maxiter": 50,\n+          "distance": 0.001,\n+          "wg": 11,\n+          "ws": 1,\n+          "noendgaps": false,\n+          "substring": true,\n+          "seed": 0,\n+          "brief": 1000\n+        },\n+        "alphabet": {\n+          "name": "DNA with covalent modifications",\n+          "like": "dna",\n+          "ncore": 16,\n+          "symbols": [\n+            {\n+              "symbol": "A",\n+              "name": "Adenine",\n+              "colour": "8510A8",\n+              "complement": "T"\n+            }, {\n+              "symbol": "C",\n+              "name": "Cytosine",\n+              "colour": "A50026",\n+              "complement": "G"\n+            }, {\n+              "symbol": "G",\n+              "name": "Guanine",\n+              "colour": "313695",\n+              "complement": "C"\n+            }, {\n+              "symbol": "T",\n+              "name": "Thymine",\n+              "colour": "A89610",\n+              "complement": "A"\n+            }, {\n+              "symbol": "a",\n+              "name": "N6-methyladenine",\n+              "colour": "8510A8",\n+              "complement": "t"\n+            }, {\n+              "symbol": "c",\n+              "name": "5-Carboxylcytosine",\n+              "colour": "FEE090",\n+              "complement": "4"\n+            }, {\n+              "symbol": "f",\n+              "name": "5-Formylcytosine",\n+              "colour": "FDAE61",\n+              "complement": "3"\n+            }, {\n+              "symbol": "g",\n+              "name": "modified_g",\n+              "colour": "00897B",\n+              "complement": "5"\n+            }, {\n+              "symbol": "h",\n+              "name": "5-Hydroxymethylcytosine",\n+              "colour": "F46D43",\n+              "complement": "2"\n+            }, {\n+              "symbol": "m",\n+              "name": "5-Methylcytosine",\n+              "colour": "D73027",\n+              "complement": "1"\n+            }, {\n+              "symbol": "t",\n+              "name": "Thymine_lowercase",\n+              "colour": "756BB1",\n+              "complement": "a"\n+            }, {\n+              "symbol": "1",\n+              "name": "Guanine:5-Methylcytosine",\n+              "colour": "4575B4",\n+              "complement": "m"\n+            }, {\n+              "symbol": "2",\n+              "name": "Guanine:5-Hydroxymethylcytosine",\n+              "colour": "74ADD1",\n+              "complement": "h"\n+            }, {\n+              "symbol": "3",\n+              "name": "Guanine:5-Formylcytosine",\n+              "colour": "ABD9E9",\n+              "complement": "f"\n+            }, {\n+              "symbol": "4",\n+              "name": "Guanine:5-Carboxylcytosine",\n+              "colour": "E0F3F8",\n+              "complement": "c"\n+            }, {\n+              "symbol": "5",\n+              "name": "cytosine_lowercase",\n+              "colour": "53698A",\n+              "complement": "g"\n+            }, {\n+              "symbol": "?",\n+              "equals": "ACGTacfghmt12345"\n+            }\n+          ]\n+        },\n+        "background": {\n+          "source": "--sequences--",\n+          "order": 0,\n+          "freqs": [\n+            0.263, 0.2, 0.249, 0.189, 0.0297, 0.027, 0.00066, 0.00726,\n+            0.00066, 0.00396, 0.0277, 0.00066, 0.00066, 0.00066, 0.00066,\n+            0.00066\n+          ]\n+        },\n+        "sequence_db": {\n+          "prim'..b'   "weight": 1.000000\n+            }, {\n+              "name": "chr21_31710037_31710087_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_31744582_31744632_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_31768316_31768366_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_31914206_31914256_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_31933633_31933683_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_31962741_31962791_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_31964683_31964733_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_31973364_31973414_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_31992870_31992920_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_32185595_32185645_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_32202076_32202126_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_32253899_32253949_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_32410820_32410870_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_36411748_36411798_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_37838750_37838800_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_45705687_45705737_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_45971413_45971463_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_45978668_45978718_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_45993530_45993580_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_46020421_46020471_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_46031920_46031970_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_46046964_46047014_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_46057197_46057247_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_46086869_46086919_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_46102103_46102153_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_47517957_47518007_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_47575506_47575556_-",\n+              "length": 50,\n+              "weight": 1.000000\n+            }\n+          ]\n+        },\n+        "motifs": [\n+          {\n+            "db": 0,\n+            "id": "taCaaACAGtataactctcac",\n+            "alt": "MEME-1",\n+            "len": 21,\n+            "nsites": 5,\n+            "evalue": "4.6e-028",\n+            "ic": 55.7,\n+            "re": 68.0,\n+            "llr": 236,\n+            "bt": 7.73664,\n\\ No newline at end of file\n'
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/meme_output_custom.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_custom.txt Mon Jul 14 21:33:41 2025 +0000
b
@@ -0,0 +1,61 @@
+********************************************************************************
+MEME - Motif discovery tool
+********************************************************************************
+MEME version 5.5.8 (Release date: Thu May 15 15:01:46 2025 -0700)
+
+For further information on how to interpret these results please access https://meme-suite.org/meme.
+To get a copy of the MEME Suite software please access https://meme-suite.org.
+
+********************************************************************************
+
+
+********************************************************************************
+REFERENCE
+********************************************************************************
+If you use this program in your research, please cite:
+
+Timothy L. Bailey and Charles Elkan,
+"Fitting a mixture model by expectation maximization to
+discover motifs in biopolymers",
+Proceedings of the Second International Conference on Intelligent Systems
+for Molecular Biology, pp. 28-36, AAAI Press, Menlo Park, California, 1994.
+********************************************************************************
+
+
+********************************************************************************
+ALPHABET "DNA with covalent modifications" DNA-LIKE
+********************************************************************************
+A "Adenine" 8510A8 ~ T "Thymine" A89610
+C "Cytosine" A50026 ~ G "Guanine" 313695
+a "N6-methyladenine" 8510A8 ~ t "Thymine_lowercase" 756BB1
+c "5-Carboxylcytosine" FEE090 ~ 4 "Guanine:5-Carboxylcytosine" E0F3F8
+f "5-Formylcytosine" FDAE61 ~ 3 "Guanine:5-Formylcytosine" ABD9E9
+g "modified_g" 00897B ~ 5 "cytosine_lowercase" 53698A
+h "5-Hydroxymethylcytosine" F46D43 ~ 2 "Guanine:5-Hydroxymethylcytosine" 74ADD1
+m "5-Methylcytosine" D73027 ~ 1 "Guanine:5-Methylcytosine" 4575B4
+? = ACGTacfghmt12345
+********************************************************************************
+
+********************************************************************************
+TRAINING SET
+********************************************************************************
+PRIMARY SEQUENCES= Galaxy_FASTA_Input
+CONTROL SEQUENCES= --none--
+Sequence name            Weight Length  Sequence name            Weight Length  
+-------------            ------ ------  -------------            ------ ------  
+chr21_19617074_19617124_ 1.0000     50  chr21_26934381_26934431_ 1.0000     50  
+chr21_28217753_28217803_ 1.0000     50  chr21_31710037_31710087_ 1.0000     50  
+chr21_31744582_31744632_ 1.0000     50  chr21_31768316_31768366_ 1.0000     50  
+chr21_31914206_31914256_ 1.0000     50  chr21_31933633_31933683_ 1.0000     50  
+chr21_31962741_31962791_ 1.0000     50  chr21_31964683_31964733_ 1.0000     50  
+chr21_31973364_31973414_ 1.0000     50  chr21_31992870_31992920_ 1.0000     50  
+chr21_32185595_32185645_ 1.0000     50  chr21_32202076_32202126_ 1.0000     50  
+chr21_32253899_32253949_ 1.0000     50  chr21_32410820_32410870_ 1.0000     50  
+chr21_36411748_36411798_ 1.0000     50  chr21_37838750_37838800_ 1.0000     50  
+chr21_45705687_45705737_ 1.0000     50  chr21_45971413_45971463_ 1.0000     50  
+chr21_45978668_45978718_ 1.0000     50  chr21_45993530_45993580_ 1.0000     50  
+chr21_46020421_46020471_ 1.0000     50  chr21_46031920_46031970_ 1.0000     50  
+chr21_46046964_46047014_ 1.0000     50  chr21_46057197_46057247_ 1.0000     50  
+chr21_46086869_46086919_ 1.0000     50  chr21_46102103_46102153_ 1.0000     50  
+chr21_47517957_47518007_ 1.0000     50  chr21_47575506_47575556_ 1.0000     50  
+********************************************************************************
\ No newline at end of file
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/meme_output_custom.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_custom.xml Mon Jul 14 21:33:41 2025 +0000
b
b'@@ -0,0 +1,946 @@\n+<alphabet_matrix>\n+<alphabet_array>\n+<value letter_id="A">61</value>\n+<value letter_id="C">-897</value>\n+<value letter_id="G">-897</value>\n+<value letter_id="T">-897</value>\n+<value letter_id="a">-897</value>\n+<value letter_id="c">-897</value>\n+<value letter_id="f">-897</value>\n+<value letter_id="g">-897</value>\n+<value letter_id="h">-897</value>\n+<value letter_id="m">-897</value>\n+<value letter_id="t">443</value>\n+<value letter_id="n1">-897</value>\n+<value letter_id="n2">-897</value>\n+<value letter_id="n3">-897</value>\n+<value letter_id="n4">-897</value>\n+<value letter_id="n5">-897</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">-897</value>\n+<value letter_id="C">0</value>\n+<value letter_id="G">-31</value>\n+<value letter_id="T">-897</value>\n+<value letter_id="a">375</value>\n+<value letter_id="c">-897</value>\n+<value letter_id="f">-897</value>\n+<value letter_id="g">-897</value>\n+<value letter_id="h">-897</value>\n+<value letter_id="m">-897</value>\n+<value letter_id="t">285</value>\n+<value letter_id="n1">-897</value>\n+<value letter_id="n2">-897</value>\n+<value letter_id="n3">-897</value>\n+<value letter_id="n4">-897</value>\n+<value letter_id="n5">-897</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">-897</value>\n+<value letter_id="C">100</value>\n+<value letter_id="G">-897</value>\n+<value letter_id="T">-897</value>\n+<value letter_id="a">375</value>\n+<value letter_id="c">-897</value>\n+<value letter_id="f">-897</value>\n+<value letter_id="g">-897</value>\n+<value letter_id="h">-897</value>\n+<value letter_id="m">-897</value>\n+<value letter_id="t">285</value>\n+<value letter_id="n1">-897</value>\n+<value letter_id="n2">-897</value>\n+<value letter_id="n3">-897</value>\n+<value letter_id="n4">-897</value>\n+<value letter_id="n5">-897</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">-897</value>\n+<value letter_id="C">0</value>\n+<value letter_id="G">-897</value>\n+<value letter_id="T">8</value>\n+<value letter_id="a">433</value>\n+<value letter_id="c">-897</value>\n+<value letter_id="f">-897</value>\n+<value letter_id="g">-897</value>\n+<value letter_id="h">-897</value>\n+<value letter_id="m">-897</value>\n+<value letter_id="t">-897</value>\n+<value letter_id="n1">-897</value>\n+<value letter_id="n2">-897</value>\n+<value letter_id="n3">-897</value>\n+<value letter_id="n4">-897</value>\n+<value letter_id="n5">-897</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">61</value>\n+<value letter_id="C">-897</value>\n+<value letter_id="G">-897</value>\n+<value letter_id="T">-897</value>\n+<value letter_id="a">433</value>\n+<value letter_id="c">-897</value>\n+<value letter_id="f">-897</value>\n+<value letter_id="g">-897</value>\n+<value letter_id="h">-897</value>\n+<value letter_id="m">-897</value>\n+<value letter_id="t">-897</value>\n+<value letter_id="n1">-897</value>\n+<value letter_id="n2">-897</value>\n+<value letter_id="n3">-897</value>\n+<value letter_id="n4">-897</value>\n+<value letter_id="n5">-897</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">61</value>\n+<value letter_id="C">-897</value>\n+<value letter_id="G">-897</value>\n+<value letter_id="T">-897</value>\n+<value letter_id="a">375</value>\n+<value letter_id="c">-897</value>\n+<value letter_id="f">-897</value>\n+<value letter_id="g">478</value>\n+<value letter_id="h">-897</value>\n+<value letter_id="m">-897</value>\n+<value letter_id="t">-897</value>\n+<value letter_id="n1">-897</value>\n+<value letter_id="n2">-897</value>\n+<value letter_id="n3">-897</value>\n+<value letter_id="n4">-897</value>\n+<value letter_id="n5">-897</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">-897</value>\n+<value letter_id="C">100</value>\n+<value letter_id="G">-897</value>\n+<value letter_id="T">-897</value>\n+<value letter_id="a">-897</value>\n+<value letter_id="c">288</value>\n+<value letter_id="f">-897</value>\n+<value letter_id="g">478</value>\n+<value letter_id="h">-897</value>\n+<value letter_id="m">-897</value>\n+<value let'..b'd="A"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="a"/>\n+<letter_ref letter_id="c"/>\n+<letter_ref letter_id="t"/>\n+<letter_ref letter_id="c"/>\n+<letter_ref letter_id="a"/>\n+<letter_ref letter_id="c"/>\n+<letter_ref letter_id="t"/>\n+<letter_ref letter_id="c"/>\n+</site>\n+<right_flank></right_flank>\n+</contributing_site>\n+</contributing_sites>\n+</motif>\n+</motifs>\n+<scanned_sites_summary p_thresh="0.0001">\n+<scanned_sites sequence_id="sequence_0" pvalue="9.99e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_1" pvalue="9.78e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_2" pvalue="1.00e+00" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_3" pvalue="9.99e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_4" pvalue="9.99e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_5" pvalue="1.00e+00" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_6" pvalue="9.61e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_7" pvalue="1.00e+00" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_8" pvalue="9.65e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_9" pvalue="9.67e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_10" pvalue="6.20e-25" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="4" pvalue="2.07e-26"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_11" pvalue="1.00e+00" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_12" pvalue="7.25e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_13" pvalue="9.59e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_14" pvalue="7.20e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_15" pvalue="9.99e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_16" pvalue="2.29e-26" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="28" pvalue="7.62e-28"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_17" pvalue="3.02e-32" num_sites="2"><scanned_site motif_id="motif_1" strand="plus" position="8" pvalue="1.01e-33"/>\n+<scanned_site motif_id="motif_1" strand="plus" position="29" pvalue="1.08e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_18" pvalue="1.00e+00" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_19" pvalue="2.46e-16" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="28" pvalue="8.19e-18"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_20" pvalue="1.27e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="28" pvalue="4.22e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_21" pvalue="9.58e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_22" pvalue="2.70e-14" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="29" pvalue="9.00e-16"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_23" pvalue="9.69e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_24" pvalue="2.53e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="28" pvalue="8.43e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_25" pvalue="9.67e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_26" pvalue="1.00e+00" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_27" pvalue="1.00e+00" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_28" pvalue="1.00e+00" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_29" pvalue="1.00e+00" num_sites="0"></scanned_sites>\n+</scanned_sites_summary>\n+</MEME>\n'
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/meme_output_test1.html
--- a/test-data/meme_output_test1.html Thu Aug 29 10:19:55 2024 +0000
+++ b/test-data/meme_output_test1.html Mon Jul 14 21:33:41 2025 +0000
[
@@ -1,73 +1,124 @@
-      <script>
-      {
-        $("opt_mod").className = data.options.mod;
-        $("opt_objfun").textContent = data.options.objfun;
-        $("opt_spfun").textContent = data.options.spfun;
-        $("opt_strand").className = (current_alphabet.has_complement() ? (data.options.revcomp ? "both" : "given") : "none");
-        $("opt_nmotifs").textContent = data.options.nmotifs;
-        $("opt_evt").textContent = (typeof data.options.evt === "number" ? data.options.evt : "no limit");
-        $("opt_minw").textContent = data.options.minw;
-        $("opt_maxw").textContent = data.options.maxw;
-        $("opt_minsites").textContent = data.options.minsites;
-        $("opt_maxsites").textContent = data.options.maxsites;
-        $("opt_wnsites").textContent = data.options.wnsites;
-        $("opt_spmap").className = data.options.spmap;
-        $("opt_spfuzz").textContent = data.options.spfuzz;
-        $("opt_prior").className = data.options.prior;
-        if (data.options.prior == "dirichlet") {
-          $("opt_prior_source").textContent = make_background_source("Source", data.background.source, true);
-        } else {
-          $("opt_prior_source").textContent = (data.options.prior == "addone") ? "motif observed frequencies" : data.options.priors_source;
-        }
-        $("opt_b").textContent = (data.options.b < 0) ? "not applicable" : (data.options.b == 0) ? "intrinsic strength" : data.options.b;
-        $("opt_maxiter").textContent = data.options.maxiter;
-        $("opt_distance").textContent = data.options.distance;
-        $("opt_searchsize").textContent = data.options.searchsize;
-        if (typeof data.options.csites != "undefined") {
-          $("opt_csites").textContent = data.options.csites;
-        } else {
-          $("opt_csites").parentElement.style.display = "none"
-        }
-        if (typeof data.options.wg != "undefined") {
-          $("opt_wg").textContent = data.options.wg;
-        } else {
-          $("opt_wg").parentElement.style.display = "none"
-        }
-        if (typeof data.options.ws != "undefined") {
-          $("opt_ws").textContent = data.options.ws;
-        }
-        else {
-          $("opt_ws").parentElement.style.display = "none"
-        }
-        if (typeof data.options.noendgaps != "undefined") {
-          $("opt_noendgaps").className = (data.options.noendgaps ? "on" : "off");
-        }
-        else {
-          $("opt_noendgaps").parentElement.style.display = "none"
-        }
-        $("opt_substring").className = (data.options.substring ? "on" : "off");
-      }
-      </script>
-    </div>
-    <!-- list information on this program -->
-    <div id="info_sec" class="bar">
-      <div class="subsection">
-        <h5 id="version">MEME version</h5>
-        <span id="ins_version"></span> 
-        (Release date: <span id="ins_release"></span>)<br>
-      </div>
-      <script>
-        $("ins_version").innerHTML = data["version"];
-        $("ins_release").innerHTML = data["release"];
-      </script>
-      <div class="subsection" id="reference"> <script>print_citation("reference", "MEME");</script></div>
-      <div class="subsection">
-        <h5 id="command">Command line</h5>
-        <textarea id="cmd" rows="5" style="width:100%;" readonly="readonly">
-        </textarea>
-        <script>$("cmd").value = data["cmd"].join(" ");</script>
-      </div>
-    </div>
-    
-  </body>
-</html>
+          "sequences": [
+            {
+              "name": "chr21_19617074_19617124_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_26934381_26934431_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_28217753_28217803_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_31710037_31710087_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_31744582_31744632_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_31768316_31768366_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_31914206_31914256_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_31933633_31933683_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_31962741_31962791_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_31964683_31964733_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_31973364_31973414_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_31992870_31992920_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_32185595_32185645_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_32202076_32202126_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_32253899_32253949_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_32410820_32410870_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_36411748_36411798_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_37838750_37838800_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_45705687_45705737_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_45971413_45971463_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_45978668_45978718_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_45993530_45993580_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_46020421_46020471_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_46031920_46031970_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_46046964_46047014_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_46057197_46057247_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_46086869_46086919_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_46102103_46102153_-",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_47517957_47518007_+",
+              "length": 50,
+              "weight": 1.000000
+            }, {
+              "name": "chr21_47575506_47575556_-",
+              "length": 50,
+              "weight": 1.000000
+            }
+          ]
+        },
\ No newline at end of file
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/meme_output_test1.txt
--- a/test-data/meme_output_test1.txt Thu Aug 29 10:19:55 2024 +0000
+++ b/test-data/meme_output_test1.txt Mon Jul 14 21:33:41 2025 +0000
[
b'@@ -1,39 +1,207 @@\n+model:  mod=         zoops    nmotifs=         1    evt=           inf\n+objective function:           em=       E-value of product of p-values\n+                              starts=   E-value of product of p-values\n+width:  minw=            8    maxw=           50\n+nsites: minsites=        2    maxsites=       30    wnsites=       0.8\n+theta:  spmap=         pam    spfuzz=        120\n+em:     prior=       megap    b=            7500    maxiter=        50\n+        distance=    1e-05\n+trim:   wg=             11    ws=              1    endgaps=       yes\n+data:   n=            1500    N=              30\n+sample: seed=            0    hsfrac=          0\n+        searchsize=   1500    norand=         no    csites=       1000\n+Dirichlet mixture priors file: prior30.plib\n+Letter frequencies in dataset:\n+A 0.294 C 0.231 D 0 E 0 F 0 G 0.257 H 0 I 0 K 0 \n+L 0 M 0 N 0 P 0 Q 0 R 0 S 0 T 0.217 V 0 \n+W 0 Y 0 \n+Background letter frequencies (from file dataset with add-one prior applied):\n+A 0.291 C 0.229 D 0.000658 E 0.000658 F 0.000658 G 0.255 H 0.000658 I 0.000658 K 0.000658 \n+L 0.000658 M 0.000658 N 0.000658 P 0.000658 Q 0.000658 R 0.000658 S 0.000658 T 0.215 V 0.000658 \n+W 0.000658 Y 0.000658 \n+Background model order: 0\n ********************************************************************************\n-SUMMARY OF MOTIFS\n+\n+\n+********************************************************************************\n+MOTIF GGGGTATAAAA MEME-1\twidth =  11  sites =  25  llr = 239  E-value = 2.4e-011\n ********************************************************************************\n+--------------------------------------------------------------------------------\n+\tMotif GGGGTATAAAA MEME-1 Description\n+--------------------------------------------------------------------------------\n+Simplified        A  2323:a:a8a8\n+pos.-specific     C  ::3::::::::\n+probability       D  :::::::::::\n+matrix            E  :::::::::::\n+                  F  :::::::::::\n+                  G  7746::::::1\n+                  H  :::::::::::\n+                  I  :::::::::::\n+                  K  :::::::::::\n+                  L  :::::::::::\n+                  M  :::::::::::\n+                  N  :::::::::::\n+                  P  :::::::::::\n+                  Q  :::::::::::\n+                  R  :::::::::::\n+                  S  :::::::::::\n+                  T  1:2:a:a:2::\n+                  V  :::::::::::\n+                  W  :::::::::::\n+                  Y  :::::::::::\n+\n+         bits   10.6            \n+                 9.5            \n+                 8.5            \n+                 7.4            \n+Relative         6.3            \n+Entropy          5.3            \n+(13.8 bits)      4.2            \n+                 3.2            \n+                 2.1     * **   \n+                 1.1 ** ********\n+                 0.0 -----------\n+\n+Multilevel           GGGGTATAAAA\n+consensus            AACA    T  \n+sequence                        \n+                                \n+                                \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif GGGGTATAAAA MEME-1 sites sorted by position p-value\n+--------------------------------------------------------------------------------\n+Sequence name             Start   P-value               Site  \n+-------------             ----- ---------            -----------\n+chr21_46046964_46047014_     13  1.06e-06 AAGGCCAGGA GGGGTATAAAA GCCTGAGAGC\n+chr21_46057197_46057247_     37  3.41e-06 ACAGGCCCTG GGCATATAAAA GCC       \n+chr21_45971413_45971463_     10  3.41e-06  CAGGCCCTG GGCATATAAAA GCCCCAGCAG\n+chr21_31964683_31964733_     14  3.41e-06 GATTCACTGA GGCATATAAAA GGCCCTCTGC\n+chr21_45993530_45993580_      8  4.00e-06    CCAAGGA GGAGTATAAAA GCCCCACAAA\n+chr21_32202076_32202126_     14  5.01e-06 CCACCAGCTT GAGGTATAAAA AGCCCTGTAC\n+chr21_46031920_46031970_     16  6.06e-06 ATACCCAGGG AGGGTATAAAA C'..b' -53   -267    -74     37     16     44    -37     98     31      9     19    319    212    127   -193    -95 \n+   165   -261     70    110     77   -521     -4    147     95    201     90    121    124     91    107    425   -527    314    -95      8 \n+  -838   -990    -89   -149   -151   -841   -161   -117   -113    -66   -209    -68    -69   -129    -91    111    221    -55   -255   -173 \n+   176   -858    -79   -103   -115   -717   -148    -95   -108    -17   -162    -61    -12    -95    -69    193   -737     52   -240   -153 \n+   134   -686      0     16    -12   -553    -68     44     -8     96     -9     88    124     41     36    384     11    216   -177    -71 \n+   165   -261     70    110     77   -521     -4    147     95    201     90    121    124     91    107    425   -527    314    -95      8 \n+   147   -614     89    129     93   -121     12    160    113    217    108    144    144    111    125    447   -241    332    -81     22 \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif GGGGTATAAAA MEME-1 position-specific probability matrix\n+--------------------------------------------------------------------------------\n+letter-probability matrix: alength= 20 w= 11 nsites= 25 E= 2.4e-011 \n+ 0.240000  0.000000  0.000000  0.000000  0.000000  0.680000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.080000  0.000000  0.000000  0.000000 \n+ 0.280000  0.000000  0.000000  0.000000  0.000000  0.680000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.040000  0.000000  0.000000  0.000000 \n+ 0.160000  0.320000  0.000000  0.000000  0.000000  0.360000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.160000  0.000000  0.000000  0.000000 \n+ 0.320000  0.000000  0.000000  0.000000  0.000000  0.640000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.040000  0.000000  0.000000  0.000000 \n+ 0.000000  0.000000  0.000000  0.000000  0.000000  0.040000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.960000  0.000000  0.000000  0.000000 \n+ 0.960000  0.040000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000 \n+ 0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  1.000000  0.000000  0.000000  0.000000 \n+ 1.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000 \n+ 0.760000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.240000  0.000000  0.000000  0.000000 \n+ 0.960000  0.040000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000 \n+ 0.840000  0.000000  0.000000  0.000000  0.000000  0.120000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.040000  0.000000  0.000000  0.000000 \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif GGGGTATAAAA MEME-1 regular expression\n+--------------------------------------------------------------------------------\n+[GA][GA][GC][GA]TATA[AT]AA\n+--------------------------------------------------------------------------------\n\\ No newline at end of file\n'
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/meme_output_test1.xml
--- a/test-data/meme_output_test1.xml Thu Aug 29 10:19:55 2024 +0000
+++ b/test-data/meme_output_test1.xml Mon Jul 14 21:33:41 2025 +0000
b
b'@@ -1,3 +1,924 @@\n+<alphabet_array>\n+<value letter_id="A">-32</value>\n+<value letter_id="C">-680</value>\n+<value letter_id="D">91</value>\n+<value letter_id="E">77</value>\n+<value letter_id="F">7</value>\n+<value letter_id="G">138</value>\n+<value letter_id="H">-20</value>\n+<value letter_id="I">55</value>\n+<value letter_id="K">64</value>\n+<value letter_id="L">107</value>\n+<value letter_id="M">11</value>\n+<value letter_id="N">150</value>\n+<value letter_id="P">142</value>\n+<value letter_id="Q">72</value>\n+<value letter_id="R">87</value>\n+<value letter_id="S">396</value>\n+<value letter_id="T">-148</value>\n+<value letter_id="V">221</value>\n+<value letter_id="W">-140</value>\n+<value letter_id="Y">-36</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">-11</value>\n+<value letter_id="C">-680</value>\n+<value letter_id="D">89</value>\n+<value letter_id="E">76</value>\n+<value letter_id="F">7</value>\n+<value letter_id="G">137</value>\n+<value letter_id="H">-21</value>\n+<value letter_id="I">55</value>\n+<value letter_id="K">63</value>\n+<value letter_id="L">107</value>\n+<value letter_id="M">10</value>\n+<value letter_id="N">149</value>\n+<value letter_id="P">141</value>\n+<value letter_id="Q">71</value>\n+<value letter_id="R">87</value>\n+<value letter_id="S">396</value>\n+<value letter_id="T">-239</value>\n+<value letter_id="V">220</value>\n+<value letter_id="W">-140</value>\n+<value letter_id="Y">-36</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">-79</value>\n+<value letter_id="C">41</value>\n+<value letter_id="D">4</value>\n+<value letter_id="E">21</value>\n+<value letter_id="F">-7</value>\n+<value letter_id="G">44</value>\n+<value letter_id="H">-62</value>\n+<value letter_id="I">42</value>\n+<value letter_id="K">-5</value>\n+<value letter_id="L">99</value>\n+<value letter_id="M">0</value>\n+<value letter_id="N">99</value>\n+<value letter_id="P">138</value>\n+<value letter_id="Q">52</value>\n+<value letter_id="R">42</value>\n+<value letter_id="S">399</value>\n+<value letter_id="T">-46</value>\n+<value letter_id="V">223</value>\n+<value letter_id="W">-173</value>\n+<value letter_id="Y">-68</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">11</value>\n+<value letter_id="C">-677</value>\n+<value letter_id="D">48</value>\n+<value letter_id="E">47</value>\n+<value letter_id="F">-2</value>\n+<value letter_id="G">127</value>\n+<value letter_id="H">-43</value>\n+<value letter_id="I">46</value>\n+<value letter_id="K">27</value>\n+<value letter_id="L">101</value>\n+<value letter_id="M">3</value>\n+<value letter_id="N">124</value>\n+<value letter_id="P">138</value>\n+<value letter_id="Q">60</value>\n+<value letter_id="R">62</value>\n+<value letter_id="S">397</value>\n+<value letter_id="T">-235</value>\n+<value letter_id="V">220</value>\n+<value letter_id="W">-160</value>\n+<value letter_id="Y">-55</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">-596</value>\n+<value letter_id="C">-820</value>\n+<value letter_id="D">12</value>\n+<value letter_id="E">-21</value>\n+<value letter_id="F">-53</value>\n+<value letter_id="G">-267</value>\n+<value letter_id="H">-74</value>\n+<value letter_id="I">37</value>\n+<value letter_id="K">16</value>\n+<value letter_id="L">44</value>\n+<value letter_id="M">-37</value>\n+<value letter_id="N">98</value>\n+<value letter_id="P">31</value>\n+<value letter_id="Q">9</value>\n+<value letter_id="R">19</value>\n+<value letter_id="S">319</value>\n+<value letter_id="T">212</value>\n+<value letter_id="V">127</value>\n+<value letter_id="W">-193</value>\n+<value letter_id="Y">-95</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">165</value>\n+<value letter_id="C">-261</value>\n+<value letter_id="D">70</value>\n+<value letter_id="E">110</value>\n+<value letter_id="F">77</value>\n+<value letter_id="G">-521</value>\n+<value letter_id="H">-4</value>\n+<value letter_id="I">147</value>\n+<value letter_id="K">95</value>\n+<value letter_id="L">201</value>\n+<value letter_id="M">90</value>\n+<value letter_id="N">121</value>\n+<value le'..b'etter_ref letter_id="A"/>\n+</site>\n+<right_flank>GC</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_5" position="0" strand="none" pvalue="3.82e-05" >\n+<left_flank></left_flank>\n+<site>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+</site>\n+<right_flank>ATGGTCCTGT</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_29" position="30" strand="none" pvalue="4.02e-05" >\n+<left_flank>GCTGCCGGTG</left_flank>\n+<site>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="G"/>\n+</site>\n+<right_flank>GCCCTGGCG</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_1" position="27" strand="none" pvalue="5.52e-05" >\n+<left_flank>AGTCACAAGT</left_flank>\n+<site>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+</site>\n+<right_flank>GGGTCGCACG</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_3" position="14" strand="none" pvalue="5.94e-05" >\n+<left_flank>CCCAGGTTTC</left_flank>\n+<site>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+</site>\n+<right_flank>TCGCCGCACC</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_16" position="22" strand="none" pvalue="6.78e-05" >\n+<left_flank>AGTTTCAGTT</left_flank>\n+<site>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+</site>\n+<right_flank>attatataac</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_7" position="2" strand="none" pvalue="2.08e-04" >\n+<left_flank>TC</left_flank>\n+<site>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+</site>\n+<right_flank>AAATGTTCCT</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_8" position="13" strand="none" pvalue="4.05e-04" >\n+<left_flank>TATAACTCAG</left_flank>\n+<site>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+</site>\n+<right_flank>TAATTTGTAC</right_flank>\n+</contributing_site>\n+</contributing_sites>\n+</motif>\n </motifs>\n <scanned_sites_summary p_thresh="0.0001">\n <scanned_sites sequence_id="sequence_0" pvalue="1.22e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="39" pvalue="3.06e-05"/>\n'
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/meme_output_test2.html
--- a/test-data/meme_output_test2.html Thu Aug 29 10:19:55 2024 +0000
+++ b/test-data/meme_output_test2.html Mon Jul 14 21:33:41 2025 +0000
[
b'@@ -1,3 +1,9508 @@\n+ * See http://stackoverflow.com/a/5450113/66387\n+ * Does string multiplication like the perl x operator.\n+ */\n+function string_mult(pattern, count) {\n+    if (count < 1) return \'\';\n+    var result = \'\';\n+    while (count > 1) {\n+        if (count & 1) result += pattern;\n+        count >>= 1, pattern += pattern;\n+    }\n+    return result + pattern;\n+}\n+\n+/*\n+ * See http://stackoverflow.com/questions/814613/how-to-read-get-data-from-a-url-using-javascript\n+ * Slightly modified with information from\n+ * https://developer.mozilla.org/en/DOM/window.location\n+ */\n+function parse_params() {\n+  "use strict";\n+  var search, queryStart, queryEnd, query, params, nvPairs, i, nv, n, v;\n+  search = window.location.search;\n+  queryStart = search.indexOf("?") + 1;\n+  queryEnd   = search.indexOf("#") + 1 || search.length + 1;\n+  query      = search.slice(queryStart, queryEnd - 1);\n+\n+  if (query === search || query === "") return {};\n+\n+  params  = {};\n+  nvPairs = query.replace(/\\+/g, " ").split("&");\n+\n+  for (i = 0; i < nvPairs.length; i++) {\n+    nv = nvPairs[i].split("=");\n+    n  = decodeURIComponent(nv[0]);\n+    v  = decodeURIComponent(nv[1]);\n+    // allow a name to be used multiple times\n+    // storing each value in the array\n+    if (!(n in params)) {\n+      params[n] = [];\n+    }\n+    params[n].push(nv.length === 2 ? v : null);\n+  }\n+  return params;\n+}\n+\n+/*\n+ * coords\n+ *\n+ * Calculates the x and y offset of an element.\n+ * From http://www.quirksmode.org/js/findpos.html\n+ * with alterations to take into account scrolling regions\n+ */\n+function coords(elem) {\n+  var myX = myY = 0;\n+  if (elem.getBoundingClientRect) {\n+    var rect;\n+    rect = elem.getBoundingClientRect();\n+    myX = rect.left + ((typeof window.pageXOffset !== "undefined") ?\n+        window.pageXOffset : document.body.scrollLeft);\n+    myY = rect.top + ((typeof window.pageYOffset !== "undefined") ?\n+        window.pageYOffset : document.body.scrollTop);\n+  } else {\n+    // this fall back doesn\'t properly handle absolutely positioned elements\n+    // inside a scrollable box\n+    var node;\n+    if (elem.offsetParent) {\n+      // subtract all scrolling\n+      node = elem;\n+      do {\n+        myX -= node.scrollLeft ? node.scrollLeft : 0;\n+        myY -= node.scrollTop ? node.scrollTop : 0;\n+      } while (node = node.parentNode);\n+      // this will include the page scrolling (which is unwanted) so add it back on\n+      myX += (typeof window.pageXOffset !== "undefined") ? window.pageXOffset : document.body.scrollLeft;\n+      myY += (typeof window.pageYOffset !== "undefined") ? window.pageYOffset : document.body.scrollTop;\n+      // sum up offsets\n+      node = elem;\n+      do {\n+        myX += node.offsetLeft;\n+        myY += node.offsetTop;\n+      } while (node = node.offsetParent);\n+    }\n+  }\n+  return [myX, myY];\n+}\n+\n+/*\n+ * position_popup\n+ *\n+ * Positions a popup relative to an anchor element.\n+ *\n+ * The available positions are:\n+ * 0 - Centered below the anchor.\n+ */\n+function position_popup(anchor, popup, position) {\n+  "use strict";\n+  var a_x, a_y, a_w, a_h, p_x, p_y, p_w, p_h;\n+  var a_xy, spacer, margin, scrollbar, page_w;\n+  // define constants\n+  spacer = 5;\n+  margin = 15;\n+  scrollbar = 15;\n+  // define the positions and widths\n+  a_xy = coords(anchor);\n+  a_x = a_xy[0];\n+  a_y = a_xy[1];\n+  a_w = anchor.offsetWidth;\n+  a_h = anchor.offsetHeight;\n+  p_w = popup.offsetWidth;\n+  p_h = popup.offsetHeight;\n+  page_w = null;\n+  if (window.innerWidth) {\n+    page_w = window.innerWidth;\n+  } else if (document.body) {\n+    page_w = document.body.clientWidth;\n+  }\n+  // check the position type is defined\n+  if (typeof position !== "number") {\n+    position = 0;\n+  }\n+  // calculate the popup position\n+  switch (position) {\n+    case 1:\n+      p_x = a_x + a_w + spacer;\n+      p_y = a_y + (a_h / 2) - (p_h / 2);\n+      break;\n+    case 0:\n+    default:\n+      p_x = a_x + (a_w / 2) - (p_w / 2);\n+      p_y = a_y + a_h + spacer;\n+      break;\n'..b'Site Strand Handling</th>\n+          <td id="opt_strand">\n+            <span class="strand_none">This alphabet only has one strand</span>\n+            <span class="strand_given">Sites must be on the given strand</span>\n+            <span class="strand_both">Sites may be on either strand</span>\n+          </td>\n+        </tr>\n+        <tr>\n+          <th>Maximum Number of Motifs</th>\n+          <td id="opt_nmotifs"></td>\n+        </tr>\n+        <tr>\n+          <th>Motif E-value Threshold</th>\n+          <td id="opt_evt"></td>\n+        </tr>\n+        <tr>\n+          <th>Minimum Motif Width</th>\n+          <td id="opt_minw"></td>\n+        </tr>\n+        <tr>\n+          <th>Maximum Motif Width</th>\n+          <td id="opt_maxw"></td>\n+        </tr>\n+        <tr>\n+          <th>Minimum Sites per Motif</th>\n+          <td id="opt_minsites"></td>\n+        </tr>\n+        <tr>\n+          <th>Maximum Sites per Motif</th>\n+          <td id="opt_maxsites"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Bias on Number of Sites</th>\n+          <td id="opt_wnsites"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Sequence Prior</th>\n+          <td id="opt_prior">\n+            <span class="prior_dirichlet">Simple Dirichlet</span>\n+            <span class="prior_dmix">Dirichlet Mixture</span>\n+            <span class="prior_mega">Mega-weight Dirichlet Mixture</span>\n+            <span class="prior_megap">Mega-weight Dirichlet Mixture Plus</span>\n+            <span class="prior_addone">Add One</span>\n+          </td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Sequence Prior Source</th>\n+\t  <td id="opt_prior_source"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Sequence Prior Strength</th>\n+          <td id="opt_b"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>EM Starting Point Source</th>\n+          <td id="opt_substring">\n+            <span class="substring_on">From substrings in input sequences</span>\n+            <span class="substring_off">From strings on command line (-cons)</span>\n+          </td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>EM Starting Point Map Type</th>\n+          <td id="opt_spmap">\n+            <span class="spmap_uni">Uniform</span>\n+            <span class="spmap_pam">Point Accepted Mutation</span>\n+          </td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>EM Starting Point Fuzz</th>\n+          <td id="opt_spfuzz"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>EM Maximum Iterations</th>\n+          <td id="opt_maxiter"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>EM Improvement Threshold</th>\n+          <td id="opt_distance"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Maximum Search Size</th>\n+          <td id="opt_searchsize"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Maximum Number of Sites for E-values</th>\n+          <td id="opt_csites"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Trim Gap Open Cost</th>\n+          <td id="opt_wg"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Trim Gap Extend Cost</th>\n+          <td id="opt_ws"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>End Gap Treatment</th>\n+          <td id="opt_noendgaps">\n+            <span class="noendgaps_on">No cost</span>\n+            <span class="noendgaps_off">Same cost as other gaps</span>\n+          </td>\n+        </tr>\n+        <tr>\n+          <td colspan="2" style="text-align: center">\n+            <a href="javascript:toggle_class(document.getElementById(\'tbl_settings\'), \'hide_advanced\')">\n+              <span class="show_more">Show Advanced Settings</span>\n+              <span class="show_less">Hide Advanced Settings</span>\n+            </a>\n+          </td>\n+        </tr>\n+      </table>\n       <script>\n       {\n         $("opt_mod").className = data.options.mod;\n'
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/meme_output_test2.txt
--- a/test-data/meme_output_test2.txt Thu Aug 29 10:19:55 2024 +0000
+++ b/test-data/meme_output_test2.txt Mon Jul 14 21:33:41 2025 +0000
[
b'@@ -1,40 +1,180 @@\n ********************************************************************************\n-SUMMARY OF MOTIFS\n+MOTIF GGSRTATAAAA MEME-1\twidth =  11  sites =  30  llr = 254  E-value = 5.1e-040\n ********************************************************************************\n+--------------------------------------------------------------------------------\n+\tMotif GGSRTATAAAA MEME-1 Description\n+--------------------------------------------------------------------------------\n+Simplified        A  3313:9:a798\n+pos.-specific     C  1:3::1:::1:\n+probability       G  6756::::::2\n+matrix            T  1:11a1a:3::\n+\n+         bits    2.2       *    \n+                 2.0     * *    \n+                 1.8     * *    \n+                 1.5     * ** * \n+Relative         1.3     * ** * \n+Entropy          1.1     ****** \n+(12.2 bits)      0.9  *  *******\n+                 0.7  *  *******\n+                 0.4 ** ********\n+                 0.2 ***********\n+                 0.0 -----------\n+\n+Multilevel           GGGGTATAAAA\n+consensus            AACA    T  \n+sequence                        \n+                                \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif GGSRTATAAAA MEME-1 sites sorted by position p-value\n+--------------------------------------------------------------------------------\n+Sequence name             Start   P-value               Site  \n+-------------             ----- ---------            -----------\n+chr21_46046964_46047014_     13  4.51e-07 AAGGCCAGGA GGGGTATAAAA GCCTGAGAGC\n+chr21_46031920_46031970_     16  2.22e-06 ATACCCAGGG AGGGTATAAAA CCTCAGCAGC\n+chr21_32202076_32202126_     14  2.74e-06 CCACCAGCTT GAGGTATAAAA AGCCCTGTAC\n+chr21_46057197_46057247_     37  4.86e-06 ACAGGCCCTG GGCATATAAAA GCC       \n+chr21_45993530_45993580_      8  4.86e-06    CCAAGGA GGAGTATAAAA GCCCCACAAA\n+chr21_45971413_45971463_     10  4.86e-06  CAGGCCCTG GGCATATAAAA GCCCCAGCAG\n+chr21_31964683_31964733_     14  4.86e-06 GATTCACTGA GGCATATAAAA GGCCCTCTGC\n+chr21_47517957_47518007_     33  6.48e-06 CCGGCGGGGC GGGGTATAAAG GGGGCGG   \n+chr21_45978668_45978718_      5  6.48e-06       CAGA GGGGTATAAAG GTTCCGACCA\n+chr21_32185595_32185645_     19  6.48e-06 CACCAGAGCT GGGATATATAA AGAAGGTTCT\n+chr21_32410820_32410870_     22  1.38e-05 AATCACTGAG GATGTATAAAA GTCCCAGGGA\n+chr21_31992870_31992920_     17  1.38e-05 CACTATTGAA GATGTATAAAA TTTCATTTGC\n+chr21_19617074_19617124_     40  1.41e-05 CCTCGGGACG TGGGTATATAA           \n+chr21_31914206_31914256_     16  1.61e-05 CCCACTACTT AGAGTATAAAA TCATTCTGAG\n+chr21_46020421_46020471_      3  1.95e-05         GA GACATATAAAA GCCAACATCC\n+chr21_32253899_32253949_     18  1.95e-05 CCCACCAGCA AGGATATATAA AAGCTCAGGA\n+chr21_45705687_45705737_     38  2.16e-05 CGTGGTCGCG GGGGTATAACA GC        \n+chr21_47575506_47575556_     31  3.04e-05 GCTGCCGGTG AGCGTATAAAG GCCCTGGCG \n+chr21_31744582_31744632_     13  3.04e-05 CAGGTCTAAG AGCATATATAA CTTGGAGTCC\n+chr21_31768316_31768366_      1  3.67e-05          . AACGTATATAA ATGGTCCTGT\n+chr21_26934381_26934431_     28  3.93e-05 AGTCACAAGT GAGTTATAAAA GGGTCGCACG\n+chr21_31933633_31933683_      5  5.65e-05       TCAG AGTATATATAA ATGTTCCTGT\n+chr21_31710037_31710087_     15  6.24e-05 CCCAGGTTTC TGAGTATATAA TCGCCGCACC\n+chr21_36411748_36411798_     23  7.15e-05 AGTTTCAGTT GGCATCtaaaa attatataac\n+chr21_46102103_46102153_     37  1.39e-04 TGCCTGGGTC CAGGTATAAAG GCT       \n+chr21_46086869_46086919_     38  1.39e-04 TGCCTGGGCC CAGGTATAAAG GC        \n+chr21_37838750_37838800_      3  4.81e-04         ga tggttttataa ggggcctcac\n+chr21_31962741_31962791_     14  8.57e-04 TATAACTCAG GTTGGATAAAA TAATTTGTAC\n+chr21_31973364_31973414_      8  1.47e-03    aaactta aaactctataa acttaaaact\n+chr21_28217753_28217803_     27  2.64e-03 GGTGGGGGTG GGGGTTTCACT GGTCCACTAT\n+--------------------------------------------------------------------------------\n \n -----------------------------'..b'8  2_[+1]_37\n+chr21_31962741_31962791_          0.00086  13_[+1]_26\n+chr21_31973364_31973414_           0.0015  7_[+1]_32\n+chr21_28217753_28217803_           0.0026  26_[+1]_13\n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif GGSRTATAAAA MEME-1 in BLOCKS format\n+--------------------------------------------------------------------------------\n+BL   MOTIF GGSRTATAAAA width=11 seqs=30\n+chr21_46046964_46047014_ (   13) GGGGTATAAAA  1 \n+chr21_46031920_46031970_ (   16) AGGGTATAAAA  1 \n+chr21_32202076_32202126_ (   14) GAGGTATAAAA  1 \n+chr21_46057197_46057247_ (   37) GGCATATAAAA  1 \n+chr21_45993530_45993580_ (    8) GGAGTATAAAA  1 \n+chr21_45971413_45971463_ (   10) GGCATATAAAA  1 \n+chr21_31964683_31964733_ (   14) GGCATATAAAA  1 \n+chr21_47517957_47518007_ (   33) GGGGTATAAAG  1 \n+chr21_45978668_45978718_ (    5) GGGGTATAAAG  1 \n+chr21_32185595_32185645_ (   19) GGGATATATAA  1 \n+chr21_32410820_32410870_ (   22) GATGTATAAAA  1 \n+chr21_31992870_31992920_ (   17) GATGTATAAAA  1 \n+chr21_19617074_19617124_ (   40) TGGGTATATAA  1 \n+chr21_31914206_31914256_ (   16) AGAGTATAAAA  1 \n+chr21_46020421_46020471_ (    3) GACATATAAAA  1 \n+chr21_32253899_32253949_ (   18) AGGATATATAA  1 \n+chr21_45705687_45705737_ (   38) GGGGTATAACA  1 \n+chr21_47575506_47575556_ (   31) AGCGTATAAAG  1 \n+chr21_31744582_31744632_ (   13) AGCATATATAA  1 \n+chr21_31768316_31768366_ (    1) AACGTATATAA  1 \n+chr21_26934381_26934431_ (   28) GAGTTATAAAA  1 \n+chr21_31933633_31933683_ (    5) AGTATATATAA  1 \n+chr21_31710037_31710087_ (   15) TGAGTATATAA  1 \n+chr21_36411748_36411798_ (   23) GGCATCTAAAA  1 \n+chr21_46102103_46102153_ (   37) CAGGTATAAAG  1 \n+chr21_46086869_46086919_ (   38) CAGGTATAAAG  1 \n+chr21_37838750_37838800_ (    3) TGGTTTTATAA  1 \n+chr21_31962741_31962791_ (   14) GTTGGATAAAA  1 \n+chr21_31973364_31973414_ (    8) AAACTCTATAA  1 \n+chr21_28217753_28217803_ (   27) GGGGTTTCACT  1 \n+//\n+\n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif GGSRTATAAAA MEME-1 position-specific scoring matrix\n+--------------------------------------------------------------------------------\n+log-odds matrix: alength= 4 w= 11 n= 1200 bayes= 5.2854 E= 5.1e-040 \n+   -14   -179    114   -112 \n+     3  -1155    137   -270 \n+  -114     20     86    -71 \n+     3   -279    122   -170 \n+ -1155  -1155   -295    215 \n+   156   -179  -1155   -170 \n+ -1155  -1155  -1155    220 \n+   172   -279  -1155  -1155 \n+   125  -1155  -1155     46 \n+   167   -179  -1155  -1155 \n+   144  -1155    -63   -270 \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif GGSRTATAAAA MEME-1 position-specific probability matrix\n+--------------------------------------------------------------------------------\n+letter-probability matrix: alength= 4 w= 11 nsites= 30 E= 5.1e-040 \n+ 0.266667  0.066667  0.566667  0.100000 \n+ 0.300000  0.000000  0.666667  0.033333 \n+ 0.133333  0.266667  0.466667  0.133333 \n+ 0.300000  0.033333  0.600000  0.066667 \n+ 0.000000  0.000000  0.033333  0.966667 \n+ 0.866667  0.066667  0.000000  0.066667 \n+ 0.000000  0.000000  0.000000  1.000000 \n+ 0.966667  0.033333  0.000000  0.000000 \n+ 0.700000  0.000000  0.000000  0.300000 \n+ 0.933333  0.066667  0.000000  0.000000 \n+ 0.800000  0.000000  0.166667  0.033333 \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif GGSRTATAAAA MEME-1 regular expression\n+--------------------------------------------------------------------------------\n+[GA][GA][GC][GA]TATA[AT]AA\n --------------------------------------------------------------------------------\n\\ No newline at end of file\n'
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/meme_output_test2.xml
--- a/test-data/meme_output_test2.xml Thu Aug 29 10:19:55 2024 +0000
+++ b/test-data/meme_output_test2.xml Mon Jul 14 21:33:41 2025 +0000
b
b'@@ -1,3 +1,658 @@\n+<alphabet_matrix>\n+<alphabet_array>\n+<value letter_id="A">-14</value>\n+<value letter_id="C">-179</value>\n+<value letter_id="G">114</value>\n+<value letter_id="T">-112</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">3</value>\n+<value letter_id="C">-1155</value>\n+<value letter_id="G">137</value>\n+<value letter_id="T">-270</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">-114</value>\n+<value letter_id="C">20</value>\n+<value letter_id="G">86</value>\n+<value letter_id="T">-71</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">3</value>\n+<value letter_id="C">-279</value>\n+<value letter_id="G">122</value>\n+<value letter_id="T">-170</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">-1155</value>\n+<value letter_id="C">-1155</value>\n+<value letter_id="G">-295</value>\n+<value letter_id="T">215</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">156</value>\n+<value letter_id="C">-179</value>\n+<value letter_id="G">-1155</value>\n+<value letter_id="T">-170</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">-1155</value>\n+<value letter_id="C">-1155</value>\n+<value letter_id="G">-1155</value>\n+<value letter_id="T">220</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">172</value>\n+<value letter_id="C">-279</value>\n+<value letter_id="G">-1155</value>\n+<value letter_id="T">-1155</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">125</value>\n+<value letter_id="C">-1155</value>\n+<value letter_id="G">-1155</value>\n+<value letter_id="T">46</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">167</value>\n+<value letter_id="C">-179</value>\n+<value letter_id="G">-1155</value>\n+<value letter_id="T">-1155</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">144</value>\n+<value letter_id="C">-1155</value>\n+<value letter_id="G">-63</value>\n+<value letter_id="T">-270</value>\n+</alphabet_array>\n+</alphabet_matrix>\n+</scores>\n+<probabilities>\n+<alphabet_matrix>\n+<alphabet_array>\n+<value letter_id="A">0.266667</value>\n+<value letter_id="C">0.066667</value>\n+<value letter_id="G">0.566667</value>\n+<value letter_id="T">0.100000</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">0.300000</value>\n+<value letter_id="C">0.000000</value>\n+<value letter_id="G">0.666667</value>\n+<value letter_id="T">0.033333</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">0.133333</value>\n+<value letter_id="C">0.266667</value>\n+<value letter_id="G">0.466667</value>\n+<value letter_id="T">0.133333</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">0.300000</value>\n+<value letter_id="C">0.033333</value>\n+<value letter_id="G">0.600000</value>\n+<value letter_id="T">0.066667</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">0.000000</value>\n+<value letter_id="C">0.000000</value>\n+<value letter_id="G">0.033333</value>\n+<value letter_id="T">0.966667</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">0.866667</value>\n+<value letter_id="C">0.066667</value>\n+<value letter_id="G">0.000000</value>\n+<value letter_id="T">0.066667</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">0.000000</value>\n+<value letter_id="C">0.000000</value>\n+<value letter_id="G">0.000000</value>\n+<value letter_id="T">1.000000</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">0.966667</value>\n+<value letter_id="C">0.033333</value>\n+<value letter_id="G">0.000000</value>\n+<value letter_id="T">0.000000</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">0.700000</value>\n+<value letter_id="C">0.000000</value>\n+<value letter_id="G">0.000000</value>\n+<value letter_id="T">0.300000</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value letter_id="A">0.933333</value>\n+<value letter_id="C">0.066667</value>\n+<value letter_id="G">0.000000</value>\n+<value letter_id="T">0.000000</value>\n+</alphabet_array>\n+<alphabet_array>\n+<value let'..b'r_ref letter_id="A"/>\n+</site>\n+<right_flank>TCGCCGCACC</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_16" position="22" strand="plus" pvalue="7.15e-05" >\n+<left_flank>AGTTTCAGTT</left_flank>\n+<site>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+</site>\n+<right_flank>attatataac</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_27" position="36" strand="plus" pvalue="1.39e-04" >\n+<left_flank>TGCCTGGGTC</left_flank>\n+<site>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="G"/>\n+</site>\n+<right_flank>GCT</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_26" position="37" strand="plus" pvalue="1.39e-04" >\n+<left_flank>TGCCTGGGCC</left_flank>\n+<site>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="G"/>\n+</site>\n+<right_flank>GC</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_17" position="2" strand="plus" pvalue="4.81e-04" >\n+<left_flank>ga</left_flank>\n+<site>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+</site>\n+<right_flank>ggggcctcac</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_8" position="13" strand="plus" pvalue="8.57e-04" >\n+<left_flank>TATAACTCAG</left_flank>\n+<site>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+</site>\n+<right_flank>TAATTTGTAC</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_10" position="7" strand="plus" pvalue="1.47e-03" >\n+<left_flank>aaactta</left_flank>\n+<site>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="A"/>\n+</site>\n+<right_flank>acttaaaact</right_flank>\n+</contributing_site>\n+<contributing_site sequence_id="sequence_2" position="26" strand="plus" pvalue="2.64e-03" >\n+<left_flank>GGTGGGGGTG</left_flank>\n+<site>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="G"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="T"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="A"/>\n+<letter_ref letter_id="C"/>\n+<letter_ref letter_id="T"/>\n+</site>\n+<right_flank>GGTCCACTAT</right_flank>\n+</contributing_site>\n+</contributing_sites>\n+</motif>\n </motifs>\n <scanned_sites_summary p_thresh="0.0001">\n <scanned_sites sequence_id="sequence_0" pvalue="5.63e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="39" pvalue="1.41e-05"/>\n'
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_input_neg2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_input_neg2.fasta Mon Jul 14 21:33:41 2025 +0000
b
@@ -0,0 +1,66 @@
+>chr21_19617074_19617124_+
+AAAAATTATTACTAGGGAGGGGGCCGGAAAAACCTCGGGACGTGGGTATATAA
+>chr21_26934381_26934431_+
+GCGCCTGGTCGGTTATGAGTCACAAGTGAGTTTTCCmATAAAAGGGTCGCACGTT
+>chr21_28217753_28217803_-
+CAAAGGGGAGGAGTGmGGGTGGGGGTGGGGGTTTCACTGGTCCACTATAAA
+>chr21_31710037_31710087_-
+AACACCCAGGTTTCTGAGTATATAATCGCCGCACCAAAGAATTTAATTTT
+>chr21_31744582_31744632_-
+CCCAGGTCTAAGAGCATATATAmACTTGGAGTCCAGACTATGACATTCAAA
+>chr21_31768316_31768366_+
+AACGTATATAAATGGTCCTGTCCAGATGTGGCATGCAmAACTCAGAATCTT
+>chr21_31914206_31914256_-
+TGACACCCACTACTTAGAGTATAAAmATCATTCTGAGAAGTTAGAGACACC
+>chr21_31933633_31933683_-
+TCAGAGTATATATAAATGTTCCTGTCCAGTCACAAAGTCACCAAACTGACCT
+>chr21_31962741_31962791_-
+ACATATAACTCAGGTTGGATAAAATAATTTGTATCAGmCAAATCAGGAGAGTCAA
+>chr21_31964683_31964733_+
+TCTGATTCACTGAGGCATATAAAAGGCCCTCTGCGGAGAmAGTGTCCATAC
+>chr21_31973364_31973414_+
+aaacttaaaactctataaacttaaaactCTAGAATCCTGCTATAC
+>chr21_31992870_31992920_+
+CTCATACACTATTGAAGATGTATAAAATTTCATTTGmGATGGTGACATT
+>chr21_32185595_32185645_-
+TCACCACCCACCAmAGCTGGGATATATAAAGAAGGTTCTGAGACTAGGAA
+>chr21_32202076_32202126_-
+TGCCCACCAGCTATmGTGAGGTATAAAAAGCCCTGTACGGGAAGAGACCTTCAT
+>chr21_32253899_32253949_-
+AGCCCCACCCACCAGCAAGGATATATAAAAGCTCAGGAGTCTGGAGTGAC
+>chr21_32410820_32410870_-
+TCTACCCCACTAATCACTGAGGATGTATAAAAGTCCCAGGGAAGCTGGTG
+>chr21_36411748_36411798_-
+ATAGTTCTGTATAAmGAGTTTCAGTTGGCATCtaaaaattatataactttattt
+>chr21_37838750_37838800_-
+gatggttttataaggggcctcaccctcggctcagccctcattcttctcct
+>chr21_45705687_45705737_+
+CCGGGGCGGAGCGGCCTTTGCTCTTTGCGTGGTCGCGGGGGTATAACAGC
+>chr21_45971413_45971463_-
+CAGGCCCTGGGCATATAAAAGCCCCAGCAGmCCAACAGGctcacacacaca
+>chr21_45978668_45978718_-
+CAGAGGGGTATAAAGGTTCCGACCACTCAGAGGCCTGGCACGAtcactca
+>chr21_45993530_45993580_+
+CCAAGGAGGAGTATAAAAGCCCCACAAACCCGAGCACCTCACTCTCGC
+>chr21_46020421_46020471_+
+GAGACATATAAAAGCCAACATCCCTGAGCACCTAAACGGactcactc
+>chr21_46031920_46031970_+
+GGAAAATACCCAGGGAGGGTATAAATCAGCAGCCAGGGCACACAAAC
+>chr21_46046964_46047014_+
+ACAAGGCCAGGAGGGGTATAAAACTGAGAGCCCCAAGAACctcacaca
+>chr21_46057197_46057247_+
+ATTGCTGAGTCTCCTGCTGGGAAAACACAGGCCCTGGGCATATAAAAGCC
+>chr21_46086869_46086919_-
+GACGTGTGCTTCTGTGCTGTGGGGATGCCTGGGCCCAGGTATAAAGGC
+>chr21_46102103_46102153_-
+AGGTGTGCTTCTGTGCTGTGGGGATGCCTGGGTCCAGGTATAAAGGCT
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGCGGGGTATAAAGGGGGCGG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGGAGGTGCTGCGTGAGCGTATAAAGGCCCTGGCG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGAGmGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_input_pos.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_input_pos.fasta Mon Jul 14 21:33:41 2025 +0000
b
@@ -0,0 +1,66 @@
+>chr21_19617074_19617124_+
+AAAAATTATTACTAGGGAGGGGGCCGGAACCTCGGGACGTGGGTATATAA
+>chr21_26934381_26934431_+
+GCGCCTGGTCGGTTATGAGTCACAAGTGAGTTATAAAAGGGTCGCACGTT
+>chr21_28217753_28217803_-
+CAAAGGGGAGGAGTGGGGTGGGGGTGGGGGTTTCACTGGTCCTATAAA
+>chr21_31710037_31710087_-
+AACACCCAGGTTTCTGAGTATATAATCGCCGCACCAAAGAATTTAATTTT
+>chr21_31744582_31744632_-
+CCCAGGTCTAAGAGCATATATAACTTGGAGTCCAGACTATGACATTCAAA
+>chr21_31768316_31768366_+
+AACGTATATAAATGGTCCTGTCCAGATGTG
+>chr21_31914206_31914256_-
+TGACACCCACTACTTAGAGTATAAAATCATTCTGAGAAGTTAGAGACACC
+>chr21_31933633_31933683_-
+TCAGAGTATATATAAATGTTCCTGTCCAGTCACAGTCACCAATGACCT
+>chr21_31962741_31962791_-
+ACATATAACTCAGGTTGGATAAAATAATTTGTACAAATCAGGAGAGTCAA
+>chr21_31964683_31964733_+
+TCTGATTCACTGAGGCATATAAAAGGCCCTCTGCGGAGAAGTTAC
+>chr21_31973364_31973414_+
+aaacttaaaactctataaacttaaaactCTAGAATCTGATCCTGCTATAC
+>chr21_31992870_31992920_+
+CTCATACACTATTGAAGATGTATAAAATTTCATTTGCAGATGGTGACATT
+>chr21_32185595_32185645_-
+TCACCACCCACCAGAGCTGGGATATATAAAGAAGGTTCTAGGAA
+>chr21_32202076_32202126_-
+TGCCCACCAGCTTGAGGTATAAAAAGCCCTGTACGGGAAGAGACCTTCAT
+>chr21_32253899_32253949_-
+AGCCCCACCCACCAGCAAGGATATATAAAAGCTCAGGAGTCTGGAGTGAC
+>chr21_32410820_32410870_-
+TCTACCCCACTAATCACTGAGGATGTATAAAAGTCCCAGGGAAGCTGGTG
+>chr21_36411748_36411798_-
+ATAGTTCTGTATAGTTTCAGTTGGCATCtaaaaattatataactttattt
+>chr21_37838750_37838800_-
+gatggttttataaggggcctcaccctcggctcagccctcattcttctcct
+>chr21_45705687_45705737_+
+CCGGGGCGGAGCGGCCTTTGCTCTTTGCGTGGTCGGGGTATAACAGC
+>chr21_45971413_45971463_-
+CAGGCCCTGGGCATATAAAAGCCCCAGCAGCCAACAGGctcacacacaca
+>chr21_45978668_45978718_-
+CAGAGGGGTATAAAGGTTCCGACCACTCCTGGCACGAtcactca
+>chr21_45993530_45993580_+
+CCAAGGAGGAGTATAAAAGCCCCACAAACCCGAGCACCTCACTCACTCGC
+>chr21_46020421_46020471_+
+GAGACATATAAAAGCCAACATCCCTGAGCACCTAACACACGGactcactc
+>chr21_46031920_46031970_+
+GGAAAATACCCAGGGAGGGTATAAAACCTCAGCAGCCAGGGCACACAAAC
+>chr21_46046964_46047014_+
+ACAAGGCCAGGAGGGGTATAAAAGCCTGAGAGCCCCAAGAACctcacaca
+>chr21_46057197_46057247_+
+ATTGCTGAGTCTCCTGCTGGGAAAACACAGGCCCTGGGCATATAAAAGCC
+>chr21_46086869_46086919_-
+GACAGGTGTGCTTCTGTGCTGTGGGGATGCCTGGGCCCAGGTATAAAGGC
+>chr21_46102103_46102153_-
+AGGTGTGTGCTTCTGTGCTGTGGGGATGCCTGGGTCCAGGTATAAAGGCT
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGTATAAAGGGGGCGG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_input_pos2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_input_pos2.fasta Mon Jul 14 21:33:41 2025 +0000
b
@@ -0,0 +1,66 @@
+>chr21_19617074_19617124_+
+AAAAATTATTACTAGGGAGGGGGCmGGAACCTCGGGACGTGGGTmATATAA
+>chr21_26934381_26934431_+
+GCGCCTGGTCGGTTATGAGTCACAAGTGAGTTATAAAAGGGTCGCACGTT
+>chr21_28217753_28217803_-
+CAAAGGGGAGGAGTGGGGTGGGGGTGGGGGTTTCACTGGTmCACTATAAA
+>chr21_31710037_31710087_-
+AACACCCAGGTTTCTGAGTATATAATCGCCGCACCAAAGAATTTAATTTTTT
+>chr21_31744582_31744632_-
+CCmAGGTCTAAGAGCATATATTAACTTGGAGTCCAGACTATGACATTCAAA
+>chr21_31768316_31768366_+
+AACGTATATAAATGGTCCTGTCCAGATGTGGCATGCAAACTmAGAATCTT
+>chr21_31914206_31914256_-
+TGACACCCACTACTTAGAGTATAAAATCATTCTGAGAAGTTAGAGA
+>chr21_31933633_31933683_-
+TCAGAGTATATATAAATGTTCCTGTCCAGTCACAGTCACCAAACTGACCT
+>chr21_31962741_31962791_-
+ACATATAACTCAGGTTGGATAAAATAATTTGTACAAATCAGG
+>chr21_31964683_31964733_+
+TCTGATTCACTGAGGCATATAAAAGGCCCTCTGCGGAGAAGTGTCCATAC
+>chr21_31973364_31973414_+
+aaacttaaaactctataaacttaaaactCTAGAATCTGATCCTGCTATAC
+>chr21_31992870_31992920_+
+CTCATACACTATTGAAGATGTATAAAATTTCATTTGCAGATGGTGACATT
+>chr21_32185595_32185645_-
+TCACCACCCACCAGAGCTGGGATATATAAAGAAGGTTCTGAGACTAGGAA
+>chr21_32202076_32202126_-
+TGCCCACCAGCTTGAGGTATAAAAAGCCCTGTACGGGAAGAGACCTTCAT
+>chr21_32253899_32253949_-
+AGCCCCACCCACCAGCAAGGATATATAAAAGCTCAGGAGTCTGGAGTGAC
+>chr21_32410820_32410870_-
+TCTACCCCACTAATCACTGAGGATGTATAAAAGTCCCAGGGAAGCTGGTG
+>chr21_36411748_36411798_-
+ATAGTTCTGTATAGTTTCAGTTGGCATCtaaaaattatataactttattt
+>chr21_37838750_37838800_-
+gatggttttataaggggcctcaccctcggctcagccctcattcttctcct
+>chr21_45705687_45705737_+
+CCGGGGCGGAGCGGCCTTTGCTCTTTGCGTGGTCGCGGGmGGTATAACAGC
+>chr21_45971413_45971463_-
+CAGGCCCTGGGCATATAAAAGCCCCAGCAGCCAACAGGctcacacacaca
+>chr21_45978668_45978718_-
+CAGAGGGGTATAAAGGTTCCGACCACTCAGAGGCCTGGCACGAtcactca
+>chr21_45993530_45993580_+
+CCAAGGAGGAGTATAAAAGCCCCACAAACCCGAGCACCmTCACTCACTCGC
+>chr21_46020421_46020471_+
+GAGACATATAAAAGCCAACATCCCTGAGCACCTAACACACGGactcactc
+>chr21_46031920_46031970_+
+GGAAAATACCCAGGGAGGGTATAAAACCTCAGCAGCCAGGGCACACAAAC
+>chr21_46046964_46047014_+
+ACAAGGCCAGGAGGGGTATAAAAGCCTGAGAGCCCCAAGAACctcacaca
+>chr21_46057197_46057247_+
+ATTGCTGAGTCTCCTGCTGGGAAAACACAGGCCCTGGGCAT
+>chr21_46086869_46086919_-
+GACAGGTGTGCTTCTGTGCTGTGGGGATGCCTGGGCCCAGGTATAAAGGC
+>chr21_46102103_46102153_-
+AGGTGTGTGCTTCTGTGCTGTGGGGATGCCTCCAGGTATAAAGGCT
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGmGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGmGGGCGGGTCAGGCCGGCGGGmGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGmGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGmGCGG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGGAGGTGmTGCCGGTGAGCmGTATAAAGGCCCTGGCG
+>chr21_47575506_47575556_-
+TGAGAAGCmGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGmGCCCTGGCG
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_output_test1.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_output_test1.html Mon Jul 14 21:33:41 2025 +0000
[
@@ -0,0 +1,39 @@
+        "train_negatives": {
+          "from": "shuffled",
+          "count": 33,
+          "positions": 1597
+        },
+        "test_positives": {
+          "count": 0,
+          "positions": 0
+        },
+        "test_negatives": {
+          "count": 0,
+          "positions": 0
+        },
+        "sequence_db": {
+          "freqs": [0.245, 0.255, 0.255, 0.245]
+        },
+        "motifs": [
+          {
+            "db": 0,
+            "id": "1-CTTTTATAYRCCY",
+            "alt": "STREME-1",
+            "width": 13,
+            "initial_width": 8,
+            "seed": "CTTTTATATGCCT",
+            "score_threshold": 10.1927,
+            "npassing": 18,
+            "train_pos_count": 18,
+            "train_neg_count": 0,
+            "train_log_pvalue": -6.81978,
+            "train_pvalue": "1.5e-007",
+            "train_dtc": -1.0,
+            "train_bernoulli": -1,
+            "test_pos_count": 0,
+            "test_neg_count": 0,
+            "test_log_pvalue": 0,
+            "test_pvalue": "1.0e+000",
+            "test_log_evalue": 0.69897,
+            "test_evalue": "5.0e+000",
+            "test_dtc": -1.0,
\ No newline at end of file
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_output_test1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_output_test1.txt Mon Jul 14 21:33:41 2025 +0000
b
@@ -0,0 +1,93 @@
+********************************************************************************
+STREME - Sensitive, Thorough, Rapid, Enriched Motif Elicitation
+********************************************************************************
+MEME version 5.5.8 (Release date: Thu May 15 15:01:46 2025 -0700)
+
+For further information on how to interpret these results please access https://meme-suite.org/meme.
+To get a copy of the MEME Suite software please access https://meme-suite.org.
+
+********************************************************************************
+
+
+********************************************************************************
+REFERENCE
+********************************************************************************
+If you use this program in your research, please cite:
+
+Timothy L. Bailey,
+"STREME: accurate and versatile sequence motif discovery",
+Bioinformatics, Mar. 24, 2021.
+********************************************************************************
+
+
+ALPHABET= ACGT
+
+strands: + -
+
+Background letter frequencies
+A 0.244 C 0.256 G 0.256 T 0.244 
+
+MOTIF 1-CTTTTATAYRCCY STREME-1
+letter-probability matrix: alength= 4 w= 13 nsites= 18 S= 1.5e-007
+ 0.072963 0.851476 0.002598 0.072963
+ 0.072963 0.145128 0.002598 0.779311
+ 0.002486 0.002598 0.073075 0.921841
+ 0.002486 0.002598 0.002598 0.992318
+ 0.002486 0.002598 0.002598 0.992318
+ 0.992318 0.002598 0.002598 0.002486
+ 0.002486 0.002598 0.073075 0.921841
+ 0.992318 0.002598 0.002598 0.002486
+ 0.002486 0.497514 0.002598 0.497402
+ 0.284394 0.143552 0.499090 0.072963
+ 0.002486 0.710522 0.002598 0.284394
+ 0.002486 0.779422 0.073075 0.145017
+ 0.072963 0.428613 0.073075 0.425349
+
+MOTIF 2-CCCCACCRSC STREME-2
+letter-probability matrix: alength= 4 w= 10 nsites= 12 S= 7.2e-005
+ 0.139001 0.787776 0.070814 0.002409
+ 0.070705 0.924368 0.002517 0.002409
+ 0.002409 0.924368 0.070814 0.002409
+ 0.002409 0.992664 0.002517 0.002409
+ 0.716267 0.002517 0.210510 0.070705
+ 0.002409 0.856072 0.070814 0.070705
+ 0.208825 0.786248 0.002517 0.002409
+ 0.343890 0.030359 0.555046 0.070705
+ 0.070705 0.376471 0.550414 0.002409
+ 0.208825 0.786248 0.002517 0.002409
+
+MOTIF 3-RCAGAAKCA STREME-3
+letter-probability matrix: alength= 4 w= 9 nsites= 10 S= 4.4e-004
+ 0.595835 0.003626 0.397069 0.003470
+ 0.003470 0.694352 0.298708 0.003470
+ 0.792557 0.003626 0.101987 0.101831
+ 0.003470 0.101987 0.891074 0.003470
+ 0.694196 0.003626 0.298708 0.003470
+ 0.694196 0.298708 0.003626 0.003470
+ 0.003470 0.101987 0.300909 0.593635
+ 0.101831 0.792713 0.003626 0.101831
+ 0.989279 0.003626 0.003626 0.003470
+
+MOTIF 4-RAGTTATAAA STREME-4
+letter-probability matrix: alength= 4 w= 10 nsites= 8 S= 2.4e-003
+ 0.333562 0.113112 0.440386 0.112939
+ 0.988110 0.004021 0.004021 0.003848
+ 0.003848 0.004021 0.879191 0.112939
+ 0.003848 0.004021 0.222204 0.769927
+ 0.112939 0.004021 0.004021 0.879018
+ 0.769927 0.004021 0.113112 0.112939
+ 0.003848 0.004021 0.113112 0.879018
+ 0.988110 0.004021 0.004021 0.003848
+ 0.549304 0.113112 0.113112 0.224471
+ 0.769927 0.004021 0.113112 0.112939
+
+MOTIF 5-AACCTCGG STREME-5
+letter-probability matrix: alength= 4 w= 8 nsites= 7 S= 5.5e-003
+ 0.973700 0.008894 0.008894 0.008511
+ 0.732403 0.250191 0.008894 0.008511
+ 0.008511 0.974083 0.008894 0.008511
+ 0.008511 0.974083 0.008894 0.008511
+ 0.008511 0.008894 0.008894 0.973700
+ 0.008511 0.732786 0.250191 0.008511
+ 0.249809 0.008894 0.732786 0.008511
+ 0.008511 0.008894 0.974083 0.008511
\ No newline at end of file
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_output_test1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_output_test1.xml Mon Jul 14 21:33:41 2025 +0000
b
@@ -0,0 +1,26 @@
+      <letter id="A" symbol="A" complement="T" name="Adenine" colour="CC0000"/>
+      <letter id="C" symbol="C" complement="G" name="Cytosine" colour="0000CC"/>
+      <letter id="G" symbol="G" complement="C" name="Guanine" colour="FFB300"/>
+      <letter id="T" symbol="T" aliases="U" complement="A" name="Thymine" colour="008000"/>
+      <letter id="N" symbol="N" aliases="X." equals="ACGT" name="Any base"/>
+      <letter id="V" symbol="V" equals="ACG" name="Not T"/>
+      <letter id="H" symbol="H" equals="ACT" name="Not G"/>
+      <letter id="D" symbol="D" equals="AGT" name="Not C"/>
+      <letter id="B" symbol="B" equals="CGT" name="Not A"/>
+      <letter id="M" symbol="M" equals="AC" name="Amino"/>
+      <letter id="R" symbol="R" equals="AG" name="Purine"/>
+      <letter id="W" symbol="W" equals="AT" name="Weak"/>
+      <letter id="S" symbol="S" equals="CG" name="Strong"/>
+      <letter id="Y" symbol="Y" equals="CT" name="Pyrimidine"/>
+      <letter id="K" symbol="K" equals="GT" name="Keto"/>
+    </alphabet>
+    <strands>both</strands>
+    <sequence_db A="0.245" C="0.255" G="0.255" T="0.245"/>
+    <background_frequencies source="--negatives--" order="2">
+      <alphabet_array>
+        <value letter_id="A">0.244</value>
+        <value letter_id="C">0.256</value>
+        <value letter_id="G">0.256</value>
+        <value letter_id="T">0.244</value>
+      </alphabet_array>
+    </background_frequencies>
\ No newline at end of file
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_output_test2.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_output_test2.html Mon Jul 14 21:33:41 2025 +0000
[
@@ -0,0 +1,43 @@
+        "train_negatives": {
+          "from": "shuffled",
+          "count": 33,
+          "positions": 1597
+        },
+        "test_positives": {
+          "count": 0,
+          "positions": 0
+        },
+        "test_negatives": {
+          "count": 0,
+          "positions": 0
+        },
+        "sequence_db": {
+          "freqs": [
+            0.218, 0.24, 0.24, 0.218, 0.0266, 0.0125, 0, 0.00313, 0, 0,
+            0.0266, 0, 0, 0, 0.0125, 0.00313
+          ]
+        },
+        "motifs": [
+          {
+            "db": 0,
+            "id": "1-GTATAAAAGC",
+            "alt": "STREME-1",
+            "width": 10,
+            "initial_width": 5,
+            "seed": "GTATAAAGGC",
+            "score_threshold": 9.73039,
+            "npassing": 28,
+            "train_pos_count": 28,
+            "train_neg_count": 0,
+            "train_log_pvalue": -13.1578,
+            "train_pvalue": "7.0e-014",
+            "train_dtc": -1.0,
+            "train_bernoulli": -1,
+            "test_pos_count": 0,
+            "test_neg_count": 0,
+            "test_log_pvalue": 0,
+            "test_pvalue": "1.0e+000",
+            "test_log_evalue": 0.477121,
+            "test_evalue": "3.0e+000",
+            "test_dtc": -1.0,
+            "test_bernoulli": -1,
\ No newline at end of file
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_output_test2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_output_test2.txt Mon Jul 14 21:33:41 2025 +0000
b
@@ -0,0 +1,78 @@
+********************************************************************************
+STREME - Sensitive, Thorough, Rapid, Enriched Motif Elicitation
+********************************************************************************
+MEME version 5.5.8 (Release date: Thu May 15 15:01:46 2025 -0700)
+
+For further information on how to interpret these results please access https://meme-suite.org/meme.
+To get a copy of the MEME Suite software please access https://meme-suite.org.
+
+********************************************************************************
+
+
+********************************************************************************
+REFERENCE
+********************************************************************************
+If you use this program in your research, please cite:
+
+Timothy L. Bailey,
+"STREME: accurate and versatile sequence motif discovery",
+Bioinformatics, Mar. 24, 2021.
+********************************************************************************
+
+
+********************************************************************************
+ALPHABET "DNA with covalent modifications" DNA-LIKE
+********************************************************************************
+A "Adenine" 8510A8 ~ T "Thymine" A89610
+C "Cytosine" A50026 ~ G "Guanine" 313695
+a "N6-methyladenine" 8510A8 ~ t "Thymine_lowercase" 756BB1
+c "5-Carboxylcytosine" FEE090 ~ 4 "Guanine:5-Carboxylcytosine" E0F3F8
+f "5-Formylcytosine" FDAE61 ~ 3 "Guanine:5-Formylcytosine" ABD9E9
+g "modified_g" 00897B ~ 5 "cytosine_lowercase" 53698A
+h "5-Hydroxymethylcytosine" F46D43 ~ 2 "Guanine:5-Hydroxymethylcytosine" 74ADD1
+m "5-Methylcytosine" D73027 ~ 1 "Guanine:5-Methylcytosine" 4575B4
+? = ACGTacfghmt12345
+********************************************************************************
+
+strands: + -
+
+Background letter frequencies
+A 0.218 C 0.24 G 0.24 T 0.218 a 0.0266 c 0.0125 f 1.96e-05 g 0.00315 h 1.96e-05 m 1.96e-05 t 0.0266 1 1.96e-05 2 1.96e-05 3 1.96e-05 4 0.0125 5 0.00315 
+
+MOTIF 1-GTATAAAAGC STREME-1
+letter-probability matrix: alength= 16 w= 10 nsites= 28 S= 7.0e-014
+ 0.344606 0.001172 0.618858 0.034950 0.000130 0.000061 0.000000 0.000015 0.000000 0.000000 0.000130 0.000000 0.000000 0.000000 0.000061 0.000015
+ 0.001065 0.001172 0.001172 0.996177 0.000130 0.000061 0.000000 0.000015 0.000000 0.000000 0.000130 0.000000 0.000000 0.000000 0.000061 0.000015
+ 0.996177 0.001172 0.001172 0.001065 0.000130 0.000061 0.000000 0.000015 0.000000 0.000000 0.000130 0.000000 0.000000 0.000000 0.000061 0.000015
+ 0.001065 0.001172 0.001172 0.996177 0.000130 0.000061 0.000000 0.000015 0.000000 0.000000 0.000130 0.000000 0.000000 0.000000 0.000061 0.000015
+ 0.996177 0.001172 0.001172 0.001065 0.000130 0.000061 0.000000 0.000015 0.000000 0.000000 0.000130 0.000000 0.000000 0.000000 0.000061 0.000015
+ 0.928407 0.001172 0.035057 0.034950 0.000130 0.000061 0.000000 0.000015 0.000000 0.000000 0.000130 0.000000 0.000000 0.000000 0.000061 0.000015
+ 0.860638 0.136711 0.001172 0.001065 0.000130 0.000061 0.000000 0.000015 0.000000 0.000000 0.000130 0.000000 0.000000 0.000000 0.000061 0.000015
+ 0.548672 0.001172 0.313139 0.136603 0.000130 0.000061 0.000000 0.000015 0.000000 0.000000 0.000130 0.000000 0.000000 0.000000 0.000061 0.000015
+ 0.068834 0.068941 0.725207 0.136603 0.000130 0.000061 0.000000 0.000015 0.000000 0.000000 0.000130 0.000000 0.000000 0.000000 0.000061 0.000015
+ 0.068834 0.485705 0.308443 0.136603 0.000130 0.000061 0.000000 0.000015 0.000000 0.000000 0.000130 0.000000 0.000000 0.000000 0.000061 0.000015
+
+MOTIF 2-CCCACCAGA STREME-2
+letter-probability matrix: alength= 16 w= 9 nsites= 20 S= 2.2e-007
+ 0.001482 0.947685 0.048775 0.001482 0.000181 0.000085 0.000000 0.000021 0.000000 0.000000 0.000181 0.000000 0.000000 0.000000 0.000085 0.000021
+ 0.143970 0.805197 0.048775 0.001482 0.000181 0.000085 0.000000 0.000021 0.000000 0.000000 0.000181 0.000000 0.000000 0.000000 0.000085 0.000021
+ 0.095771 0.711962 0.143064 0.048626 0.000181 0.000085 0.000000 0.000021 0.000000 0.000000 0.000181 0.000000 0.000000 0.000000 0.000085 0.000021
+ 0.947536 0.001631 0.048775 0.001482 0.000181 0.000085 0.000000 0.000021 0.000000 0.000000 0.000181 0.000000 0.000000 0.000000 0.000085 0.000021
+ 0.095771 0.710908 0.191263 0.001482 0.000181 0.000085 0.000000 0.000021 0.000000 0.000000 0.000181 0.000000 0.000000 0.000000 0.000085 0.000021
+ 0.001482 0.900540 0.001631 0.095771 0.000181 0.000085 0.000000 0.000021 0.000000 0.000000 0.000181 0.000000 0.000000 0.000000 0.000085 0.000021
+ 0.663614 0.095920 0.096974 0.142915 0.000181 0.000085 0.000000 0.000021 0.000000 0.000000 0.000181 0.000000 0.000000 0.000000 0.000085 0.000021
+ 0.095771 0.332697 0.569474 0.001482 0.000181 0.000085 0.000000 0.000021 0.000000 0.000000 0.000181 0.000000 0.000000 0.000000 0.000085 0.000021
+ 0.473982 0.239462 0.143064 0.142915 0.000181 0.000085 0.000000 0.000021 0.000000 0.000000 0.000181 0.000000 0.000000 0.000000 0.000085 0.000021
+
+MOTIF 3-4a4t4a4a4T STREME-3
+letter-probability matrix: alength= 16 w= 10 nsites= 7 S= 5.5e-003
+ 0.004400 0.004842 0.004842 0.004400 0.140510 0.000253 0.000000 0.000064 0.000000 0.000000 0.000537 0.000000 0.000000 0.000000 0.840087 0.000064
+ 0.004400 0.004842 0.004842 0.004400 0.700399 0.000253 0.000000 0.000064 0.000000 0.000000 0.280482 0.000000 0.000000 0.000000 0.000253 0.000064
+ 0.004400 0.004842 0.004842 0.004400 0.000537 0.000253 0.000000 0.000064 0.000000 0.000000 0.000537 0.000000 0.000000 0.000000 0.980059 0.000064
+ 0.004400 0.004842 0.004842 0.004400 0.000537 0.000253 0.000000 0.000064 0.000000 0.000000 0.840371 0.000000 0.000000 0.000000 0.140226 0.000064
+ 0.004400 0.004842 0.004842 0.004400 0.000537 0.000253 0.000000 0.000064 0.000000 0.000000 0.280482 0.000000 0.000000 0.000000 0.700115 0.000064
+ 0.004400 0.004842 0.004842 0.004400 0.560427 0.000253 0.000000 0.000064 0.000000 0.000000 0.420454 0.000000 0.000000 0.000000 0.000253 0.000064
+ 0.004400 0.004842 0.004842 0.144372 0.000537 0.000253 0.000000 0.000064 0.000000 0.000000 0.280482 0.000000 0.000000 0.000000 0.560142 0.000064
+ 0.004400 0.144814 0.144814 0.004400 0.420454 0.000253 0.000000 0.000064 0.000000 0.000000 0.280482 0.000000 0.000000 0.000000 0.000253 0.000064
+ 0.004400 0.144814 0.144814 0.144372 0.140510 0.000253 0.000000 0.000064 0.000000 0.000000 0.140510 0.000000 0.000000 0.000000 0.280198 0.000064
+ 0.004400 0.144814 0.004842 0.284344 0.280482 0.000253 0.000000 0.000064 0.000000 0.000000 0.000537 0.000000 0.000000 0.000000 0.280198 0.000064
\ No newline at end of file
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_output_test2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_output_test2.xml Mon Jul 14 21:33:41 2025 +0000
b
@@ -0,0 +1,39 @@
+    <alphabet name="DNA with covalent modifications" like="dna">
+      <letter id="A" symbol="A" complement="T" name="Adenine" colour="8510A8"/>
+      <letter id="C" symbol="C" complement="G" name="Cytosine" colour="A50026"/>
+      <letter id="G" symbol="G" complement="C" name="Guanine" colour="313695"/>
+      <letter id="T" symbol="T" complement="A" name="Thymine" colour="A89610"/>
+      <letter id="a" symbol="a" complement="t" name="N6-methyladenine" colour="8510A8"/>
+      <letter id="c" symbol="c" complement="4" name="5-Carboxylcytosine" colour="FEE090"/>
+      <letter id="f" symbol="f" complement="3" name="5-Formylcytosine" colour="FDAE61"/>
+      <letter id="g" symbol="g" complement="5" name="modified_g" colour="00897B"/>
+      <letter id="h" symbol="h" complement="2" name="5-Hydroxymethylcytosine" colour="F46D43"/>
+      <letter id="m" symbol="m" complement="1" name="5-Methylcytosine" colour="D73027"/>
+      <letter id="t" symbol="t" complement="a" name="Thymine_lowercase" colour="756BB1"/>
+      <letter id="n1" symbol="1" complement="m" name="Guanine:5-Methylcytosine" colour="4575B4"/>
+      <letter id="n2" symbol="2" complement="h" name="Guanine:5-Hydroxymethylcytosine" colour="74ADD1"/>
+      <letter id="n3" symbol="3" complement="f" name="Guanine:5-Formylcytosine" colour="ABD9E9"/>
+      <letter id="n4" symbol="4" complement="c" name="Guanine:5-Carboxylcytosine" colour="E0F3F8"/>
+      <letter id="n5" symbol="5" complement="g" name="cytosine_lowercase" colour="53698A"/>
+      <letter id="x3F" symbol="?" equals="ACGTacfghmt12345"/>
+    </alphabet>
+    <strands>both</strands>
+    <sequence_db A="0.218" C="0.24" G="0.24" T="0.218" a="0.0266" c="0.0125" f="0" g="0.00313" h="0" m="0" t="0.0266" n1="0" n2="0" n3="0" n4="0.0125" n5="0.00313"/>
+    <background_frequencies source="--negatives--" order="0">
+      <alphabet_array>
+        <value letter_id="A">0.218</value>
+        <value letter_id="C">0.24</value>
+        <value letter_id="G">0.24</value>
+        <value letter_id="T">0.218</value>
+        <value letter_id="a">0.0266</value>
+        <value letter_id="c">0.0125</value>
+        <value letter_id="f">1.96e-05</value>
+        <value letter_id="g">0.00315</value>
+        <value letter_id="h">1.96e-05</value>
+        <value letter_id="m">1.96e-05</value>
+        <value letter_id="t">0.0266</value>
+        <value letter_id="n1">1.96e-05</value>
+        <value letter_id="n2">1.96e-05</value>
+        <value letter_id="n3">1.96e-05</value>
+        <value letter_id="n4">0.0125</value>
+        <value letter_id="n5">0.00315</value>
\ No newline at end of file
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_output_test3.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_output_test3.html Mon Jul 14 21:33:41 2025 +0000
[
@@ -0,0 +1,38 @@
+            "max_sites": 2,
+            "site_hist": [0, 6, 1],
+            "len": 5,
+            "nsites": 7,
+            "evalue": "0",
+            "pwm": [
+              [
+                0.00775938, 0.00626088, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                0.979973, 8.41549e-07, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                0.000219631, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                8.41549e-07, 8.41549e-07, 0.00577435, 8.41549e-07,
+                8.41549e-07, 8.41549e-07
+              ], [
+                0.00775938, 0.588717, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                0.397517, 8.41549e-07, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                0.000219631, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                8.41549e-07, 8.41549e-07, 0.00577435, 8.41549e-07,
+                8.41549e-07, 8.41549e-07
+              ], [
+                0.00775938, 0.00626088, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                0.00777606, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                8.41549e-07, 0.972417, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                8.41549e-07, 8.41549e-07, 0.00577435, 8.41549e-07,
+                8.41549e-07, 8.41549e-07
+              ], [
+                0.00775938, 0.00626088, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                0.979973, 8.41549e-07, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                0.000219631, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                8.41549e-07, 8.41549e-07, 0.00577435, 8.41549e-07,
+                8.41549e-07, 8.41549e-07
+              ], [
+                0.00775938, 0.00626088, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                0.787258, 8.41549e-07, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                0.000219631, 8.41549e-07, 8.41549e-07, 8.41549e-07,
+                8.41549e-07, 8.41549e-07, 0.19849, 8.41549e-07, 8.41549e-07,
+                8.41549e-07
+              ]
+            ]
\ No newline at end of file
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_output_test3.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_output_test3.txt Mon Jul 14 21:33:41 2025 +0000
b
b'@@ -0,0 +1,79 @@\n+********************************************************************************\n+STREME - Sensitive, Thorough, Rapid, Enriched Motif Elicitation\n+********************************************************************************\n+MEME version 5.5.8 (Release date: Thu May 15 15:01:46 2025 -0700)\n+\n+For further information on how to interpret these results please access https://meme-suite.org/meme.\n+To get a copy of the MEME Suite software please access https://meme-suite.org.\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+REFERENCE\n+********************************************************************************\n+If you use this program in your research, please cite:\n+\n+Timothy L. Bailey,\n+"STREME: accurate and versatile sequence motif discovery",\n+Bioinformatics, Mar. 24, 2021.\n+********************************************************************************\n+\n+\n+ALPHABET= ACDEFGHIKLMNPQRSTVWY\n+\n+strands: +\n+\n+Background letter frequencies\n+A 0.279 C 0.225 D 3.03e-05 E 3.03e-05 F 3.03e-05 G 0.28 H 3.03e-05 I 3.03e-05 K 3.03e-05 L 3.03e-05 M 0.0079 N 3.03e-05 P 3.03e-05 Q 3.03e-05 R 3.03e-05 S 3.03e-05 T 0.208 V 3.03e-05 W 3.03e-05 Y 3.03e-05 \n+\n+MOTIF 1-GCMGG STREME-1\n+letter-probability matrix: alength= 20 w= 5 nsites= 7 S= 5.5e-003\n+ 0.007759 0.006261 0.000001 0.000001 0.000001 0.979973 0.000001 0.000001 0.000001 0.000001 0.000220 0.000001 0.000001 0.000001 0.000001 0.000001 0.005774 0.000001 0.000001 0.000001\n+ 0.007759 0.588717 0.000001 0.000001 0.000001 0.397517 0.000001 0.000001 0.000001 0.000001 0.000220 0.000001 0.000001 0.000001 0.000001 0.000001 0.005774 0.000001 0.000001 0.000001\n+ 0.007759 0.006261 0.000001 0.000001 0.000001 0.007776 0.000001 0.000001 0.000001 0.000001 0.972417 0.000001 0.000001 0.000001 0.000001 0.000001 0.005774 0.000001 0.000001 0.000001\n+ 0.007759 0.006261 0.000001 0.000001 0.000001 0.979973 0.000001 0.000001 0.000001 0.000001 0.000220 0.000001 0.000001 0.000001 0.000001 0.000001 0.005774 0.000001 0.000001 0.000001\n+ 0.007759 0.006261 0.000001 0.000001 0.000001 0.787258 0.000001 0.000001 0.000001 0.000001 0.000220 0.000001 0.000001 0.000001 0.000001 0.000001 0.198490 0.000001 0.000001 0.000001\n+\n+MOTIF 2-GGTMA STREME-2\n+letter-probability matrix: alength= 20 w= 5 nsites= 6 S= 5.2e-002\n+ 0.166242 0.005192 0.000001 0.000001 0.000001 0.823585 0.000001 0.000001 0.000001 0.000001 0.000182 0.000001 0.000001 0.000001 0.000001 0.000001 0.004788 0.000001 0.000001 0.000001\n+ 0.006434 0.164999 0.000001 0.000001 0.000001 0.823585 0.000001 0.000001 0.000001 0.000001 0.000182 0.000001 0.000001 0.000001 0.000001 0.000001 0.004788 0.000001 0.000001 0.000001\n+ 0.006434 0.005192 0.000001 0.000001 0.000001 0.006448 0.000001 0.000001 0.000001 0.000001 0.000182 0.000001 0.000001 0.000001 0.000001 0.000001 0.981733 0.000001 0.000001 0.000001\n+ 0.006434 0.277760 0.000001 0.000001 0.000001 0.166256 0.000001 0.000001 0.000001 0.000001 0.479605 0.000001 0.000001 0.000001 0.000001 0.000001 0.069935 0.000001 0.000001 0.000001\n+ 0.823572 0.164999 0.000001 0.000001 0.000001 0.006448 0.000001 0.000001 0.000001 0.000001 0.000182 0.000001 0.000001 0.000001 0.000001 0.000001 0.004788 0.000001 0.000001 0.000001\n+\n+MOTIF 3-GTATAAAATCA STREME-3\n+letter-probability matrix: alength= 20 w= 11 nsites= 8 S= 9.3e-002\n+ 0.008845 0.007137 0.000001 0.000001 0.000001 0.977170 0.000001 0.000001 0.000001 0.000001 0.000250 0.000001 0.000001 0.000001 0.000001 0.000001 0.006583 0.000001 0.000001 0.000001\n+ 0.008845 0.007137 0.000001 0.000001 0.000001 0.008864 0.000001 0.000001 0.000001 0.000001 0.000250 0.000001 0.000001 0.000001 0.000001 0.000001 0.974888 0.000001 0.000001 0.000001\n+ 0.977151 0.007137 0.000001 0.000001 0.000001 0.008864 0.000001 0.000001 0.000001 0.000001 0.000250 0.000001 0.000001 0.000001 0.000001 0.000001 0.006583 0.000001 0.000001 0.000001\n+ 0.008845 0.007137 0.000001 0.000001 0.00000'..b'07137 0.000001 0.000001 0.000001 0.228552 0.000001 0.000001 0.000001 0.000001 0.000250 0.000001 0.000001 0.000001 0.000001 0.000001 0.006583 0.000001 0.000001 0.000001\n+ 0.757464 0.007137 0.000001 0.000001 0.000001 0.008864 0.000001 0.000001 0.000001 0.000001 0.000250 0.000001 0.000001 0.000001 0.000001 0.000001 0.226270 0.000001 0.000001 0.000001\n+ 0.757464 0.007137 0.000001 0.000001 0.000001 0.008864 0.000001 0.000001 0.000001 0.000001 0.000250 0.000001 0.000001 0.000001 0.000001 0.000001 0.226270 0.000001 0.000001 0.000001\n+ 0.008845 0.226824 0.000001 0.000001 0.000001 0.228552 0.000001 0.000001 0.000001 0.000001 0.000250 0.000001 0.000001 0.000001 0.000001 0.000001 0.535514 0.000001 0.000001 0.000001\n+ 0.008845 0.885886 0.000001 0.000001 0.000001 0.008864 0.000001 0.000001 0.000001 0.000001 0.000250 0.000001 0.000001 0.000001 0.000001 0.000001 0.096140 0.000001 0.000001 0.000001\n+ 0.448220 0.226824 0.000001 0.000001 0.000001 0.008864 0.000001 0.000001 0.000001 0.000001 0.000250 0.000001 0.000001 0.000001 0.000001 0.000001 0.315827 0.000001 0.000001 0.000001\n+\n+MOTIF 4-GAGGTG STREME-4\n+letter-probability matrix: alength= 20 w= 6 nsites= 5 S= 9.8e-002\n+ 0.007829 0.200766 0.000001 0.000001 0.000001 0.785344 0.000001 0.000001 0.000001 0.000001 0.000222 0.000001 0.000001 0.000001 0.000001 0.000001 0.005826 0.000001 0.000001 0.000001\n+ 0.979776 0.006317 0.000001 0.000001 0.000001 0.007846 0.000001 0.000001 0.000001 0.000001 0.000222 0.000001 0.000001 0.000001 0.000001 0.000001 0.005826 0.000001 0.000001 0.000001\n+ 0.202278 0.041933 0.000001 0.000001 0.000001 0.670460 0.000001 0.000001 0.000001 0.000001 0.000222 0.000001 0.000001 0.000001 0.000001 0.000001 0.085095 0.000001 0.000001 0.000001\n+ 0.007829 0.200766 0.000001 0.000001 0.000001 0.785344 0.000001 0.000001 0.000001 0.000001 0.000222 0.000001 0.000001 0.000001 0.000001 0.000001 0.005826 0.000001 0.000001 0.000001\n+ 0.007829 0.006317 0.000001 0.000001 0.000001 0.007846 0.000001 0.000001 0.000001 0.000001 0.000222 0.000001 0.000001 0.000001 0.000001 0.000001 0.977773 0.000001 0.000001 0.000001\n+ 0.087098 0.006317 0.000001 0.000001 0.000001 0.900525 0.000001 0.000001 0.000001 0.000001 0.000222 0.000001 0.000001 0.000001 0.000001 0.000001 0.005826 0.000001 0.000001 0.000001\n+\n+MOTIF 5-TCACACACA STREME-5\n+letter-probability matrix: alength= 20 w= 9 nsites= 5 S= 9.8e-002\n+ 0.007502 0.192368 0.000001 0.000001 0.000001 0.007518 0.000001 0.000001 0.000001 0.000001 0.000212 0.000001 0.000001 0.000001 0.000001 0.000001 0.792388 0.000001 0.000001 0.000001\n+ 0.193817 0.716905 0.000001 0.000001 0.000001 0.083471 0.000001 0.000001 0.000001 0.000001 0.000212 0.000001 0.000001 0.000001 0.000001 0.000001 0.005583 0.000001 0.000001 0.000001\n+ 0.794307 0.006053 0.000001 0.000001 0.000001 0.007518 0.000001 0.000001 0.000001 0.000001 0.000212 0.000001 0.000001 0.000001 0.000001 0.000001 0.191898 0.000001 0.000001 0.000001\n+ 0.007502 0.903220 0.000001 0.000001 0.000001 0.007518 0.000001 0.000001 0.000001 0.000001 0.000212 0.000001 0.000001 0.000001 0.000001 0.000001 0.081536 0.000001 0.000001 0.000001\n+ 0.794307 0.006053 0.000001 0.000001 0.000001 0.007518 0.000001 0.000001 0.000001 0.000001 0.000212 0.000001 0.000001 0.000001 0.000001 0.000001 0.191898 0.000001 0.000001 0.000001\n+ 0.007502 0.792858 0.000001 0.000001 0.000001 0.193833 0.000001 0.000001 0.000001 0.000001 0.000212 0.000001 0.000001 0.000001 0.000001 0.000001 0.005583 0.000001 0.000001 0.000001\n+ 0.718354 0.082006 0.000001 0.000001 0.000001 0.007518 0.000001 0.000001 0.000001 0.000001 0.000212 0.000001 0.000001 0.000001 0.000001 0.000001 0.191898 0.000001 0.000001 0.000001\n+ 0.007502 0.979173 0.000001 0.000001 0.000001 0.007518 0.000001 0.000001 0.000001 0.000001 0.000212 0.000001 0.000001 0.000001 0.000001 0.000001 0.005583 0.000001 0.000001 0.000001\n+ 0.532038 0.006053 0.000001 0.000001 0.000001 0.193833 0.000001 0.000001 0.000001 0.000001 0.000212 0.000001 0.000001 0.000001 0.000001 0.000001 0.267851 0.000001 0.000001 0.000001\n'
b
diff -r fd05b142b3a3 -r 0c97e5c18468 test-data/streme_output_test3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/streme_output_test3.xml Mon Jul 14 21:33:41 2025 +0000
b
@@ -0,0 +1,73 @@
+      <letter id="A" symbol="A" name="Alanine" colour="0000CC"/>
+      <letter id="C" symbol="C" name="Cysteine" colour="0000CC"/>
+      <letter id="D" symbol="D" name="Aspartic acid" colour="FF00FF"/>
+      <letter id="E" symbol="E" name="Glutamic acid" colour="FF00FF"/>
+      <letter id="F" symbol="F" name="Phenylalanine" colour="0000CC"/>
+      <letter id="G" symbol="G" name="Glycine" colour="FFB300"/>
+      <letter id="H" symbol="H" name="Histidine" colour="FFCCCC"/>
+      <letter id="I" symbol="I" name="Isoleucine" colour="0000CC"/>
+      <letter id="K" symbol="K" name="Lysine" colour="CC0000"/>
+      <letter id="L" symbol="L" name="Leucine" colour="0000CC"/>
+      <letter id="M" symbol="M" name="Methionine" colour="0000CC"/>
+      <letter id="N" symbol="N" name="Asparagine" colour="008000"/>
+      <letter id="P" symbol="P" name="Proline" colour="FFFF00"/>
+      <letter id="Q" symbol="Q" name="Glutamine" colour="008000"/>
+      <letter id="R" symbol="R" name="Arginine" colour="CC0000"/>
+      <letter id="S" symbol="S" name="Serine" colour="008000"/>
+      <letter id="T" symbol="T" name="Threonine" colour="008000"/>
+      <letter id="V" symbol="V" name="Valine" colour="0000CC"/>
+      <letter id="W" symbol="W" name="Tryptophan" colour="0000CC"/>
+      <letter id="Y" symbol="Y" name="Tyrosine" colour="33E6CC"/>
+      <letter id="X" symbol="X" aliases="*." equals="ACDEFGHIKLMNPQRSTVWY" name="Any amino acid"/>
+      <letter id="B" symbol="B" equals="DN" name="Asparagine or Aspartic acid"/>
+      <letter id="Z" symbol="Z" equals="EQ" name="Glutamine or Glutamic acid"/>
+      <letter id="J" symbol="J" equals="IL" name="Leucine or Isoleucine"/>
+    </alphabet>
+    <strands>none</strands>
+    <sequence_db A="0.275" C="0.224" D="0" E="0" F="0" G="0.283" H="0" I="0" K="0" L="0" M="0.00977" N="0" P="0" Q="0" R="0" S="0" T="0.209" V="0" W="0" Y="0"/>
+    <background_frequencies source="--negatives--" order="0">
+      <alphabet_array>
+        <value letter_id="A">0.279</value>
+        <value letter_id="C">0.225</value>
+        <value letter_id="D">3.03e-05</value>
+        <value letter_id="E">3.03e-05</value>
+        <value letter_id="F">3.03e-05</value>
+        <value letter_id="G">0.28</value>
+        <value letter_id="H">3.03e-05</value>
+        <value letter_id="I">3.03e-05</value>
+        <value letter_id="K">3.03e-05</value>
+        <value letter_id="L">3.03e-05</value>
+        <value letter_id="M">0.0079</value>
+        <value letter_id="N">3.03e-05</value>
+        <value letter_id="P">3.03e-05</value>
+        <value letter_id="Q">3.03e-05</value>
+        <value letter_id="R">3.03e-05</value>
+        <value letter_id="S">3.03e-05</value>
+        <value letter_id="T">0.208</value>
+        <value letter_id="V">3.03e-05</value>
+        <value letter_id="W">3.03e-05</value>
+        <value letter_id="Y">3.03e-05</value>
+      </alphabet_array>
+    </background_frequencies>
+    <stop nmotifs="5"/>
+    <objfun>Differential Enrichment</objfun>
+    <test>Fisher Exact Test</test>
+    <minw>5</minw>
+    <maxw>20</maxw>
+    <kmer>1</kmer>
+    <hofract>0.1</hofract>
+    <neval>25</neval>
+    <nref>4</nref>
+    <niter>20</niter>
+    <patience>3</patience>
+    <seed>0</seed>
+    <notrim>no</notrim>
+    <useer>no</useer>
+    <minscore>0</minscore>
+    <ignore_depth>5</ignore_depth>
+    <nsubsets>1</nsubsets>
+    <min_pal_ratio>0.85</min_pal_ratio>
+    <max_pal_ed>5</max_pal_ed>
+    <cand>no</cand>
+    <experimental>no</experimental>
+    <totallength>0</totallength>
\ No newline at end of file