Repository 'meme_psp_gen'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/meme_psp_gen

Changeset 2:b48e673af4e8 (2018-05-17)
Previous changeset 1:793225b11202 (2018-04-25) Next changeset 3:ff2f53a32d0e (2019-12-11)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meme commit e2cf796f991cbe8c96e0cc5a0056b7255ac3ad6b
modified:
macros.xml
meme_psp_gen.xml
test-data/meme_psp_protein_input.fasta
added:
test-data/dreme_fimo_input_1.xml
test-data/dreme_output_test1.html
test-data/dreme_output_test1.txt
test-data/dreme_output_test1.xml
test-data/dreme_output_test2.html
test-data/dreme_output_test2.txt
test-data/dreme_output_test2.xml
test-data/fimo_background_probs_hsa_chrM.txt
test-data/fimo_output_test1.gff
test-data/fimo_output_test1.html
test-data/fimo_output_test1.txt
test-data/fimo_output_test1.xml
test-data/fimo_output_test2.gff
test-data/fimo_output_test2.html
test-data/fimo_output_test2.txt
test-data/fimo_output_test2.xml
test-data/fimo_output_test3.html
test-data/fimo_output_test3.txt
test-data/fimo_output_test3.xml
test-data/hsa_chrM.fa
test-data/meme_fimo_input_1.xml
test-data/meme_output_test1.html
test-data/meme_output_test1.txt
test-data/meme_output_test1.xml
test-data/meme_output_test2.html
test-data/meme_output_test2.txt
test-data/meme_output_test2.xml
test-data/meme_psp_output_test1.memepsp
test-data/meme_psp_output_test1.tabular
removed:
fimo_wrapper.py
test-data/dreme1.html
test-data/dreme1.txt
test-data/dreme1.xml
test-data/dreme2.html
test-data/dreme2.txt
test-data/fimo_output_almost-gff_1.txt
test-data/fimo_output_almost-gff_2.txt
test-data/fimo_output_html_1.html
test-data/fimo_output_html_2.html
test-data/fimo_output_interval_1.txt
test-data/fimo_output_interval_2.txt
test-data/fimo_output_txt_1.txt
test-data/fimo_output_txt_2.txt
test-data/fimo_output_xml_1.xml
test-data/fimo_output_xml_2.xml
test-data/meme_output_html_1.html
test-data/meme_output_html_2.html
test-data/meme_output_txt_1.txt
test-data/meme_output_txt_2.txt
test-data/meme_output_xml_1.xml
test-data/meme_output_xml_2.xml
test-data/meme_psp_gen_reports_output.tabular
test-data/motif1.gff
test-data/output.memepsp
test-data/phiX.fasta
b
diff -r 793225b11202 -r b48e673af4e8 fimo_wrapper.py
--- a/fimo_wrapper.py Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,194 +0,0 @@\n-#!/usr/bin/env python\n-import argparse\n-import os\n-import shutil\n-import string\n-import subprocess\n-import sys\n-import tempfile\n-\n-BUFFSIZE = 1048576\n-# Translation table for reverse Complement, with ambiguity codes.\n-DNA_COMPLEMENT = string.maketrans("ACGTRYKMBDHVacgtrykmbdhv", "TGCAYRMKVHDBtgcayrmkvhdb")\n-\n-\n-def get_stderr(tmp_stderr):\n-    tmp_stderr.seek(0)\n-    stderr = \'\'\n-    try:\n-        while True:\n-            stderr += tmp_stderr.read(BUFFSIZE)\n-            if not stderr or len(stderr) % BUFFSIZE != 0:\n-                break\n-    except OverflowError:\n-        pass\n-    return stderr\n-\n-\n-def reverse(sequence):\n-    # Reverse sequence string.\n-    return sequence[::-1]\n-\n-\n-def dna_complement(sequence):\n-    # Complement DNA sequence string.\n-    return sequence.translate(DNA_COMPLEMENT)\n-\n-\n-def dna_reverse_complement(sequence):\n-    # Returns the reverse complement of the sequence.\n-    sequence = reverse(sequence)\n-    return dna_complement(sequence)\n-\n-\n-def stop_err(msg):\n-    sys.stderr.write(msg)\n-    sys.exit(1)\n-\n-\n-parser = argparse.ArgumentParser()\n-parser.add_argument(\'--input_motifs\', dest=\'input_motifs\', help=\'MEME output formatted files for input to fimo\')\n-parser.add_argument(\'--input_fasta\', dest=\'input_fasta\', help=\'Fassta sequence file\')\n-parser.add_argument(\'--options_type\', dest=\'options_type\', help=\'Basic or Advance options\')\n-parser.add_argument(\'--input_psp\', dest=\'input_psp\', default=None, help=\'File containing position specific priors\')\n-parser.add_argument(\'--input_prior_dist\', dest=\'input_prior_dist\', default=None, help=\'File containing binned distribution of priors\')\n-parser.add_argument(\'--alpha\', dest=\'alpha\', type=float, default=1.0, help=\'The alpha parameter for calculating position specific priors\')\n-parser.add_argument(\'--bgfile\', dest=\'bgfile\', default=None, help=\'Background file type, used only if not "default"\')\n-parser.add_argument(\'--max_strand\', action=\'store_true\', help=\'If matches on both strands at a given position satisfy the output threshold, only report the match for the strand with the higher score\')\n-parser.add_argument(\'--max_stored_scores\', dest=\'max_stored_scores\', type=int, help=\'Maximum score count to store\')\n-parser.add_argument(\'--motif\', dest=\'motifs\', action=\'append\', default=[], help=\'Specify motif by id\')\n-parser.add_argument(\'--output_separate_motifs\', dest=\'output_separate_motifs\', default=\'no\', help=\'Output one dataset per motif\')\n-parser.add_argument(\'--motif_pseudo\', dest=\'motif_pseudo\', type=float, default=0.1, help=\'Pseudocount to add to counts in motif matrix\')\n-parser.add_argument(\'--no_qvalue\', action=\'store_true\', help=\'Do not compute a q-value for each p-value\')\n-parser.add_argument(\'--norc\', action=\'store_true\', help=\'Do not score the reverse complement DNA strand\')\n-parser.add_argument(\'--output_path\', dest=\'output_path\', help=\'Output files directory\')\n-parser.add_argument(\'--parse_genomic_coord\', dest=\'parse_genomic_coord\', default=\'no\', help=\'Check each sequence header for UCSC style genomic coordinates\')\n-parser.add_argument(\'--remove_duplicate_coords\', dest=\'remove_duplicate_coords\', default=\'no\', help=\'Remove duplicate entries in unique GFF coordinates\')\n-parser.add_argument(\'--qv_thresh\', action=\'store_true\', help=\'Use q-values for the output threshold\')\n-parser.add_argument(\'--thresh\', dest=\'thresh\', type=float, help=\'p-value threshold\')\n-parser.add_argument(\'--gff_output\', dest=\'gff_output\', help=\'Gff output file\')\n-parser.add_argument(\'--html_output\', dest=\'html_output\', help=\'HTML output file\')\n-parser.add_argument(\'--interval_output\', dest=\'interval_output\', help=\'Interval output file\')\n-parser.add_argument(\'--txt_output\', dest=\'txt_output\', help=\'Text output file\')\n-parser.add_argument(\'--xml_output\', dest=\'xml_output\', help=\'XML output file\')\n-args = parser.parse_args()\n-\n-fimo_cmd_list = [\'fimo\']\n-if args.options_type == \'advanced\':\n-    fimo_cmd_list.append(\'--alpha %4f\' % args.alpha)\n-    if args.bgfile is'..b'.append(\'--thresh %4f\' % args.thresh)\n-    if args.input_psp is not None:\n-        fimo_cmd_list.append(\'--psp "%s"\' % args.input_psp)\n-    if args.input_prior_dist is not None:\n-        fimo_cmd_list.append(\'--prior-dist "%s"\' % args.input_prior_dist)\n-fimo_cmd_list.append(\'--o "%s"\' % (args.output_path))\n-fimo_cmd_list.append(\'--verbosity 1\')\n-fimo_cmd_list.append(args.input_motifs)\n-fimo_cmd_list.append(args.input_fasta)\n-\n-fimo_cmd = \' \'.join(fimo_cmd_list)\n-\n-try:\n-    tmp_stderr = tempfile.NamedTemporaryFile()\n-    proc = subprocess.Popen(args=fimo_cmd, shell=True, stderr=tmp_stderr)\n-    returncode = proc.wait()\n-    if returncode != 0:\n-        stderr = get_stderr(tmp_stderr)\n-        stop_err(stderr)\n-except Exception as e:\n-    stop_err(\'Error running FIMO:\\n%s\' % e)\n-\n-shutil.move(os.path.join(args.output_path, \'fimo.txt\'), args.txt_output)\n-\n-gff_file = os.path.join(args.output_path, \'fimo.gff\')\n-if args.remove_duplicate_coords == \'yes\':\n-    tmp_stderr = tempfile.NamedTemporaryFile()\n-    # Identify and eliminating identical motif occurrences.  These\n-    # are identical if the combination of chrom, start, end and\n-    # motif id are identical.\n-    cmd = \'sort -k1,1 -k4,4n -k5,5n -k9.1,9.6 -u -o %s %s\' % (gff_file, gff_file)\n-    proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, shell=True)\n-    returncode = proc.wait()\n-    if returncode != 0:\n-        stderr = get_stderr(tmp_stderr)\n-        stop_err(stderr)\n-    # Sort GFF output by a combination of chrom, score, start.\n-    cmd = \'sort -k1,1 -k4,4n -k6,6n -o %s %s\' % (gff_file, gff_file)\n-    proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, shell=True)\n-    returncode = proc.wait()\n-    if returncode != 0:\n-        stderr = get_stderr(tmp_stderr)\n-        stop_err(stderr)\n-if args.output_separate_motifs == \'yes\':\n-    # Create the collection output directory.\n-    collection_path = (os.path.join(os.getcwd(), \'output\'))\n-    # Keep track of motif occurrences.\n-    header_line = None\n-    motif_ids = []\n-    file_handles = []\n-    for line in open(gff_file, \'r\'):\n-        if line.startswith(\'#\'):\n-            if header_line is None:\n-                header_line = line\n-            continue\n-        items = line.split(\'\\t\')\n-        attribute = items[8]\n-        attributes = attribute.split(\';\')\n-        name = attributes[0]\n-        motif_id = name.split(\'=\')[1]\n-        file_name = os.path.join(collection_path, \'MOTIF%s.gff\' % motif_id)\n-        if motif_id in motif_ids:\n-            i = motif_ids.index(motif_id)\n-            fh = file_handles[i]\n-            fh.write(line)\n-        else:\n-            fh = open(file_name, \'wb\')\n-            if header_line is not None:\n-                fh.write(header_line)\n-            fh.write(line)\n-            motif_ids.append(motif_id)\n-            file_handles.append(fh)\n-    for file_handle in file_handles:\n-        file_handle.close()\n-else:\n-    shutil.move(gff_file, args.gff_output)\n-shutil.move(os.path.join(args.output_path, \'fimo.xml\'), args.xml_output)\n-shutil.move(os.path.join(args.output_path, \'fimo.html\'), args.html_output)\n-\n-out_file = open(args.interval_output, \'wb\')\n-out_file.write("#%s\\n" % "\\t".join(("chr", "start", "end", "pattern name", "score", "strand", "matched sequence", "p-value", "q-value")))\n-for line in open(args.txt_output):\n-    if line.startswith(\'#\'):\n-        continue\n-    fields = line.rstrip("\\n\\r").split("\\t")\n-    start, end = int(fields[2]), int(fields[3])\n-    sequence = fields[7]\n-    if start > end:\n-        # Flip start and end and set strand.\n-        start, end = end, start\n-        strand = "-"\n-        # We want sequences relative to strand; FIMO always provides + stranded sequence.\n-        sequence = dna_reverse_complement(sequence)\n-    else:\n-        strand = "+"\n-    # Make 0-based start position.\n-    start -= 1\n-    out_file.write("%s\\n" % "\\t".join([fields[1], str(start), str(end), fields[0], fields[4], strand, sequence, fields[5], fields[6]]))\n-out_file.close()\n'
b
diff -r 793225b11202 -r b48e673af4e8 macros.xml
--- a/macros.xml Wed Apr 25 12:13:08 2018 -0400
+++ b/macros.xml Thu May 17 14:11:15 2018 -0400
b
@@ -1,10 +1,11 @@
 <?xml version='1.0' encoding='UTF-8'?>
 <macros>
-    <token name="@WRAPPER_VERSION@">4.11.2</token>
+    <token name="@WRAPPER_VERSION@">4.12.0</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="1.3.23">graphicsmagick</requirement>
-            <requirement type="package" version="4.11.2">meme</requirement>
+            <requirement type="package" version="4.12.0">meme</requirement>
+            <yield/>
         </requirements>
     </xml>
 </macros>
b
diff -r 793225b11202 -r b48e673af4e8 meme_psp_gen.xml
--- a/meme_psp_gen.xml Wed Apr 25 12:13:08 2018 -0400
+++ b/meme_psp_gen.xml Thu May 17 14:11:15 2018 -0400
[
@@ -5,9 +5,11 @@
     </macros>
     <expand macro="requirements" />
     <command detect_errors="exit_code"><![CDATA[
+ln -s '${primary_sequence}' meme_psp_input_pos.fa && 
+ln -s '${control_sequence}' meme_psp_input_neg.fa &&
 psp-gen
--pos '$primary_sequence'
--neg '$control_sequence'
+-pos meme_psp_input_pos.fa
+-neg meme_psp_input_neg.fa
 -minw $adv.minw
 -maxw $adv.maxw
 $adv.alphabet
@@ -84,8 +86,8 @@
             <param name="report_scores" value="yes"/>
             <param name="verbose" value="-verbose"/>
             <param name="non_commercial_use" value="NON_COMMERCIAL_USE"/>
-            <output name="output_psp" file="output.memepsp"/>
-            <output name="output_tabular" file="meme_psp_gen_reports_output.tabular" compare="contains"/>
+            <output name="output_psp" file="meme_psp_output_test1.memepsp"/>
+            <output name="output_tabular" file="meme_psp_output_test1.tabular"/>
         </test>
     </tests>
     <help>
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme1.html
--- a/test-data/dreme1.html Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,6199 +0,0 @@\n-<!DOCTYPE HTML>\n-<html>\n-  <head>\n-    <meta charset="UTF-8">\n-    <title>DREME</title>\n-    <script>\n-      // @JSON_VAR data\n-      var data = {\n-        "program": "dreme",\n-        "version": "4.11.2",\n-        "release": "Thu May 05 14:58:55 2016 -0700",\n-        "cmd": [\n-          "dreme", "-o", "./output", "-p",\n-          "/tmp/tmpijN1y0/files/000/dataset_1.dat", "-norc", "-rna", "-s",\n-          "1"\n-        ],\n-        "options": {\n-          "revcomp": false,\n-          "ngen": 100,\n-          "add_pv_thresh": 0.01,\n-          "seed": 1,\n-          "stop": {\n-            "evalue": "0.05"\n-          }\n-        },\n-        "alphabet": {\n-          "name": "RNA",\n-          "like": "rna",\n-          "ncore": 4,\n-          "symbols": [\n-            {\n-              "symbol": "A",\n-              "name": "Adenine",\n-              "colour": "CC0000"\n-            }, {\n-              "symbol": "C",\n-              "name": "Cytosine",\n-              "colour": "0000CC"\n-            }, {\n-              "symbol": "G",\n-              "name": "Guanine",\n-              "colour": "FFB300"\n-            }, {\n-              "symbol": "U",\n-              "aliases": "T",\n-              "name": "Uracil",\n-              "colour": "008000"\n-            }, {\n-              "symbol": "N",\n-              "aliases": "X.",\n-              "name": "Any base",\n-              "equals": "ACGU"\n-            }, {\n-              "symbol": "V",\n-              "name": "Not U",\n-              "equals": "ACG"\n-            }, {\n-              "symbol": "H",\n-              "name": "Not G",\n-              "equals": "ACU"\n-            }, {\n-              "symbol": "D",\n-              "name": "Not C",\n-              "equals": "AGU"\n-            }, {\n-              "symbol": "B",\n-              "name": "Not A",\n-              "equals": "CGU"\n-            }, {\n-              "symbol": "M",\n-              "name": "Amino",\n-              "equals": "AC"\n-            }, {\n-              "symbol": "R",\n-              "name": "Purine",\n-              "equals": "AG"\n-            }, {\n-              "symbol": "W",\n-              "name": "Weak",\n-              "equals": "AU"\n-            }, {\n-              "symbol": "S",\n-              "name": "Strong",\n-              "equals": "CG"\n-            }, {\n-              "symbol": "Y",\n-              "name": "Pyrimidine",\n-              "equals": "CU"\n-            }, {\n-              "symbol": "K",\n-              "name": "Keto",\n-              "equals": "GU"\n-            }\n-          ]\n-        },\n-        "background": {\n-          "freqs": [0.221, 0.245, 0.221, 0.312]\n-        },\n-        "sequence_db": {\n-          "name": "dataset 1",\n-          "file": "/tmp/tmpijN1y0/files/000/dataset_1.dat",\n-          "lmod": "Tue Apr 24 13:55:48 CEST 2018",\n-          "count": 1000\n-        },\n-        "control_db": {\n-          "name": "shuffled positive sequences",\n-          "from": "shuffled",\n-          "count": 1000,\n-          "freqs": [0.221, 0.245, 0.221, 0.312]\n-        },\n-        "motifs": [\n-          {\n-            "db": 0,\n-            "id": "UUYUCY",\n-            "alt": "MEME",\n-            "len": 6,\n-            "nsites": 459,\n-            "evalue": "1.2e-013",\n-            "p": 387,\n-            "n": 210,\n-            "pvalue": "2.6e-018",\n-            "unerased_evalue": "1.2e-013",\n-            "pwm": [\n-              [0.000000, 0.000000, 0.000000, 1.000000], \n-              [0.000000, 0.000000, 0.000000, 1.000000], \n-              [0.000000, 0.294118, 0.000000, 0.705882], \n-              [0.000000, 0.000000, 0.000000, 1.000000], \n-              [0.000000, 1.000000, 0.000000, 0.000000], \n-              [0.000000, 0.474946, 0.000000, 0.525054]\n-            ],\n-            "matches": [\n-              {\n-                "seq": "UUUUCC",\n-                "p": 147,\n-                "n": 75,\n-                "pvalue": "1.8e-007",\n-                "evalue": "8.1e-003"\n-              }, {\n-              '..b'alphabet"></td>\n-          <td id="ins_seq_count"></td>\n-        </tr>\n-      </table>\n-      <script>\n-      {\n-        var db = data.sequence_db;\n-        $("ins_seq_source").innerHTML = db.file;\n-        $("ins_seq_alphabet").innerHTML = dreme_alphabet.get_alphabet_name();\n-        $("ins_seq_count").innerHTML = db.count;\n-      }\n-      </script>\n-      <h4>Control Sequences</h4>\n-      <table id="seq_info" class="inputs">\n-        <tr><th>Source <div class="help" data-topic="pop_seq_source"></div></th>\n-          <th>Sequence Count <div class="help" data-topic="pop_seq_count"></div></th>\n-        </tr>\n-        <tr>\n-          <td id="ins_cseq_source"></td>\n-          <td id="ins_cseq_count"></td>\n-        </tr>\n-      </table>\n-      <script>\n-      {\n-        var db = data.control_db;\n-        if (db.from == "shuffled") {\n-          $("ins_cseq_source").innerHTML = "Shuffled Sequences";\n-        } else {\n-          $("ins_cseq_source").innerHTML = db.file;\n-        }\n-        $("ins_cseq_count").innerHTML = db.count;\n-      }\n-      </script>\n-      <h4>Background</h4>\n-      <span id="alpha_bg"></span>\n-      <script>\n-      {\n-        $("alpha_bg").appendChild(make_alpha_bg(dreme_alphabet, data.control_db.freqs));\n-      }\n-      </script>\n-      <h4>Other Settings</h4>\n-      <table id="tbl_settings" class="inputs hide_advanced">\n-        <tr>\n-          <th>Strand Handling</th>\n-          <td id="opt_strand">\n-            <span class="strand_none">This alphabet only has one strand</span>\n-            <span class="strand_given">Only the given strand is processed</span>\n-            <span class="strand_both">Both the given and reverse complement strands are processed</span>\n-          </td>\n-        </tr>\n-        <tr><th># REs to Generalize</th><td id="opt_ngen"></td></tr>\n-        <tr><th>Shuffle Seed</th><td id="opt_seed"></td></tr>\n-        <tr><th>E-value Threshold</th><td id="opt_stop_evalue"></td></tr>\n-        <tr><th>Max Motif Count</th><td id="opt_stop_count"></td></tr>\n-        <tr><th>Max Run Time</th><td id="opt_stop_time"></td></tr>\n-      </table>\n-      <script>\n-      {\n-        $("opt_strand").className = (dreme_alphabet.has_complement() ? (data.options.revcomp ? "both" : "given") : "none");\n-        $("opt_ngen").innerHTML = data.options.ngen;\n-        $("opt_seed").innerHTML = data.options.seed;\n-        $("opt_stop_evalue").innerHTML = data.options.stop.evalue;\n-        $("opt_stop_count").innerHTML = (typeof data.options.stop.count == "number" ? data.options.stop.count : "No maximum motif count.");\n-        $("opt_stop_time").innerHTML = (typeof data.options.stop.time == "number" ? data.options.stop.time + " seconds." : "No maximum running time.");\n-      }\n-      </script>\n-    </div>\n-    <!-- list information on this program -->\n-    <div id="info_sec" class="bar" style="position:relative">\n-      <div style="position: absolute; right: 0;"><a href="#inputs_sec">Previous</a> <a href="#">Top</a></div>\n-      <div class="subsection">\n-        <h5 id="version">DREME version</h5>\n-        <span id="ins_version"></span> \n-        (Release date: <span id="ins_release"></span>)<br>\n-      </div>\n-      <script>\n-        $("ins_version").innerHTML = data["version"];\n-        $("ins_release").innerHTML = data["release"];\n-      </script>\n-      <div class="subsection">\n-        <h5 id="reference">Reference</h5>\n-        <span class="citation">\n-          Timothy L. Bailey, "DREME: Motif discovery in transcription factor ChIP-seq data", <i>Bioinformatics</i>, <b>27</b>(12):1653-1659, 2011.\n-          <a href="http://bioinformatics.oxfordjournals.org/content/27/12/1653">[full text]</a>\n-        </span>\n-      </div>\n-      <div class="subsection">\n-        <h5 id="command">Command line</h5>\n-        <textarea id="cmd" rows="3" style="width:100%;" readonly="readonly">\n-        </textarea>\n-        <script>$("cmd").value = data["cmd"].join(" ");</script>\n-      </div>\n-    </div>\n-    \n-  </body>\n-</html>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme1.txt
--- a/test-data/dreme1.txt Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,102 +0,0 @@
-# DREME 4.12.0
-#     command: dreme -oc dreme_out -rna -norc -p dreme_test_sites.fa -e 0.05 -mink 3 -maxk 8
-#   positives: 1000 from dreme_test_sites.fa (Thu Apr 19 19:09:45 CEST 2018)
-#   negatives: 1000 from shuffled positives
-#        host: ThinkPad-T450s
-#        when: Thu Apr 19 19:11:17 CEST 2018
-
-MEME version 4.12.0
-
-ALPHABET "RNA" RNA-LIKE
-A "Adenine" CC0000
-C "Cytosine" 0000CC
-G "Guanine" FFB300
-U "Uracil" 008000
-N "Any base" = ACGU
-X = ACGU
-. = ACGU
-V "Not U" = ACG
-H "Not G" = ACU
-D "Not C" = AGU
-B "Not A" = CGU
-M "Amino" = AC
-R "Purine" = AG
-W "Weak" = AU
-S "Strong" = CG
-Y "Pyrimidine" = CU
-K "Keto" = GU
-T = U
-END ALPHABET
-
-Background letter frequencies (from dataset):
-A 0.221 C 0.245 G 0.221 U 0.312
-
-
-MOTIF UUYUCY DREME-1
-
-#             Word        Pos        Neg    P-value    E-value
-# BEST      UUYUCY        387        210   2.6e-018   1.2e-013
-#           UUUUCC        147         75   1.8e-007   8.1e-003
-#           UUUUCU        155         94   2.2e-005   1.0e+000
-#           UUCUCU         94         51   1.3e-004   6.1e+000
-#           UUCUCC         75         42   1.1e-003   5.0e+001
-
-letter-probability matrix: alength= 4 w= 6 nsites= 459 E= 1.2e-013
-0.000000 0.000000 0.000000 1.000000
-0.000000 0.000000 0.000000 1.000000
-0.000000 0.294118 0.000000 0.705882
-0.000000 0.000000 0.000000 1.000000
-0.000000 1.000000 0.000000 0.000000
-0.000000 0.474946 0.000000 0.525054
-
-
-MOTIF YAGG DREME-2
-
-#             Word        Pos        Neg    P-value    E-value
-# BEST        YAGG        600        416   1.1e-016   5.1e-012
-#             CAGG        441        304   1.5e-010   6.6e-006
-#             UAGG        232        165   1.1e-004   4.7e+000
-
-letter-probability matrix: alength= 4 w= 4 nsites= 793 E= 5.1e-012
-0.000000 0.692308 0.000000 0.307692
-1.000000 0.000000 0.000000 0.000000
-0.000000 0.000000 1.000000 0.000000
-0.000000 0.000000 1.000000 0.000000
-
-
-MOTIF GAAGAW DREME-3
-
-#             Word        Pos        Neg    P-value    E-value
-# BEST      GAAGAW         81         22   8.2e-010   3.4e-005
-#           GAAGAU         45          7   2.4e-008   9.9e-004
-#           GAAGAA         40         16   7.9e-004   3.3e+001
-
-letter-probability matrix: alength= 4 w= 6 nsites= 89 E= 3.4e-005
-0.000000 0.000000 1.000000 0.000000
-1.000000 0.000000 0.000000 0.000000
-1.000000 0.000000 0.000000 0.000000
-0.000000 0.000000 1.000000 0.000000
-1.000000 0.000000 0.000000 0.000000
-0.494382 0.000000 0.000000 0.505618
-
-
-MOTIF SMUGGA DREME-4
-
-#             Word        Pos        Neg    P-value    E-value
-# BEST      SMUGGA        110         47   9.1e-008   3.7e-003
-#           GAUGGA         22          6   1.7e-003   7.1e+001
-#           GCUGGA         33         14   3.6e-003   1.5e+002
-#           CCUGGA         32         15   8.6e-003   3.5e+002
-#           CAUGGA         29         13   9.1e-003   3.7e+002
-
-letter-probability matrix: alength= 4 w= 6 nsites= 119 E= 3.7e-003
-0.000000 0.529412 0.470588 0.000000
-0.428571 0.571429 0.000000 0.000000
-0.000000 0.000000 0.000000 1.000000
-0.000000 0.000000 1.000000 0.000000
-0.000000 0.000000 1.000000 0.000000
-1.000000 0.000000 0.000000 0.000000
-
-
-# Stopping reason: E-value threshold exceeded
-#    Running time: 13.45 seconds
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme1.xml
--- a/test-data/dreme1.xml Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,32 +0,0 @@
-<dreme version="4.12.0" release="Tue Jun 27 16:22:50 2017 -0700">
-  <model>
-    <command_line>dreme -oc dreme_out -rna -norc -p dreme_test_sites.fa -e 0</command_line>
-    <positives name="dreme test sites" count="1000" file="dreme_test_sites.fa" last_mod_date="Thu Apr 19 19:09:45 CEST 2018" />
-    <negatives name="shuffled positive sequences" count="1000" from="shuffled"/>
-    <alphabet name="RNA" like="rna">
-      <letter id="A" symbol="A" name="Adenine" colour="CC0000"/>
-      <letter id="C" symbol="C" name="Cytosine" colour="0000CC"/>
-      <letter id="G" symbol="G" name="Guanine" colour="FFB300"/>
-      <letter id="U" symbol="U" aliases="T" name="Uracil" colour="008000"/>
-      <letter id="N" symbol="N" aliases="X." equals="ACGU" name="Any base"/>
-      <letter id="V" symbol="V" equals="ACG" name="Not U"/>
-      <letter id="H" symbol="H" equals="ACU" name="Not G"/>
-      <letter id="D" symbol="D" equals="AGU" name="Not C"/>
-      <letter id="B" symbol="B" equals="CGU" name="Not A"/>
-      <letter id="M" symbol="M" equals="AC" name="Amino"/>
-      <letter id="R" symbol="R" equals="AG" name="Purine"/>
-      <letter id="W" symbol="W" equals="AU" name="Weak"/>
-      <letter id="S" symbol="S" equals="CG" name="Strong"/>
-      <letter id="Y" symbol="Y" equals="CU" name="Pyrimidine"/>
-      <letter id="K" symbol="K" equals="GU" name="Keto"/>
-    </alphabet>
-    <strands>none</strands>
-    <background A="0.221" C="0.245" G="0.221" U="0.312" from="dataset"/>
-    <stop evalue="0"/>
-    <ngen>100</ngen>
-    <add_pv_thresh>0.01</add_pv_thresh>
-    <seed>1</seed>
-    <host>ThinkPad-T450s</host>
-    <when>Thu Apr 19 19:40:08 CEST 2018</when>
-  </model>
-  <motifs>
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme2.html
--- a/test-data/dreme2.html Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,6118 +0,0 @@\n-<!DOCTYPE HTML>\n-<html>\n-  <head>\n-    <meta charset="UTF-8">\n-    <title>DREME</title>\n-    <script>\n-      // @JSON_VAR data\n-      var data = {\n-        "program": "dreme",\n-        "version": "4.12.0",\n-        "release": "Tue Jun 27 16:22:50 2017 -0700",\n-        "cmd": [\n-          "dreme", "-oc", "dreme_out_adv", "-rna", "-norc", "-p",\n-          "dreme_test_sites.fa", "-e", "0.00001", "-mink", "4", "-maxk", "10"\n-        ],\n-        "options": {\n-          "revcomp": false,\n-          "ngen": 100,\n-          "add_pv_thresh": 0.01,\n-          "seed": 1,\n-          "stop": {\n-            "evalue": "1e-05"\n-          }\n-        },\n-        "alphabet": {\n-          "name": "RNA",\n-          "like": "rna",\n-          "ncore": 4,\n-          "symbols": [\n-            {\n-              "symbol": "A",\n-              "name": "Adenine",\n-              "colour": "CC0000"\n-            }, {\n-              "symbol": "C",\n-              "name": "Cytosine",\n-              "colour": "0000CC"\n-            }, {\n-              "symbol": "G",\n-              "name": "Guanine",\n-              "colour": "FFB300"\n-            }, {\n-              "symbol": "U",\n-              "aliases": "T",\n-              "name": "Uracil",\n-              "colour": "008000"\n-            }, {\n-              "symbol": "N",\n-              "aliases": "X.",\n-              "name": "Any base",\n-              "equals": "ACGU"\n-            }, {\n-              "symbol": "V",\n-              "name": "Not U",\n-              "equals": "ACG"\n-            }, {\n-              "symbol": "H",\n-              "name": "Not G",\n-              "equals": "ACU"\n-            }, {\n-              "symbol": "D",\n-              "name": "Not C",\n-              "equals": "AGU"\n-            }, {\n-              "symbol": "B",\n-              "name": "Not A",\n-              "equals": "CGU"\n-            }, {\n-              "symbol": "M",\n-              "name": "Amino",\n-              "equals": "AC"\n-            }, {\n-              "symbol": "R",\n-              "name": "Purine",\n-              "equals": "AG"\n-            }, {\n-              "symbol": "W",\n-              "name": "Weak",\n-              "equals": "AU"\n-            }, {\n-              "symbol": "S",\n-              "name": "Strong",\n-              "equals": "CG"\n-            }, {\n-              "symbol": "Y",\n-              "name": "Pyrimidine",\n-              "equals": "CU"\n-            }, {\n-              "symbol": "K",\n-              "name": "Keto",\n-              "equals": "GU"\n-            }\n-          ]\n-        },\n-        "background": {\n-          "freqs": [0.221, 0.245, 0.221, 0.312]\n-        },\n-        "sequence_db": {\n-          "name": "dreme test sites",\n-          "file": "dreme_test_sites.fa",\n-          "lmod": "Thu Apr 19 19:09:45 CEST 2018",\n-          "count": 1000\n-        },\n-        "control_db": {\n-          "name": "shuffled positive sequences",\n-          "from": "shuffled",\n-          "count": 1000,\n-          "freqs": [0.221, 0.245, 0.221, 0.312]\n-        },\n-        "motifs": [\n-          {\n-            "db": 0,\n-            "id": "UUYUCY",\n-            "alt": "DREME-1",\n-            "len": 6,\n-            "nsites": 459,\n-            "evalue": "3.3e-013",\n-            "p": 387,\n-            "n": 210,\n-            "pvalue": "2.6e-018",\n-            "unerased_evalue": "3.3e-013",\n-            "pwm": [\n-              [0.000000, 0.000000, 0.000000, 1.000000], \n-              [0.000000, 0.000000, 0.000000, 1.000000], \n-              [0.000000, 0.294118, 0.000000, 0.705882], \n-              [0.000000, 0.000000, 0.000000, 1.000000], \n-              [0.000000, 1.000000, 0.000000, 0.000000], \n-              [0.000000, 0.474946, 0.000000, 0.525054]\n-            ],\n-            "matches": [\n-              {\n-                "seq": "UUUUCC",\n-                "p": 147,\n-                "n": 75,\n-                "pvalue": "1.8e-007",\n-                "evalue": "2.2e-002"\n-              }, {\n-           '..b'alphabet"></td>\n-          <td id="ins_seq_count"></td>\n-        </tr>\n-      </table>\n-      <script>\n-      {\n-        var db = data.sequence_db;\n-        $("ins_seq_source").innerHTML = db.file;\n-        $("ins_seq_alphabet").innerHTML = dreme_alphabet.get_alphabet_name();\n-        $("ins_seq_count").innerHTML = db.count;\n-      }\n-      </script>\n-      <h4>Control Sequences</h4>\n-      <table id="seq_info" class="inputs">\n-        <tr><th>Source <div class="help" data-topic="pop_seq_source"></div></th>\n-          <th>Sequence Count <div class="help" data-topic="pop_seq_count"></div></th>\n-        </tr>\n-        <tr>\n-          <td id="ins_cseq_source"></td>\n-          <td id="ins_cseq_count"></td>\n-        </tr>\n-      </table>\n-      <script>\n-      {\n-        var db = data.control_db;\n-        if (db.from == "shuffled") {\n-          $("ins_cseq_source").innerHTML = "Shuffled Sequences";\n-        } else {\n-          $("ins_cseq_source").innerHTML = db.file;\n-        }\n-        $("ins_cseq_count").innerHTML = db.count;\n-      }\n-      </script>\n-      <h4>Background</h4>\n-      <span id="alpha_bg"></span>\n-      <script>\n-      {\n-        $("alpha_bg").appendChild(make_alpha_bg(dreme_alphabet, data.control_db.freqs));\n-      }\n-      </script>\n-      <h4>Other Settings</h4>\n-      <table id="tbl_settings" class="inputs hide_advanced">\n-        <tr>\n-          <th>Strand Handling</th>\n-          <td id="opt_strand">\n-            <span class="strand_none">This alphabet only has one strand</span>\n-            <span class="strand_given">Only the given strand is processed</span>\n-            <span class="strand_both">Both the given and reverse complement strands are processed</span>\n-          </td>\n-        </tr>\n-        <tr><th># REs to Generalize</th><td id="opt_ngen"></td></tr>\n-        <tr><th>Shuffle Seed</th><td id="opt_seed"></td></tr>\n-        <tr><th>E-value Threshold</th><td id="opt_stop_evalue"></td></tr>\n-        <tr><th>Max Motif Count</th><td id="opt_stop_count"></td></tr>\n-        <tr><th>Max Run Time</th><td id="opt_stop_time"></td></tr>\n-      </table>\n-      <script>\n-      {\n-        $("opt_strand").className = (dreme_alphabet.has_complement() ? (data.options.revcomp ? "both" : "given") : "none");\n-        $("opt_ngen").innerHTML = data.options.ngen;\n-        $("opt_seed").innerHTML = data.options.seed;\n-        $("opt_stop_evalue").innerHTML = data.options.stop.evalue;\n-        $("opt_stop_count").innerHTML = (typeof data.options.stop.count == "number" ? data.options.stop.count : "No maximum motif count.");\n-        $("opt_stop_time").innerHTML = (typeof data.options.stop.time == "number" ? data.options.stop.time + " seconds." : "No maximum running time.");\n-      }\n-      </script>\n-    </div>\n-    <!-- list information on this program -->\n-    <div id="info_sec" class="bar" style="position:relative">\n-      <div style="position: absolute; right: 0;"><a href="#inputs_sec">Previous</a> <a href="#">Top</a></div>\n-      <div class="subsection">\n-        <h5 id="version">DREME version</h5>\n-        <span id="ins_version"></span> \n-        (Release date: <span id="ins_release"></span>)<br>\n-      </div>\n-      <script>\n-        $("ins_version").innerHTML = data["version"];\n-        $("ins_release").innerHTML = data["release"];\n-      </script>\n-      <div class="subsection">\n-        <h5 id="reference">Reference</h5>\n-        <span class="citation">\n-          Timothy L. Bailey, "DREME: Motif discovery in transcription factor ChIP-seq data", <i>Bioinformatics</i>, <b>27</b>(12):1653-1659, 2011.\n-          <a href="http://bioinformatics.oxfordjournals.org/content/27/12/1653">[full text]</a>\n-        </span>\n-      </div>\n-      <div class="subsection">\n-        <h5 id="command">Command line</h5>\n-        <textarea id="cmd" rows="3" style="width:100%;" readonly="readonly">\n-        </textarea>\n-        <script>$("cmd").value = data["cmd"].join(" ");</script>\n-      </div>\n-    </div>\n-    \n-  </body>\n-</html>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme2.txt
--- a/test-data/dreme2.txt Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,68 +0,0 @@
-# DREME 4.12.0
-#     command: dreme -oc dreme_out_adv -rna -norc -p dreme_test_sites.fa -e 0.00001 -mink 4 -maxk 10
-#   positives: 1000 from dreme_test_sites.fa (Thu Apr 19 19:09:45 CEST 2018)
-#   negatives: 1000 from shuffled positives
-#        host: ThinkPad-T450s
-#        when: Tue Apr 24 18:44:36 CEST 2018
-
-MEME version 4.12.0
-
-ALPHABET "RNA" RNA-LIKE
-A "Adenine" CC0000
-C "Cytosine" 0000CC
-G "Guanine" FFB300
-U "Uracil" 008000
-N "Any base" = ACGU
-X = ACGU
-. = ACGU
-V "Not U" = ACG
-H "Not G" = ACU
-D "Not C" = AGU
-B "Not A" = CGU
-M "Amino" = AC
-R "Purine" = AG
-W "Weak" = AU
-S "Strong" = CG
-Y "Pyrimidine" = CU
-K "Keto" = GU
-T = U
-END ALPHABET
-
-Background letter frequencies (from dataset):
-A 0.221 C 0.245 G 0.221 U 0.312
-
-
-MOTIF UUYUCY DREME-1
-
-#             Word        Pos        Neg    P-value    E-value
-# BEST      UUYUCY        387        210   2.6e-018   3.3e-013
-#           UUUUCC        147         75   1.8e-007   2.2e-002
-#           UUUUCU        155         94   2.2e-005   2.8e+000
-#           UUCUCU         94         51   1.3e-004   1.7e+001
-#           UUCUCC         75         42   1.1e-003   1.4e+002
-
-letter-probability matrix: alength= 4 w= 6 nsites= 459 E= 3.3e-013
-0.000000 0.000000 0.000000 1.000000
-0.000000 0.000000 0.000000 1.000000
-0.000000 0.294118 0.000000 0.705882
-0.000000 0.000000 0.000000 1.000000
-0.000000 1.000000 0.000000 0.000000
-0.000000 0.474946 0.000000 0.525054
-
-
-MOTIF YAGG DREME-2
-
-#             Word        Pos        Neg    P-value    E-value
-# BEST        YAGG        600        416   1.1e-016   1.4e-011
-#             CAGG        441        304   1.5e-010   1.8e-005
-#             UAGG        232        165   1.1e-004   1.3e+001
-
-letter-probability matrix: alength= 4 w= 4 nsites= 793 E= 1.4e-011
-0.000000 0.692308 0.000000 0.307692
-1.000000 0.000000 0.000000 0.000000
-0.000000 0.000000 1.000000 0.000000
-0.000000 0.000000 1.000000 0.000000
-
-
-# Stopping reason: E-value threshold exceeded
-#    Running time: 18.17 seconds
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme_fimo_input_1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dreme_fimo_input_1.xml Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,99 @@
+<dreme version="4.12.0" release="Tue Jun 27 16:22:50 2017 -0700">
+  <model>
+    <command_line>dreme -oc dreme_out -norc -p input.fa</command_line>
+    <positives name="input" count="1000" file="input.fa" last_mod_date="Wed May 02 16:37:00 CEST 2018" />
+    <negatives name="shuffled positive sequences" count="1000" from="shuffled"/>
+    <alphabet name="DNA" like="dna">
+      <letter id="A" symbol="A" complement="T" name="Adenine" colour="CC0000"/>
+      <letter id="C" symbol="C" complement="G" name="Cytosine" colour="0000CC"/>
+      <letter id="G" symbol="G" complement="C" name="Guanine" colour="FFB300"/>
+      <letter id="T" symbol="T" aliases="U" complement="A" name="Thymine" colour="008000"/>
+      <letter id="N" symbol="N" aliases="X." equals="ACGT" name="Any base"/>
+      <letter id="V" symbol="V" equals="ACG" name="Not T"/>
+      <letter id="H" symbol="H" equals="ACT" name="Not G"/>
+      <letter id="D" symbol="D" equals="AGT" name="Not C"/>
+      <letter id="B" symbol="B" equals="CGT" name="Not A"/>
+      <letter id="M" symbol="M" equals="AC" name="Amino"/>
+      <letter id="R" symbol="R" equals="AG" name="Purine"/>
+      <letter id="W" symbol="W" equals="AT" name="Weak"/>
+      <letter id="S" symbol="S" equals="CG" name="Strong"/>
+      <letter id="Y" symbol="Y" equals="CT" name="Pyrimidine"/>
+      <letter id="K" symbol="K" equals="GT" name="Keto"/>
+    </alphabet>
+    <strands>given</strands>
+    <background A="0.294" C="0.209" G="0.164" T="0.333" from="dataset"/>
+    <stop evalue="0.05"/>
+    <ngen>100</ngen>
+    <add_pv_thresh>0.01</add_pv_thresh>
+    <seed>1</seed>
+    <host>ThinkPad-T450s</host>
+    <when>Wed May 02 16:45:34 CEST 2018</when>
+  </model>
+  <motifs>
+    <motif id="m01" alt="DREME-1" seq="ACTAAYH" length="7" nsites="405" p="371" n="75" pvalue="9.7e-061" evalue="4.9e-056" unerased_evalue="4.9e-056">
+      <pos A="1.000000" C="0.000000" G="0.000000" T="0.000000"/>
+      <pos A="0.000000" C="1.000000" G="0.000000" T="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="0.000000" T="1.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" T="0.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" T="0.000000"/>
+      <pos A="0.000000" C="0.600000" G="0.000000" T="0.400000"/>
+      <pos A="0.471605" C="0.244444" G="0.000000" T="0.283951"/>
+      <match seq="ACTAACA" p="108" n="10" pvalue="2.6e-023" evalue="1.3e-018"/>
+      <match seq="ACTAACC" p="62" n="7" pvalue="9.5e-013" evalue="4.8e-008"/>
+      <match seq="ACTAATA" p="77" n="18" pvalue="1.5e-010" evalue="7.4e-006"/>
+      <match seq="ACTAACT" p="62" n="16" pvalue="4.4e-008" evalue="2.2e-003"/>
+      <match seq="ACTAATC" p="35" n="8" pvalue="1.7e-005" evalue="8.8e-001"/>
+      <match seq="ACTAATT" p="48" n="20" pvalue="3.7e-004" evalue="1.9e+001"/>
+    </motif>
+    <motif id="m02" alt="DREME-2" seq="YTAACA" length="6" nsites="197" p="170" n="59" pvalue="1.8e-015" evalue="9.0e-011" unerased_evalue="6.8e-031">
+      <pos A="0.000000" C="0.365482" G="0.000000" T="0.634518"/>
+      <pos A="0.000000" C="0.000000" G="0.000000" T="1.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" T="0.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" T="0.000000"/>
+      <pos A="0.000000" C="1.000000" G="0.000000" T="0.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" T="0.000000"/>
+      <match seq="TTAACA" p="118" n="38" pvalue="8.6e-012" evalue="4.3e-007"/>
+      <match seq="CTAACA" p="63" n="21" pvalue="1.6e-006" evalue="7.8e-002"/>
+    </motif>
+    <motif id="m03" alt="DREME-3" seq="TCTGT" length="5" nsites="220" p="208" n="101" pvalue="1.9e-011" evalue="9.2e-007" unerased_evalue="4.4e-007">
+      <pos A="0.000000" C="0.000000" G="0.000000" T="1.000000"/>
+      <pos A="0.000000" C="1.000000" G="0.000000" T="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="0.000000" T="1.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" T="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="0.000000" T="1.000000"/>
+      <match seq="TCTGT" p="208" n="101" pvalue="1.9e-011" evalue="9.2e-007"/>
+    </motif>
+    <motif id="m04" alt="DREME-4" seq="SCCAGG" length="6" nsites="58" p="58" n="14" pvalue="5.0e-008" evalue="2.4e-003" unerased_evalue="1.5e-003">
+      <pos A="0.000000" C="0.620690" G="0.379310" T="0.000000"/>
+      <pos A="0.000000" C="1.000000" G="0.000000" T="0.000000"/>
+      <pos A="0.000000" C="1.000000" G="0.000000" T="0.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" T="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" T="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" T="0.000000"/>
+      <match seq="CCCAGG" p="36" n="9" pvalue="2.7e-005" evalue="1.3e+000"/>
+      <match seq="GCCAGG" p="22" n="5" pvalue="7.1e-004" evalue="3.4e+001"/>
+    </motif>
+    <motif id="m05" alt="DREME-5" seq="CCAGCAY" length="7" nsites="27" p="27" n="1" pvalue="9.2e-008" evalue="4.4e-003" unerased_evalue="2.3e-003">
+      <pos A="0.000000" C="1.000000" G="0.000000" T="0.000000"/>
+      <pos A="0.000000" C="1.000000" G="0.000000" T="0.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" T="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" T="0.000000"/>
+      <pos A="0.000000" C="1.000000" G="0.000000" T="0.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" T="0.000000"/>
+      <pos A="0.000000" C="0.518519" G="0.000000" T="0.481481"/>
+      <match seq="CCAGCAC" p="14" n="0" pvalue="5.8e-005" evalue="2.8e+000"/>
+      <match seq="CCAGCAT" p="13" n="1" pvalue="8.9e-004" evalue="4.3e+001"/>
+    </motif>
+    <motif id="m06" alt="DREME-6" seq="GMATGT" length="6" nsites="60" p="59" n="18" pvalue="9.9e-007" evalue="4.8e-002" unerased_evalue="4.8e-002">
+      <pos A="0.000000" C="0.000000" G="1.000000" T="0.000000"/>
+      <pos A="0.533333" C="0.466667" G="0.000000" T="0.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" T="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="0.000000" T="1.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" T="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="0.000000" T="1.000000"/>
+      <match seq="GCATGT" p="28" n="8" pvalue="5.4e-004" evalue="2.6e+001"/>
+      <match seq="GAATGT" p="32" n="11" pvalue="8.6e-004" evalue="4.1e+001"/>
+    </motif>
+  </motifs>
+  <run_time cpu="30.86" real="30.97" stop="evalue"/>
+</dreme>
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme_output_test1.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dreme_output_test1.html Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,6198 @@\n+<!DOCTYPE HTML>\n+<html>\n+  <head>\n+    <meta charset="UTF-8">\n+    <title>DREME</title>\n+    <script>\n+      // @JSON_VAR data\n+      var data = {\n+        "program": "dreme",\n+        "version": "4.12.0",\n+        "release": "Tue Jun 27 16:22:50 2017 -0700",\n+        "cmd": [\n+          "dreme", "-o", "./dreme_test1_out", "-p", "dreme_test_sites.fa",\n+          "-norc", "-rna", "-s", "1"\n+        ],\n+        "options": {\n+          "revcomp": false,\n+          "ngen": 100,\n+          "add_pv_thresh": 0.01,\n+          "seed": 1,\n+          "stop": {\n+            "evalue": "0.05"\n+          }\n+        },\n+        "alphabet": {\n+          "name": "RNA",\n+          "like": "rna",\n+          "ncore": 4,\n+          "symbols": [\n+            {\n+              "symbol": "A",\n+              "name": "Adenine",\n+              "colour": "CC0000"\n+            }, {\n+              "symbol": "C",\n+              "name": "Cytosine",\n+              "colour": "0000CC"\n+            }, {\n+              "symbol": "G",\n+              "name": "Guanine",\n+              "colour": "FFB300"\n+            }, {\n+              "symbol": "U",\n+              "aliases": "T",\n+              "name": "Uracil",\n+              "colour": "008000"\n+            }, {\n+              "symbol": "N",\n+              "aliases": "X.",\n+              "name": "Any base",\n+              "equals": "ACGU"\n+            }, {\n+              "symbol": "V",\n+              "name": "Not U",\n+              "equals": "ACG"\n+            }, {\n+              "symbol": "H",\n+              "name": "Not G",\n+              "equals": "ACU"\n+            }, {\n+              "symbol": "D",\n+              "name": "Not C",\n+              "equals": "AGU"\n+            }, {\n+              "symbol": "B",\n+              "name": "Not A",\n+              "equals": "CGU"\n+            }, {\n+              "symbol": "M",\n+              "name": "Amino",\n+              "equals": "AC"\n+            }, {\n+              "symbol": "R",\n+              "name": "Purine",\n+              "equals": "AG"\n+            }, {\n+              "symbol": "W",\n+              "name": "Weak",\n+              "equals": "AU"\n+            }, {\n+              "symbol": "S",\n+              "name": "Strong",\n+              "equals": "CG"\n+            }, {\n+              "symbol": "Y",\n+              "name": "Pyrimidine",\n+              "equals": "CU"\n+            }, {\n+              "symbol": "K",\n+              "name": "Keto",\n+              "equals": "GU"\n+            }\n+          ]\n+        },\n+        "background": {\n+          "freqs": [0.221, 0.245, 0.221, 0.312]\n+        },\n+        "sequence_db": {\n+          "name": "dreme test sites",\n+          "file": "dreme_test_sites.fa",\n+          "lmod": "Thu Apr 26 15:09:03 CEST 2018",\n+          "count": 1000\n+        },\n+        "control_db": {\n+          "name": "shuffled positive sequences",\n+          "from": "shuffled",\n+          "count": 1000,\n+          "freqs": [0.221, 0.245, 0.221, 0.312]\n+        },\n+        "motifs": [\n+          {\n+            "db": 0,\n+            "id": "UUYUCY",\n+            "alt": "DREME-1",\n+            "len": 6,\n+            "nsites": 459,\n+            "evalue": "1.2e-013",\n+            "p": 387,\n+            "n": 210,\n+            "pvalue": "2.6e-018",\n+            "unerased_evalue": "1.2e-013",\n+            "pwm": [\n+              [0.000000, 0.000000, 0.000000, 1.000000], \n+              [0.000000, 0.000000, 0.000000, 1.000000], \n+              [0.000000, 0.294118, 0.000000, 0.705882], \n+              [0.000000, 0.000000, 0.000000, 1.000000], \n+              [0.000000, 1.000000, 0.000000, 0.000000], \n+              [0.000000, 0.474946, 0.000000, 0.525054]\n+            ],\n+            "matches": [\n+              {\n+                "seq": "UUUUCC",\n+                "p": 147,\n+                "n": 75,\n+                "pvalue": "1.8e-007",\n+                "evalue": "8.1e-003"\n+              }, {\n+                "seq": "UUUUCU",\n+          '..b'alphabet"></td>\n+          <td id="ins_seq_count"></td>\n+        </tr>\n+      </table>\n+      <script>\n+      {\n+        var db = data.sequence_db;\n+        $("ins_seq_source").innerHTML = db.file;\n+        $("ins_seq_alphabet").innerHTML = dreme_alphabet.get_alphabet_name();\n+        $("ins_seq_count").innerHTML = db.count;\n+      }\n+      </script>\n+      <h4>Control Sequences</h4>\n+      <table id="seq_info" class="inputs">\n+        <tr><th>Source <div class="help" data-topic="pop_seq_source"></div></th>\n+          <th>Sequence Count <div class="help" data-topic="pop_seq_count"></div></th>\n+        </tr>\n+        <tr>\n+          <td id="ins_cseq_source"></td>\n+          <td id="ins_cseq_count"></td>\n+        </tr>\n+      </table>\n+      <script>\n+      {\n+        var db = data.control_db;\n+        if (db.from == "shuffled") {\n+          $("ins_cseq_source").innerHTML = "Shuffled Sequences";\n+        } else {\n+          $("ins_cseq_source").innerHTML = db.file;\n+        }\n+        $("ins_cseq_count").innerHTML = db.count;\n+      }\n+      </script>\n+      <h4>Background</h4>\n+      <span id="alpha_bg"></span>\n+      <script>\n+      {\n+        $("alpha_bg").appendChild(make_alpha_bg(dreme_alphabet, data.control_db.freqs));\n+      }\n+      </script>\n+      <h4>Other Settings</h4>\n+      <table id="tbl_settings" class="inputs hide_advanced">\n+        <tr>\n+          <th>Strand Handling</th>\n+          <td id="opt_strand">\n+            <span class="strand_none">This alphabet only has one strand</span>\n+            <span class="strand_given">Only the given strand is processed</span>\n+            <span class="strand_both">Both the given and reverse complement strands are processed</span>\n+          </td>\n+        </tr>\n+        <tr><th># REs to Generalize</th><td id="opt_ngen"></td></tr>\n+        <tr><th>Shuffle Seed</th><td id="opt_seed"></td></tr>\n+        <tr><th>E-value Threshold</th><td id="opt_stop_evalue"></td></tr>\n+        <tr><th>Max Motif Count</th><td id="opt_stop_count"></td></tr>\n+        <tr><th>Max Run Time</th><td id="opt_stop_time"></td></tr>\n+      </table>\n+      <script>\n+      {\n+        $("opt_strand").className = (dreme_alphabet.has_complement() ? (data.options.revcomp ? "both" : "given") : "none");\n+        $("opt_ngen").innerHTML = data.options.ngen;\n+        $("opt_seed").innerHTML = data.options.seed;\n+        $("opt_stop_evalue").innerHTML = data.options.stop.evalue;\n+        $("opt_stop_count").innerHTML = (typeof data.options.stop.count == "number" ? data.options.stop.count : "No maximum motif count.");\n+        $("opt_stop_time").innerHTML = (typeof data.options.stop.time == "number" ? data.options.stop.time + " seconds." : "No maximum running time.");\n+      }\n+      </script>\n+    </div>\n+    <!-- list information on this program -->\n+    <div id="info_sec" class="bar" style="position:relative">\n+      <div style="position: absolute; right: 0;"><a href="#inputs_sec">Previous</a> <a href="#">Top</a></div>\n+      <div class="subsection">\n+        <h5 id="version">DREME version</h5>\n+        <span id="ins_version"></span> \n+        (Release date: <span id="ins_release"></span>)<br>\n+      </div>\n+      <script>\n+        $("ins_version").innerHTML = data["version"];\n+        $("ins_release").innerHTML = data["release"];\n+      </script>\n+      <div class="subsection">\n+        <h5 id="reference">Reference</h5>\n+        <span class="citation">\n+          Timothy L. Bailey, "DREME: Motif discovery in transcription factor ChIP-seq data", <i>Bioinformatics</i>, <b>27</b>(12):1653-1659, 2011.\n+          <a href="http://bioinformatics.oxfordjournals.org/content/27/12/1653">[full text]</a>\n+        </span>\n+      </div>\n+      <div class="subsection">\n+        <h5 id="command">Command line</h5>\n+        <textarea id="cmd" rows="3" style="width:100%;" readonly="readonly">\n+        </textarea>\n+        <script>$("cmd").value = data["cmd"].join(" ");</script>\n+      </div>\n+    </div>\n+    \n+  </body>\n+</html>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme_output_test1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dreme_output_test1.txt Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,102 @@
+# DREME 4.12.0
+#     command: dreme -o ./dreme_test1_out -p dreme_test_sites.fa -norc -rna -s 1
+#   positives: 1000 from dreme_test_sites.fa (Thu Apr 26 15:09:03 CEST 2018)
+#   negatives: 1000 from shuffled positives
+#        host: ThinkPad-T450s
+#        when: Thu May 03 13:22:29 CEST 2018
+
+MEME version 4.12.0
+
+ALPHABET "RNA" RNA-LIKE
+A "Adenine" CC0000
+C "Cytosine" 0000CC
+G "Guanine" FFB300
+U "Uracil" 008000
+N "Any base" = ACGU
+X = ACGU
+. = ACGU
+V "Not U" = ACG
+H "Not G" = ACU
+D "Not C" = AGU
+B "Not A" = CGU
+M "Amino" = AC
+R "Purine" = AG
+W "Weak" = AU
+S "Strong" = CG
+Y "Pyrimidine" = CU
+K "Keto" = GU
+T = U
+END ALPHABET
+
+Background letter frequencies (from dataset):
+A 0.221 C 0.245 G 0.221 U 0.312
+
+
+MOTIF UUYUCY DREME-1
+
+#             Word        Pos        Neg    P-value    E-value
+# BEST      UUYUCY        387        210   2.6e-018   1.2e-013
+#           UUUUCC        147         75   1.8e-007   8.1e-003
+#           UUUUCU        155         94   2.2e-005   1.0e+000
+#           UUCUCU         94         51   1.3e-004   6.1e+000
+#           UUCUCC         75         42   1.1e-003   5.0e+001
+
+letter-probability matrix: alength= 4 w= 6 nsites= 459 E= 1.2e-013
+0.000000 0.000000 0.000000 1.000000
+0.000000 0.000000 0.000000 1.000000
+0.000000 0.294118 0.000000 0.705882
+0.000000 0.000000 0.000000 1.000000
+0.000000 1.000000 0.000000 0.000000
+0.000000 0.474946 0.000000 0.525054
+
+
+MOTIF YAGG DREME-2
+
+#             Word        Pos        Neg    P-value    E-value
+# BEST        YAGG        600        416   1.1e-016   5.1e-012
+#             CAGG        441        304   1.5e-010   6.6e-006
+#             UAGG        232        165   1.1e-004   4.7e+000
+
+letter-probability matrix: alength= 4 w= 4 nsites= 793 E= 5.1e-012
+0.000000 0.692308 0.000000 0.307692
+1.000000 0.000000 0.000000 0.000000
+0.000000 0.000000 1.000000 0.000000
+0.000000 0.000000 1.000000 0.000000
+
+
+MOTIF GAAGAW DREME-3
+
+#             Word        Pos        Neg    P-value    E-value
+# BEST      GAAGAW         81         22   8.2e-010   3.4e-005
+#           GAAGAU         45          7   2.4e-008   9.9e-004
+#           GAAGAA         40         16   7.9e-004   3.3e+001
+
+letter-probability matrix: alength= 4 w= 6 nsites= 89 E= 3.4e-005
+0.000000 0.000000 1.000000 0.000000
+1.000000 0.000000 0.000000 0.000000
+1.000000 0.000000 0.000000 0.000000
+0.000000 0.000000 1.000000 0.000000
+1.000000 0.000000 0.000000 0.000000
+0.494382 0.000000 0.000000 0.505618
+
+
+MOTIF SMUGGA DREME-4
+
+#             Word        Pos        Neg    P-value    E-value
+# BEST      SMUGGA        110         47   9.1e-008   3.7e-003
+#           GAUGGA         22          6   1.7e-003   7.1e+001
+#           GCUGGA         33         14   3.6e-003   1.5e+002
+#           CCUGGA         32         15   8.6e-003   3.5e+002
+#           CAUGGA         29         13   9.1e-003   3.7e+002
+
+letter-probability matrix: alength= 4 w= 6 nsites= 119 E= 3.7e-003
+0.000000 0.529412 0.470588 0.000000
+0.428571 0.571429 0.000000 0.000000
+0.000000 0.000000 0.000000 1.000000
+0.000000 0.000000 1.000000 0.000000
+0.000000 0.000000 1.000000 0.000000
+1.000000 0.000000 0.000000 0.000000
+
+
+# Stopping reason: E-value threshold exceeded
+#    Running time: 13.95 seconds
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme_output_test1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dreme_output_test1.xml Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,77 @@
+<dreme version="4.12.0" release="Tue Jun 27 16:22:50 2017 -0700">
+  <model>
+    <command_line>dreme -o ./dreme_test1_out -p dreme_test_sites.fa -norc -rna -s 1</command_line>
+    <positives name="dreme test sites" count="1000" file="dreme_test_sites.fa" last_mod_date="Thu Apr 26 15:09:03 CEST 2018" />
+    <negatives name="shuffled positive sequences" count="1000" from="shuffled"/>
+    <alphabet name="RNA" like="rna">
+      <letter id="A" symbol="A" name="Adenine" colour="CC0000"/>
+      <letter id="C" symbol="C" name="Cytosine" colour="0000CC"/>
+      <letter id="G" symbol="G" name="Guanine" colour="FFB300"/>
+      <letter id="U" symbol="U" aliases="T" name="Uracil" colour="008000"/>
+      <letter id="N" symbol="N" aliases="X." equals="ACGU" name="Any base"/>
+      <letter id="V" symbol="V" equals="ACG" name="Not U"/>
+      <letter id="H" symbol="H" equals="ACU" name="Not G"/>
+      <letter id="D" symbol="D" equals="AGU" name="Not C"/>
+      <letter id="B" symbol="B" equals="CGU" name="Not A"/>
+      <letter id="M" symbol="M" equals="AC" name="Amino"/>
+      <letter id="R" symbol="R" equals="AG" name="Purine"/>
+      <letter id="W" symbol="W" equals="AU" name="Weak"/>
+      <letter id="S" symbol="S" equals="CG" name="Strong"/>
+      <letter id="Y" symbol="Y" equals="CU" name="Pyrimidine"/>
+      <letter id="K" symbol="K" equals="GU" name="Keto"/>
+    </alphabet>
+    <strands>none</strands>
+    <background A="0.221" C="0.245" G="0.221" U="0.312" from="dataset"/>
+    <stop evalue="0.05"/>
+    <ngen>100</ngen>
+    <add_pv_thresh>0.01</add_pv_thresh>
+    <seed>1</seed>
+    <host>ThinkPad-T450s</host>
+    <when>Thu May 03 13:22:29 CEST 2018</when>
+  </model>
+  <motifs>
+    <motif id="m01" alt="DREME-1" seq="UUYUCY" length="6" nsites="459" p="387" n="210" pvalue="2.6e-018" evalue="1.2e-013" unerased_evalue="1.2e-013">
+      <pos A="0.000000" C="0.000000" G="0.000000" U="1.000000"/>
+      <pos A="0.000000" C="0.000000" G="0.000000" U="1.000000"/>
+      <pos A="0.000000" C="0.294118" G="0.000000" U="0.705882"/>
+      <pos A="0.000000" C="0.000000" G="0.000000" U="1.000000"/>
+      <pos A="0.000000" C="1.000000" G="0.000000" U="0.000000"/>
+      <pos A="0.000000" C="0.474946" G="0.000000" U="0.525054"/>
+      <match seq="UUUUCC" p="147" n="75" pvalue="1.8e-007" evalue="8.1e-003"/>
+      <match seq="UUUUCU" p="155" n="94" pvalue="2.2e-005" evalue="1.0e+000"/>
+      <match seq="UUCUCU" p="94" n="51" pvalue="1.3e-004" evalue="6.1e+000"/>
+      <match seq="UUCUCC" p="75" n="42" pvalue="1.1e-003" evalue="5.0e+001"/>
+    </motif>
+    <motif id="m02" alt="DREME-2" seq="YAGG" length="4" nsites="793" p="600" n="416" pvalue="1.1e-016" evalue="5.1e-012" unerased_evalue="2.4e-012">
+      <pos A="0.000000" C="0.692308" G="0.000000" U="0.307692"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" U="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" U="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" U="0.000000"/>
+      <match seq="CAGG" p="441" n="304" pvalue="1.5e-010" evalue="6.6e-006"/>
+      <match seq="UAGG" p="232" n="165" pvalue="1.1e-004" evalue="4.7e+000"/>
+    </motif>
+    <motif id="m03" alt="DREME-3" seq="GAAGAW" length="6" nsites="89" p="81" n="22" pvalue="8.2e-010" evalue="3.4e-005" unerased_evalue="3.5e-004">
+      <pos A="0.000000" C="0.000000" G="1.000000" U="0.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" U="0.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" U="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" U="0.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" U="0.000000"/>
+      <pos A="0.494382" C="0.000000" G="0.000000" U="0.505618"/>
+      <match seq="GAAGAU" p="45" n="7" pvalue="2.4e-008" evalue="9.9e-004"/>
+      <match seq="GAAGAA" p="40" n="16" pvalue="7.9e-004" evalue="3.3e+001"/>
+    </motif>
+    <motif id="m04" alt="DREME-4" seq="SMUGGA" length="6" nsites="119" p="110" n="47" pvalue="9.1e-008" evalue="3.7e-003" unerased_evalue="2.6e-005">
+      <pos A="0.000000" C="0.529412" G="0.470588" U="0.000000"/>
+      <pos A="0.428571" C="0.571429" G="0.000000" U="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="0.000000" U="1.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" U="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" U="0.000000"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" U="0.000000"/>
+      <match seq="GAUGGA" p="22" n="6" pvalue="1.7e-003" evalue="7.1e+001"/>
+      <match seq="GCUGGA" p="33" n="14" pvalue="3.6e-003" evalue="1.5e+002"/>
+      <match seq="CCUGGA" p="32" n="15" pvalue="8.6e-003" evalue="3.5e+002"/>
+      <match seq="CAUGGA" p="29" n="13" pvalue="9.1e-003" evalue="3.7e+002"/>
+    </motif>
+  </motifs>
+  <run_time cpu="13.95" real="13.95" stop="evalue"/>
+</dreme>
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme_output_test2.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dreme_output_test2.html Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,6119 @@\n+<!DOCTYPE HTML>\n+<html>\n+  <head>\n+    <meta charset="UTF-8">\n+    <title>DREME</title>\n+    <script>\n+      // @JSON_VAR data\n+      var data = {\n+        "program": "dreme",\n+        "version": "4.12.0",\n+        "release": "Tue Jun 27 16:22:50 2017 -0700",\n+        "cmd": [\n+          "dreme", "-o", "./dreme_test2_out", "-p", "dreme_test_sites.fa",\n+          "-norc", "-rna", "-s", "1", "-e", "1e-05", "-g", "100", "-mink",\n+          "4", "-maxk", "10"\n+        ],\n+        "options": {\n+          "revcomp": false,\n+          "ngen": 100,\n+          "add_pv_thresh": 0.01,\n+          "seed": 1,\n+          "stop": {\n+            "evalue": "1e-05"\n+          }\n+        },\n+        "alphabet": {\n+          "name": "RNA",\n+          "like": "rna",\n+          "ncore": 4,\n+          "symbols": [\n+            {\n+              "symbol": "A",\n+              "name": "Adenine",\n+              "colour": "CC0000"\n+            }, {\n+              "symbol": "C",\n+              "name": "Cytosine",\n+              "colour": "0000CC"\n+            }, {\n+              "symbol": "G",\n+              "name": "Guanine",\n+              "colour": "FFB300"\n+            }, {\n+              "symbol": "U",\n+              "aliases": "T",\n+              "name": "Uracil",\n+              "colour": "008000"\n+            }, {\n+              "symbol": "N",\n+              "aliases": "X.",\n+              "name": "Any base",\n+              "equals": "ACGU"\n+            }, {\n+              "symbol": "V",\n+              "name": "Not U",\n+              "equals": "ACG"\n+            }, {\n+              "symbol": "H",\n+              "name": "Not G",\n+              "equals": "ACU"\n+            }, {\n+              "symbol": "D",\n+              "name": "Not C",\n+              "equals": "AGU"\n+            }, {\n+              "symbol": "B",\n+              "name": "Not A",\n+              "equals": "CGU"\n+            }, {\n+              "symbol": "M",\n+              "name": "Amino",\n+              "equals": "AC"\n+            }, {\n+              "symbol": "R",\n+              "name": "Purine",\n+              "equals": "AG"\n+            }, {\n+              "symbol": "W",\n+              "name": "Weak",\n+              "equals": "AU"\n+            }, {\n+              "symbol": "S",\n+              "name": "Strong",\n+              "equals": "CG"\n+            }, {\n+              "symbol": "Y",\n+              "name": "Pyrimidine",\n+              "equals": "CU"\n+            }, {\n+              "symbol": "K",\n+              "name": "Keto",\n+              "equals": "GU"\n+            }\n+          ]\n+        },\n+        "background": {\n+          "freqs": [0.221, 0.245, 0.221, 0.312]\n+        },\n+        "sequence_db": {\n+          "name": "dreme test sites",\n+          "file": "dreme_test_sites.fa",\n+          "lmod": "Thu Apr 26 15:09:03 CEST 2018",\n+          "count": 1000\n+        },\n+        "control_db": {\n+          "name": "shuffled positive sequences",\n+          "from": "shuffled",\n+          "count": 1000,\n+          "freqs": [0.221, 0.245, 0.221, 0.312]\n+        },\n+        "motifs": [\n+          {\n+            "db": 0,\n+            "id": "UUYUCY",\n+            "alt": "DREME-1",\n+            "len": 6,\n+            "nsites": 459,\n+            "evalue": "3.3e-013",\n+            "p": 387,\n+            "n": 210,\n+            "pvalue": "2.6e-018",\n+            "unerased_evalue": "3.3e-013",\n+            "pwm": [\n+              [0.000000, 0.000000, 0.000000, 1.000000], \n+              [0.000000, 0.000000, 0.000000, 1.000000], \n+              [0.000000, 0.294118, 0.000000, 0.705882], \n+              [0.000000, 0.000000, 0.000000, 1.000000], \n+              [0.000000, 1.000000, 0.000000, 0.000000], \n+              [0.000000, 0.474946, 0.000000, 0.525054]\n+            ],\n+            "matches": [\n+              {\n+                "seq": "UUUUCC",\n+                "p": 147,\n+                "n": 75,\n+                "pvalue": "1.8e-007",\n+                "evalue": "2.2e-0'..b'alphabet"></td>\n+          <td id="ins_seq_count"></td>\n+        </tr>\n+      </table>\n+      <script>\n+      {\n+        var db = data.sequence_db;\n+        $("ins_seq_source").innerHTML = db.file;\n+        $("ins_seq_alphabet").innerHTML = dreme_alphabet.get_alphabet_name();\n+        $("ins_seq_count").innerHTML = db.count;\n+      }\n+      </script>\n+      <h4>Control Sequences</h4>\n+      <table id="seq_info" class="inputs">\n+        <tr><th>Source <div class="help" data-topic="pop_seq_source"></div></th>\n+          <th>Sequence Count <div class="help" data-topic="pop_seq_count"></div></th>\n+        </tr>\n+        <tr>\n+          <td id="ins_cseq_source"></td>\n+          <td id="ins_cseq_count"></td>\n+        </tr>\n+      </table>\n+      <script>\n+      {\n+        var db = data.control_db;\n+        if (db.from == "shuffled") {\n+          $("ins_cseq_source").innerHTML = "Shuffled Sequences";\n+        } else {\n+          $("ins_cseq_source").innerHTML = db.file;\n+        }\n+        $("ins_cseq_count").innerHTML = db.count;\n+      }\n+      </script>\n+      <h4>Background</h4>\n+      <span id="alpha_bg"></span>\n+      <script>\n+      {\n+        $("alpha_bg").appendChild(make_alpha_bg(dreme_alphabet, data.control_db.freqs));\n+      }\n+      </script>\n+      <h4>Other Settings</h4>\n+      <table id="tbl_settings" class="inputs hide_advanced">\n+        <tr>\n+          <th>Strand Handling</th>\n+          <td id="opt_strand">\n+            <span class="strand_none">This alphabet only has one strand</span>\n+            <span class="strand_given">Only the given strand is processed</span>\n+            <span class="strand_both">Both the given and reverse complement strands are processed</span>\n+          </td>\n+        </tr>\n+        <tr><th># REs to Generalize</th><td id="opt_ngen"></td></tr>\n+        <tr><th>Shuffle Seed</th><td id="opt_seed"></td></tr>\n+        <tr><th>E-value Threshold</th><td id="opt_stop_evalue"></td></tr>\n+        <tr><th>Max Motif Count</th><td id="opt_stop_count"></td></tr>\n+        <tr><th>Max Run Time</th><td id="opt_stop_time"></td></tr>\n+      </table>\n+      <script>\n+      {\n+        $("opt_strand").className = (dreme_alphabet.has_complement() ? (data.options.revcomp ? "both" : "given") : "none");\n+        $("opt_ngen").innerHTML = data.options.ngen;\n+        $("opt_seed").innerHTML = data.options.seed;\n+        $("opt_stop_evalue").innerHTML = data.options.stop.evalue;\n+        $("opt_stop_count").innerHTML = (typeof data.options.stop.count == "number" ? data.options.stop.count : "No maximum motif count.");\n+        $("opt_stop_time").innerHTML = (typeof data.options.stop.time == "number" ? data.options.stop.time + " seconds." : "No maximum running time.");\n+      }\n+      </script>\n+    </div>\n+    <!-- list information on this program -->\n+    <div id="info_sec" class="bar" style="position:relative">\n+      <div style="position: absolute; right: 0;"><a href="#inputs_sec">Previous</a> <a href="#">Top</a></div>\n+      <div class="subsection">\n+        <h5 id="version">DREME version</h5>\n+        <span id="ins_version"></span> \n+        (Release date: <span id="ins_release"></span>)<br>\n+      </div>\n+      <script>\n+        $("ins_version").innerHTML = data["version"];\n+        $("ins_release").innerHTML = data["release"];\n+      </script>\n+      <div class="subsection">\n+        <h5 id="reference">Reference</h5>\n+        <span class="citation">\n+          Timothy L. Bailey, "DREME: Motif discovery in transcription factor ChIP-seq data", <i>Bioinformatics</i>, <b>27</b>(12):1653-1659, 2011.\n+          <a href="http://bioinformatics.oxfordjournals.org/content/27/12/1653">[full text]</a>\n+        </span>\n+      </div>\n+      <div class="subsection">\n+        <h5 id="command">Command line</h5>\n+        <textarea id="cmd" rows="3" style="width:100%;" readonly="readonly">\n+        </textarea>\n+        <script>$("cmd").value = data["cmd"].join(" ");</script>\n+      </div>\n+    </div>\n+    \n+  </body>\n+</html>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme_output_test2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dreme_output_test2.txt Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,68 @@
+# DREME 4.12.0
+#     command: dreme -o ./dreme_test2_out -p dreme_test_sites.fa -norc -rna -s 1 -e 1e-05 -g 100 -mink 4 -maxk 10
+#   positives: 1000 from dreme_test_sites.fa (Thu Apr 26 15:09:03 CEST 2018)
+#   negatives: 1000 from shuffled positives
+#        host: ThinkPad-T450s
+#        when: Thu May 03 13:22:11 CEST 2018
+
+MEME version 4.12.0
+
+ALPHABET "RNA" RNA-LIKE
+A "Adenine" CC0000
+C "Cytosine" 0000CC
+G "Guanine" FFB300
+U "Uracil" 008000
+N "Any base" = ACGU
+X = ACGU
+. = ACGU
+V "Not U" = ACG
+H "Not G" = ACU
+D "Not C" = AGU
+B "Not A" = CGU
+M "Amino" = AC
+R "Purine" = AG
+W "Weak" = AU
+S "Strong" = CG
+Y "Pyrimidine" = CU
+K "Keto" = GU
+T = U
+END ALPHABET
+
+Background letter frequencies (from dataset):
+A 0.221 C 0.245 G 0.221 U 0.312
+
+
+MOTIF UUYUCY DREME-1
+
+#             Word        Pos        Neg    P-value    E-value
+# BEST      UUYUCY        387        210   2.6e-018   3.3e-013
+#           UUUUCC        147         75   1.8e-007   2.2e-002
+#           UUUUCU        155         94   2.2e-005   2.8e+000
+#           UUCUCU         94         51   1.3e-004   1.7e+001
+#           UUCUCC         75         42   1.1e-003   1.4e+002
+
+letter-probability matrix: alength= 4 w= 6 nsites= 459 E= 3.3e-013
+0.000000 0.000000 0.000000 1.000000
+0.000000 0.000000 0.000000 1.000000
+0.000000 0.294118 0.000000 0.705882
+0.000000 0.000000 0.000000 1.000000
+0.000000 1.000000 0.000000 0.000000
+0.000000 0.474946 0.000000 0.525054
+
+
+MOTIF YAGG DREME-2
+
+#             Word        Pos        Neg    P-value    E-value
+# BEST        YAGG        600        416   1.1e-016   1.4e-011
+#             CAGG        441        304   1.5e-010   1.8e-005
+#             UAGG        232        165   1.1e-004   1.3e+001
+
+letter-probability matrix: alength= 4 w= 4 nsites= 793 E= 1.4e-011
+0.000000 0.692308 0.000000 0.307692
+1.000000 0.000000 0.000000 0.000000
+0.000000 0.000000 1.000000 0.000000
+0.000000 0.000000 1.000000 0.000000
+
+
+# Stopping reason: E-value threshold exceeded
+#    Running time: 15.97 seconds
b
diff -r 793225b11202 -r b48e673af4e8 test-data/dreme_output_test2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dreme_output_test2.xml Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,55 @@
+<dreme version="4.12.0" release="Tue Jun 27 16:22:50 2017 -0700">
+  <model>
+    <command_line>dreme -o ./dreme_test2_out -p dreme_test_sites.fa -norc -rna -s 1 -e 1e-05 -g 100 -mink 4 -maxk 10</command_line>
+    <positives name="dreme test sites" count="1000" file="dreme_test_sites.fa" last_mod_date="Thu Apr 26 15:09:03 CEST 2018" />
+    <negatives name="shuffled positive sequences" count="1000" from="shuffled"/>
+    <alphabet name="RNA" like="rna">
+      <letter id="A" symbol="A" name="Adenine" colour="CC0000"/>
+      <letter id="C" symbol="C" name="Cytosine" colour="0000CC"/>
+      <letter id="G" symbol="G" name="Guanine" colour="FFB300"/>
+      <letter id="U" symbol="U" aliases="T" name="Uracil" colour="008000"/>
+      <letter id="N" symbol="N" aliases="X." equals="ACGU" name="Any base"/>
+      <letter id="V" symbol="V" equals="ACG" name="Not U"/>
+      <letter id="H" symbol="H" equals="ACU" name="Not G"/>
+      <letter id="D" symbol="D" equals="AGU" name="Not C"/>
+      <letter id="B" symbol="B" equals="CGU" name="Not A"/>
+      <letter id="M" symbol="M" equals="AC" name="Amino"/>
+      <letter id="R" symbol="R" equals="AG" name="Purine"/>
+      <letter id="W" symbol="W" equals="AU" name="Weak"/>
+      <letter id="S" symbol="S" equals="CG" name="Strong"/>
+      <letter id="Y" symbol="Y" equals="CU" name="Pyrimidine"/>
+      <letter id="K" symbol="K" equals="GU" name="Keto"/>
+    </alphabet>
+    <strands>none</strands>
+    <background A="0.221" C="0.245" G="0.221" U="0.312" from="dataset"/>
+    <stop evalue="1e-05"/>
+    <ngen>100</ngen>
+    <add_pv_thresh>0.01</add_pv_thresh>
+    <seed>1</seed>
+    <host>ThinkPad-T450s</host>
+    <when>Thu May 03 13:22:11 CEST 2018</when>
+  </model>
+  <motifs>
+    <motif id="m01" alt="DREME-1" seq="UUYUCY" length="6" nsites="459" p="387" n="210" pvalue="2.6e-018" evalue="3.3e-013" unerased_evalue="3.3e-013">
+      <pos A="0.000000" C="0.000000" G="0.000000" U="1.000000"/>
+      <pos A="0.000000" C="0.000000" G="0.000000" U="1.000000"/>
+      <pos A="0.000000" C="0.294118" G="0.000000" U="0.705882"/>
+      <pos A="0.000000" C="0.000000" G="0.000000" U="1.000000"/>
+      <pos A="0.000000" C="1.000000" G="0.000000" U="0.000000"/>
+      <pos A="0.000000" C="0.474946" G="0.000000" U="0.525054"/>
+      <match seq="UUUUCC" p="147" n="75" pvalue="1.8e-007" evalue="2.2e-002"/>
+      <match seq="UUUUCU" p="155" n="94" pvalue="2.2e-005" evalue="2.8e+000"/>
+      <match seq="UUCUCU" p="94" n="51" pvalue="1.3e-004" evalue="1.7e+001"/>
+      <match seq="UUCUCC" p="75" n="42" pvalue="1.1e-003" evalue="1.4e+002"/>
+    </motif>
+    <motif id="m02" alt="DREME-2" seq="YAGG" length="4" nsites="793" p="600" n="416" pvalue="1.1e-016" evalue="1.4e-011" unerased_evalue="6.3e-012">
+      <pos A="0.000000" C="0.692308" G="0.000000" U="0.307692"/>
+      <pos A="1.000000" C="0.000000" G="0.000000" U="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" U="0.000000"/>
+      <pos A="0.000000" C="0.000000" G="1.000000" U="0.000000"/>
+      <match seq="CAGG" p="441" n="304" pvalue="1.5e-010" evalue="1.8e-005"/>
+      <match seq="UAGG" p="232" n="165" pvalue="1.1e-004" evalue="1.3e+001"/>
+    </motif>
+  </motifs>
+  <run_time cpu="15.97" real="15.97" stop="evalue"/>
+</dreme>
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_background_probs_hsa_chrM.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_background_probs_hsa_chrM.txt Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,7 @@
+# 0-order Markov frequencies from file hsa_chrM.fa
+# seqs: 1    min: 16569    max: 16569    avg: 16569.0    sum: 16569    alph: DNA
+# order 0
+A 3.093e-01
+C 3.127e-01
+G 1.309e-01
+T 2.471e-01
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_almost-gff_1.txt
--- a/test-data/fimo_output_almost-gff_1.txt Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,100 +0,0 @@\n-##gff-version 3\n-phiX174\tfimo\tpolypeptide_motif\t1388\t1398\t102\t+\t.\tName=1;ID=1-1-phiX174;pvalue=6.36e-11;qvalue= 1.25e-09;sequence=AATATCTATAA;\n-phiX174\tfimo\tpolypeptide_motif\t847\t857\t102\t+\t.\tName=1;ID=1-2-phiX174;pvalue=7.02e-11;qvalue= 1.25e-09;sequence=AATGTCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t2301\t2311\t99.6\t+\t.\tName=1;ID=1-3-phiX174;pvalue=1.08e-10;qvalue= 1.29e-09;sequence=AGGTTATAACG;\n-phiX174\tfimo\tpolypeptide_motif\t5063\t5073\t95.6\t+\t.\tName=1;ID=1-4-phiX174;pvalue=2.73e-10;qvalue= 2.25e-09;sequence=AGGAGCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t989\t999\t 95\t+\t.\tName=1;ID=1-5-phiX174;pvalue=3.15e-10;qvalue= 2.25e-09;sequence=TGAGGATAAAT;\n-phiX174\tfimo\tpolypeptide_motif\t4713\t4723\t91.1\t+\t.\tName=1;ID=1-6-phiX174;pvalue=7.74e-10;qvalue= 3.48e-09;sequence=GACTGCTATCA;\n-phiX174\tfimo\tpolypeptide_motif\t5048\t5058\t90.7\t+\t.\tName=1;ID=1-7-phiX174;pvalue=8.51e-10;qvalue= 3.48e-09;sequence=TGCTGCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t855\t865\t90.6\t+\t.\tName=1;ID=1-8-phiX174;pvalue=8.64e-10;qvalue= 3.48e-09;sequence=AAGGTAAAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t3155\t3165\t90.1\t+\t.\tName=1;ID=1-9-phiX174;pvalue=9.76e-10;qvalue= 3.48e-09;sequence=TATGGCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t5009\t5019\t90.1\t+\t.\tName=1;ID=1-10-phiX174;pvalue=9.76e-10;qvalue= 3.48e-09;sequence=TGTGGCTAAAT;\n-phiX174\tfimo\tpolypeptide_motif\t814\t824\t88.9\t+\t.\tName=1;ID=1-11-phiX174;pvalue=1.28e-09;qvalue= 4.14e-09;sequence=TGCGTCAAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t2832\t2842\t88.5\t+\t.\tName=1;ID=1-12-phiX174;pvalue=1.42e-09;qvalue= 4.23e-09;sequence=TTGGTCTAACT;\n-phiX174\tfimo\tpolypeptide_motif\t3830\t3840\t87.7\t+\t.\tName=1;ID=1-13-phiX174;pvalue=1.7e-09;qvalue= 4.68e-09;sequence=TATTGATAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t3560\t3570\t87.2\t+\t.\tName=1;ID=1-14-phiX174;pvalue=1.89e-09;qvalue= 4.82e-09;sequence=TGCGTCTATTA;\n-phiX174\tfimo\tpolypeptide_motif\t2882\t2892\t86.4\t+\t.\tName=1;ID=1-15-phiX174;pvalue=2.29e-09;qvalue= 5.46e-09;sequence=AGGTTATTAAA;\n-phiX174\tfimo\tpolypeptide_motif\t4453\t4463\t85.9\t+\t.\tName=1;ID=1-16-phiX174;pvalue=2.58e-09;qvalue= 5.75e-09;sequence=AAGGTATTAAG;\n-phiX174\tfimo\tpolypeptide_motif\t2493\t2503\t85.1\t+\t.\tName=1;ID=1-17-phiX174;pvalue=3.06e-09;qvalue= 5.79e-09;sequence=GACACCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t4104\t4114\t85.1\t+\t.\tName=1;ID=1-18-phiX174;pvalue=3.08e-09;qvalue= 5.79e-09;sequence=GGCTTCCATAA;\n-phiX174\tfimo\tpolypeptide_motif\t4955\t4965\t85.1\t+\t.\tName=1;ID=1-19-phiX174;pvalue=3.08e-09;qvalue= 5.79e-09;sequence=TGATGCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t1885\t1895\t84.4\t+\t.\tName=1;ID=1-20-phiX174;pvalue=3.61e-09;qvalue= 6.45e-09;sequence=TGCGACTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t3376\t3386\t84.2\t+\t.\tName=1;ID=1-21-phiX174;pvalue=3.81e-09;qvalue= 6.48e-09;sequence=AGAATCAAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t52\t62\t83.9\t+\t.\tName=1;ID=1-22-phiX174;pvalue=4.06e-09;qvalue= 6.58e-09;sequence=TGAGTCGAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t1390\t1400\t83.7\t+\t.\tName=1;ID=1-23-phiX174;pvalue=4.26e-09;qvalue= 6.61e-09;sequence=TATCTATAACA;\n-phiX174\tfimo\tpolypeptide_motif\t2017\t2027\t83.4\t+\t.\tName=1;ID=1-24-phiX174;pvalue=4.6e-09;qvalue= 6.85e-09;sequence=TTCGTCTAAGA;\n-phiX174\tfimo\tpolypeptide_motif\t1000\t1010\t83.1\t+\t.\tName=1;ID=1-25-phiX174;pvalue=4.88e-09;qvalue= 6.97e-09;sequence=TATGTCTAATA;\n-phiX174\tfimo\tpolypeptide_motif\t1555\t1565\t82.5\t+\t.\tName=1;ID=1-26-phiX174;pvalue=5.58e-09;qvalue= 7.37e-09;sequence=GACTTCTACCA;\n-phiX174\tfimo\tpolypeptide_motif\t4430\t4440\t82.5\t+\t.\tName=1;ID=1-27-phiX174;pvalue=5.62e-09;qvalue= 7.37e-09;sequence=TGAGTATAATT;\n-phiX174\tfimo\tpolypeptide_motif\t1927\t1937\t82.3\t+\t.\tName=1;ID=1-28-phiX174;pvalue=5.82e-09;qvalue= 7.37e-09;sequence=GACTTATACCG;\n-phiX174\tfimo\tpolypeptide_motif\t2981\t2991\t82.1\t+\t.\tName=1;ID=1-29-phiX174;pvalue=6.13e-09;qvalue= 7.37e-09;sequence=CATGTCTAAAT;\n-phiX174\tfimo\tpolypeptide_motif\t4203\t4213\t 82\t+\t.\tName=1;ID=1-30-phiX174;pvalue=6.34e-09;qvalue= 7.37e-09;sequence=GACGGCCATAA;\n-phiX174\tfimo\tpolypeptide_motif\t1669\t1679\t81.9\t+\t.\tName=1;ID=1-31-phiX174;pvalue=6.4e-09;qvalue= 7.37e-09;sequence=TGGAGG'..b'= 1.31e-08;sequence=AAATGAGAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t1491\t1501\t75.9\t+\t.\tName=1;ID=1-69-phiX174;pvalue=2.55e-08;qvalue= 1.32e-08;sequence=GCCATCTCAAA;\n-phiX174\tfimo\tpolypeptide_motif\t434\t444\t75.7\t+\t.\tName=1;ID=1-70-phiX174;pvalue=2.67e-08;qvalue= 1.36e-08;sequence=GGCCTCTATTA;\n-phiX174\tfimo\tpolypeptide_motif\t4565\t4575\t75.6\t+\t.\tName=1;ID=1-71-phiX174;pvalue=2.73e-08;qvalue= 1.36e-08;sequence=TTGGTTTATCG;\n-phiX174\tfimo\tpolypeptide_motif\t102\t112\t75.6\t+\t.\tName=1;ID=1-72-phiX174;pvalue=2.75e-08;qvalue= 1.36e-08;sequence=GAATTAAATCG;\n-phiX174\tfimo\tpolypeptide_motif\t903\t913\t75.5\t+\t.\tName=1;ID=1-73-phiX174;pvalue=2.82e-08;qvalue= 1.38e-08;sequence=GAGGTACTAAA;\n-phiX174\tfimo\tpolypeptide_motif\t4748\t4758\t75.2\t+\t.\tName=1;ID=1-74-phiX174;pvalue=3.01e-08;qvalue= 1.45e-08;sequence=TACAGCTAATG;\n-phiX174\tfimo\tpolypeptide_motif\t2622\t2632\t 75\t+\t.\tName=1;ID=1-75-phiX174;pvalue=3.16e-08;qvalue= 1.5e-08;sequence=TGCTGATATTG;\n-phiX174\tfimo\tpolypeptide_motif\t467\t477\t74.7\t+\t.\tName=1;ID=1-76-phiX174;pvalue=3.35e-08;qvalue= 1.57e-08;sequence=TTTGGATTTAA;\n-phiX174\tfimo\tpolypeptide_motif\t4033\t4043\t74.6\t+\t.\tName=1;ID=1-77-phiX174;pvalue=3.44e-08;qvalue= 1.58e-08;sequence=AGCGTATCGAG;\n-phiX174\tfimo\tpolypeptide_motif\t1348\t1358\t74.6\t+\t.\tName=1;ID=1-78-phiX174;pvalue=3.46e-08;qvalue= 1.58e-08;sequence=TACCAATAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t239\t249\t74.4\t+\t.\tName=1;ID=1-79-phiX174;pvalue=3.62e-08;qvalue= 1.64e-08;sequence=AGTGGCTTAAT;\n-phiX174\tfimo\tpolypeptide_motif\t500\t510\t74.1\t+\t.\tName=1;ID=1-80-phiX174;pvalue=3.84e-08;qvalue= 1.71e-08;sequence=GACGAGTAACA;\n-phiX174\tfimo\tpolypeptide_motif\t3001\t3011\t 74\t+\t.\tName=1;ID=1-81-phiX174;pvalue=3.93e-08;qvalue= 1.73e-08;sequence=GCGGTCAAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t3776\t3786\t 74\t+\t.\tName=1;ID=1-82-phiX174;pvalue=3.98e-08;qvalue= 1.73e-08;sequence=TATTTCTAATG;\n-phiX174\tfimo\tpolypeptide_motif\t2026\t2036\t73.9\t+\t.\tName=1;ID=1-83-phiX174;pvalue=4.06e-08;qvalue= 1.75e-08;sequence=GAAGTTTAAGA;\n-phiX174\tfimo\tpolypeptide_motif\t4237\t4247\t73.8\t+\t.\tName=1;ID=1-84-phiX174;pvalue=4.12e-08;qvalue= 1.75e-08;sequence=AGTTTGTATCT;\n-phiX174\tfimo\tpolypeptide_motif\t803\t813\t73.7\t+\t.\tName=1;ID=1-85-phiX174;pvalue=4.24e-08;qvalue= 1.78e-08;sequence=AGAAGAAAACG;\n-phiX174\tfimo\tpolypeptide_motif\t3770\t3780\t73.6\t+\t.\tName=1;ID=1-86-phiX174;pvalue=4.35e-08;qvalue= 1.81e-08;sequence=AAAGGATATTT;\n-phiX174\tfimo\tpolypeptide_motif\t3429\t3439\t73.5\t+\t.\tName=1;ID=1-87-phiX174;pvalue=4.45e-08;qvalue= 1.82e-08;sequence=GAGATGCAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t99\t109\t73.5\t+\t.\tName=1;ID=1-88-phiX174;pvalue=4.48e-08;qvalue= 1.82e-08;sequence=TACGAATTAAA;\n-phiX174\tfimo\tpolypeptide_motif\t67\t77\t73.2\t+\t.\tName=1;ID=1-89-phiX174;pvalue=4.78e-08;qvalue= 1.92e-08;sequence=TCTTGATAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t5332\t5342\t72.9\t+\t.\tName=1;ID=1-90-phiX174;pvalue=5.13e-08;qvalue= 2.01e-08;sequence=ATCTGCTCAAA;\n-phiX174\tfimo\tpolypeptide_motif\t277\t287\t72.9\t+\t.\tName=1;ID=1-91-phiX174;pvalue=5.14e-08;qvalue= 2.01e-08;sequence=TTTAGATATGA;\n-phiX174\tfimo\tpolypeptide_motif\t4338\t4348\t72.8\t+\t.\tName=1;ID=1-92-phiX174;pvalue=5.18e-08;qvalue= 2.01e-08;sequence=GGGGACGAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t3812\t3822\t72.8\t+\t.\tName=1;ID=1-93-phiX174;pvalue=5.28e-08;qvalue= 2.03e-08;sequence=GGTTGATATTT;\n-phiX174\tfimo\tpolypeptide_motif\t1909\t1919\t72.6\t+\t.\tName=1;ID=1-94-phiX174;pvalue=5.51e-08;qvalue= 2.08e-08;sequence=TAACGCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t3000\t3010\t72.6\t+\t.\tName=1;ID=1-95-phiX174;pvalue=5.54e-08;qvalue= 2.08e-08;sequence=GGCGGTCAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t3891\t3901\t72.4\t+\t.\tName=1;ID=1-96-phiX174;pvalue=5.75e-08;qvalue= 2.11e-08;sequence=ATTGGCTCTAA;\n-phiX174\tfimo\tpolypeptide_motif\t3079\t3089\t72.4\t+\t.\tName=1;ID=1-97-phiX174;pvalue=5.76e-08;qvalue= 2.11e-08;sequence=CTGGTATTAAA;\n-phiX174\tfimo\tpolypeptide_motif\t37\t47\t72.4\t+\t.\tName=1;ID=1-98-phiX174;pvalue=5.79e-08;qvalue= 2.11e-08;sequence=TTCGGATATTT;\n-phiX174\tfimo\tpolypeptide_motif\t380\t390\t72.2\t+\t.\tName=1;ID=1-99-phiX174;pvalue=6.01e-08;qvalue= 2.17e-08;sequence=GTAAGAAATCA;\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_almost-gff_2.txt
--- a/test-data/fimo_output_almost-gff_2.txt Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,100 +0,0 @@\n-##gff-version 3\n-phiX174\tfimo\tpolypeptide_motif\t1388\t1398\t102\t+\t.\tName=1;ID=1-1-phiX174;pvalue=6.36e-11;sequence=AATATCTATAA;\n-phiX174\tfimo\tpolypeptide_motif\t847\t857\t102\t+\t.\tName=1;ID=1-2-phiX174;pvalue=7.02e-11;sequence=AATGTCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t2301\t2311\t99.6\t+\t.\tName=1;ID=1-3-phiX174;pvalue=1.08e-10;sequence=AGGTTATAACG;\n-phiX174\tfimo\tpolypeptide_motif\t5063\t5073\t95.6\t+\t.\tName=1;ID=1-4-phiX174;pvalue=2.73e-10;sequence=AGGAGCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t989\t999\t 95\t+\t.\tName=1;ID=1-5-phiX174;pvalue=3.15e-10;sequence=TGAGGATAAAT;\n-phiX174\tfimo\tpolypeptide_motif\t4713\t4723\t91.1\t+\t.\tName=1;ID=1-6-phiX174;pvalue=7.74e-10;sequence=GACTGCTATCA;\n-phiX174\tfimo\tpolypeptide_motif\t5048\t5058\t90.7\t+\t.\tName=1;ID=1-7-phiX174;pvalue=8.51e-10;sequence=TGCTGCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t855\t865\t90.6\t+\t.\tName=1;ID=1-8-phiX174;pvalue=8.64e-10;sequence=AAGGTAAAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t3155\t3165\t90.1\t+\t.\tName=1;ID=1-9-phiX174;pvalue=9.76e-10;sequence=TATGGCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t5009\t5019\t90.1\t+\t.\tName=1;ID=1-10-phiX174;pvalue=9.76e-10;sequence=TGTGGCTAAAT;\n-phiX174\tfimo\tpolypeptide_motif\t814\t824\t88.9\t+\t.\tName=1;ID=1-11-phiX174;pvalue=1.28e-09;sequence=TGCGTCAAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t2832\t2842\t88.5\t+\t.\tName=1;ID=1-12-phiX174;pvalue=1.42e-09;sequence=TTGGTCTAACT;\n-phiX174\tfimo\tpolypeptide_motif\t3830\t3840\t87.7\t+\t.\tName=1;ID=1-13-phiX174;pvalue=1.7e-09;sequence=TATTGATAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t3560\t3570\t87.2\t+\t.\tName=1;ID=1-14-phiX174;pvalue=1.89e-09;sequence=TGCGTCTATTA;\n-phiX174\tfimo\tpolypeptide_motif\t2882\t2892\t86.4\t+\t.\tName=1;ID=1-15-phiX174;pvalue=2.29e-09;sequence=AGGTTATTAAA;\n-phiX174\tfimo\tpolypeptide_motif\t4453\t4463\t85.9\t+\t.\tName=1;ID=1-16-phiX174;pvalue=2.58e-09;sequence=AAGGTATTAAG;\n-phiX174\tfimo\tpolypeptide_motif\t2493\t2503\t85.1\t+\t.\tName=1;ID=1-17-phiX174;pvalue=3.06e-09;sequence=GACACCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t4104\t4114\t85.1\t+\t.\tName=1;ID=1-18-phiX174;pvalue=3.08e-09;sequence=GGCTTCCATAA;\n-phiX174\tfimo\tpolypeptide_motif\t4955\t4965\t85.1\t+\t.\tName=1;ID=1-19-phiX174;pvalue=3.08e-09;sequence=TGATGCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t1885\t1895\t84.4\t+\t.\tName=1;ID=1-20-phiX174;pvalue=3.61e-09;sequence=TGCGACTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t3376\t3386\t84.2\t+\t.\tName=1;ID=1-21-phiX174;pvalue=3.81e-09;sequence=AGAATCAAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t52\t62\t83.9\t+\t.\tName=1;ID=1-22-phiX174;pvalue=4.06e-09;sequence=TGAGTCGAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t1390\t1400\t83.7\t+\t.\tName=1;ID=1-23-phiX174;pvalue=4.26e-09;sequence=TATCTATAACA;\n-phiX174\tfimo\tpolypeptide_motif\t2017\t2027\t83.4\t+\t.\tName=1;ID=1-24-phiX174;pvalue=4.6e-09;sequence=TTCGTCTAAGA;\n-phiX174\tfimo\tpolypeptide_motif\t1000\t1010\t83.1\t+\t.\tName=1;ID=1-25-phiX174;pvalue=4.88e-09;sequence=TATGTCTAATA;\n-phiX174\tfimo\tpolypeptide_motif\t1555\t1565\t82.5\t+\t.\tName=1;ID=1-26-phiX174;pvalue=5.58e-09;sequence=GACTTCTACCA;\n-phiX174\tfimo\tpolypeptide_motif\t4430\t4440\t82.5\t+\t.\tName=1;ID=1-27-phiX174;pvalue=5.62e-09;sequence=TGAGTATAATT;\n-phiX174\tfimo\tpolypeptide_motif\t1927\t1937\t82.3\t+\t.\tName=1;ID=1-28-phiX174;pvalue=5.82e-09;sequence=GACTTATACCG;\n-phiX174\tfimo\tpolypeptide_motif\t2981\t2991\t82.1\t+\t.\tName=1;ID=1-29-phiX174;pvalue=6.13e-09;sequence=CATGTCTAAAT;\n-phiX174\tfimo\tpolypeptide_motif\t4203\t4213\t 82\t+\t.\tName=1;ID=1-30-phiX174;pvalue=6.34e-09;sequence=GACGGCCATAA;\n-phiX174\tfimo\tpolypeptide_motif\t1669\t1679\t81.9\t+\t.\tName=1;ID=1-31-phiX174;pvalue=6.4e-09;sequence=TGGAGGTAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t3260\t3270\t81.5\t+\t.\tName=1;ID=1-32-phiX174;pvalue=7.01e-09;sequence=CGCTGATAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t3047\t3057\t81.3\t+\t.\tName=1;ID=1-33-phiX174;pvalue=7.4e-09;sequence=TACCGATAACA;\n-phiX174\tfimo\tpolypeptide_motif\t4176\t4186\t81.2\t+\t.\tName=1;ID=1-34-phiX174;pvalue=7.6e-09;sequence=GAGTTCGATAA;\n-phiX174\tfimo\tpolypeptide_motif\t4118\t4128\t81.1\t+\t.\tName=1;ID=1-35-phiX174;pvalue=7.7e-09;sequence=GATGGATAACC;\n-phiX174\tfimo\tpolypeptide_motif\t5370\t5380\t80.9\t+\t.\tName=1;ID=1-36-phiX174;p'..b'GT;\n-phiX174\tfimo\tpolypeptide_motif\t4217\t4227\t76.7\t+\t.\tName=1;ID=1-64-phiX174;pvalue=2.15e-08;sequence=TGCTTCTGACG;\n-phiX174\tfimo\tpolypeptide_motif\t4262\t4272\t76.6\t+\t.\tName=1;ID=1-65-phiX174;pvalue=2.18e-08;sequence=AATGGATGAAT;\n-phiX174\tfimo\tpolypeptide_motif\t3569\t3579\t76.5\t+\t.\tName=1;ID=1-66-phiX174;pvalue=2.26e-08;sequence=TATGGAAAACA;\n-phiX174\tfimo\tpolypeptide_motif\t194\t204\t76.4\t+\t.\tName=1;ID=1-67-phiX174;pvalue=2.29e-08;sequence=ATCAACTAACG;\n-phiX174\tfimo\tpolypeptide_motif\t131\t141\t 76\t+\t.\tName=1;ID=1-68-phiX174;pvalue=2.49e-08;sequence=AAATGAGAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t1491\t1501\t75.9\t+\t.\tName=1;ID=1-69-phiX174;pvalue=2.55e-08;sequence=GCCATCTCAAA;\n-phiX174\tfimo\tpolypeptide_motif\t434\t444\t75.7\t+\t.\tName=1;ID=1-70-phiX174;pvalue=2.67e-08;sequence=GGCCTCTATTA;\n-phiX174\tfimo\tpolypeptide_motif\t4565\t4575\t75.6\t+\t.\tName=1;ID=1-71-phiX174;pvalue=2.73e-08;sequence=TTGGTTTATCG;\n-phiX174\tfimo\tpolypeptide_motif\t102\t112\t75.6\t+\t.\tName=1;ID=1-72-phiX174;pvalue=2.75e-08;sequence=GAATTAAATCG;\n-phiX174\tfimo\tpolypeptide_motif\t903\t913\t75.5\t+\t.\tName=1;ID=1-73-phiX174;pvalue=2.82e-08;sequence=GAGGTACTAAA;\n-phiX174\tfimo\tpolypeptide_motif\t4748\t4758\t75.2\t+\t.\tName=1;ID=1-74-phiX174;pvalue=3.01e-08;sequence=TACAGCTAATG;\n-phiX174\tfimo\tpolypeptide_motif\t2622\t2632\t 75\t+\t.\tName=1;ID=1-75-phiX174;pvalue=3.16e-08;sequence=TGCTGATATTG;\n-phiX174\tfimo\tpolypeptide_motif\t467\t477\t74.7\t+\t.\tName=1;ID=1-76-phiX174;pvalue=3.35e-08;sequence=TTTGGATTTAA;\n-phiX174\tfimo\tpolypeptide_motif\t4033\t4043\t74.6\t+\t.\tName=1;ID=1-77-phiX174;pvalue=3.44e-08;sequence=AGCGTATCGAG;\n-phiX174\tfimo\tpolypeptide_motif\t1348\t1358\t74.6\t+\t.\tName=1;ID=1-78-phiX174;pvalue=3.46e-08;sequence=TACCAATAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t239\t249\t74.4\t+\t.\tName=1;ID=1-79-phiX174;pvalue=3.62e-08;sequence=AGTGGCTTAAT;\n-phiX174\tfimo\tpolypeptide_motif\t500\t510\t74.1\t+\t.\tName=1;ID=1-80-phiX174;pvalue=3.84e-08;sequence=GACGAGTAACA;\n-phiX174\tfimo\tpolypeptide_motif\t3001\t3011\t 74\t+\t.\tName=1;ID=1-81-phiX174;pvalue=3.93e-08;sequence=GCGGTCAAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t3776\t3786\t 74\t+\t.\tName=1;ID=1-82-phiX174;pvalue=3.98e-08;sequence=TATTTCTAATG;\n-phiX174\tfimo\tpolypeptide_motif\t2026\t2036\t73.9\t+\t.\tName=1;ID=1-83-phiX174;pvalue=4.06e-08;sequence=GAAGTTTAAGA;\n-phiX174\tfimo\tpolypeptide_motif\t4237\t4247\t73.8\t+\t.\tName=1;ID=1-84-phiX174;pvalue=4.12e-08;sequence=AGTTTGTATCT;\n-phiX174\tfimo\tpolypeptide_motif\t803\t813\t73.7\t+\t.\tName=1;ID=1-85-phiX174;pvalue=4.24e-08;sequence=AGAAGAAAACG;\n-phiX174\tfimo\tpolypeptide_motif\t3770\t3780\t73.6\t+\t.\tName=1;ID=1-86-phiX174;pvalue=4.35e-08;sequence=AAAGGATATTT;\n-phiX174\tfimo\tpolypeptide_motif\t3429\t3439\t73.5\t+\t.\tName=1;ID=1-87-phiX174;pvalue=4.45e-08;sequence=GAGATGCAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t99\t109\t73.5\t+\t.\tName=1;ID=1-88-phiX174;pvalue=4.48e-08;sequence=TACGAATTAAA;\n-phiX174\tfimo\tpolypeptide_motif\t67\t77\t73.2\t+\t.\tName=1;ID=1-89-phiX174;pvalue=4.78e-08;sequence=TCTTGATAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t5332\t5342\t72.9\t+\t.\tName=1;ID=1-90-phiX174;pvalue=5.13e-08;sequence=ATCTGCTCAAA;\n-phiX174\tfimo\tpolypeptide_motif\t277\t287\t72.9\t+\t.\tName=1;ID=1-91-phiX174;pvalue=5.14e-08;sequence=TTTAGATATGA;\n-phiX174\tfimo\tpolypeptide_motif\t4338\t4348\t72.8\t+\t.\tName=1;ID=1-92-phiX174;pvalue=5.18e-08;sequence=GGGGACGAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t3812\t3822\t72.8\t+\t.\tName=1;ID=1-93-phiX174;pvalue=5.28e-08;sequence=GGTTGATATTT;\n-phiX174\tfimo\tpolypeptide_motif\t1909\t1919\t72.6\t+\t.\tName=1;ID=1-94-phiX174;pvalue=5.51e-08;sequence=TAACGCTAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t3000\t3010\t72.6\t+\t.\tName=1;ID=1-95-phiX174;pvalue=5.54e-08;sequence=GGCGGTCAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t3891\t3901\t72.4\t+\t.\tName=1;ID=1-96-phiX174;pvalue=5.75e-08;sequence=ATTGGCTCTAA;\n-phiX174\tfimo\tpolypeptide_motif\t3079\t3089\t72.4\t+\t.\tName=1;ID=1-97-phiX174;pvalue=5.76e-08;sequence=CTGGTATTAAA;\n-phiX174\tfimo\tpolypeptide_motif\t37\t47\t72.4\t+\t.\tName=1;ID=1-98-phiX174;pvalue=5.79e-08;sequence=TTCGGATATTT;\n-phiX174\tfimo\tpolypeptide_motif\t380\t390\t72.2\t+\t.\tName=1;ID=1-99-phiX174;pvalue=6.01e-08;sequence=GTAAGAAATCA;\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_html_1.html
--- a/test-data/fimo_output_html_1.html Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,97 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html xmlns:cis="http://zlab.bu.edu/schema/cisml" xmlns:fimo="http://noble.gs.washington.edu/schema/cisml" xmlns:mem="http://noble.gs.washington.edu/meme">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta charset="UTF-8">
-<title>FIMO Results</title>
-<style type="text/css">
-td.left {text-align: left;}
-td.right {text-align: right; padding-right: 1cm;}
-</style>
-</head>
-<body bgcolor="#D5F0FF">
-<a name="top_buttons"></a>
-<hr>
-<table summary="buttons" align="left" cellspacing="0">
-<tr>
-<td bgcolor="#00FFFF"><a href="#database_and_motifs"><b>Database and Motifs</b></a></td>
-<td bgcolor="#DDFFDD"><a href="#sec_i"><b>High-scoring Motif Occurences</b></a></td>
-<td bgcolor="#DDDDFF"><a href="#debugging_information"><b>Debugging Information</b></a></td>
-</tr>
-</table>
-<br/>
-<br/>
-<hr/>
-<center><big><b>FIMO - Motif search tool</b></big></center>
-<hr>
-<p>
-For further information on how to interpret these results
-or to get a copy of the FIMO software please access
-<a href="http://meme.nbcr.net">http://meme.nbcr.net</a></p>
-<p>If you use FIMO in your research, please cite the following paper:<br>
-Charles E. Grant, Timothy L. Bailey, and William Stafford Noble,
-"FIMO: Scanning for occurrences of a given motif",
-<i>Bioinformatics</i>, <b>27</b>(7):1017-1018, 2011.
-<a href="http://bioinformatics.oxfordjournals.org/content/27/7/1017">[full text]</a></p>
-<hr>
-<center><big><b><a name="database_and_motifs">DATABASE AND MOTIFS</a></b></big></center>
-<hr>
-<div style="padding-left: 0.75in; line-height: 1em; font-family: monospace;">
-<p>
-  <br />
-  Database contains 1 sequences, 5386 residues
-</p>
-<p>
-  <table>
-    <thead>
-      <tr>
-        <th style="border-bottom: 1px dashed;">MOTIF</th>
-        <th style="border-bottom: 1px dashed; padding-left: 1em;">WIDTH</th>
-        <th style="border-bottom: 1px dashed; padding-left: 1em;text-align:left;" >
-         BEST POSSIBLE MATCH
-        </th>
-      </tr>
-    </thead>
-    <tbody>
-      <tr>
-        <td style="text-align:right;">1</td>
-        <td style="text-align:right;padding-left: 1em;">11</td>
-        <td style="text-align:left;padding-left: 1em;">GGGGTATAAAA</td>
-       </tr>
-    </tbody>
-  </table>
-</p>
-<p>
-Random model letter frequencies (from non-redundant database):
-<br/>
-
-A 0.073 C 0.018 D 0.052 E 0.062 F 0.040 G 0.069 H 0.022 I 0.056 K 0.058 
-L 0.092 M 0.023 N 0.046 P 0.051 Q 0.041 R 0.052 S 0.074 T 0.059 V 0.064 
-W 0.013 Y 0.033 </p>
-</div>
-<hr>
-<center><big><b><a name="sec_i">SECTION I: HIGH-SCORING MOTIF OCCURENCES</a></b></big></center>
-<hr>
-<ul>
-<li>
-There were 1937 motif occurences with a p-value less than 0.0001.
-<b>Only the most significant 1000 matches are shown here.</b>
-
-The full set of motif occurences can be seen in the
-tab-delimited plain text output file
-<a href="fimo.txt">fimo.txt</a>, 
-the GFF file 
-<a href="fimo.gff">fimo.gff</a> 
-which may be suitable for uploading to the 
-<a href="http://genome.ucsc.edu/cgi-bin/hgTables">UCSC Genome Table Browser</a>
-(assuming the FASTA input sequences included genomic coordinates in UCSC or Galaxy format),
-or the XML file 
-<a href="fimo.xml">fimo.xml</a>.
-</li>
-<li>
-The p-value of a motif occurrence is defined as the
-probability of a random sequence of the same length as the motif
-matching that position of the sequence with as good or better a score.
-</li>
-<li>
-The score for the match of a position in a sequence to a motif
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_html_2.html
--- a/test-data/fimo_output_html_2.html Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,97 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html xmlns:cis="http://zlab.bu.edu/schema/cisml" xmlns:fimo="http://noble.gs.washington.edu/schema/cisml" xmlns:mem="http://noble.gs.washington.edu/meme">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta charset="UTF-8">
-<title>FIMO Results</title>
-<style type="text/css">
-td.left {text-align: left;}
-td.right {text-align: right; padding-right: 1cm;}
-</style>
-</head>
-<body bgcolor="#D5F0FF">
-<a name="top_buttons"></a>
-<hr>
-<table summary="buttons" align="left" cellspacing="0">
-<tr>
-<td bgcolor="#00FFFF"><a href="#database_and_motifs"><b>Database and Motifs</b></a></td>
-<td bgcolor="#DDFFDD"><a href="#sec_i"><b>High-scoring Motif Occurences</b></a></td>
-<td bgcolor="#DDDDFF"><a href="#debugging_information"><b>Debugging Information</b></a></td>
-</tr>
-</table>
-<br/>
-<br/>
-<hr/>
-<center><big><b>FIMO - Motif search tool</b></big></center>
-<hr>
-<p>
-For further information on how to interpret these results
-or to get a copy of the FIMO software please access
-<a href="http://meme.nbcr.net">http://meme.nbcr.net</a></p>
-<p>If you use FIMO in your research, please cite the following paper:<br>
-Charles E. Grant, Timothy L. Bailey, and William Stafford Noble,
-"FIMO: Scanning for occurrences of a given motif",
-<i>Bioinformatics</i>, <b>27</b>(7):1017-1018, 2011.
-<a href="http://bioinformatics.oxfordjournals.org/content/27/7/1017">[full text]</a></p>
-<hr>
-<center><big><b><a name="database_and_motifs">DATABASE AND MOTIFS</a></b></big></center>
-<hr>
-<div style="padding-left: 0.75in; line-height: 1em; font-family: monospace;">
-<p>
-  <br />
-  Database contains 1 sequences, 5386 residues
-</p>
-<p>
-  <table>
-    <thead>
-      <tr>
-        <th style="border-bottom: 1px dashed;">MOTIF</th>
-        <th style="border-bottom: 1px dashed; padding-left: 1em;">WIDTH</th>
-        <th style="border-bottom: 1px dashed; padding-left: 1em;text-align:left;" >
-         BEST POSSIBLE MATCH
-        </th>
-      </tr>
-    </thead>
-    <tbody>
-      <tr>
-        <td style="text-align:right;">1</td>
-        <td style="text-align:right;padding-left: 1em;">11</td>
-        <td style="text-align:left;padding-left: 1em;">GGGGTATAAAA</td>
-       </tr>
-    </tbody>
-  </table>
-</p>
-<p>
-Random model letter frequencies (from non-redundant database):
-<br/>
-
-A 0.073 C 0.018 D 0.052 E 0.062 F 0.040 G 0.069 H 0.022 I 0.056 K 0.058 
-L 0.092 M 0.023 N 0.046 P 0.051 Q 0.041 R 0.052 S 0.074 T 0.059 V 0.064 
-W 0.013 Y 0.033 </p>
-</div>
-<hr>
-<center><big><b><a name="sec_i">SECTION I: HIGH-SCORING MOTIF OCCURENCES</a></b></big></center>
-<hr>
-<ul>
-<li>
-There were 1937 motif occurences with a p-value less than 0.0001.
-<b>Only the most significant 1000 matches are shown here.</b>
-
-The full set of motif occurences can be seen in the
-tab-delimited plain text output file
-<a href="fimo.txt">fimo.txt</a>, 
-the GFF file 
-<a href="fimo.gff">fimo.gff</a> 
-which may be suitable for uploading to the 
-<a href="http://genome.ucsc.edu/cgi-bin/hgTables">UCSC Genome Table Browser</a>
-(assuming the FASTA input sequences included genomic coordinates in UCSC or Galaxy format),
-or the XML file 
-<a href="fimo.xml">fimo.xml</a>.
-</li>
-<li>
-The p-value of a motif occurrence is defined as the
-probability of a random sequence of the same length as the motif
-matching that position of the sequence with as good or better a score.
-</li>
-<li>
-The score for the match of a position in a sequence to a motif
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_interval_1.txt
--- a/test-data/fimo_output_interval_1.txt Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,100 +0,0 @@
-#chr start end pattern name score strand matched sequence p-value q-value
-phiX174 1387 1398 1 + + 1.25e-09 29.4024 6.36e-11
-phiX174 846 857 1 + + 1.25e-09 29.122 7.02e-11
-phiX174 2300 2311 1 + + 1.29e-09 27.6463 1.08e-10
-phiX174 5062 5073 1 + + 2.25e-09 25.5366 2.73e-10
-phiX174 988 999 1 + + 2.25e-09 25.3049 3.15e-10
-phiX174 4712 4723 1 + + 3.48e-09 23.622 7.74e-10
-phiX174 5047 5058 1 + + 3.48e-09 23.3293 8.51e-10
-phiX174 854 865 1 + + 3.48e-09 23.3049 8.64e-10
-phiX174 3154 3165 1 + + 3.48e-09 23.0366 9.76e-10
-phiX174 5008 5019 1 + + 3.48e-09 23.0366 9.76e-10
-phiX174 813 824 1 + + 4.14e-09 22.5854 1.28e-09
-phiX174 2831 2842 1 + + 4.23e-09 22.3415 1.42e-09
-phiX174 3829 3840 1 + + 4.68e-09 21.8293 1.7e-09
-phiX174 3559 3570 1 + + 4.82e-09 21.5976 1.89e-09
-phiX174 2881 2892 1 + + 5.46e-09 21.1951 2.29e-09
-phiX174 4452 4463 1 + + 5.75e-09 20.8902 2.58e-09
-phiX174 2492 2503 1 + + 5.79e-09 20.3415 3.06e-09
-phiX174 4103 4114 1 + + 5.79e-09 20.3171 3.08e-09
-phiX174 4954 4965 1 + + 5.79e-09 20.3171 3.08e-09
-phiX174 1884 1895 1 + + 6.45e-09 19.9268 3.61e-09
-phiX174 3375 3386 1 + + 6.48e-09 19.7683 3.81e-09
-phiX174 51 62 1 + + 6.58e-09 19.5732 4.06e-09
-phiX174 1389 1400 1 + + 6.61e-09 19.378 4.26e-09
-phiX174 2016 2027 1 + + 6.85e-09 19.0854 4.6e-09
-phiX174 999 1010 1 + + 6.97e-09 18.878 4.88e-09
-phiX174 1554 1565 1 + + 7.37e-09 18.439 5.58e-09
-phiX174 4429 4440 1 + + 7.37e-09 18.4268 5.62e-09
-phiX174 1926 1937 1 + + 7.37e-09 18.2927 5.82e-09
-phiX174 2980 2991 1 + + 7.37e-09 18.0732 6.13e-09
-phiX174 4202 4213 1 + + 7.37e-09 17.9268 6.34e-09
-phiX174 1668 1679 1 + + 7.37e-09 17.8659 6.4e-09
-phiX174 3259 3270 1 + + 7.82e-09 17.5 7.01e-09
-phiX174 3046 3057 1 + + 7.85e-09 17.2805 7.4e-09
-phiX174 4175 4186 1 + + 7.85e-09 17.1829 7.6e-09
-phiX174 4117 4128 1 + + 7.85e-09 17.1341 7.7e-09
-phiX174 5369 5380 1 + + 7.87e-09 16.9878 8.03e-09
-phiX174 1241 1252 1 + + 7.87e-09 16.5122 8.94e-09
-phiX174 2582 2593 1 + + 7.87e-09 16.5122 8.94e-09
-phiX174 697 708 1 + + 7.87e-09 16.4146 9.13e-09
-phiX174 2298 2309 1 + + 7.87e-09 16.3537 9.26e-09
-phiX174 4188 4199 1 + + 7.87e-09 16.1707 9.69e-09
-phiX174 274 285 1 + + 7.87e-09 16.0976 9.85e-09
-phiX174 1800 1811 1 + + 7.87e-09 16.0366 1e-08
-phiX174 1385 1396 1 + + 7.87e-09 15.9268 1.03e-08
-phiX174 1302 1313 1 + + 7.87e-09 15.9024 1.03e-08
-phiX174 3771 3782 1 + + 7.87e-09 15.878 1.04e-08
-phiX174 1287 1298 1 + + 7.87e-09 15.8659 1.04e-08
-phiX174 2576 2587 1 + + 7.87e-09 15.7683 1.08e-08
-phiX174 936 947 1 + + 7.87e-09 15.7561 1.08e-08
-phiX174 903 914 1 + + 7.93e-09 15.6585 1.11e-08
-phiX174 2278 2289 1 + + 7.93e-09 15.5854 1.13e-08
-phiX174 3163 3174 1 + + 7.98e-09 15.5 1.16e-08
-phiX174 23 34 1 + + 8.24e-09 15.3293 1.23e-08
-phiX174 837 848 1 + + 8.24e-09 15.2561 1.27e-08
-phiX174 852 863 1 + + 8.24e-09 15.2561 1.27e-08
-phiX174 1983 1994 1 + + 8.68e-09 15.0244 1.36e-08
-phiX174 0 11 1 + + 9.05e-09 14.8293 1.46e-08
-phiX174 4306 4317 1 + + 9.05e-09 14.7927 1.47e-08
-phiX174 4302 4313 1 + + 9.19e-09 14.6585 1.52e-08
-phiX174 5032 5043 1 + + 9.41e-09 14.561 1.58e-08
-phiX174 2578 2589 1 + + 1.01e-08 14.2927 1.73e-08
-phiX174 321 332 1 + + 1.05e-08 14.1951 1.82e-08
-phiX174 5000 5011 1 + + 1.19e-08 13.8902 2.09e-08
-phiX174 4216 4227 1 + + 1.2e-08 13.8171 2.15e-08
-phiX174 4261 4272 1 + + 1.2e-08 13.7805 2.18e-08
-phiX174 3568 3579 1 + + 1.22e-08 13.7073 2.26e-08
-phiX174 193 204 1 + + 1.22e-08 13.6829 2.29e-08
-phiX174 130 141 1 + + 1.31e-08 13.4756 2.49e-08
-phiX174 1490 1501 1 + + 1.32e-08 13.4024 2.55e-08
-phiX174 433 444 1 + + 1.36e-08 13.2805 2.67e-08
-phiX174 4564 4575 1 + + 1.36e-08 13.2439 2.73e-08
-phiX174 101 112 1 + + 1.36e-08 13.2195 2.75e-08
-phiX174 902 913 1 + + 1.38e-08 13.1463 2.82e-08
-phiX174 4747 4758 1 + + 1.45e-08 12.9756 3.01e-08
-phiX174 2621 2632 1 + + 1.5e-08 12.8659 3.16e-08
-phiX174 466 477 1 + + 1.57e-08 12.7317 3.35e-08
-phiX174 4032 4043 1 + + 1.58e-08 12.6829 3.44e-08
-phiX174 1347 1358 1 + + 1.58e-08 12.6707 3.46e-08
-phiX174 238 249 1 + + 1.64e-08 12.5732 3.62e-08
-phiX174 499 510 1 + + 1.71e-08 12.4634 3.84e-08
-phiX174 3000 3011 1 + + 1.73e-08 12.4146 3.93e-08
-phiX174 3775 3786 1 + + 1.73e-08 12.378 3.98e-08
-phiX174 2025 2036 1 + + 1.75e-08 12.3293 4.06e-08
-phiX174 4236 4247 1 + + 1.75e-08 12.3049 4.12e-08
-phiX174 802 813 1 + + 1.78e-08 12.2439 4.24e-08
-phiX174 3769 3780 1 + + 1.81e-08 12.1829 4.35e-08
-phiX174 3428 3439 1 + + 1.82e-08 12.122 4.45e-08
-phiX174 98 109 1 + + 1.82e-08 12.1098 4.48e-08
-phiX174 66 77 1 + + 1.92e-08 11.9268 4.78e-08
-phiX174 5331 5342 1 + + 2.01e-08 11.7195 5.13e-08
-phiX174 276 287 1 + + 2.01e-08 11.7073 5.14e-08
-phiX174 4337 4348 1 + + 2.01e-08 11.6951 5.18e-08
-phiX174 3811 3822 1 + + 2.03e-08 11.6585 5.28e-08
-phiX174 1908 1919 1 + + 2.08e-08 11.5488 5.51e-08
-phiX174 2999 3010 1 + + 2.08e-08 11.5366 5.54e-08
-phiX174 3890 3901 1 + + 2.11e-08 11.439 5.75e-08
-phiX174 3078 3089 1 + + 2.11e-08 11.4268 5.76e-08
-phiX174 36 47 1 + + 2.11e-08 11.4146 5.79e-08
-phiX174 379 390 1 + + 2.17e-08 11.3293 6.01e-08
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_interval_2.txt
--- a/test-data/fimo_output_interval_2.txt Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,100 +0,0 @@
-#chr start end pattern name score strand matched sequence p-value q-value
-phiX174 1387 1398 1 + + 0 29.4024 6.36e-11
-phiX174 846 857 1 + + 0 29.122 7.02e-11
-phiX174 2300 2311 1 + + 0 27.6463 1.08e-10
-phiX174 5062 5073 1 + + 0 25.5366 2.73e-10
-phiX174 988 999 1 + + 0 25.3049 3.15e-10
-phiX174 4712 4723 1 + + 0 23.622 7.74e-10
-phiX174 5047 5058 1 + + 0 23.3293 8.51e-10
-phiX174 854 865 1 + + 0 23.3049 8.64e-10
-phiX174 3154 3165 1 + + 0 23.0366 9.76e-10
-phiX174 5008 5019 1 + + 0 23.0366 9.76e-10
-phiX174 813 824 1 + + 0 22.5854 1.28e-09
-phiX174 2831 2842 1 + + 0 22.3415 1.42e-09
-phiX174 3829 3840 1 + + 0 21.8293 1.7e-09
-phiX174 3559 3570 1 + + 0 21.5976 1.89e-09
-phiX174 2881 2892 1 + + 0 21.1951 2.29e-09
-phiX174 4452 4463 1 + + 0 20.8902 2.58e-09
-phiX174 2492 2503 1 + + 0 20.3415 3.06e-09
-phiX174 4103 4114 1 + + 0 20.3171 3.08e-09
-phiX174 4954 4965 1 + + 0 20.3171 3.08e-09
-phiX174 1884 1895 1 + + 0 19.9268 3.61e-09
-phiX174 3375 3386 1 + + 0 19.7683 3.81e-09
-phiX174 51 62 1 + + 0 19.5732 4.06e-09
-phiX174 1389 1400 1 + + 0 19.378 4.26e-09
-phiX174 2016 2027 1 + + 0 19.0854 4.6e-09
-phiX174 999 1010 1 + + 0 18.878 4.88e-09
-phiX174 1554 1565 1 + + 0 18.439 5.58e-09
-phiX174 4429 4440 1 + + 0 18.4268 5.62e-09
-phiX174 1926 1937 1 + + 0 18.2927 5.82e-09
-phiX174 2980 2991 1 + + 0 18.0732 6.13e-09
-phiX174 4202 4213 1 + + 0 17.9268 6.34e-09
-phiX174 1668 1679 1 + + 0 17.8659 6.4e-09
-phiX174 3259 3270 1 + + 0 17.5 7.01e-09
-phiX174 3046 3057 1 + + 0 17.2805 7.4e-09
-phiX174 4175 4186 1 + + 0 17.1829 7.6e-09
-phiX174 4117 4128 1 + + 0 17.1341 7.7e-09
-phiX174 5369 5380 1 + + 0 16.9878 8.03e-09
-phiX174 1241 1252 1 + + 0 16.5122 8.94e-09
-phiX174 2582 2593 1 + + 0 16.5122 8.94e-09
-phiX174 697 708 1 + + 0 16.4146 9.13e-09
-phiX174 2298 2309 1 + + 0 16.3537 9.26e-09
-phiX174 4188 4199 1 + + 0 16.1707 9.69e-09
-phiX174 274 285 1 + + 0 16.0976 9.85e-09
-phiX174 1800 1811 1 + + 0 16.0366 1e-08
-phiX174 1385 1396 1 + + 0 15.9268 1.03e-08
-phiX174 1302 1313 1 + + 0 15.9024 1.03e-08
-phiX174 3771 3782 1 + + 0 15.878 1.04e-08
-phiX174 1287 1298 1 + + 0 15.8659 1.04e-08
-phiX174 2576 2587 1 + + 0 15.7683 1.08e-08
-phiX174 936 947 1 + + 0 15.7561 1.08e-08
-phiX174 903 914 1 + + 0 15.6585 1.11e-08
-phiX174 2278 2289 1 + + 0 15.5854 1.13e-08
-phiX174 3163 3174 1 + + 0 15.5 1.16e-08
-phiX174 23 34 1 + + 0 15.3293 1.23e-08
-phiX174 837 848 1 + + 0 15.2561 1.27e-08
-phiX174 852 863 1 + + 0 15.2561 1.27e-08
-phiX174 1983 1994 1 + + 0 15.0244 1.36e-08
-phiX174 0 11 1 + + 0 14.8293 1.46e-08
-phiX174 4306 4317 1 + + 0 14.7927 1.47e-08
-phiX174 4302 4313 1 + + 0 14.6585 1.52e-08
-phiX174 5032 5043 1 + + 0 14.561 1.58e-08
-phiX174 2578 2589 1 + + 0 14.2927 1.73e-08
-phiX174 321 332 1 + + 0 14.1951 1.82e-08
-phiX174 5000 5011 1 + + 0 13.8902 2.09e-08
-phiX174 4216 4227 1 + + 0 13.8171 2.15e-08
-phiX174 4261 4272 1 + + 0 13.7805 2.18e-08
-phiX174 3568 3579 1 + + 0 13.7073 2.26e-08
-phiX174 193 204 1 + + 0 13.6829 2.29e-08
-phiX174 130 141 1 + + 0 13.4756 2.49e-08
-phiX174 1490 1501 1 + + 0 13.4024 2.55e-08
-phiX174 433 444 1 + + 0 13.2805 2.67e-08
-phiX174 4564 4575 1 + + 0 13.2439 2.73e-08
-phiX174 101 112 1 + + 0 13.2195 2.75e-08
-phiX174 902 913 1 + + 0 13.1463 2.82e-08
-phiX174 4747 4758 1 + + 0 12.9756 3.01e-08
-phiX174 2621 2632 1 + + 0 12.8659 3.16e-08
-phiX174 466 477 1 + + 0 12.7317 3.35e-08
-phiX174 4032 4043 1 + + 0 12.6829 3.44e-08
-phiX174 1347 1358 1 + + 0 12.6707 3.46e-08
-phiX174 238 249 1 + + 0 12.5732 3.62e-08
-phiX174 499 510 1 + + 0 12.4634 3.84e-08
-phiX174 3000 3011 1 + + 0 12.4146 3.93e-08
-phiX174 3775 3786 1 + + 0 12.378 3.98e-08
-phiX174 2025 2036 1 + + 0 12.3293 4.06e-08
-phiX174 4236 4247 1 + + 0 12.3049 4.12e-08
-phiX174 802 813 1 + + 0 12.2439 4.24e-08
-phiX174 3769 3780 1 + + 0 12.1829 4.35e-08
-phiX174 3428 3439 1 + + 0 12.122 4.45e-08
-phiX174 98 109 1 + + 0 12.1098 4.48e-08
-phiX174 66 77 1 + + 0 11.9268 4.78e-08
-phiX174 5331 5342 1 + + 0 11.7195 5.13e-08
-phiX174 276 287 1 + + 0 11.7073 5.14e-08
-phiX174 4337 4348 1 + + 0 11.6951 5.18e-08
-phiX174 3811 3822 1 + + 0 11.6585 5.28e-08
-phiX174 1908 1919 1 + + 0 11.5488 5.51e-08
-phiX174 2999 3010 1 + + 0 11.5366 5.54e-08
-phiX174 3890 3901 1 + + 0 11.439 5.75e-08
-phiX174 3078 3089 1 + + 0 11.4268 5.76e-08
-phiX174 36 47 1 + + 0 11.4146 5.79e-08
-phiX174 379 390 1 + + 0 11.3293 6.01e-08
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_test1.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_test1.gff Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,12 @@
+##gff-version 3
+chrM fimo nucleotide_motif 2299 2306 46.6 - . Name=TACTAAYM_chrM-;Alias=MEME-1;ID=TACTAAYM-MEME-1-1-chrM;pvalue=2.18e-05;qvalue= 0.142;sequence=TACTAACA;
+chrM fimo nucleotide_motif 6529 6536 46.6 + . Name=TACTAAYM_chrM+;Alias=MEME-1;ID=TACTAAYM-MEME-1-2-chrM;pvalue=2.18e-05;qvalue= 0.142;sequence=TACTAACA;
+chrM fimo nucleotide_motif 7741 7748 46.6 + . Name=TACTAAYM_chrM+;Alias=MEME-1;ID=TACTAAYM-MEME-1-3-chrM;pvalue=2.18e-05;qvalue= 0.142;sequence=TACTAACA;
+chrM fimo nucleotide_motif 13656 13663 46.6 + . Name=TACTAAYM_chrM+;Alias=MEME-1;ID=TACTAAYM-MEME-1-4-chrM;pvalue=2.18e-05;qvalue= 0.142;sequence=TACTAACA;
+chrM fimo nucleotide_motif 13740 13747 46.6 + . Name=TACTAAYM_chrM+;Alias=MEME-1;ID=TACTAAYM-MEME-1-5-chrM;pvalue=2.18e-05;qvalue= 0.142;sequence=TACTAACA;
+chrM fimo nucleotide_motif 861 868  44 + . Name=TACTAAYM_chrM+;Alias=MEME-1;ID=TACTAAYM-MEME-1-6-chrM;pvalue=3.96e-05;qvalue= 0.185;sequence=TACTAACC;
+chrM fimo nucleotide_motif 9346 9353  44 + . Name=TACTAAYM_chrM+;Alias=MEME-1;ID=TACTAAYM-MEME-1-7-chrM;pvalue=3.96e-05;qvalue= 0.185;sequence=TACTAACC;
+chrM fimo nucleotide_motif 3767 3774 41.8 + . Name=TACTAAYM_chrM+;Alias=MEME-1;ID=TACTAAYM-MEME-1-8-chrM;pvalue=6.62e-05;qvalue= 0.216;sequence=TACTAATA;
+chrM fimo nucleotide_motif 5497 5504 41.8 + . Name=TACTAAYM_chrM+;Alias=MEME-1;ID=TACTAAYM-MEME-1-9-chrM;pvalue=6.62e-05;qvalue= 0.216;sequence=TACTAATA;
+chrM fimo nucleotide_motif 10105 10112 41.8 + . Name=TACTAAYM_chrM+;Alias=MEME-1;ID=TACTAAYM-MEME-1-10-chrM;pvalue=6.62e-05;qvalue= 0.216;sequence=TACTAATA;
+chrM fimo nucleotide_motif 10959 10966 40.6 + . Name=TACTAAYM_chrM+;Alias=MEME-1;ID=TACTAAYM-MEME-1-11-chrM;pvalue=8.79e-05;qvalue= 0.261;sequence=TACTAACT;
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_test1.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_test1.html Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,300 @@\n+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n+<html xmlns:cis="http://zlab.bu.edu/schema/cisml" xmlns:fimo="http://noble.gs.washington.edu/schema/cisml" xmlns:mem="http://noble.gs.washington.edu/meme">\n+<head>\n+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n+<meta charset="UTF-8">\n+<title>FIMO Results</title>\n+<style type="text/css">\n+td.left {text-align: left;}\n+td.right {text-align: right; padding-right: 1cm;}\n+</style>\n+</head>\n+<body bgcolor="#D5F0FF">\n+<a name="top_buttons"></a>\n+<hr>\n+<table summary="buttons" align="left" cellspacing="0">\n+<tr>\n+<td bgcolor="#00FFFF"><a href="#database_and_motifs"><b>Database and Motifs</b></a></td>\n+<td bgcolor="#DDFFDD"><a href="#sec_i"><b>High-scoring Motif Occurences</b></a></td>\n+<td bgcolor="#DDDDFF"><a href="#debugging_information"><b>Debugging Information</b></a></td>\n+</tr>\n+</table>\n+<br/>\n+<br/>\n+<hr/>\n+<center><big><b>FIMO - Motif search tool</b></big></center>\n+<hr>\n+<p>\n+FIMO version 4.12.0, (Release date: Tue Jun 27 16:22:50 2017 -0700)\n+</p>\n+<p>\n+For further information on how to interpret these results\n+or to get a copy of the FIMO software please access\n+<a href="http://meme.nbcr.net">http://meme.nbcr.net</a></p>\n+<p>If you use FIMO in your research, please cite the following paper:<br>\n+Charles E. Grant, Timothy L. Bailey, and William Stafford Noble,\n+"FIMO: Scanning for occurrences of a given motif",\n+<i>Bioinformatics</i>, <b>27</b>(7):1017-1018, 2011.\n+<a href="http://bioinformatics.oxfordjournals.org/content/27/7/1017">[full text]</a></p>\n+<hr>\n+<center><big><b><a name="database_and_motifs">DATABASE AND MOTIFS</a></b></big></center>\n+<hr>\n+<div style="padding-left: 0.75in; line-height: 1em; font-family: monospace;">\n+<p>\n+  DATABASE hsa_chrM.fa\n+  <br />\n+  Database contains 1 sequences, 16569 residues\n+</p>\n+<p>\n+  MOTIFS meme_fimo_input_1.xml (DNA)\n+  <table>\n+    <thead>\n+      <tr>\n+        <th style="border-bottom: 1px dashed;">MOTIF</th>\n+        <th style="border-bottom: 1px dashed; padding-left: 1em;">WIDTH</th>\n+        <th style="border-bottom: 1px dashed; padding-left: 1em;text-align:left;" >\n+         BEST POSSIBLE MATCH\n+        </th>\n+      </tr>\n+    </thead>\n+    <tbody>\n+      <tr>\n+        <td style="text-align:right;">TACTAAYM</td>\n+        <td style="text-align:right;padding-left: 1em;">8</td>\n+        <td style="text-align:left;padding-left: 1em;">TACTAACA</td>\n+       </tr>\n+    </tbody>\n+  </table>\n+</p>\n+<p>\n+Random model letter frequencies (--nrdb--):\n+<br/>\n+\n+A 0.275 C 0.225 G 0.225 T 0.275 </p>\n+</div>\n+<hr>\n+<center><big><b><a name="sec_i">SECTION I: HIGH-SCORING MOTIF OCCURENCES</a></b></big></center>\n+<hr>\n+<ul>\n+<li>\n+There were 11 motif occurences with a p-value less than 0.0001.\n+\n+The full set of motif occurences can be seen in the\n+tab-delimited plain text output file\n+<a href="fimo.txt">fimo.txt</a>, \n+the GFF file \n+<a href="fimo.gff">fimo.gff</a> \n+which may be suitable for uploading to the \n+<a href="http://genome.ucsc.edu/cgi-bin/hgTables">UCSC Genome Table Browser</a>\n+(assuming the FASTA input sequences included genomic coordinates in UCSC or Galaxy format),\n+or the XML file \n+<a href="fimo.xml">fimo.xml</a>.\n+</li>\n+<li>\n+The p-value of a motif occurrence is defined as the\n+probability of a random sequence of the same length as the motif\n+matching that position of the sequence with as good or better a score.\n+</li>\n+<li>\n+The score for the match of a position in a sequence to a motif\n+is computed by summing the appropriate entries from each column of\n+the position-dependent scoring matrix that represents the motif.\n+</li>\n+<li>\n+The q-value of a motif occurrence is defined as the\n+false discovery rate if the occurrence is accepted as significant.\n+</li>\n+<li>The table is sorted by increasing p-value.</li>\n+</ul>\n+<table border="1">\n+<thead>\n+<tr>\n+<th>Motif ID</th>\n+<th>Alt ID</th>\n+<th>Sequence Name</th>\n+<th>Strand</th>\n+'..b'e:x-large;font-family:monospace;">TACTAACC</td>\n+   </tr>\n+    <tr>\n+      <td style="text-align:left;">TACTAAYM</td>\n+      <td style="text-align:left;">MEME-1</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">+</td>\n+      <td style="text-align:left;">3767</td>\n+      <td style="text-align:left;">3774</td>\n+      <td style="text-align:left;">6.62e-05</td>\n+      <td style="text-align:left;">0.216</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">TACTAATA</td>\n+   </tr>\n+    <tr>\n+      <td style="text-align:left;">TACTAAYM</td>\n+      <td style="text-align:left;">MEME-1</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">+</td>\n+      <td style="text-align:left;">5497</td>\n+      <td style="text-align:left;">5504</td>\n+      <td style="text-align:left;">6.62e-05</td>\n+      <td style="text-align:left;">0.216</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">TACTAATA</td>\n+   </tr>\n+    <tr>\n+      <td style="text-align:left;">TACTAAYM</td>\n+      <td style="text-align:left;">MEME-1</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">+</td>\n+      <td style="text-align:left;">10105</td>\n+      <td style="text-align:left;">10112</td>\n+      <td style="text-align:left;">6.62e-05</td>\n+      <td style="text-align:left;">0.216</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">TACTAATA</td>\n+   </tr>\n+    <tr>\n+      <td style="text-align:left;">TACTAAYM</td>\n+      <td style="text-align:left;">MEME-1</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">+</td>\n+      <td style="text-align:left;">10959</td>\n+      <td style="text-align:left;">10966</td>\n+      <td style="text-align:left;">8.79e-05</td>\n+      <td style="text-align:left;">0.261</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">TACTAACT</td>\n+   </tr>\n+</tbody>\n+</table>\n+\n+<hr>\n+<center><big><b><a name="debugging_information">DEBUGGING INFORMATION</a></b></big></center>\n+<hr>\n+<p>\n+Command line:\n+</p>\n+<pre>\n+fimo -oc fimo_test1_out meme_fimo_input_1.xml hsa_chrM.fa\n+</pre>\n+<p>\n+Settings:\n+</p>\n+<pre>\n+<table>\n+  <tr>\n+    <td style="padding-right: 2em">output_directory = fimo_test1_out</td>\n+    <td style="padding-left: 5em; padding-right: 2em">MEME file name = meme_fimo_input_1.xml</td>\n+    <td style="padding-left: 5em; padding-right: 2em">sequence file name = hsa_chrM.fa</td>\n+  </tr>  <tr>\n+    <td style="padding-right: 2em">background file name = --nrdb--</td>\n+    <td style="padding-left: 5em; padding-right: 2em">alphabet = DNA</td>\n+    <td style="padding-left: 5em; padding-right: 2em">max stored scores = 100000</td>\n+  </tr>  <tr>\n+    <td style="padding-right: 2em">allow clobber = true</td>\n+    <td style="padding-left: 5em; padding-right: 2em">compute q-values = true</td>\n+    <td style="padding-left: 5em; padding-right: 2em">parse genomic coord. = false</td>\n+  </tr>\n+  <tr>\n+    <td style="padding-right: 2em">text only = false</td>\n+    <td style="padding-left: 5em; padding-right: 2em">scan both strands = true</td>\n+    <td style="padding-left: 5em; padding-right: 2em">max strand = false</td>\n+  </tr>\n+  <tr>\n+    <td style="padding-right: 2em">threshold type = p-value</td>\n+    <td style="padding-left: 5em; padding-right: 2em">output theshold = 0.0001</td>\n+    <td style="padding-left: 5em; padding-right: 2em">pseudocount = 0.1</td>\n+  </tr>\n+  <tr>\n+    <td style="padding-right: 2em">alpha = 1</td>\n+    <td style="padding-left: 5em; padding-right: 2em">verbosity = 2</td>\n+    <td style="padding-left: 5em; padding-right: 2em"></td>\n+  </tr>\n+\n+</table>\n+</pre>\n+<p>\n+This information can be useful in the event you wish to report a\n+problem with the FIMO software.\n+</p>\n+<hr>\n+<span style="background-color: #DDDDFF"><a href="#top_buttons"><b>Go to top</b></a></span>\n+</body>\n+</html>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_test1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_test1.txt Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,12 @@
+# motif_id motif_alt_id sequence_name start stop strand score p-value q-value matched_sequence
+TACTAAYM MEME-1 chrM 2299 2306 - 12.9701 2.18e-05 0.142 TACTAACA
+TACTAAYM MEME-1 chrM 6529 6536 + 12.9701 2.18e-05 0.142 TACTAACA
+TACTAAYM MEME-1 chrM 7741 7748 + 12.9701 2.18e-05 0.142 TACTAACA
+TACTAAYM MEME-1 chrM 13656 13663 + 12.9701 2.18e-05 0.142 TACTAACA
+TACTAAYM MEME-1 chrM 13740 13747 + 12.9701 2.18e-05 0.142 TACTAACA
+TACTAAYM MEME-1 chrM 861 868 + 12.2836 3.96e-05 0.185 TACTAACC
+TACTAAYM MEME-1 chrM 9346 9353 + 12.2836 3.96e-05 0.185 TACTAACC
+TACTAAYM MEME-1 chrM 3767 3774 + 11.7164 6.62e-05 0.216 TACTAATA
+TACTAAYM MEME-1 chrM 5497 5504 + 11.7164 6.62e-05 0.216 TACTAATA
+TACTAAYM MEME-1 chrM 10105 10112 + 11.7164 6.62e-05 0.216 TACTAATA
+TACTAAYM MEME-1 chrM 10959 10966 + 11.6567 8.79e-05 0.261 TACTAACT
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_test1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_test1.xml Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Begin document body -->
+<fimo version="4.12.0" release="Tue Jun 27 16:22:50 2017 -0700">
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation=  xmlns:fimo="http://noble.gs.washington.edu/schema/fimo"
+>
+<command-line>fimo -oc fimo_test1_out meme_fimo_input_1.xml hsa_chrM.fa</command-line>
+<settings>
+<setting name="output directory">fimo_test1_out</setting>
+<setting name="MEME file name">meme_fimo_input_1.xml</setting>
+<setting name="sequence file name">hsa_chrM.fa</setting>
+<setting name="background file name">--nrdb--</setting>
+<setting name="allow clobber">true</setting>
+<setting name="compute q-values">true</setting>
+<setting name="parse genomic coord.">false</setting>
+<setting name="text only">false</setting>
+<setting name="scan both strands">true</setting>
+<setting name="output threshold">0.0001</setting>
+<setting name="threshold type">p-value</setting>
+<setting name="max stored scores">100000</setting>
+<setting name="pseudocount">0.1</setting>
+<setting name="verbosity">2</setting>
+</settings>
+<sequence-data num-sequences="1" num-residues="16569" />
+<alphabet name="DNA" like="dna">
+<letter id="A" symbol="A" complement="T" name="Adenine" colour="CC0000"/>
+<letter id="C" symbol="C" complement="G" name="Cytosine" colour="0000CC"/>
+<letter id="G" symbol="G" complement="C" name="Guanine" colour="FFB300"/>
+<letter id="T" symbol="T" aliases="U" complement="A" name="Thymine" colour="008000"/>
+<letter id="N" symbol="N" aliases="X." equals="ACGT" name="Any base"/>
+<letter id="V" symbol="V" equals="ACG" name="Not T"/>
+<letter id="H" symbol="H" equals="ACT" name="Not G"/>
+<letter id="D" symbol="D" equals="AGT" name="Not C"/>
+<letter id="B" symbol="B" equals="CGT" name="Not A"/>
+<letter id="M" symbol="M" equals="AC" name="Amino"/>
+<letter id="R" symbol="R" equals="AG" name="Purine"/>
+<letter id="W" symbol="W" equals="AT" name="Weak"/>
+<letter id="S" symbol="S" equals="CG" name="Strong"/>
+<letter id="Y" symbol="Y" equals="CT" name="Pyrimidine"/>
+<letter id="K" symbol="K" equals="GT" name="Keto"/>
+</alphabet>
+<motif name="TACTAAYM" alt="MEME-1" width="8" best-possible-match="TACTAACA"/>
+<background source="--nrdb--">
+<value letter="A">0.275</value>
+<value letter="C">0.225</value>
+<value letter="G">0.225</value>
+<value letter="T">0.275</value>
+</background>
+<cisml-file>cisml.xml</cisml-file>
+</fimo>
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_test2.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_test2.gff Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,12 @@
+##gff-version 3
+chrM fimo nucleotide_motif 440 446 40.9 + . Name=ACTAAYH_chrM+;Alias=DREME-1;ID=ACTAAYH-DREME-1-1-chrM;pvalue=8.2e-05;qvalue= 0.327;sequence=ACTAACA;
+chrM fimo nucleotide_motif 2093 2099 40.9 - . Name=ACTAAYH_chrM-;Alias=DREME-1;ID=ACTAAYH-DREME-1-2-chrM;pvalue=8.2e-05;qvalue= 0.327;sequence=ACTAACA;
+chrM fimo nucleotide_motif 2299 2305 40.9 - . Name=ACTAAYH_chrM-;Alias=DREME-1;ID=ACTAAYH-DREME-1-3-chrM;pvalue=8.2e-05;qvalue= 0.327;sequence=ACTAACA;
+chrM fimo nucleotide_motif 5186 5192 40.9 + . Name=ACTAAYH_chrM+;Alias=DREME-1;ID=ACTAAYH-DREME-1-4-chrM;pvalue=8.2e-05;qvalue= 0.327;sequence=ACTAACA;
+chrM fimo nucleotide_motif 6530 6536 40.9 + . Name=ACTAAYH_chrM+;Alias=DREME-1;ID=ACTAAYH-DREME-1-5-chrM;pvalue=8.2e-05;qvalue= 0.327;sequence=ACTAACA;
+chrM fimo nucleotide_motif 7742 7748 40.9 + . Name=ACTAAYH_chrM+;Alias=DREME-1;ID=ACTAAYH-DREME-1-6-chrM;pvalue=8.2e-05;qvalue= 0.327;sequence=ACTAACA;
+chrM fimo nucleotide_motif 13657 13663 40.9 + . Name=ACTAAYH_chrM+;Alias=DREME-1;ID=ACTAAYH-DREME-1-7-chrM;pvalue=8.2e-05;qvalue= 0.327;sequence=ACTAACA;
+chrM fimo nucleotide_motif 13741 13747 40.9 + . Name=ACTAAYH_chrM+;Alias=DREME-1;ID=ACTAAYH-DREME-1-8-chrM;pvalue=8.2e-05;qvalue= 0.327;sequence=ACTAACA;
+chrM fimo nucleotide_motif 510 516 43.8 + . Name=CCAGCAY_chrM+;Alias=DREME-5;ID=CCAGCAY-DREME-5-1-chrM;pvalue=4.15e-05;qvalue= 0.668;sequence=CCAGCAC;
+chrM fimo nucleotide_motif 5137 5143 43.8 + . Name=CCAGCAY_chrM+;Alias=DREME-5;ID=CCAGCAY-DREME-5-2-chrM;pvalue=4.15e-05;qvalue= 0.668;sequence=CCAGCAC;
+chrM fimo nucleotide_motif 4241 4247 40.3 + . Name=CCAGCAY_chrM+;Alias=DREME-5;ID=CCAGCAY-DREME-5-3-chrM;pvalue=9.37e-05;qvalue= 1;sequence=CCAGCAT;
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_test2.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_test2.html Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,325 @@\n+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n+<html xmlns:cis="http://zlab.bu.edu/schema/cisml" xmlns:fimo="http://noble.gs.washington.edu/schema/cisml" xmlns:mem="http://noble.gs.washington.edu/meme">\n+<head>\n+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n+<meta charset="UTF-8">\n+<title>FIMO Results</title>\n+<style type="text/css">\n+td.left {text-align: left;}\n+td.right {text-align: right; padding-right: 1cm;}\n+</style>\n+</head>\n+<body bgcolor="#D5F0FF">\n+<a name="top_buttons"></a>\n+<hr>\n+<table summary="buttons" align="left" cellspacing="0">\n+<tr>\n+<td bgcolor="#00FFFF"><a href="#database_and_motifs"><b>Database and Motifs</b></a></td>\n+<td bgcolor="#DDFFDD"><a href="#sec_i"><b>High-scoring Motif Occurences</b></a></td>\n+<td bgcolor="#DDDDFF"><a href="#debugging_information"><b>Debugging Information</b></a></td>\n+</tr>\n+</table>\n+<br/>\n+<br/>\n+<hr/>\n+<center><big><b>FIMO - Motif search tool</b></big></center>\n+<hr>\n+<p>\n+FIMO version 4.12.0, (Release date: Tue Jun 27 16:22:50 2017 -0700)\n+</p>\n+<p>\n+For further information on how to interpret these results\n+or to get a copy of the FIMO software please access\n+<a href="http://meme.nbcr.net">http://meme.nbcr.net</a></p>\n+<p>If you use FIMO in your research, please cite the following paper:<br>\n+Charles E. Grant, Timothy L. Bailey, and William Stafford Noble,\n+"FIMO: Scanning for occurrences of a given motif",\n+<i>Bioinformatics</i>, <b>27</b>(7):1017-1018, 2011.\n+<a href="http://bioinformatics.oxfordjournals.org/content/27/7/1017">[full text]</a></p>\n+<hr>\n+<center><big><b><a name="database_and_motifs">DATABASE AND MOTIFS</a></b></big></center>\n+<hr>\n+<div style="padding-left: 0.75in; line-height: 1em; font-family: monospace;">\n+<p>\n+  DATABASE hsa_chrM.fa\n+  <br />\n+  Database contains 1 sequences, 16569 residues\n+</p>\n+<p>\n+  MOTIFS dreme_fimo_input_1.xml (DNA)\n+  <table>\n+    <thead>\n+      <tr>\n+        <th style="border-bottom: 1px dashed;">MOTIF</th>\n+        <th style="border-bottom: 1px dashed; padding-left: 1em;">WIDTH</th>\n+        <th style="border-bottom: 1px dashed; padding-left: 1em;text-align:left;" >\n+         BEST POSSIBLE MATCH\n+        </th>\n+      </tr>\n+    </thead>\n+    <tbody>\n+      <tr>\n+        <td style="text-align:right;">ACTAAYH</td>\n+        <td style="text-align:right;padding-left: 1em;">7</td>\n+        <td style="text-align:left;padding-left: 1em;">ACTAACA</td>\n+       </tr>\n+      <tr>\n+        <td style="text-align:right;">YTAACA</td>\n+        <td style="text-align:right;padding-left: 1em;">6</td>\n+        <td style="text-align:left;padding-left: 1em;">TTAACA</td>\n+       </tr>\n+      <tr>\n+        <td style="text-align:right;">TCTGT</td>\n+        <td style="text-align:right;padding-left: 1em;">5</td>\n+        <td style="text-align:left;padding-left: 1em;">TCTGT</td>\n+       </tr>\n+      <tr>\n+        <td style="text-align:right;">SCCAGG</td>\n+        <td style="text-align:right;padding-left: 1em;">6</td>\n+        <td style="text-align:left;padding-left: 1em;">CCCAGG</td>\n+       </tr>\n+      <tr>\n+        <td style="text-align:right;">CCAGCAY</td>\n+        <td style="text-align:right;padding-left: 1em;">7</td>\n+        <td style="text-align:left;padding-left: 1em;">CCAGCAC</td>\n+       </tr>\n+      <tr>\n+        <td style="text-align:right;">GMATGT</td>\n+        <td style="text-align:right;padding-left: 1em;">6</td>\n+        <td style="text-align:left;padding-left: 1em;">GAATGT</td>\n+       </tr>\n+    </tbody>\n+  </table>\n+</p>\n+<p>\n+Random model letter frequencies (fimo_background_probs_hsa_chrM.txt):\n+<br/>\n+\n+A 0.278 C 0.222 G 0.222 T 0.278 </p>\n+</div>\n+<hr>\n+<center><big><b><a name="sec_i">SECTION I: HIGH-SCORING MOTIF OCCURENCES</a></b></big></center>\n+<hr>\n+<ul>\n+<li>\n+There were 11 motif occurences with a p-value less than 0.0001.\n+\n+The full set of motif occurences can be seen in the\n+tab-delimited plain text output file\n+<a href="fimo'..b'  <tr>\n+      <td style="text-align:left;">ACTAAYH</td>\n+      <td style="text-align:left;">DREME-1</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">+</td>\n+      <td style="text-align:left;">7742</td>\n+      <td style="text-align:left;">7748</td>\n+      <td style="text-align:left;">8.2e-05</td>\n+      <td style="text-align:left;">0.327</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">ACTAACA</td>\n+   </tr>\n+    <tr>\n+      <td style="text-align:left;">ACTAAYH</td>\n+      <td style="text-align:left;">DREME-1</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">+</td>\n+      <td style="text-align:left;">13657</td>\n+      <td style="text-align:left;">13663</td>\n+      <td style="text-align:left;">8.2e-05</td>\n+      <td style="text-align:left;">0.327</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">ACTAACA</td>\n+   </tr>\n+    <tr>\n+      <td style="text-align:left;">ACTAAYH</td>\n+      <td style="text-align:left;">DREME-1</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">+</td>\n+      <td style="text-align:left;">13741</td>\n+      <td style="text-align:left;">13747</td>\n+      <td style="text-align:left;">8.2e-05</td>\n+      <td style="text-align:left;">0.327</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">ACTAACA</td>\n+   </tr>\n+    <tr>\n+      <td style="text-align:left;">CCAGCAY</td>\n+      <td style="text-align:left;">DREME-5</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">+</td>\n+      <td style="text-align:left;">4241</td>\n+      <td style="text-align:left;">4247</td>\n+      <td style="text-align:left;">9.37e-05</td>\n+      <td style="text-align:left;">1</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">CCAGCAT</td>\n+   </tr>\n+</tbody>\n+</table>\n+\n+<hr>\n+<center><big><b><a name="debugging_information">DEBUGGING INFORMATION</a></b></big></center>\n+<hr>\n+<p>\n+Command line:\n+</p>\n+<pre>\n+fimo -oc fimo_test2_out --bgfile fimo_background_probs_hsa_chrM.txt dreme_fimo_input_1.xml hsa_chrM.fa\n+</pre>\n+<p>\n+Settings:\n+</p>\n+<pre>\n+<table>\n+  <tr>\n+    <td style="padding-right: 2em">output_directory = fimo_test2_out</td>\n+    <td style="padding-left: 5em; padding-right: 2em">MEME file name = dreme_fimo_input_1.xml</td>\n+    <td style="padding-left: 5em; padding-right: 2em">sequence file name = hsa_chrM.fa</td>\n+  </tr>  <tr>\n+    <td style="padding-right: 2em">background file name = fimo_background_probs_hsa_chrM.txt</td>\n+    <td style="padding-left: 5em; padding-right: 2em">alphabet = DNA</td>\n+    <td style="padding-left: 5em; padding-right: 2em">max stored scores = 100000</td>\n+  </tr>  <tr>\n+    <td style="padding-right: 2em">allow clobber = true</td>\n+    <td style="padding-left: 5em; padding-right: 2em">compute q-values = true</td>\n+    <td style="padding-left: 5em; padding-right: 2em">parse genomic coord. = false</td>\n+  </tr>\n+  <tr>\n+    <td style="padding-right: 2em">text only = false</td>\n+    <td style="padding-left: 5em; padding-right: 2em">scan both strands = true</td>\n+    <td style="padding-left: 5em; padding-right: 2em">max strand = false</td>\n+  </tr>\n+  <tr>\n+    <td style="padding-right: 2em">threshold type = p-value</td>\n+    <td style="padding-left: 5em; padding-right: 2em">output theshold = 0.0001</td>\n+    <td style="padding-left: 5em; padding-right: 2em">pseudocount = 0.1</td>\n+  </tr>\n+  <tr>\n+    <td style="padding-right: 2em">alpha = 1</td>\n+    <td style="padding-left: 5em; padding-right: 2em">verbosity = 2</td>\n+    <td style="padding-left: 5em; padding-right: 2em"></td>\n+  </tr>\n+\n+</table>\n+</pre>\n+<p>\n+This information can be useful in the event you wish to report a\n+problem with the FIMO software.\n+</p>\n+<hr>\n+<span style="background-color: #DDDDFF"><a href="#top_buttons"><b>Go to top</b></a></span>\n+</body>\n+</html>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_test2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_test2.txt Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,12 @@
+# motif_id motif_alt_id sequence_name start stop strand score p-value q-value matched_sequence
+CCAGCAY DREME-5 chrM 510 516 + 13.5843 4.15e-05 0.668 CCAGCAC
+CCAGCAY DREME-5 chrM 5137 5143 + 13.5843 4.15e-05 0.668 CCAGCAC
+ACTAAYH DREME-1 chrM 440 446 + 11.7385 8.2e-05 0.327 ACTAACA
+ACTAAYH DREME-1 chrM 2093 2099 - 11.7385 8.2e-05 0.327 ACTAACA
+ACTAAYH DREME-1 chrM 2299 2305 - 11.7385 8.2e-05 0.327 ACTAACA
+ACTAAYH DREME-1 chrM 5186 5192 + 11.7385 8.2e-05 0.327 ACTAACA
+ACTAAYH DREME-1 chrM 6530 6536 + 11.7385 8.2e-05 0.327 ACTAACA
+ACTAAYH DREME-1 chrM 7742 7748 + 11.7385 8.2e-05 0.327 ACTAACA
+ACTAAYH DREME-1 chrM 13657 13663 + 11.7385 8.2e-05 0.327 ACTAACA
+ACTAAYH DREME-1 chrM 13741 13747 + 11.7385 8.2e-05 0.327 ACTAACA
+CCAGCAY DREME-5 chrM 4241 4247 + 13.1461 9.37e-05 1 CCAGCAT
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_test2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_test2.xml Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Begin document body -->
+<fimo version="4.12.0" release="Tue Jun 27 16:22:50 2017 -0700">
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation=  xmlns:fimo="http://noble.gs.washington.edu/schema/fimo"
+>
+<command-line>fimo -oc fimo_test2_out --bgfile fimo_background_probs_hsa_chrM.txt dreme_fimo_input_1.xml hsa_chrM.fa</command-line>
+<settings>
+<setting name="output directory">fimo_test2_out</setting>
+<setting name="MEME file name">dreme_fimo_input_1.xml</setting>
+<setting name="sequence file name">hsa_chrM.fa</setting>
+<setting name="background file name">fimo_background_probs_hsa_chrM.txt</setting>
+<setting name="allow clobber">true</setting>
+<setting name="compute q-values">true</setting>
+<setting name="parse genomic coord.">false</setting>
+<setting name="text only">false</setting>
+<setting name="scan both strands">true</setting>
+<setting name="output threshold">0.0001</setting>
+<setting name="threshold type">p-value</setting>
+<setting name="max stored scores">100000</setting>
+<setting name="pseudocount">0.1</setting>
+<setting name="verbosity">2</setting>
+</settings>
+<sequence-data num-sequences="1" num-residues="16569" />
+<alphabet name="DNA" like="dna">
+<letter id="A" symbol="A" complement="T" name="Adenine" colour="CC0000"/>
+<letter id="C" symbol="C" complement="G" name="Cytosine" colour="0000CC"/>
+<letter id="G" symbol="G" complement="C" name="Guanine" colour="FFB300"/>
+<letter id="T" symbol="T" aliases="U" complement="A" name="Thymine" colour="008000"/>
+<letter id="N" symbol="N" aliases="X." equals="ACGT" name="Any base"/>
+<letter id="V" symbol="V" equals="ACG" name="Not T"/>
+<letter id="H" symbol="H" equals="ACT" name="Not G"/>
+<letter id="D" symbol="D" equals="AGT" name="Not C"/>
+<letter id="B" symbol="B" equals="CGT" name="Not A"/>
+<letter id="M" symbol="M" equals="AC" name="Amino"/>
+<letter id="R" symbol="R" equals="AG" name="Purine"/>
+<letter id="W" symbol="W" equals="AT" name="Weak"/>
+<letter id="S" symbol="S" equals="CG" name="Strong"/>
+<letter id="Y" symbol="Y" equals="CT" name="Pyrimidine"/>
+<letter id="K" symbol="K" equals="GT" name="Keto"/>
+</alphabet>
+<motif name="ACTAAYH" alt="DREME-1" width="7" best-possible-match="ACTAACA"/>
+<motif name="YTAACA" alt="DREME-2" width="6" best-possible-match="TTAACA"/>
+<motif name="TCTGT" alt="DREME-3" width="5" best-possible-match="TCTGT"/>
+<motif name="SCCAGG" alt="DREME-4" width="6" best-possible-match="CCCAGG"/>
+<motif name="CCAGCAY" alt="DREME-5" width="7" best-possible-match="CCAGCAC"/>
+<motif name="GMATGT" alt="DREME-6" width="6" best-possible-match="GAATGT"/>
+<background source="fimo_background_probs_hsa_chrM.txt">
+<value letter="A">0.278</value>
+<value letter="C">0.222</value>
+<value letter="G">0.222</value>
+<value letter="T">0.278</value>
+</background>
+<cisml-file>cisml.xml</cisml-file>
+</fimo>
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_test3.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_test3.html Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,5891 @@\n+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n+<html xmlns:cis="http://zlab.bu.edu/schema/cisml" xmlns:fimo="http://noble.gs.washington.edu/schema/cisml" xmlns:mem="http://noble.gs.washington.edu/meme">\n+<head>\n+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n+<meta charset="UTF-8">\n+<title>FIMO Results</title>\n+<style type="text/css">\n+td.left {text-align: left;}\n+td.right {text-align: right; padding-right: 1cm;}\n+</style>\n+</head>\n+<body bgcolor="#D5F0FF">\n+<a name="top_buttons"></a>\n+<hr>\n+<table summary="buttons" align="left" cellspacing="0">\n+<tr>\n+<td bgcolor="#00FFFF"><a href="#database_and_motifs"><b>Database and Motifs</b></a></td>\n+<td bgcolor="#DDFFDD"><a href="#sec_i"><b>High-scoring Motif Occurences</b></a></td>\n+<td bgcolor="#DDDDFF"><a href="#debugging_information"><b>Debugging Information</b></a></td>\n+</tr>\n+</table>\n+<br/>\n+<br/>\n+<hr/>\n+<center><big><b>FIMO - Motif search tool</b></big></center>\n+<hr>\n+<p>\n+FIMO version 4.12.0, (Release date: Tue Jun 27 16:22:50 2017 -0700)\n+</p>\n+<p>\n+For further information on how to interpret these results\n+or to get a copy of the FIMO software please access\n+<a href="http://meme.nbcr.net">http://meme.nbcr.net</a></p>\n+<p>If you use FIMO in your research, please cite the following paper:<br>\n+Charles E. Grant, Timothy L. Bailey, and William Stafford Noble,\n+"FIMO: Scanning for occurrences of a given motif",\n+<i>Bioinformatics</i>, <b>27</b>(7):1017-1018, 2011.\n+<a href="http://bioinformatics.oxfordjournals.org/content/27/7/1017">[full text]</a></p>\n+<hr>\n+<center><big><b><a name="database_and_motifs">DATABASE AND MOTIFS</a></b></big></center>\n+<hr>\n+<div style="padding-left: 0.75in; line-height: 1em; font-family: monospace;">\n+<p>\n+  DATABASE hsa_chrM.fa\n+  <br />\n+  Database contains 1 sequences, 16569 residues\n+</p>\n+<p>\n+  MOTIFS dreme_fimo_input_1.xml (DNA)\n+  <table>\n+    <thead>\n+      <tr>\n+        <th style="border-bottom: 1px dashed;">MOTIF</th>\n+        <th style="border-bottom: 1px dashed; padding-left: 1em;">WIDTH</th>\n+        <th style="border-bottom: 1px dashed; padding-left: 1em;text-align:left;" >\n+         BEST POSSIBLE MATCH\n+        </th>\n+      </tr>\n+    </thead>\n+    <tbody>\n+      <tr>\n+        <td style="text-align:right;">ACTAAYH</td>\n+        <td style="text-align:right;padding-left: 1em;">7</td>\n+        <td style="text-align:left;padding-left: 1em;">ACTAACA</td>\n+       </tr>\n+      <tr>\n+        <td style="text-align:right;">YTAACA</td>\n+        <td style="text-align:right;padding-left: 1em;">6</td>\n+        <td style="text-align:left;padding-left: 1em;">TTAACA</td>\n+       </tr>\n+      <tr>\n+        <td style="text-align:right;">TCTGT</td>\n+        <td style="text-align:right;padding-left: 1em;">5</td>\n+        <td style="text-align:left;padding-left: 1em;">TCTGT</td>\n+       </tr>\n+      <tr>\n+        <td style="text-align:right;">SCCAGG</td>\n+        <td style="text-align:right;padding-left: 1em;">6</td>\n+        <td style="text-align:left;padding-left: 1em;">CCCAGG</td>\n+       </tr>\n+      <tr>\n+        <td style="text-align:right;">CCAGCAY</td>\n+        <td style="text-align:right;padding-left: 1em;">7</td>\n+        <td style="text-align:left;padding-left: 1em;">CCAGCAC</td>\n+       </tr>\n+      <tr>\n+        <td style="text-align:right;">GMATGT</td>\n+        <td style="text-align:right;padding-left: 1em;">6</td>\n+        <td style="text-align:left;padding-left: 1em;">GAATGT</td>\n+       </tr>\n+    </tbody>\n+  </table>\n+</p>\n+<p>\n+Random model letter frequencies (--uniform--):\n+<br/>\n+\n+A 0.250 C 0.250 G 0.250 T 0.250 </p>\n+</div>\n+<hr>\n+<center><big><b><a name="sec_i">SECTION I: HIGH-SCORING MOTIF OCCURENCES</a></b></big></center>\n+<hr>\n+<ul>\n+<li>\n+There were 517 motif occurences with a p-value less than 0.01.\n+\n+The full set of motif occurences can be seen in the\n+tab-delimited plain text output file\n+<a href="fimo.txt">fimo.txt</a>, \n+t'..b't;">ACTAAYH</td>\n+      <td style="text-align:left;">DREME-1</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">-</td>\n+      <td style="text-align:left;">15892</td>\n+      <td style="text-align:left;">15898</td>\n+      <td style="text-align:left;">0.00922</td>\n+      <td style="text-align:left;">0.559</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">ACTACAA</td>\n+   </tr>\n+    <tr>\n+      <td style="text-align:left;">ACTAAYH</td>\n+      <td style="text-align:left;">DREME-1</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">+</td>\n+      <td style="text-align:left;">15995</td>\n+      <td style="text-align:left;">16001</td>\n+      <td style="text-align:left;">0.00922</td>\n+      <td style="text-align:left;">0.559</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">GCTAAGA</td>\n+   </tr>\n+    <tr>\n+      <td style="text-align:left;">ACTAAYH</td>\n+      <td style="text-align:left;">DREME-1</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">+</td>\n+      <td style="text-align:left;">16269</td>\n+      <td style="text-align:left;">16275</td>\n+      <td style="text-align:left;">0.00922</td>\n+      <td style="text-align:left;">0.559</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">ACTAGGA</td>\n+   </tr>\n+    <tr>\n+      <td style="text-align:left;">ACTAAYH</td>\n+      <td style="text-align:left;">DREME-1</td>\n+      <td style="text-align:left;">chrM</td>\n+      <td style="text-align:center;">+</td>\n+      <td style="text-align:left;">16553</td>\n+      <td style="text-align:left;">16559</td>\n+      <td style="text-align:left;">0.00922</td>\n+      <td style="text-align:left;">0.559</td>\n+      <td style="text-align:left;font-size:x-large;font-family:monospace;">AATAAGA</td>\n+   </tr>\n+</tbody>\n+</table>\n+\n+<hr>\n+<center><big><b><a name="debugging_information">DEBUGGING INFORMATION</a></b></big></center>\n+<hr>\n+<p>\n+Command line:\n+</p>\n+<pre>\n+fimo -o ./fimo_test4_out --thresh 0.01 --bgfile --uniform-- --max-stored-scores 100000 --motif ACTAAYH --motif-pseudo 0.1 dreme_fimo_input_1.xml hsa_chrM.fa\n+</pre>\n+<p>\n+Settings:\n+</p>\n+<pre>\n+<table>\n+  <tr>\n+    <td style="padding-right: 2em">output_directory = ./fimo_test4_out</td>\n+    <td style="padding-left: 5em; padding-right: 2em">MEME file name = dreme_fimo_input_1.xml</td>\n+    <td style="padding-left: 5em; padding-right: 2em">sequence file name = hsa_chrM.fa</td>\n+  </tr>  <tr>\n+    <td style="padding-right: 2em">background file name = --uniform--</td>\n+    <td style="padding-left: 5em; padding-right: 2em">alphabet = DNA</td>\n+    <td style="padding-left: 5em; padding-right: 2em">max stored scores = 100000</td>\n+  </tr>  <tr>\n+    <td style="padding-right: 2em">allow clobber = false</td>\n+    <td style="padding-left: 5em; padding-right: 2em">compute q-values = true</td>\n+    <td style="padding-left: 5em; padding-right: 2em">parse genomic coord. = false</td>\n+  </tr>\n+  <tr>\n+    <td style="padding-right: 2em">text only = false</td>\n+    <td style="padding-left: 5em; padding-right: 2em">scan both strands = true</td>\n+    <td style="padding-left: 5em; padding-right: 2em">max strand = false</td>\n+  </tr>\n+  <tr>\n+    <td style="padding-right: 2em">threshold type = p-value</td>\n+    <td style="padding-left: 5em; padding-right: 2em">output theshold = 0.01</td>\n+    <td style="padding-left: 5em; padding-right: 2em">pseudocount = 0.1</td>\n+  </tr>\n+  <tr>\n+    <td style="padding-right: 2em">alpha = 1</td>\n+    <td style="padding-left: 5em; padding-right: 2em">verbosity = 2</td>\n+    <td style="padding-left: 5em; padding-right: 2em"></td>\n+  </tr>\n+\n+</table>\n+</pre>\n+<p>\n+This information can be useful in the event you wish to report a\n+problem with the FIMO software.\n+</p>\n+<hr>\n+<span style="background-color: #DDDDFF"><a href="#top_buttons"><b>Go to top</b></a></span>\n+</body>\n+</html>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_test3.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_test3.txt Thu May 17 14:11:15 2018 -0400
b
b'@@ -0,0 +1,518 @@\n+# motif_id\tmotif_alt_id\tsequence_name\tstart\tstop\tstrand\tscore\tp-value\tq-value\tmatched_sequence\n+ACTAAYH\tDREME-1\tchrM\t440\t446\t+\t12.1831\t6.1e-05\t0.239\tACTAACA\n+ACTAAYH\tDREME-1\tchrM\t2093\t2099\t-\t12.1831\t6.1e-05\t0.239\tACTAACA\n+ACTAAYH\tDREME-1\tchrM\t2299\t2305\t-\t12.1831\t6.1e-05\t0.239\tACTAACA\n+ACTAAYH\tDREME-1\tchrM\t5186\t5192\t+\t12.1831\t6.1e-05\t0.239\tACTAACA\n+ACTAAYH\tDREME-1\tchrM\t6530\t6536\t+\t12.1831\t6.1e-05\t0.239\tACTAACA\n+ACTAAYH\tDREME-1\tchrM\t7742\t7748\t+\t12.1831\t6.1e-05\t0.239\tACTAACA\n+ACTAAYH\tDREME-1\tchrM\t13657\t13663\t+\t12.1831\t6.1e-05\t0.239\tACTAACA\n+ACTAAYH\tDREME-1\tchrM\t13741\t13747\t+\t12.1831\t6.1e-05\t0.239\tACTAACA\n+ACTAAYH\tDREME-1\tchrM\t3768\t3774\t+\t11.5915\t0.000122\t0.239\tACTAATA\n+ACTAAYH\tDREME-1\tchrM\t5498\t5504\t+\t11.5915\t0.000122\t0.239\tACTAATA\n+ACTAAYH\tDREME-1\tchrM\t7736\t7742\t+\t11.5915\t0.000122\t0.239\tACTAATA\n+ACTAAYH\tDREME-1\tchrM\t9872\t9878\t+\t11.5915\t0.000122\t0.239\tACTAATA\n+ACTAAYH\tDREME-1\tchrM\t10106\t10112\t+\t11.5915\t0.000122\t0.239\tACTAATA\n+ACTAAYH\tDREME-1\tchrM\t10313\t10319\t+\t11.5915\t0.000122\t0.239\tACTAATA\n+ACTAAYH\tDREME-1\tchrM\t11818\t11824\t+\t11.5915\t0.000122\t0.239\tACTAATA\n+ACTAAYH\tDREME-1\tchrM\t15903\t15909\t+\t11.5915\t0.000122\t0.239\tACTAATA\n+ACTAAYH\tDREME-1\tchrM\t7732\t7738\t+\t11.4507\t0.000183\t0.294\tACTAACT\n+ACTAAYH\tDREME-1\tchrM\t10960\t10966\t+\t11.4507\t0.000183\t0.294\tACTAACT\n+ACTAAYH\tDREME-1\tchrM\t862\t868\t+\t11.2394\t0.000244\t0.294\tACTAACC\n+ACTAAYH\tDREME-1\tchrM\t1832\t1838\t+\t11.2394\t0.000244\t0.294\tACTAACC\n+ACTAAYH\tDREME-1\tchrM\t8679\t8685\t+\t11.2394\t0.000244\t0.294\tACTAACC\n+ACTAAYH\tDREME-1\tchrM\t8770\t8776\t+\t11.2394\t0.000244\t0.294\tACTAACC\n+ACTAAYH\tDREME-1\tchrM\t9347\t9353\t+\t11.2394\t0.000244\t0.294\tACTAACC\n+ACTAAYH\tDREME-1\tchrM\t9359\t9365\t+\t11.2394\t0.000244\t0.294\tACTAACC\n+ACTAAYH\tDREME-1\tchrM\t10302\t10308\t+\t11.2394\t0.000244\t0.294\tACTAACC\n+ACTAAYH\tDREME-1\tchrM\t14765\t14771\t+\t11.2394\t0.000244\t0.294\tACTAACC\n+ACTAAYH\tDREME-1\tchrM\t4466\t4472\t+\t10.8592\t0.000305\t0.302\tACTAATT\n+ACTAAYH\tDREME-1\tchrM\t11248\t11254\t+\t10.8592\t0.000305\t0.302\tACTAATT\n+ACTAAYH\tDREME-1\tchrM\t475\t481\t+\t10.6479\t0.000366\t0.302\tACTAATC\n+ACTAAYH\tDREME-1\tchrM\t7930\t7936\t+\t10.6479\t0.000366\t0.302\tACTAATC\n+ACTAAYH\tDREME-1\tchrM\t8649\t8655\t+\t10.6479\t0.000366\t0.302\tACTAATC\n+ACTAAYH\tDREME-1\tchrM\t8670\t8676\t+\t10.6479\t0.000366\t0.302\tACTAATC\n+ACTAAYH\tDREME-1\tchrM\t10768\t10774\t+\t10.6479\t0.000366\t0.302\tACTAATC\n+ACTAAYH\tDREME-1\tchrM\t11053\t11059\t+\t10.6479\t0.000366\t0.302\tACTAATC\n+ACTAAYH\tDREME-1\tchrM\t11101\t11107\t+\t10.6479\t0.000366\t0.302\tACTAATC\n+ACTAAYH\tDREME-1\tchrM\t12720\t12726\t+\t10.6479\t0.000366\t0.302\tACTAATC\n+ACTAAYH\tDREME-1\tchrM\t13149\t13155\t+\t10.6479\t0.000366\t0.302\tACTAATC\n+ACTAAYH\tDREME-1\tchrM\t14216\t14222\t+\t10.6479\t0.000366\t0.302\tACTAATC\n+ACTAAYH\tDREME-1\tchrM\t678\t684\t-\t-1.07042\t0.000549\t0.366\tACTAAGA\n+ACTAAYH\tDREME-1\tchrM\t1435\t1441\t+\t-1.07042\t0.000549\t0.366\tACTAAGA\n+ACTAAYH\tDREME-1\tchrM\t1524\t1530\t+\t-1.07042\t0.000549\t0.366\tACTAAAA\n+ACTAAYH\tDREME-1\tchrM\t2035\t2041\t-\t-1.07042\t0.000549\t0.366\tACTAAGA\n+ACTAAYH\tDREME-1\tchrM\t8440\t8446\t+\t-1.07042\t0.000549\t0.366\tACTAAAA\n+ACTAAYH\tDREME-1\tchrM\t9995\t10001\t-\t-1.07042\t0.000549\t0.366\tACTAAAA\n+ACTAAYH\tDREME-1\tchrM\t10225\t10231\t-\t-1.07042\t0.000549\t0.366\tACTAAGA\n+ACTAAYH\tDREME-1\tchrM\t12725\t12731\t-\t-1.07042\t0.000549\t0.366\tACTAAGA\n+ACTAAYH\tDREME-1\tchrM\t14393\t14399\t+\t-1.07042\t0.000549\t0.366\tACTAAAA\n+ACTAAYH\tDREME-1\tchrM\t2294\t2300\t+\t-1.30986\t0.00061\t0.398\tACTAATG\n+ACTAAYH\tDREME-1\tchrM\t206\t212\t-\t-1.80282\t0.00165\t0.441\tATTAACA\n+ACTAAYH\tDREME-1\tchrM\t237\t243\t+\t-1.80282\t0.00165\t0.441\tAATAACA\n+ACTAAYH\tDREME-1\tchrM\t370\t376\t+\t-1.80282\t0.00165\t0.441\tCCTAACA\n+ACTAAYH\tDREME-1\tchrM\t1630\t1636\t+\t-1.80282\t0.00165\t0.441\tACTTACA\n+ACTAAYH\tDREME-1\tchrM\t1702\t1708\t+\t-1.80282\t0.00165\t0.441\tACTACCA\n+ACTAAYH\tDREME-1\tchrM\t2309\t2315\t+\t-1.80282\t0.00165\t0.441\tAGTAACA\n+ACTAAYH\tDREME-1\tchrM\t2363\t2369\t+\t-1.80282\t0.00165\t0.441\tACTGACA\n+ACTAAYH\tDREME-1\tchrM\t2370\t2376\t+\t-1.80282\t0.00165\t0.441\tATTAACA\n+ACTAAYH\tDREME-1\tchrM\t2395\t2401\t+\t-1.80282\t0.00165\t0.441\tACCAACA\n+ACTAAYH\tDREME-1\tchrM\t2761\t2767\t+\t-1.80282\t0.00165\t0.441\tCCTAACA\n+ACTAAYH\tDREME-1\tchrM\t2784\t2790\t+\t-1.80282\t0.00165\t0.441\tACTACCA\n+ACTAAYH\tDREME-1\tchrM\t31'..b'.559\tACAAAAA\n+ACTAAYH\tDREME-1\tchrM\t7581\t7587\t-\t-15.0563\t0.00922\t0.559\tATTAAGA\n+ACTAAYH\tDREME-1\tchrM\t7692\t7698\t-\t-15.0563\t0.00922\t0.559\tACTAGGA\n+ACTAAYH\tDREME-1\tchrM\t7800\t7806\t-\t-15.0563\t0.00922\t0.559\tACTAGGA\n+ACTAAYH\tDREME-1\tchrM\t8230\t8236\t+\t-15.0563\t0.00922\t0.559\tCCTAAAA\n+ACTAAYH\tDREME-1\tchrM\t8323\t8329\t-\t-15.0563\t0.00922\t0.559\tACTTAAA\n+ACTAAYH\tDREME-1\tchrM\t8335\t8341\t+\t-15.0563\t0.00922\t0.559\tATTAAGA\n+ACTAAYH\tDREME-1\tchrM\t8494\t8500\t+\t-15.0563\t0.00922\t0.559\tAATAAAA\n+ACTAAYH\tDREME-1\tchrM\t8521\t8527\t+\t-15.0563\t0.00922\t0.559\tACCAAAA\n+ACTAAYH\tDREME-1\tchrM\t8531\t8537\t+\t-15.0563\t0.00922\t0.559\tACGAAAA\n+ACTAAYH\tDREME-1\tchrM\t8757\t8763\t-\t-15.0563\t0.00922\t0.559\tAATAAAA\n+ACTAAYH\tDREME-1\tchrM\t8881\t8887\t+\t-15.0563\t0.00922\t0.559\tTCTAAGA\n+ACTAAYH\tDREME-1\tchrM\t8887\t8893\t+\t-15.0563\t0.00922\t0.559\tATTAAAA\n+ACTAAYH\tDREME-1\tchrM\t9173\t9179\t-\t-15.0563\t0.00922\t0.559\tACTAGAA\n+ACTAAYH\tDREME-1\tchrM\t9236\t9242\t+\t-15.0563\t0.00922\t0.559\tAGTAAAA\n+ACTAAYH\tDREME-1\tchrM\t9706\t9712\t-\t-15.0563\t0.00922\t0.559\tAGTAAAA\n+ACTAAYH\tDREME-1\tchrM\t9796\t9802\t-\t-15.0563\t0.00922\t0.559\tACAAAAA\n+ACTAAYH\tDREME-1\tchrM\t9987\t9993\t-\t-15.0563\t0.00922\t0.559\tAGTAAGA\n+ACTAAYH\tDREME-1\tchrM\t10075\t10081\t-\t-15.0563\t0.00922\t0.559\tATTAAAA\n+ACTAAYH\tDREME-1\tchrM\t10243\t10249\t-\t-15.0563\t0.00922\t0.559\tAATAAGA\n+ACTAAYH\tDREME-1\tchrM\t10333\t10339\t-\t-15.0563\t0.00922\t0.559\tAATAAGA\n+ACTAAYH\tDREME-1\tchrM\t10376\t10382\t+\t-15.0563\t0.00922\t0.559\tACTACAA\n+ACTAAYH\tDREME-1\tchrM\t10379\t10385\t+\t-15.0563\t0.00922\t0.559\tACAAAAA\n+ACTAAYH\tDREME-1\tchrM\t10630\t10636\t-\t-15.0563\t0.00922\t0.559\tGCTAAGA\n+ACTAAYH\tDREME-1\tchrM\t10762\t10768\t+\t-15.0563\t0.00922\t0.559\tGCTAAAA\n+ACTAAYH\tDREME-1\tchrM\t11029\t11035\t+\t-15.0563\t0.00922\t0.559\tACGAAAA\n+ACTAAYH\tDREME-1\tchrM\t11367\t11373\t-\t-15.0563\t0.00922\t0.559\tACTATAA\n+ACTAAYH\tDREME-1\tchrM\t11602\t11608\t+\t-15.0563\t0.00922\t0.559\tCCTAAAA\n+ACTAAYH\tDREME-1\tchrM\t11646\t11652\t-\t-15.0563\t0.00922\t0.559\tACTACGA\n+ACTAAYH\tDREME-1\tchrM\t11751\t11757\t+\t-15.0563\t0.00922\t0.559\tACTCAAA\n+ACTAAYH\tDREME-1\tchrM\t11757\t11763\t+\t-15.0563\t0.00922\t0.559\tACTACGA\n+ACTAAYH\tDREME-1\tchrM\t12152\t12158\t+\t-15.0563\t0.00922\t0.559\tACCAAAA\n+ACTAAYH\tDREME-1\tchrM\t12295\t12301\t-\t-15.0563\t0.00922\t0.559\tCCTAAGA\n+ACTAAYH\tDREME-1\tchrM\t12311\t12317\t-\t-15.0563\t0.00922\t0.559\tACCAAAA\n+ACTAAYH\tDREME-1\tchrM\t12327\t12333\t+\t-15.0563\t0.00922\t0.559\tAATAAAA\n+ACTAAYH\tDREME-1\tchrM\t12352\t12358\t+\t-15.0563\t0.00922\t0.559\tACTATAA\n+ACTAAYH\tDREME-1\tchrM\t12416\t12422\t+\t-15.0563\t0.00922\t0.559\tACAAAAA\n+ACTAAYH\tDREME-1\tchrM\t12509\t12515\t+\t-15.0563\t0.00922\t0.559\tACCAAGA\n+ACTAAYH\tDREME-1\tchrM\t12662\t12668\t+\t-15.0563\t0.00922\t0.559\tACTCAGA\n+ACTAAYH\tDREME-1\tchrM\t13109\t13115\t-\t-15.0563\t0.00922\t0.559\tAGTAAGA\n+ACTAAYH\tDREME-1\tchrM\t13263\t13269\t+\t-15.0563\t0.00922\t0.559\tACTAGGA\n+ACTAAYH\tDREME-1\tchrM\t13422\t13428\t+\t-15.0563\t0.00922\t0.559\tACTCAAA\n+ACTAAYH\tDREME-1\tchrM\t13667\t13673\t+\t-15.0563\t0.00922\t0.559\tACGAAAA\n+ACTAAYH\tDREME-1\tchrM\t13791\t13797\t+\t-15.0563\t0.00922\t0.559\tCCTAAAA\n+ACTAAYH\tDREME-1\tchrM\t13866\t13872\t+\t-15.0563\t0.00922\t0.559\tACTTAAA\n+ACTAAYH\tDREME-1\tchrM\t13872\t13878\t+\t-15.0563\t0.00922\t0.559\tAATAAAA\n+ACTAAYH\tDREME-1\tchrM\t13894\t13900\t-\t-15.0563\t0.00922\t0.559\tAATAAAA\n+ACTAAYH\tDREME-1\tchrM\t14007\t14013\t+\t-15.0563\t0.00922\t0.559\tACTAGAA\n+ACTAAYH\tDREME-1\tchrM\t14023\t14029\t+\t-15.0563\t0.00922\t0.559\tCCTAAAA\n+ACTAAYH\tDREME-1\tchrM\t14405\t14411\t+\t-15.0563\t0.00922\t0.559\tACCAAGA\n+ACTAAYH\tDREME-1\tchrM\t14501\t14507\t+\t-15.0563\t0.00922\t0.559\tATTAAAA\n+ACTAAYH\tDREME-1\tchrM\t14687\t14693\t+\t-15.0563\t0.00922\t0.559\tACTACAA\n+ACTAAYH\tDREME-1\tchrM\t14732\t14738\t+\t-15.0563\t0.00922\t0.559\tACTACAA\n+ACTAAYH\tDREME-1\tchrM\t14776\t14782\t+\t-15.0563\t0.00922\t0.559\tAATAAAA\n+ACTAAYH\tDREME-1\tchrM\t15072\t15078\t+\t-15.0563\t0.00922\t0.559\tACTCAGA\n+ACTAAYH\tDREME-1\tchrM\t15607\t15613\t+\t-15.0563\t0.00922\t0.559\tACTAGGA\n+ACTAAYH\tDREME-1\tchrM\t15874\t15880\t+\t-15.0563\t0.00922\t0.559\tACTCAAA\n+ACTAAYH\tDREME-1\tchrM\t15892\t15898\t-\t-15.0563\t0.00922\t0.559\tACTACAA\n+ACTAAYH\tDREME-1\tchrM\t15995\t16001\t+\t-15.0563\t0.00922\t0.559\tGCTAAGA\n+ACTAAYH\tDREME-1\tchrM\t16269\t16275\t+\t-15.0563\t0.00922\t0.559\tACTAGGA\n+ACTAAYH\tDREME-1\tchrM\t16553\t16559\t+\t-15.0563\t0.00922\t0.559\tAATAAGA\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_test3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_test3.xml Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Begin document body -->
+<fimo version="4.12.0" release="Tue Jun 27 16:22:50 2017 -0700">
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation=  xmlns:fimo="http://noble.gs.washington.edu/schema/fimo"
+>
+<command-line>fimo -o ./fimo_test4_out --thresh 0.01 --bgfile --uniform-- --max-stored-scores 100000 --motif ACTAAYH --motif-pseudo 0.1 dreme_fimo_input_1.xml hsa_chrM.fa</command-line>
+<settings>
+<setting name="output directory">./fimo_test4_out</setting>
+<setting name="MEME file name">dreme_fimo_input_1.xml</setting>
+<setting name="sequence file name">hsa_chrM.fa</setting>
+<setting name="background file name">--uniform--</setting>
+<setting name="allow clobber">false</setting>
+<setting name="compute q-values">true</setting>
+<setting name="parse genomic coord.">false</setting>
+<setting name="text only">false</setting>
+<setting name="scan both strands">true</setting>
+<setting name="output threshold">0.01</setting>
+<setting name="threshold type">p-value</setting>
+<setting name="max stored scores">100000</setting>
+<setting name="pseudocount">0.1</setting>
+<setting name="verbosity">2</setting>
+<setting name="selected motif">ACTAAYH</setting>
+</settings>
+<sequence-data num-sequences="1" num-residues="16569" />
+<alphabet name="DNA" like="dna">
+<letter id="A" symbol="A" complement="T" name="Adenine" colour="CC0000"/>
+<letter id="C" symbol="C" complement="G" name="Cytosine" colour="0000CC"/>
+<letter id="G" symbol="G" complement="C" name="Guanine" colour="FFB300"/>
+<letter id="T" symbol="T" aliases="U" complement="A" name="Thymine" colour="008000"/>
+<letter id="N" symbol="N" aliases="X." equals="ACGT" name="Any base"/>
+<letter id="V" symbol="V" equals="ACG" name="Not T"/>
+<letter id="H" symbol="H" equals="ACT" name="Not G"/>
+<letter id="D" symbol="D" equals="AGT" name="Not C"/>
+<letter id="B" symbol="B" equals="CGT" name="Not A"/>
+<letter id="M" symbol="M" equals="AC" name="Amino"/>
+<letter id="R" symbol="R" equals="AG" name="Purine"/>
+<letter id="W" symbol="W" equals="AT" name="Weak"/>
+<letter id="S" symbol="S" equals="CG" name="Strong"/>
+<letter id="Y" symbol="Y" equals="CT" name="Pyrimidine"/>
+<letter id="K" symbol="K" equals="GT" name="Keto"/>
+</alphabet>
+<motif name="ACTAAYH" alt="DREME-1" width="7" best-possible-match="ACTAACA"/>
+<motif name="YTAACA" alt="DREME-2" width="6" best-possible-match="TTAACA"/>
+<motif name="TCTGT" alt="DREME-3" width="5" best-possible-match="TCTGT"/>
+<motif name="SCCAGG" alt="DREME-4" width="6" best-possible-match="CCCAGG"/>
+<motif name="CCAGCAY" alt="DREME-5" width="7" best-possible-match="CCAGCAC"/>
+<motif name="GMATGT" alt="DREME-6" width="6" best-possible-match="GAATGT"/>
+<background source="--uniform--">
+<value letter="A">0.250</value>
+<value letter="C">0.250</value>
+<value letter="G">0.250</value>
+<value letter="T">0.250</value>
+</background>
+<cisml-file>cisml.xml</cisml-file>
+</fimo>
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_txt_1.txt
--- a/test-data/fimo_output_txt_1.txt Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,100 +0,0 @@
-#pattern name sequence name start stop strand score p-value q-value matched sequence
-1 phiX174 1388 1398 + 29.4024 6.36e-11 1.25e-09 AATATCTATAA
-1 phiX174 847 857 + 29.122 7.02e-11 1.25e-09 AATGTCTAAAG
-1 phiX174 2301 2311 + 27.6463 1.08e-10 1.29e-09 AGGTTATAACG
-1 phiX174 5063 5073 + 25.5366 2.73e-10 2.25e-09 AGGAGCTAAAG
-1 phiX174 989 999 + 25.3049 3.15e-10 2.25e-09 TGAGGATAAAT
-1 phiX174 4713 4723 + 23.622 7.74e-10 3.48e-09 GACTGCTATCA
-1 phiX174 5048 5058 + 23.3293 8.51e-10 3.48e-09 TGCTGCTAAAG
-1 phiX174 855 865 + 23.3049 8.64e-10 3.48e-09 AAGGTAAAAAA
-1 phiX174 3155 3165 + 23.0366 9.76e-10 3.48e-09 TATGGCTAAAG
-1 phiX174 5009 5019 + 23.0366 9.76e-10 3.48e-09 TGTGGCTAAAT
-1 phiX174 814 824 + 22.5854 1.28e-09 4.14e-09 TGCGTCAAAAA
-1 phiX174 2832 2842 + 22.3415 1.42e-09 4.23e-09 TTGGTCTAACT
-1 phiX174 3830 3840 + 21.8293 1.7e-09 4.68e-09 TATTGATAAAG
-1 phiX174 3560 3570 + 21.5976 1.89e-09 4.82e-09 TGCGTCTATTA
-1 phiX174 2882 2892 + 21.1951 2.29e-09 5.46e-09 AGGTTATTAAA
-1 phiX174 4453 4463 + 20.8902 2.58e-09 5.75e-09 AAGGTATTAAG
-1 phiX174 2493 2503 + 20.3415 3.06e-09 5.79e-09 GACACCTAAAG
-1 phiX174 4104 4114 + 20.3171 3.08e-09 5.79e-09 GGCTTCCATAA
-1 phiX174 4955 4965 + 20.3171 3.08e-09 5.79e-09 TGATGCTAAAG
-1 phiX174 1885 1895 + 19.9268 3.61e-09 6.45e-09 TGCGACTAAAG
-1 phiX174 3376 3386 + 19.7683 3.81e-09 6.48e-09 AGAATCAAAAA
-1 phiX174 52 62 + 19.5732 4.06e-09 6.58e-09 TGAGTCGAAAA
-1 phiX174 1390 1400 + 19.378 4.26e-09 6.61e-09 TATCTATAACA
-1 phiX174 2017 2027 + 19.0854 4.6e-09 6.85e-09 TTCGTCTAAGA
-1 phiX174 1000 1010 + 18.878 4.88e-09 6.97e-09 TATGTCTAATA
-1 phiX174 1555 1565 + 18.439 5.58e-09 7.37e-09 GACTTCTACCA
-1 phiX174 4430 4440 + 18.4268 5.62e-09 7.37e-09 TGAGTATAATT
-1 phiX174 1927 1937 + 18.2927 5.82e-09 7.37e-09 GACTTATACCG
-1 phiX174 2981 2991 + 18.0732 6.13e-09 7.37e-09 CATGTCTAAAT
-1 phiX174 4203 4213 + 17.9268 6.34e-09 7.37e-09 GACGGCCATAA
-1 phiX174 1669 1679 + 17.8659 6.4e-09 7.37e-09 TGGAGGTAAAA
-1 phiX174 3260 3270 + 17.5 7.01e-09 7.82e-09 CGCTGATAAAG
-1 phiX174 3047 3057 + 17.2805 7.4e-09 7.85e-09 TACCGATAACA
-1 phiX174 4176 4186 + 17.1829 7.6e-09 7.85e-09 GAGTTCGATAA
-1 phiX174 4118 4128 + 17.1341 7.7e-09 7.85e-09 GATGGATAACC
-1 phiX174 5370 5380 + 16.9878 8.03e-09 7.87e-09 GGCGTATCCAA
-1 phiX174 1242 1252 + 16.5122 8.94e-09 7.87e-09 AGTGGATTAAG
-1 phiX174 2583 2593 + 16.5122 8.94e-09 7.87e-09 TACATCTGTCA
-1 phiX174 698 708 + 16.4146 9.13e-09 7.87e-09 TACGGAAAACA
-1 phiX174 2299 2309 + 16.3537 9.26e-09 7.87e-09 TGAGGTTATAA
-1 phiX174 4189 4199 + 16.1707 9.69e-09 7.87e-09 GTGATATGTAT
-1 phiX174 275 285 + 16.0976 9.85e-09 7.87e-09 GGTTTAGATAT
-1 phiX174 1801 1811 + 16.0366 1e-08 7.87e-09 GACCTATAAAC
-1 phiX174 1386 1396 + 15.9268 1.03e-08 7.87e-09 TGAATATCTAT
-1 phiX174 1303 1313 + 15.9024 1.03e-08 7.87e-09 TGGTTATATTG
-1 phiX174 3772 3782 + 15.878 1.04e-08 7.87e-09 AGGATATTTCT
-1 phiX174 1288 1298 + 15.8659 1.04e-08 7.87e-09 GACTGTTAACA
-1 phiX174 2577 2587 + 15.7683 1.08e-08 7.87e-09 GATGGATACAT
-1 phiX174 937 947 + 15.7561 1.08e-08 7.87e-09 TTGGTATGTAG
-1 phiX174 904 914 + 15.6585 1.11e-08 7.93e-09 AGGTACTAAAG
-1 phiX174 2279 2289 + 15.5854 1.13e-08 7.93e-09 TCGTGATAAAA
-1 phiX174 3164 3174 + 15.5 1.16e-08 7.98e-09 AGCTGGTAAAG
-1 phiX174 24 34 + 15.3293 1.23e-08 8.24e-09 AGAAGTTAACA
-1 phiX174 838 848 + 15.2561 1.27e-08 8.24e-09 GAGTGATGTAA
-1 phiX174 853 863 + 15.2561 1.27e-08 8.24e-09 TAAAGGTAAAA
-1 phiX174 1984 1994 + 15.0244 1.36e-08 8.68e-09 AATTTCTATGA
-1 phiX174 1 11 + 14.8293 1.46e-08 9.05e-09 GAGTTTTATCG
-1 phiX174 4307 4317 + 14.7927 1.47e-08 9.05e-09 TATTAATAACA
-1 phiX174 4303 4313 + 14.6585 1.52e-08 9.19e-09 TTGATATTAAT
-1 phiX174 5033 5043 + 14.561 1.58e-08 9.41e-09 GTCAGATATGG
-1 phiX174 2579 2589 + 14.2927 1.73e-08 1.01e-08 TGGATACATCT
-1 phiX174 322 332 + 14.1951 1.82e-08 1.05e-08 GACATTTTAAA
-1 phiX174 5001 5011 + 13.8902 2.09e-08 1.19e-08 GGTTTCTATGT
-1 phiX174 4217 4227 + 13.8171 2.15e-08 1.2e-08 TGCTTCTGACG
-1 phiX174 4262 4272 + 13.7805 2.18e-08 1.2e-08 AATGGATGAAT
-1 phiX174 3569 3579 + 13.7073 2.26e-08 1.22e-08 TATGGAAAACA
-1 phiX174 194 204 + 13.6829 2.29e-08 1.22e-08 ATCAACTAACG
-1 phiX174 131 141 + 13.4756 2.49e-08 1.31e-08 AAATGAGAAAA
-1 phiX174 1491 1501 + 13.4024 2.55e-08 1.32e-08 GCCATCTCAAA
-1 phiX174 434 444 + 13.2805 2.67e-08 1.36e-08 GGCCTCTATTA
-1 phiX174 4565 4575 + 13.2439 2.73e-08 1.36e-08 TTGGTTTATCG
-1 phiX174 102 112 + 13.2195 2.75e-08 1.36e-08 GAATTAAATCG
-1 phiX174 903 913 + 13.1463 2.82e-08 1.38e-08 GAGGTACTAAA
-1 phiX174 4748 4758 + 12.9756 3.01e-08 1.45e-08 TACAGCTAATG
-1 phiX174 2622 2632 + 12.8659 3.16e-08 1.5e-08 TGCTGATATTG
-1 phiX174 467 477 + 12.7317 3.35e-08 1.57e-08 TTTGGATTTAA
-1 phiX174 4033 4043 + 12.6829 3.44e-08 1.58e-08 AGCGTATCGAG
-1 phiX174 1348 1358 + 12.6707 3.46e-08 1.58e-08 TACCAATAAAA
-1 phiX174 239 249 + 12.5732 3.62e-08 1.64e-08 AGTGGCTTAAT
-1 phiX174 500 510 + 12.4634 3.84e-08 1.71e-08 GACGAGTAACA
-1 phiX174 3001 3011 + 12.4146 3.93e-08 1.73e-08 GCGGTCAAAAA
-1 phiX174 3776 3786 + 12.378 3.98e-08 1.73e-08 TATTTCTAATG
-1 phiX174 2026 2036 + 12.3293 4.06e-08 1.75e-08 GAAGTTTAAGA
-1 phiX174 4237 4247 + 12.3049 4.12e-08 1.75e-08 AGTTTGTATCT
-1 phiX174 803 813 + 12.2439 4.24e-08 1.78e-08 AGAAGAAAACG
-1 phiX174 3770 3780 + 12.1829 4.35e-08 1.81e-08 AAAGGATATTT
-1 phiX174 3429 3439 + 12.122 4.45e-08 1.82e-08 GAGATGCAAAA
-1 phiX174 99 109 + 12.1098 4.48e-08 1.82e-08 TACGAATTAAA
-1 phiX174 67 77 + 11.9268 4.78e-08 1.92e-08 TCTTGATAAAG
-1 phiX174 5332 5342 + 11.7195 5.13e-08 2.01e-08 ATCTGCTCAAA
-1 phiX174 277 287 + 11.7073 5.14e-08 2.01e-08 TTTAGATATGA
-1 phiX174 4338 4348 + 11.6951 5.18e-08 2.01e-08 GGGGACGAAAA
-1 phiX174 3812 3822 + 11.6585 5.28e-08 2.03e-08 GGTTGATATTT
-1 phiX174 1909 1919 + 11.5488 5.51e-08 2.08e-08 TAACGCTAAAG
-1 phiX174 3000 3010 + 11.5366 5.54e-08 2.08e-08 GGCGGTCAAAA
-1 phiX174 3891 3901 + 11.439 5.75e-08 2.11e-08 ATTGGCTCTAA
-1 phiX174 3079 3089 + 11.4268 5.76e-08 2.11e-08 CTGGTATTAAA
-1 phiX174 37 47 + 11.4146 5.79e-08 2.11e-08 TTCGGATATTT
-1 phiX174 380 390 + 11.3293 6.01e-08 2.17e-08 GTAAGAAATCA
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_txt_2.txt
--- a/test-data/fimo_output_txt_2.txt Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,100 +0,0 @@
-#pattern name sequence name start stop strand score p-value q-value matched sequence
-1 phiX174 1388 1398 + 29.4024 6.36e-11 0 AATATCTATAA
-1 phiX174 847 857 + 29.122 7.02e-11 0 AATGTCTAAAG
-1 phiX174 2301 2311 + 27.6463 1.08e-10 0 AGGTTATAACG
-1 phiX174 5063 5073 + 25.5366 2.73e-10 0 AGGAGCTAAAG
-1 phiX174 989 999 + 25.3049 3.15e-10 0 TGAGGATAAAT
-1 phiX174 4713 4723 + 23.622 7.74e-10 0 GACTGCTATCA
-1 phiX174 5048 5058 + 23.3293 8.51e-10 0 TGCTGCTAAAG
-1 phiX174 855 865 + 23.3049 8.64e-10 0 AAGGTAAAAAA
-1 phiX174 3155 3165 + 23.0366 9.76e-10 0 TATGGCTAAAG
-1 phiX174 5009 5019 + 23.0366 9.76e-10 0 TGTGGCTAAAT
-1 phiX174 814 824 + 22.5854 1.28e-09 0 TGCGTCAAAAA
-1 phiX174 2832 2842 + 22.3415 1.42e-09 0 TTGGTCTAACT
-1 phiX174 3830 3840 + 21.8293 1.7e-09 0 TATTGATAAAG
-1 phiX174 3560 3570 + 21.5976 1.89e-09 0 TGCGTCTATTA
-1 phiX174 2882 2892 + 21.1951 2.29e-09 0 AGGTTATTAAA
-1 phiX174 4453 4463 + 20.8902 2.58e-09 0 AAGGTATTAAG
-1 phiX174 2493 2503 + 20.3415 3.06e-09 0 GACACCTAAAG
-1 phiX174 4104 4114 + 20.3171 3.08e-09 0 GGCTTCCATAA
-1 phiX174 4955 4965 + 20.3171 3.08e-09 0 TGATGCTAAAG
-1 phiX174 1885 1895 + 19.9268 3.61e-09 0 TGCGACTAAAG
-1 phiX174 3376 3386 + 19.7683 3.81e-09 0 AGAATCAAAAA
-1 phiX174 52 62 + 19.5732 4.06e-09 0 TGAGTCGAAAA
-1 phiX174 1390 1400 + 19.378 4.26e-09 0 TATCTATAACA
-1 phiX174 2017 2027 + 19.0854 4.6e-09 0 TTCGTCTAAGA
-1 phiX174 1000 1010 + 18.878 4.88e-09 0 TATGTCTAATA
-1 phiX174 1555 1565 + 18.439 5.58e-09 0 GACTTCTACCA
-1 phiX174 4430 4440 + 18.4268 5.62e-09 0 TGAGTATAATT
-1 phiX174 1927 1937 + 18.2927 5.82e-09 0 GACTTATACCG
-1 phiX174 2981 2991 + 18.0732 6.13e-09 0 CATGTCTAAAT
-1 phiX174 4203 4213 + 17.9268 6.34e-09 0 GACGGCCATAA
-1 phiX174 1669 1679 + 17.8659 6.4e-09 0 TGGAGGTAAAA
-1 phiX174 3260 3270 + 17.5 7.01e-09 0 CGCTGATAAAG
-1 phiX174 3047 3057 + 17.2805 7.4e-09 0 TACCGATAACA
-1 phiX174 4176 4186 + 17.1829 7.6e-09 0 GAGTTCGATAA
-1 phiX174 4118 4128 + 17.1341 7.7e-09 0 GATGGATAACC
-1 phiX174 5370 5380 + 16.9878 8.03e-09 0 GGCGTATCCAA
-1 phiX174 1242 1252 + 16.5122 8.94e-09 0 AGTGGATTAAG
-1 phiX174 2583 2593 + 16.5122 8.94e-09 0 TACATCTGTCA
-1 phiX174 698 708 + 16.4146 9.13e-09 0 TACGGAAAACA
-1 phiX174 2299 2309 + 16.3537 9.26e-09 0 TGAGGTTATAA
-1 phiX174 4189 4199 + 16.1707 9.69e-09 0 GTGATATGTAT
-1 phiX174 275 285 + 16.0976 9.85e-09 0 GGTTTAGATAT
-1 phiX174 1801 1811 + 16.0366 1e-08 0 GACCTATAAAC
-1 phiX174 1386 1396 + 15.9268 1.03e-08 0 TGAATATCTAT
-1 phiX174 1303 1313 + 15.9024 1.03e-08 0 TGGTTATATTG
-1 phiX174 3772 3782 + 15.878 1.04e-08 0 AGGATATTTCT
-1 phiX174 1288 1298 + 15.8659 1.04e-08 0 GACTGTTAACA
-1 phiX174 2577 2587 + 15.7683 1.08e-08 0 GATGGATACAT
-1 phiX174 937 947 + 15.7561 1.08e-08 0 TTGGTATGTAG
-1 phiX174 904 914 + 15.6585 1.11e-08 0 AGGTACTAAAG
-1 phiX174 2279 2289 + 15.5854 1.13e-08 0 TCGTGATAAAA
-1 phiX174 3164 3174 + 15.5 1.16e-08 0 AGCTGGTAAAG
-1 phiX174 24 34 + 15.3293 1.23e-08 0 AGAAGTTAACA
-1 phiX174 838 848 + 15.2561 1.27e-08 0 GAGTGATGTAA
-1 phiX174 853 863 + 15.2561 1.27e-08 0 TAAAGGTAAAA
-1 phiX174 1984 1994 + 15.0244 1.36e-08 0 AATTTCTATGA
-1 phiX174 1 11 + 14.8293 1.46e-08 0 GAGTTTTATCG
-1 phiX174 4307 4317 + 14.7927 1.47e-08 0 TATTAATAACA
-1 phiX174 4303 4313 + 14.6585 1.52e-08 0 TTGATATTAAT
-1 phiX174 5033 5043 + 14.561 1.58e-08 0 GTCAGATATGG
-1 phiX174 2579 2589 + 14.2927 1.73e-08 0 TGGATACATCT
-1 phiX174 322 332 + 14.1951 1.82e-08 0 GACATTTTAAA
-1 phiX174 5001 5011 + 13.8902 2.09e-08 0 GGTTTCTATGT
-1 phiX174 4217 4227 + 13.8171 2.15e-08 0 TGCTTCTGACG
-1 phiX174 4262 4272 + 13.7805 2.18e-08 0 AATGGATGAAT
-1 phiX174 3569 3579 + 13.7073 2.26e-08 0 TATGGAAAACA
-1 phiX174 194 204 + 13.6829 2.29e-08 0 ATCAACTAACG
-1 phiX174 131 141 + 13.4756 2.49e-08 0 AAATGAGAAAA
-1 phiX174 1491 1501 + 13.4024 2.55e-08 0 GCCATCTCAAA
-1 phiX174 434 444 + 13.2805 2.67e-08 0 GGCCTCTATTA
-1 phiX174 4565 4575 + 13.2439 2.73e-08 0 TTGGTTTATCG
-1 phiX174 102 112 + 13.2195 2.75e-08 0 GAATTAAATCG
-1 phiX174 903 913 + 13.1463 2.82e-08 0 GAGGTACTAAA
-1 phiX174 4748 4758 + 12.9756 3.01e-08 0 TACAGCTAATG
-1 phiX174 2622 2632 + 12.8659 3.16e-08 0 TGCTGATATTG
-1 phiX174 467 477 + 12.7317 3.35e-08 0 TTTGGATTTAA
-1 phiX174 4033 4043 + 12.6829 3.44e-08 0 AGCGTATCGAG
-1 phiX174 1348 1358 + 12.6707 3.46e-08 0 TACCAATAAAA
-1 phiX174 239 249 + 12.5732 3.62e-08 0 AGTGGCTTAAT
-1 phiX174 500 510 + 12.4634 3.84e-08 0 GACGAGTAACA
-1 phiX174 3001 3011 + 12.4146 3.93e-08 0 GCGGTCAAAAA
-1 phiX174 3776 3786 + 12.378 3.98e-08 0 TATTTCTAATG
-1 phiX174 2026 2036 + 12.3293 4.06e-08 0 GAAGTTTAAGA
-1 phiX174 4237 4247 + 12.3049 4.12e-08 0 AGTTTGTATCT
-1 phiX174 803 813 + 12.2439 4.24e-08 0 AGAAGAAAACG
-1 phiX174 3770 3780 + 12.1829 4.35e-08 0 AAAGGATATTT
-1 phiX174 3429 3439 + 12.122 4.45e-08 0 GAGATGCAAAA
-1 phiX174 99 109 + 12.1098 4.48e-08 0 TACGAATTAAA
-1 phiX174 67 77 + 11.9268 4.78e-08 0 TCTTGATAAAG
-1 phiX174 5332 5342 + 11.7195 5.13e-08 0 ATCTGCTCAAA
-1 phiX174 277 287 + 11.7073 5.14e-08 0 TTTAGATATGA
-1 phiX174 4338 4348 + 11.6951 5.18e-08 0 GGGGACGAAAA
-1 phiX174 3812 3822 + 11.6585 5.28e-08 0 GGTTGATATTT
-1 phiX174 1909 1919 + 11.5488 5.51e-08 0 TAACGCTAAAG
-1 phiX174 3000 3010 + 11.5366 5.54e-08 0 GGCGGTCAAAA
-1 phiX174 3891 3901 + 11.439 5.75e-08 0 ATTGGCTCTAA
-1 phiX174 3079 3089 + 11.4268 5.76e-08 0 CTGGTATTAAA
-1 phiX174 37 47 + 11.4146 5.79e-08 0 TTCGGATATTT
-1 phiX174 380 390 + 11.3293 6.01e-08 0 GTAAGAAATCA
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_xml_1.xml
--- a/test-data/fimo_output_xml_1.xml Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,66 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Begin document body -->
-<settings>
-<setting name="allow clobber">false</setting>
-<setting name="compute q-values">true</setting>
-<setting name="parse genomic coord.">false</setting>
-<setting name="text only">false</setting>
-<setting name="scan both strands">false</setting>
-<setting name="output threshold">0.0001</setting>
-<setting name="threshold type">p-value</setting>
-<setting name="max stored scores">100000</setting>
-<setting name="pseudocount">0.1</setting>
-<setting name="verbosity">1</setting>
-</settings>
-<sequence-data num-sequences="1" num-residues="5386" />
-<alphabet name="Protein" like="protein">
-<letter id="A" symbol="A" name="Alanine" colour="0000CC"/>
-<letter id="C" symbol="C" name="Cysteine" colour="0000CC"/>
-<letter id="D" symbol="D" name="Aspartic acid" colour="FF00FF"/>
-<letter id="E" symbol="E" name="Glutamic acid" colour="FF00FF"/>
-<letter id="F" symbol="F" name="Phenylalanine" colour="0000CC"/>
-<letter id="G" symbol="G" name="Glycine" colour="FFB300"/>
-<letter id="H" symbol="H" name="Histidine" colour="FFCCCC"/>
-<letter id="I" symbol="I" name="Isoleucine" colour="0000CC"/>
-<letter id="K" symbol="K" name="Lysine" colour="CC0000"/>
-<letter id="L" symbol="L" name="Leucine" colour="0000CC"/>
-<letter id="M" symbol="M" name="Methionine" colour="0000CC"/>
-<letter id="N" symbol="N" name="Asparagine" colour="008000"/>
-<letter id="P" symbol="P" name="Proline" colour="FFFF00"/>
-<letter id="Q" symbol="Q" name="Glutamine" colour="008000"/>
-<letter id="R" symbol="R" name="Arginine" colour="CC0000"/>
-<letter id="S" symbol="S" name="Serine" colour="008000"/>
-<letter id="T" symbol="T" name="Threonine" colour="008000"/>
-<letter id="V" symbol="V" name="Valine" colour="0000CC"/>
-<letter id="W" symbol="W" name="Tryptophan" colour="0000CC"/>
-<letter id="Y" symbol="Y" name="Tyrosine" colour="33E6CC"/>
-<letter id="X" symbol="X" aliases="*." equals="ACDEFGHIKLMNPQRSTVWY" name="Any amino acid"/>
-<letter id="B" symbol="B" equals="DN" name="Asparagine or Aspartic acid"/>
-<letter id="Z" symbol="Z" equals="EQ" name="Glutamine or Glutamic acid"/>
-<letter id="J" symbol="J" equals="IL" name="Leucine or Isoleucine"/>
-</alphabet>
-<motif name="1" width="11" best-possible-match="GGGGTATAAAA"/>
-<background source="non-redundant database">
-<value letter="A">0.073</value>
-<value letter="C">0.018</value>
-<value letter="D">0.052</value>
-<value letter="E">0.062</value>
-<value letter="F">0.040</value>
-<value letter="G">0.069</value>
-<value letter="H">0.022</value>
-<value letter="I">0.056</value>
-<value letter="K">0.058</value>
-<value letter="L">0.092</value>
-<value letter="M">0.023</value>
-<value letter="N">0.046</value>
-<value letter="P">0.051</value>
-<value letter="Q">0.041</value>
-<value letter="R">0.052</value>
-<value letter="S">0.074</value>
-<value letter="T">0.059</value>
-<value letter="V">0.064</value>
-<value letter="W">0.013</value>
-<value letter="Y">0.033</value>
-</background>
-<cisml-file>cisml.xml</cisml-file>
-</fimo>
b
diff -r 793225b11202 -r b48e673af4e8 test-data/fimo_output_xml_2.xml
--- a/test-data/fimo_output_xml_2.xml Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,65 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Begin document body -->
-<settings>
-<setting name="allow clobber">false</setting>
-<setting name="compute q-values">false</setting>
-<setting name="text only">false</setting>
-<setting name="scan both strands">false</setting>
-<setting name="output threshold">0.0001</setting>
-<setting name="threshold type">p-value</setting>
-<setting name="max stored scores">100000</setting>
-<setting name="pseudocount">0.1</setting>
-<setting name="verbosity">1</setting>
-</settings>
-<sequence-data num-sequences="1" num-residues="5386" />
-<alphabet name="Protein" like="protein">
-<letter id="A" symbol="A" name="Alanine" colour="0000CC"/>
-<letter id="C" symbol="C" name="Cysteine" colour="0000CC"/>
-<letter id="D" symbol="D" name="Aspartic acid" colour="FF00FF"/>
-<letter id="E" symbol="E" name="Glutamic acid" colour="FF00FF"/>
-<letter id="F" symbol="F" name="Phenylalanine" colour="0000CC"/>
-<letter id="G" symbol="G" name="Glycine" colour="FFB300"/>
-<letter id="H" symbol="H" name="Histidine" colour="FFCCCC"/>
-<letter id="I" symbol="I" name="Isoleucine" colour="0000CC"/>
-<letter id="K" symbol="K" name="Lysine" colour="CC0000"/>
-<letter id="L" symbol="L" name="Leucine" colour="0000CC"/>
-<letter id="M" symbol="M" name="Methionine" colour="0000CC"/>
-<letter id="N" symbol="N" name="Asparagine" colour="008000"/>
-<letter id="P" symbol="P" name="Proline" colour="FFFF00"/>
-<letter id="Q" symbol="Q" name="Glutamine" colour="008000"/>
-<letter id="R" symbol="R" name="Arginine" colour="CC0000"/>
-<letter id="S" symbol="S" name="Serine" colour="008000"/>
-<letter id="T" symbol="T" name="Threonine" colour="008000"/>
-<letter id="V" symbol="V" name="Valine" colour="0000CC"/>
-<letter id="W" symbol="W" name="Tryptophan" colour="0000CC"/>
-<letter id="Y" symbol="Y" name="Tyrosine" colour="33E6CC"/>
-<letter id="X" symbol="X" aliases="*." equals="ACDEFGHIKLMNPQRSTVWY" name="Any amino acid"/>
-<letter id="B" symbol="B" equals="DN" name="Asparagine or Aspartic acid"/>
-<letter id="Z" symbol="Z" equals="EQ" name="Glutamine or Glutamic acid"/>
-<letter id="J" symbol="J" equals="IL" name="Leucine or Isoleucine"/>
-</alphabet>
-<motif name="1" width="11" best-possible-match="GGGGTATAAAA"/>
-<background source="non-redundant database">
-<value letter="A">0.073</value>
-<value letter="C">0.018</value>
-<value letter="D">0.052</value>
-<value letter="E">0.062</value>
-<value letter="F">0.040</value>
-<value letter="G">0.069</value>
-<value letter="H">0.022</value>
-<value letter="I">0.056</value>
-<value letter="K">0.058</value>
-<value letter="L">0.092</value>
-<value letter="M">0.023</value>
-<value letter="N">0.046</value>
-<value letter="P">0.051</value>
-<value letter="Q">0.041</value>
-<value letter="R">0.052</value>
-<value letter="S">0.074</value>
-<value letter="T">0.059</value>
-<value letter="V">0.064</value>
-<value letter="W">0.013</value>
-<value letter="Y">0.033</value>
-</background>
-<cisml-file>cisml.xml</cisml-file>
-</fimo>
b
diff -r 793225b11202 -r b48e673af4e8 test-data/hsa_chrM.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hsa_chrM.fa Thu May 17 14:11:15 2018 -0400
b
b'@@ -0,0 +1,238 @@\n+>chrM\n+GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGG\n+GTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTC\n+CTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAAAGTGTGTTA\n+ATTAATTAATGCTTGTAGGACATAATAATAACAATTGAATGTCTGCACAGCCACTTTCCACACAGACATC\n+ATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCA\n+AACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC\n+TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATA\n+CAACCCCCGCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCC\n+AAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC\n+ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAA\n+GCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGC\n+AATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAA\n+ACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGA\n+TTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACT\n+CACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAACAC\n+ACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATC\n+AACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATC\n+CCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATA\n+CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTC\n+AAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTAT\n+GAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGA\n+AGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCA\n+TTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTA\n+GCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA\n+GCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCG\n+ATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATA\n+ATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCC\n+AAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCA\n+AAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGAT\n+AGAATCTTAGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC\n+CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAG\n+TAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC\n+ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAGTATAAG\n+TAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCC\n+AATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA\n+AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC\n+ATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAA\n+AGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCT\n+TACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTA\n+TGGAGCTTTAATTTATTAATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATT\n+AAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAG\n+TCAAAGCGAACTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA\n+GCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCG\n+ATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGG\n+AGTAATCCAGGTCGGTTTCTATCTACNTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCT\n+ACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTATACCCACACCCACCCAAGA\n+ACAGGGTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAGTCAGAGGTTCAAT\n+TCCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCA\n+TTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCC\n+CCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCAC\n+ATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACCCCC\n+CTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAG\n+CCGTTTACTCAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGC\n+AGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGGC\n+TCCTTTAACCTCTCCACCCTTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCATGACCCTTGG\n+CCATAATATGATT'..b'CAGTTCTTCAAATATCTACTCATCTTCCTAATTACCATACTAATCTTAGTTACCGCTAA\n+CAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCATCAGTTGATGA\n+TACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATCCTATACAACCGTATCGGCGATATCGGTT\n+TCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAA\n+CGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCAGGCAAATCAGCCCAATTAGGT\n+CTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTA\n+TAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCACTAATCCAAAC\n+TCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCAGCAGTCTGCGCCCTTACACAAAATGACATC\n+AAAAAAATCGTAGCCTTCTCCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAAC\n+CACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTC\n+CATCATCCACAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTC\n+ACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGGTTTCTACTCCA\n+AAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCATCGC\n+TACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCCC\n+ACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCC\n+TATTCGCAGGATTTCTCATTACTAACAACATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTA\n+CCTAAAACTCACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCAACTACCTAACC\n+AACAAACTTAAAATAAAATCCCCACTATGCACATTTTATTTCTCCAACATACTCGGATTCTACCCTAGCA\n+TCACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACCT\n+AACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCA\n+ACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAA\n+TCACATAACCTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAA\n+CTACTACTAATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTGA\n+CCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATAC\n+TCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCC\n+CTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCC\n+CCCTAAATAAATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCG\n+ACCACACCGCTAACAATCAATACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACC\n+CCATTACTAAACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGAC\n+CAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATACGCAAAACTAAC\n+CCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAA\n+ACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATGCACTA\n+CTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATC\n+ATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCC\n+TATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGC\n+AACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAAC\n+TTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACA\n+GTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCTTGCCCTTCATTATTGCAGCCCTAGCAACACT\n+CCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATC\n+ACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACAT\n+TAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCC\n+TCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTA\n+GGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAAC\n+AACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCT\n+AACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATAC\n+TTCACAACAATCCTAATCCTAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTC\n+CTTGTAGTATAAACTAATACACCAGTCTTGTAAACCGGAGATGAAAACCTTTTTCCAAGGACAAATCAGA\n+GAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTC\n+ATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTACA\n+TTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCA\n+ATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCA\n+ACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAG\n+TACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCC\n+TCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCG\n+CTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTC\n+ATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATG\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_fimo_input_1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_fimo_input_1.xml Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,8739 @@\n+<?xml version=\'1.0\' encoding=\'UTF-8\' standalone=\'yes\'?>\n+<!-- Document definition -->\n+<!DOCTYPE MEME[\n+<!ELEMENT MEME (\n+  training_set,\n+  model, \n+  motifs, \n+  scanned_sites_summary?\n+)>\n+<!ATTLIST MEME \n+  version CDATA #REQUIRED\n+  release CDATA #REQUIRED\n+>\n+<!-- Training-set elements -->\n+<!ELEMENT training_set (alphabet, ambigs, sequence*, letter_frequencies)>\n+<!ATTLIST training_set datafile CDATA #REQUIRED length CDATA #REQUIRED>\n+<!ELEMENT alphabet (letter*)>\n+<!ATTLIST alphabet name CDATA #REQUIRED>\n+<!ELEMENT ambigs (letter*)>\n+<!ELEMENT letter EMPTY>\n+<!ATTLIST letter id ID #REQUIRED>\n+<!ATTLIST letter symbol CDATA #REQUIRED>\n+<!ATTLIST letter equals CDATA #IMPLIED>\n+<!ATTLIST letter aliases CDATA #IMPLIED>\n+<!ATTLIST letter complement CDATA #IMPLIED>\n+<!ATTLIST letter name CDATA #IMPLIED>\n+<!ATTLIST letter colour CDATA #IMPLIED>\n+<!ELEMENT sequence EMPTY>\n+<!ATTLIST sequence id ID #REQUIRED\n+                   name CDATA #REQUIRED\n+                   length CDATA #REQUIRED\n+                   weight CDATA #REQUIRED\n+>\n+<!ELEMENT letter_frequencies (alphabet_array)>\n+\n+<!-- Model elements -->\n+<!ELEMENT model (\n+  command_line,\n+  host,\n+  type,\n+  nmotifs,\n+  evalue_threshold,\n+  object_function,\n+  spfun,\n+  min_width,\n+  max_width,\n+  wg,\n+  ws,\n+  endgaps,\n+  minsites,\n+  maxsites,\n+  wnsites,\n+  spmap,\n+  spfuzz,\n+  prior,\n+  beta,\n+  maxiter,\n+  distance,\n+  num_sequences,\n+  num_positions,\n+  seed,\n+  hsfrac,\n+  maxwords,\n+  maxsize,\n+  csites,\n+  strands,\n+  priors_file,\n+  reason_for_stopping,\n+  back_order,\n+  background_frequencies\n+)>\n+<!ELEMENT command_line (#PCDATA)*>\n+<!ELEMENT host (#PCDATA)*>\n+<!ELEMENT type (#PCDATA)*>\n+<!ELEMENT nmotifs (#PCDATA)*>\n+<!ELEMENT evalue_threshold (#PCDATA)*>\n+<!ELEMENT object_function (#PCDATA)*>\n+<!ELEMENT spfun (#PCDATA)*>\n+<!ELEMENT min_width (#PCDATA)*>\n+<!ELEMENT max_width (#PCDATA)*>\n+<!ELEMENT wg (#PCDATA)*>\n+<!ELEMENT ws (#PCDATA)*>\n+<!ELEMENT endgaps (#PCDATA)*>\n+<!ELEMENT minsites (#PCDATA)*>\n+<!ELEMENT maxsites (#PCDATA)*>\n+<!ELEMENT wnsites (#PCDATA)*>\n+<!ELEMENT spmap (#PCDATA)*>\n+<!ELEMENT spfuzz (#PCDATA)*>\n+<!ELEMENT prior (#PCDATA)*>\n+<!ELEMENT beta (#PCDATA)*>\n+<!ELEMENT maxiter (#PCDATA)*>\n+<!ELEMENT distance (#PCDATA)*>\n+<!ELEMENT num_sequences (#PCDATA)*>\n+<!ELEMENT num_positions (#PCDATA)*>\n+<!ELEMENT seed (#PCDATA)*>\n+<!ELEMENT hsfrac (#PCDATA)*>\n+<!ELEMENT maxwords (#PCDATA)*>\n+<!ELEMENT maxsites (#PCDATA)*>\n+<!ELEMENT csites (#PCDATA)*>\n+<!ELEMENT strands (#PCDATA)*>\n+<!ELEMENT priors_file (#PCDATA)*>\n+<!ELEMENT reason_for_stopping (#PCDATA)*>\n+<!ELEMENT back_order (#PCDATA)*>\n+<!ELEMENT background_frequencies (alphabet_array)>\n+<!ATTLIST background_frequencies source CDATA #REQUIRED>\n+\n+<!-- Motif elements -->\n+<!ELEMENT motifs (motif*)>\n+<!ELEMENT motif (scores, probabilities, regular_expression?, contributing_sites)>\n+<!ATTLIST motif id ID #REQUIRED\n+                name CDATA #REQUIRED\n+                alt CDATA ""\n+                width CDATA #REQUIRED\n+                sites CDATA #REQUIRED\n+                llr CDATA #REQUIRED\n+                ic CDATA #REQUIRED\n+                re CDATA #REQUIRED\n+                bayes_threshold CDATA #REQUIRED\n+                e_value CDATA #REQUIRED\n+                elapsed_time CDATA #REQUIRED\n+                url CDATA ""\n+>\n+<!ELEMENT scores (alphabet_matrix)>\n+<!ELEMENT probabilities (alphabet_matrix)>\n+<!ELEMENT regular_expression (#PCDATA)*>\n+\n+<!-- Contributing site elements -->\n+<!-- Contributing sites are motif occurences found during the motif discovery phase -->\n+<!ELEMENT contributing_sites (contributing_site*)>\n+<!ELEMENT contributing_site (left_flank, site, right_flank)>\n+<!ATTLIST contributing_site sequence_id IDREF #REQUIRED\n+                          position CDATA #REQUIRED\n+                          strand (plus|minus|none) \'none\'\n+                          pvalue CDATA #REQUIRED\n+>\n+<!-- The left_flank contains the sequence for 10 bases to the left of the mo'..b'm_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="28" pvalue="3.62e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_959" pvalue="2.58e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_960" pvalue="2.46e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_961" pvalue="1.29e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_962" pvalue="3.32e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_963" pvalue="4.04e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_964" pvalue="3.32e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_965" pvalue="3.43e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_966" pvalue="2.58e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_967" pvalue="3.06e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_968" pvalue="1.58e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_969" pvalue="9.25e-03" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_970" pvalue="9.86e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_971" pvalue="6.77e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_972" pvalue="1.58e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_973" pvalue="4.19e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_974" pvalue="4.04e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_975" pvalue="1.36e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_976" pvalue="9.99e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_977" pvalue="3.06e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_978" pvalue="4.58e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_979" pvalue="9.72e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_980" pvalue="9.05e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_981" pvalue="2.89e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_982" pvalue="1.12e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_983" pvalue="5.56e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_984" pvalue="4.16e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_985" pvalue="9.25e-03" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_986" pvalue="1.58e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_987" pvalue="9.26e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_988" pvalue="9.57e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_989" pvalue="4.61e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_990" pvalue="1.09e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_991" pvalue="1.01e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_992" pvalue="3.32e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_993" pvalue="6.37e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_994" pvalue="1.70e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_995" pvalue="9.25e-03" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_996" pvalue="2.08e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_997" pvalue="4.04e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_998" pvalue="2.67e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="3" pvalue="3.62e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_999" pvalue="8.90e-01" num_sites="0"></scanned_sites>\n+</scanned_sites_summary>\n+</MEME>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_html_1.html
--- a/test-data/meme_output_html_1.html Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,95 +0,0 @@
-<!DOCTYPE HTML>
-<html>
-  <head>
-    <meta charset="UTF-8">
-    <title>MEME</title>
-    <script>
-      // @JSON_VAR data
-      var data = {
-        "program": "MEME",
-        "stop_reason": "Stopped because requested number of motifs (1) found.",
-        "cmd": [
-          "meme",
-          "-nostatus"
-        ],
-        "options": {
-          "mod": "zoops",
-          "revcomp": false,
-          "nmotifs": 1,
-          "minw": 8,
-          "maxw": 50,
-          "minsites": 2,
-          "maxsites": 30,
-          "wnsites": 0.8,
-          "spmap": "pam",
-          "spfuzz": 120,
-          "maxwords": -1,
-          "prior": "megap",
-          "b": 7500,
-          "maxiter": 50,
-          "distance": 1e-05,
-          "wg": 11,
-          "ws": 1,
-          "noendgaps": false,
-          "substring": true
-        },
-        "alphabet": {
-          "name": "Protein",
-          "like": "protein",
-          "ncore": 20,
-          "symbols": [
-            {
-              "symbol": "A",
-              "name": "Alanine",
-              "colour": "0000CC"
-            }, {
-              "symbol": "C",
-              "name": "Cysteine",
-              "colour": "0000CC"
-            }, {
-              "symbol": "D",
-              "name": "Aspartic acid",
-              "colour": "FF00FF"
-            }, {
-              "symbol": "E",
-              "name": "Glutamic acid",
-              "colour": "FF00FF"
-            }, {
-              "symbol": "F",
-              "name": "Phenylalanine",
-              "colour": "0000CC"
-            }, {
-              "symbol": "G",
-              "name": "Glycine",
-              "colour": "FFB300"
-            }, {
-              "symbol": "H",
-              "name": "Histidine",
-              "colour": "FFCCCC"
-            }, {
-              "symbol": "I",
-              "name": "Isoleucine",
-              "colour": "0000CC"
-            }, {
-              "symbol": "K",
-              "name": "Lysine",
-              "colour": "CC0000"
-            }, {
-              "symbol": "L",
-              "name": "Leucine",
-              "colour": "0000CC"
-            }, {
-              "symbol": "M",
-              "name": "Methionine",
-              "colour": "0000CC"
-            }, {
-              "symbol": "N",
-              "name": "Asparagine",
-              "colour": "008000"
-            }, {
-              "symbol": "P",
-              "name": "Proline",
-              "colour": "FFFF00"
-            }, {
-              "symbol": "Q",
-              "name": "Glutamine",
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_html_2.html
--- a/test-data/meme_output_html_2.html Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,89 +0,0 @@
-<!DOCTYPE HTML>
-<html>
-  <head>
-    <meta charset="UTF-8">
-    <title>MEME</title>
-    <script>
-      // @JSON_VAR data
-      var data = {
-        "program": "MEME",
-        "stop_reason": "Stopped because requested number of motifs (1) found.",
-        "cmd": [
-          "meme",
-        ],
-        "options": {
-          "mod": "zoops",
-          "revcomp": false,
-          "nmotifs": 1,
-          "minw": 8,
-          "maxw": 50,
-          "minsites": 2,
-          "maxsites": 30,
-          "wnsites": 0.8,
-          "spmap": "uni",
-          "spfuzz": 0.5,
-          "maxwords": -1,
-          "prior": "dirichlet",
-          "b": 0.01,
-          "maxiter": 50,
-          "distance": 0.001,
-          "wg": 11,
-          "ws": 1,
-          "noendgaps": false,
-          "substring": true
-        },
-        "alphabet": {
-          "name": "DNA",
-          "like": "dna",
-          "ncore": 4,
-          "symbols": [
-            {
-              "symbol": "A",
-              "name": "Adenine",
-              "colour": "CC0000",
-              "complement": "T"
-            }, {
-              "symbol": "C",
-              "name": "Cytosine",
-              "colour": "0000CC",
-              "complement": "G"
-            }, {
-              "symbol": "G",
-              "name": "Guanine",
-              "colour": "FFB300",
-              "complement": "C"
-            }, {
-              "symbol": "T",
-              "aliases": "U",
-              "name": "Thymine",
-              "colour": "008000",
-              "complement": "A"
-            }, {
-              "symbol": "N",
-              "aliases": "X.",
-              "name": "Any base",
-              "equals": "ACGT"
-            }, {
-              "symbol": "V",
-              "name": "Not T",
-              "equals": "ACG"
-            }, {
-              "symbol": "H",
-              "name": "Not G",
-              "equals": "ACT"
-            }, {
-              "symbol": "D",
-              "name": "Not C",
-              "equals": "AGT"
-            }, {
-              "symbol": "B",
-              "name": "Not A",
-              "equals": "CGT"
-            }, {
-              "symbol": "M",
-              "name": "Amino",
-              "equals": "AC"
-            }, {
-              "symbol": "R",
-              "name": "Purine",
-              "equals": "AG"
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_test1.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_test1.html Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,8005 @@\n+<!DOCTYPE HTML>\n+<html>\n+  <head>\n+    <meta charset="UTF-8">\n+    <title>MEME</title>\n+    <script>\n+      // @JSON_VAR data\n+      var data = {\n+        "program": "MEME",\n+        "version": "4.12.0",\n+        "release": "Tue Jun 27 16:22:50 2017 -0700",\n+        "stop_reason": "Stopped because requested number of motifs (1) found.",\n+        "cmd": [\n+          "meme", "meme_input_1.fasta", "-o", "meme_test1_out", "-nostatus",\n+          "-maxsize", "1000000"\n+        ],\n+        "options": {\n+          "mod": "zoops",\n+          "revcomp": false,\n+          "nmotifs": 1,\n+          "minw": 8,\n+          "maxw": 50,\n+          "minsites": 2,\n+          "maxsites": 30,\n+          "wnsites": 0.8,\n+          "spmap": "pam",\n+          "spfuzz": 120,\n+          "maxwords": -1,\n+          "prior": "megap",\n+          "b": 7500,\n+          "maxiter": 50,\n+          "distance": 1e-05,\n+          "wg": 11,\n+          "ws": 1,\n+          "noendgaps": false,\n+          "substring": true\n+        },\n+        "alphabet": {\n+          "name": "Protein",\n+          "like": "protein",\n+          "ncore": 20,\n+          "symbols": [\n+            {\n+              "symbol": "A",\n+              "name": "Alanine",\n+              "colour": "0000CC"\n+            }, {\n+              "symbol": "C",\n+              "name": "Cysteine",\n+              "colour": "0000CC"\n+            }, {\n+              "symbol": "D",\n+              "name": "Aspartic acid",\n+              "colour": "FF00FF"\n+            }, {\n+              "symbol": "E",\n+              "name": "Glutamic acid",\n+              "colour": "FF00FF"\n+            }, {\n+              "symbol": "F",\n+              "name": "Phenylalanine",\n+              "colour": "0000CC"\n+            }, {\n+              "symbol": "G",\n+              "name": "Glycine",\n+              "colour": "FFB300"\n+            }, {\n+              "symbol": "H",\n+              "name": "Histidine",\n+              "colour": "FFCCCC"\n+            }, {\n+              "symbol": "I",\n+              "name": "Isoleucine",\n+              "colour": "0000CC"\n+            }, {\n+              "symbol": "K",\n+              "name": "Lysine",\n+              "colour": "CC0000"\n+            }, {\n+              "symbol": "L",\n+              "name": "Leucine",\n+              "colour": "0000CC"\n+            }, {\n+              "symbol": "M",\n+              "name": "Methionine",\n+              "colour": "0000CC"\n+            }, {\n+              "symbol": "N",\n+              "name": "Asparagine",\n+              "colour": "008000"\n+            }, {\n+              "symbol": "P",\n+              "name": "Proline",\n+              "colour": "FFFF00"\n+            }, {\n+              "symbol": "Q",\n+              "name": "Glutamine",\n+              "colour": "008000"\n+            }, {\n+              "symbol": "R",\n+              "name": "Arginine",\n+              "colour": "CC0000"\n+            }, {\n+              "symbol": "S",\n+              "name": "Serine",\n+              "colour": "008000"\n+            }, {\n+              "symbol": "T",\n+              "name": "Threonine",\n+              "colour": "008000"\n+            }, {\n+              "symbol": "V",\n+              "name": "Valine",\n+              "colour": "0000CC"\n+            }, {\n+              "symbol": "W",\n+              "name": "Tryptophan",\n+              "colour": "0000CC"\n+            }, {\n+              "symbol": "Y",\n+              "name": "Tyrosine",\n+              "colour": "33E6CC"\n+            }, {\n+              "symbol": "X",\n+              "aliases": "*.",\n+              "name": "Any amino acid",\n+              "equals": "ACDEFGHIKLMNPQRSTVWY"\n+            }, {\n+              "symbol": "B",\n+              "name": "Asparagine or Aspartic acid",\n+              "equals": "DN"\n+            }, {\n+              "symbol": "Z",\n+              "name": "Glutamine or Glutamic acid",\n+              "equals": "EQ"\n+            }, {\n+              "symbol": "J",\n+      '..b'          <th>EM Starting Point Map Type</th>\n+          <td id="opt_spmap">\n+            <span class="spmap_uni">Uniform</span>\n+            <span class="spmap_pam">Point Accepted Mutation</span>\n+          </td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>EM Starting Point Fuzz</th>\n+          <td id="opt_spfuzz"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>EM Maximum Iterations</th>\n+          <td id="opt_maxiter"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>EM Improvement Threshold</th>\n+          <td id="opt_distance"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Trim Gap Open Cost</th>\n+          <td id="opt_wg"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Trim Gap Extend Cost</th>\n+          <td id="opt_ws"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>End Gap Treatment</th>\n+          <td id="opt_noendgaps">\n+            <span class="noendgaps_on">No cost</span>\n+            <span class="noendgaps_off">Same cost as other gaps</span>\n+          </td>\n+        </tr>\n+        <tr>\n+          <td colspan="2" style="text-align: center">\n+            <a href="javascript:toggle_class(document.getElementById(\'tbl_settings\'), \'hide_advanced\')">\n+              <span class="show_more">Show Advanced Settings</span>\n+              <span class="show_less">Hide Advanced Settings</span>\n+            </a>\n+          </td>\n+        </tr>\n+      </table>\n+      <script>\n+      {\n+        $("opt_mod").className = data.options.mod;\n+        $("opt_strand").className = (meme_alphabet.has_complement() ? (data.options.revcomp ? "both" : "given") : "none");\n+        $("opt_nmotifs").textContent = data.options.nmotifs;\n+        $("opt_evt").textContent = (typeof data.options.evt === "number" ? data.options.evt : "no limit");\n+        $("opt_minw").textContent = data.options.minw;\n+        $("opt_maxw").textContent = data.options.maxw;\n+        $("opt_minsites").textContent = data.options.minsites;\n+        $("opt_maxsites").textContent = data.options.maxsites;\n+        $("opt_wnsites").textContent = data.options.wnsites;\n+        $("opt_spmap").className = data.options.spmap;\n+        $("opt_spfuzz").textContent = data.options.spfuzz;\n+        $("opt_prior").className = data.options.prior;\n+        $("opt_b").textContent = data.options.b;\n+        $("opt_maxiter").textContent = data.options.maxiter;\n+        $("opt_distance").textContent = data.options.distance;\n+        $("opt_wg").textContent = data.options.wg;\n+        $("opt_ws").textContent = data.options.ws;\n+        $("opt_noendgaps").className = (data.options.noendgaps ? "on" : "off");\n+        $("opt_substring").className = (data.options.substring ? "on" : "off");\n+      }\n+      </script>\n+    </div>\n+    <!-- list information on this program -->\n+    <div id="info_sec" class="bar">\n+      <div class="subsection">\n+        <h5 id="version">MEME version</h5>\n+        <span id="ins_version"></span> \n+        (Release date: <span id="ins_release"></span>)<br>\n+      </div>\n+      <script>\n+        $("ins_version").innerHTML = data["version"];\n+        $("ins_release").innerHTML = data["release"];\n+      </script>\n+      <div class="subsection">\n+        <h5 id="reference">Reference</h5>\n+        <span class="citation">\n+          Timothy L. Bailey and Charles Elkan, \n+          "Fitting a mixture model by expectation maximization to discover motifs in biopolymers", \n+          <em>Proceedings of the Second International Conference on Intelligent Systems \n+          for Molecular Biology</em>, pp. 28-36, AAAI Press, Menlo Park, California, 1994. \n+        </span>\n+      </div>\n+      <div class="subsection">\n+        <h5 id="command">Command line</h5>\n+        <textarea id="cmd" rows="5" style="width:100%;" readonly="readonly">\n+        </textarea>\n+        <script>$("cmd").value = data["cmd"].join(" ");</script>\n+      </div>\n+    </div>\n+    \n+  </body>\n+</html>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_test1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_test1.txt Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,325 @@\n+********************************************************************************\n+MEME - Motif discovery tool\n+********************************************************************************\n+MEME version 4.12.0 (Release date: Tue Jun 27 16:22:50 2017 -0700)\n+\n+For further information on how to interpret these results or to get\n+a copy of the MEME software please access http://meme-suite.org .\n+\n+This file may be used as input to the MAST algorithm for searching\n+sequence databases for matches to groups of motifs.  MAST is available\n+for interactive use and downloading at http://meme-suite.org .\n+********************************************************************************\n+\n+\n+********************************************************************************\n+REFERENCE\n+********************************************************************************\n+If you use this program in your research, please cite:\n+\n+Timothy L. Bailey and Charles Elkan,\n+"Fitting a mixture model by expectation maximization to discover\n+motifs in biopolymers", Proceedings of the Second International\n+Conference on Intelligent Systems for Molecular Biology, pp. 28-36,\n+AAAI Press, Menlo Park, California, 1994.\n+********************************************************************************\n+\n+\n+********************************************************************************\n+TRAINING SET\n+********************************************************************************\n+DATAFILE= meme_input_1.fasta\n+ALPHABET= ACDEFGHIKLMNPQRSTVWY\n+Sequence name            Weight Length  Sequence name            Weight Length  \n+-------------            ------ ------  -------------            ------ ------  \n+chr21_19617074_19617124_ 1.0000     50  chr21_26934381_26934431_ 1.0000     50  \n+chr21_28217753_28217803_ 1.0000     50  chr21_31710037_31710087_ 1.0000     50  \n+chr21_31744582_31744632_ 1.0000     50  chr21_31768316_31768366_ 1.0000     50  \n+chr21_31914206_31914256_ 1.0000     50  chr21_31933633_31933683_ 1.0000     50  \n+chr21_31962741_31962791_ 1.0000     50  chr21_31964683_31964733_ 1.0000     50  \n+chr21_31973364_31973414_ 1.0000     50  chr21_31992870_31992920_ 1.0000     50  \n+chr21_32185595_32185645_ 1.0000     50  chr21_32202076_32202126_ 1.0000     50  \n+chr21_32253899_32253949_ 1.0000     50  chr21_32410820_32410870_ 1.0000     50  \n+chr21_36411748_36411798_ 1.0000     50  chr21_37838750_37838800_ 1.0000     50  \n+chr21_45705687_45705737_ 1.0000     50  chr21_45971413_45971463_ 1.0000     50  \n+chr21_45978668_45978718_ 1.0000     50  chr21_45993530_45993580_ 1.0000     50  \n+chr21_46020421_46020471_ 1.0000     50  chr21_46031920_46031970_ 1.0000     50  \n+chr21_46046964_46047014_ 1.0000     50  chr21_46057197_46057247_ 1.0000     50  \n+chr21_46086869_46086919_ 1.0000     50  chr21_46102103_46102153_ 1.0000     50  \n+chr21_47517957_47518007_ 1.0000     50  chr21_47575506_47575556_ 1.0000     50  \n+********************************************************************************\n+\n+********************************************************************************\n+COMMAND LINE SUMMARY\n+********************************************************************************\n+This information can also be useful in the event you wish to report a\n+problem with the MEME software.\n+\n+command: meme meme_input_1.fasta -o meme_test1_out -nostatus -maxsize 1000000 \n+\n+model:  mod=         zoops    nmotifs=         1    evt=           inf\n+object function=  E-value of product of p-values\n+width:  minw=            8    maxw=           50\n+width:  wg=             11    ws=              1    endgaps=       yes\n+nsites: minsites=        2    maxsites=       30    wnsites=       0.8\n+theta:  spmap=         pam    spfuzz=        120\n+global: substring=     yes    branching=      no    wbranch=        no\n+em:     prior=       megap    b=            7500    maxiter=        50\n+        distance=    1e-05\n+data:   n=            1500    N=              30    shuffle'..b'0.000000  0.000000  0.000000  0.000000 \n+ 0.760000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.240000  0.000000  0.000000  0.000000 \n+ 0.960000  0.040000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000 \n+ 0.840000  0.000000  0.000000  0.000000  0.000000  0.120000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.040000  0.000000  0.000000  0.000000 \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif GGGGTATAAAA MEME-1 regular expression\n+--------------------------------------------------------------------------------\n+[GA][GA][GC][GA]TATA[AT]AA\n+--------------------------------------------------------------------------------\n+\n+\n+\n+\n+Time  0.77 secs.\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+SUMMARY OF MOTIFS\n+********************************************************************************\n+\n+--------------------------------------------------------------------------------\n+\tCombined block diagrams: non-overlapping sites with p-value < 0.0001\n+--------------------------------------------------------------------------------\n+SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM\n+-------------            ----------------  -------------\n+chr21_19617074_19617124_         1.22e-03  39_[1(3.06e-05)]\n+chr21_26934381_26934431_         2.21e-03  27_[1(5.52e-05)]_12\n+chr21_28217753_28217803_         7.29e-01  50\n+chr21_31710037_31710087_         2.37e-03  14_[1(5.94e-05)]_25\n+chr21_31744582_31744632_         1.22e-03  12_[1(3.06e-05)]_27\n+chr21_31768316_31768366_         1.53e-03  [1(3.82e-05)]_39\n+chr21_31914206_31914256_         6.70e-04  15_[1(1.68e-05)]_24\n+chr21_31933633_31933683_         1.81e-03  4_[1(4.54e-05)]_35\n+chr21_31962741_31962791_         1.61e-02  50\n+chr21_31964683_31964733_         1.36e-04  13_[1(3.41e-06)]_26\n+chr21_31973364_31973414_         1.99e-01  50\n+chr21_31992870_31992920_         3.47e-04  16_[1(8.67e-06)]_23\n+chr21_32185595_32185645_         3.47e-04  18_[1(8.67e-06)]_21\n+chr21_32202076_32202126_         2.01e-04  13_[1(5.01e-06)]_26\n+chr21_32253899_32253949_         8.11e-04  19_[1(2.03e-05)]_20\n+chr21_32410820_32410870_         3.47e-04  21_[1(8.67e-06)]_18\n+chr21_36411748_36411798_         2.71e-03  22_[1(6.78e-05)]_17\n+chr21_37838750_37838800_         8.23e-02  50\n+chr21_45705687_45705737_         1.53e-03  37_[1(3.82e-05)]_2\n+chr21_45971413_45971463_         1.36e-04  9_[1(3.41e-06)]_30\n+chr21_45978668_45978718_         6.37e-04  4_[1(1.59e-05)]_35\n+chr21_45993530_45993580_         1.60e-04  7_[1(4.00e-06)]_32\n+chr21_46020421_46020471_         4.83e-04  2_[1(1.21e-05)]_37\n+chr21_46031920_46031970_         2.43e-04  15_[1(6.06e-06)]_24\n+chr21_46046964_46047014_         4.26e-05  12_[1(1.06e-06)]_27\n+chr21_46057197_46057247_         1.36e-04  36_[1(3.41e-06)]_3\n+chr21_46086869_46086919_         4.30e-02  50\n+chr21_46102103_46102153_         4.30e-02  50\n+chr21_47517957_47518007_         6.37e-04  32_[1(1.59e-05)]_7\n+chr21_47575506_47575556_         1.61e-03  30_[1(4.02e-05)]_9\n+--------------------------------------------------------------------------------\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+Stopped because requested number of motifs (1) found.\n+********************************************************************************\n+\n+CPU: ThinkPad-T450s\n+\n+********************************************************************************\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_test1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_test1.xml Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,1292 @@\n+<?xml version=\'1.0\' encoding=\'UTF-8\' standalone=\'yes\'?>\n+<!-- Document definition -->\n+<!DOCTYPE MEME[\n+<!ELEMENT MEME (\n+  training_set,\n+  model, \n+  motifs, \n+  scanned_sites_summary?\n+)>\n+<!ATTLIST MEME \n+  version CDATA #REQUIRED\n+  release CDATA #REQUIRED\n+>\n+<!-- Training-set elements -->\n+<!ELEMENT training_set (alphabet, ambigs, sequence*, letter_frequencies)>\n+<!ATTLIST training_set datafile CDATA #REQUIRED length CDATA #REQUIRED>\n+<!ELEMENT alphabet (letter*)>\n+<!ATTLIST alphabet name CDATA #REQUIRED>\n+<!ELEMENT ambigs (letter*)>\n+<!ELEMENT letter EMPTY>\n+<!ATTLIST letter id ID #REQUIRED>\n+<!ATTLIST letter symbol CDATA #REQUIRED>\n+<!ATTLIST letter equals CDATA #IMPLIED>\n+<!ATTLIST letter aliases CDATA #IMPLIED>\n+<!ATTLIST letter complement CDATA #IMPLIED>\n+<!ATTLIST letter name CDATA #IMPLIED>\n+<!ATTLIST letter colour CDATA #IMPLIED>\n+<!ELEMENT sequence EMPTY>\n+<!ATTLIST sequence id ID #REQUIRED\n+                   name CDATA #REQUIRED\n+                   length CDATA #REQUIRED\n+                   weight CDATA #REQUIRED\n+>\n+<!ELEMENT letter_frequencies (alphabet_array)>\n+\n+<!-- Model elements -->\n+<!ELEMENT model (\n+  command_line,\n+  host,\n+  type,\n+  nmotifs,\n+  evalue_threshold,\n+  object_function,\n+  spfun,\n+  min_width,\n+  max_width,\n+  wg,\n+  ws,\n+  endgaps,\n+  minsites,\n+  maxsites,\n+  wnsites,\n+  spmap,\n+  spfuzz,\n+  prior,\n+  beta,\n+  maxiter,\n+  distance,\n+  num_sequences,\n+  num_positions,\n+  seed,\n+  hsfrac,\n+  maxwords,\n+  maxsize,\n+  csites,\n+  strands,\n+  priors_file,\n+  reason_for_stopping,\n+  back_order,\n+  background_frequencies\n+)>\n+<!ELEMENT command_line (#PCDATA)*>\n+<!ELEMENT host (#PCDATA)*>\n+<!ELEMENT type (#PCDATA)*>\n+<!ELEMENT nmotifs (#PCDATA)*>\n+<!ELEMENT evalue_threshold (#PCDATA)*>\n+<!ELEMENT object_function (#PCDATA)*>\n+<!ELEMENT spfun (#PCDATA)*>\n+<!ELEMENT min_width (#PCDATA)*>\n+<!ELEMENT max_width (#PCDATA)*>\n+<!ELEMENT wg (#PCDATA)*>\n+<!ELEMENT ws (#PCDATA)*>\n+<!ELEMENT endgaps (#PCDATA)*>\n+<!ELEMENT minsites (#PCDATA)*>\n+<!ELEMENT maxsites (#PCDATA)*>\n+<!ELEMENT wnsites (#PCDATA)*>\n+<!ELEMENT spmap (#PCDATA)*>\n+<!ELEMENT spfuzz (#PCDATA)*>\n+<!ELEMENT prior (#PCDATA)*>\n+<!ELEMENT beta (#PCDATA)*>\n+<!ELEMENT maxiter (#PCDATA)*>\n+<!ELEMENT distance (#PCDATA)*>\n+<!ELEMENT num_sequences (#PCDATA)*>\n+<!ELEMENT num_positions (#PCDATA)*>\n+<!ELEMENT seed (#PCDATA)*>\n+<!ELEMENT hsfrac (#PCDATA)*>\n+<!ELEMENT maxwords (#PCDATA)*>\n+<!ELEMENT maxsites (#PCDATA)*>\n+<!ELEMENT csites (#PCDATA)*>\n+<!ELEMENT strands (#PCDATA)*>\n+<!ELEMENT priors_file (#PCDATA)*>\n+<!ELEMENT reason_for_stopping (#PCDATA)*>\n+<!ELEMENT back_order (#PCDATA)*>\n+<!ELEMENT background_frequencies (alphabet_array)>\n+<!ATTLIST background_frequencies source CDATA #REQUIRED>\n+\n+<!-- Motif elements -->\n+<!ELEMENT motifs (motif*)>\n+<!ELEMENT motif (scores, probabilities, regular_expression?, contributing_sites)>\n+<!ATTLIST motif id ID #REQUIRED\n+                name CDATA #REQUIRED\n+                alt CDATA ""\n+                width CDATA #REQUIRED\n+                sites CDATA #REQUIRED\n+                llr CDATA #REQUIRED\n+                ic CDATA #REQUIRED\n+                re CDATA #REQUIRED\n+                bayes_threshold CDATA #REQUIRED\n+                e_value CDATA #REQUIRED\n+                elapsed_time CDATA #REQUIRED\n+                url CDATA ""\n+>\n+<!ELEMENT scores (alphabet_matrix)>\n+<!ELEMENT probabilities (alphabet_matrix)>\n+<!ELEMENT regular_expression (#PCDATA)*>\n+\n+<!-- Contributing site elements -->\n+<!-- Contributing sites are motif occurences found during the motif discovery phase -->\n+<!ELEMENT contributing_sites (contributing_site*)>\n+<!ELEMENT contributing_site (left_flank, site, right_flank)>\n+<!ATTLIST contributing_site sequence_id IDREF #REQUIRED\n+                          position CDATA #REQUIRED\n+                          strand (plus|minus|none) \'none\'\n+                          pvalue CDATA #REQUIRED\n+>\n+<!-- The left_flank contains the sequence for 10 bases to the left of the mo'..b'="none" position="12" pvalue="3.06e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_5" pvalue="1.53e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="0" pvalue="3.82e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_6" pvalue="6.70e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="15" pvalue="1.68e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_7" pvalue="1.81e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="4" pvalue="4.54e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_8" pvalue="1.61e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_9" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="13" pvalue="3.41e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_10" pvalue="1.99e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_11" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="16" pvalue="8.67e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_12" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="18" pvalue="8.67e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_13" pvalue="2.01e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="13" pvalue="5.01e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_14" pvalue="8.11e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="19" pvalue="2.03e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_15" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="21" pvalue="8.67e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_16" pvalue="2.71e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="22" pvalue="6.78e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_17" pvalue="8.23e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_18" pvalue="1.53e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="37" pvalue="3.82e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_19" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="9" pvalue="3.41e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_20" pvalue="6.37e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="4" pvalue="1.59e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_21" pvalue="1.60e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="7" pvalue="4.00e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_22" pvalue="4.83e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="2" pvalue="1.21e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_23" pvalue="2.43e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="15" pvalue="6.06e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_24" pvalue="4.26e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="12" pvalue="1.06e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_25" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="36" pvalue="3.41e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_26" pvalue="4.30e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_27" pvalue="4.30e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_28" pvalue="6.37e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="32" pvalue="1.59e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_29" pvalue="1.61e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="30" pvalue="4.02e-05"/>\n+</scanned_sites>\n+</scanned_sites_summary>\n+</MEME>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_test2.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_test2.html Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,7937 @@\n+<!DOCTYPE HTML>\n+<html>\n+  <head>\n+    <meta charset="UTF-8">\n+    <title>MEME</title>\n+    <script>\n+      // @JSON_VAR data\n+      var data = {\n+        "program": "MEME",\n+        "version": "4.12.0",\n+        "release": "Tue Jun 27 16:22:50 2017 -0700",\n+        "stop_reason": "Stopped because requested number of motifs (1) found.",\n+        "cmd": [\n+          "meme", "meme_input_1.fasta", "-o", "meme_test2_out", "-nostatus",\n+          "-maxsize", "1000000", "-sf", "Galaxy_FASTA_Input", "-dna", "-mod",\n+          "zoops", "-nmotifs", "1", "-wnsites", "0.8", "-minw", "8", "-maxw",\n+          "50", "-wg", "11", "-ws", "1", "-maxiter", "50", "-distance",\n+          "0.001", "-prior", "dirichlet", "-b", "0.01", "-plib",\n+          "prior30.plib", "-spmap", "uni", "-spfuzz", "0.5"\n+        ],\n+        "options": {\n+          "mod": "zoops",\n+          "revcomp": false,\n+          "nmotifs": 1,\n+          "minw": 8,\n+          "maxw": 50,\n+          "minsites": 2,\n+          "maxsites": 30,\n+          "wnsites": 0.8,\n+          "spmap": "uni",\n+          "spfuzz": 0.5,\n+          "maxwords": -1,\n+          "prior": "dirichlet",\n+          "b": 0.01,\n+          "maxiter": 50,\n+          "distance": 0.001,\n+          "wg": 11,\n+          "ws": 1,\n+          "noendgaps": false,\n+          "substring": true\n+        },\n+        "alphabet": {\n+          "name": "DNA",\n+          "like": "dna",\n+          "ncore": 4,\n+          "symbols": [\n+            {\n+              "symbol": "A",\n+              "name": "Adenine",\n+              "colour": "CC0000",\n+              "complement": "T"\n+            }, {\n+              "symbol": "C",\n+              "name": "Cytosine",\n+              "colour": "0000CC",\n+              "complement": "G"\n+            }, {\n+              "symbol": "G",\n+              "name": "Guanine",\n+              "colour": "FFB300",\n+              "complement": "C"\n+            }, {\n+              "symbol": "T",\n+              "aliases": "U",\n+              "name": "Thymine",\n+              "colour": "008000",\n+              "complement": "A"\n+            }, {\n+              "symbol": "N",\n+              "aliases": "X.",\n+              "name": "Any base",\n+              "equals": "ACGT"\n+            }, {\n+              "symbol": "V",\n+              "name": "Not T",\n+              "equals": "ACG"\n+            }, {\n+              "symbol": "H",\n+              "name": "Not G",\n+              "equals": "ACT"\n+            }, {\n+              "symbol": "D",\n+              "name": "Not C",\n+              "equals": "AGT"\n+            }, {\n+              "symbol": "B",\n+              "name": "Not A",\n+              "equals": "CGT"\n+            }, {\n+              "symbol": "M",\n+              "name": "Amino",\n+              "equals": "AC"\n+            }, {\n+              "symbol": "R",\n+              "name": "Purine",\n+              "equals": "AG"\n+            }, {\n+              "symbol": "W",\n+              "name": "Weak",\n+              "equals": "AT"\n+            }, {\n+              "symbol": "S",\n+              "name": "Strong",\n+              "equals": "CG"\n+            }, {\n+              "symbol": "Y",\n+              "name": "Pyrimidine",\n+              "equals": "CT"\n+            }, {\n+              "symbol": "K",\n+              "name": "Keto",\n+              "equals": "GT"\n+            }\n+          ]\n+        },\n+        "background": {\n+          "freqs": [0.294, 0.231, 0.257, 0.217]\n+        },\n+        "sequence_db": {\n+          "source": "Galaxy_FASTA_Input",\n+          "psp_source": "prior30.plib",\n+          "freqs": [0.294, 0.231, 0.257, 0.217],\n+          "sequences": [\n+            {\n+              "name": "chr21_19617074_19617124_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_26934381_26934431_+",\n+              "length": 50,\n+              "weight": 1.000000\n+            }, {\n+              "name": "chr21_28217753_282'..b'          <th>EM Starting Point Map Type</th>\n+          <td id="opt_spmap">\n+            <span class="spmap_uni">Uniform</span>\n+            <span class="spmap_pam">Point Accepted Mutation</span>\n+          </td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>EM Starting Point Fuzz</th>\n+          <td id="opt_spfuzz"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>EM Maximum Iterations</th>\n+          <td id="opt_maxiter"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>EM Improvement Threshold</th>\n+          <td id="opt_distance"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Trim Gap Open Cost</th>\n+          <td id="opt_wg"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>Trim Gap Extend Cost</th>\n+          <td id="opt_ws"></td>\n+        </tr>\n+        <tr class="advanced">\n+          <th>End Gap Treatment</th>\n+          <td id="opt_noendgaps">\n+            <span class="noendgaps_on">No cost</span>\n+            <span class="noendgaps_off">Same cost as other gaps</span>\n+          </td>\n+        </tr>\n+        <tr>\n+          <td colspan="2" style="text-align: center">\n+            <a href="javascript:toggle_class(document.getElementById(\'tbl_settings\'), \'hide_advanced\')">\n+              <span class="show_more">Show Advanced Settings</span>\n+              <span class="show_less">Hide Advanced Settings</span>\n+            </a>\n+          </td>\n+        </tr>\n+      </table>\n+      <script>\n+      {\n+        $("opt_mod").className = data.options.mod;\n+        $("opt_strand").className = (meme_alphabet.has_complement() ? (data.options.revcomp ? "both" : "given") : "none");\n+        $("opt_nmotifs").textContent = data.options.nmotifs;\n+        $("opt_evt").textContent = (typeof data.options.evt === "number" ? data.options.evt : "no limit");\n+        $("opt_minw").textContent = data.options.minw;\n+        $("opt_maxw").textContent = data.options.maxw;\n+        $("opt_minsites").textContent = data.options.minsites;\n+        $("opt_maxsites").textContent = data.options.maxsites;\n+        $("opt_wnsites").textContent = data.options.wnsites;\n+        $("opt_spmap").className = data.options.spmap;\n+        $("opt_spfuzz").textContent = data.options.spfuzz;\n+        $("opt_prior").className = data.options.prior;\n+        $("opt_b").textContent = data.options.b;\n+        $("opt_maxiter").textContent = data.options.maxiter;\n+        $("opt_distance").textContent = data.options.distance;\n+        $("opt_wg").textContent = data.options.wg;\n+        $("opt_ws").textContent = data.options.ws;\n+        $("opt_noendgaps").className = (data.options.noendgaps ? "on" : "off");\n+        $("opt_substring").className = (data.options.substring ? "on" : "off");\n+      }\n+      </script>\n+    </div>\n+    <!-- list information on this program -->\n+    <div id="info_sec" class="bar">\n+      <div class="subsection">\n+        <h5 id="version">MEME version</h5>\n+        <span id="ins_version"></span> \n+        (Release date: <span id="ins_release"></span>)<br>\n+      </div>\n+      <script>\n+        $("ins_version").innerHTML = data["version"];\n+        $("ins_release").innerHTML = data["release"];\n+      </script>\n+      <div class="subsection">\n+        <h5 id="reference">Reference</h5>\n+        <span class="citation">\n+          Timothy L. Bailey and Charles Elkan, \n+          "Fitting a mixture model by expectation maximization to discover motifs in biopolymers", \n+          <em>Proceedings of the Second International Conference on Intelligent Systems \n+          for Molecular Biology</em>, pp. 28-36, AAAI Press, Menlo Park, California, 1994. \n+        </span>\n+      </div>\n+      <div class="subsection">\n+        <h5 id="command">Command line</h5>\n+        <textarea id="cmd" rows="5" style="width:100%;" readonly="readonly">\n+        </textarea>\n+        <script>$("cmd").value = data["cmd"].join(" ");</script>\n+      </div>\n+    </div>\n+    \n+  </body>\n+</html>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_test2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_test2.txt Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,319 @@\n+********************************************************************************\n+MEME - Motif discovery tool\n+********************************************************************************\n+MEME version 4.12.0 (Release date: Tue Jun 27 16:22:50 2017 -0700)\n+\n+For further information on how to interpret these results or to get\n+a copy of the MEME software please access http://meme-suite.org .\n+\n+This file may be used as input to the MAST algorithm for searching\n+sequence databases for matches to groups of motifs.  MAST is available\n+for interactive use and downloading at http://meme-suite.org .\n+********************************************************************************\n+\n+\n+********************************************************************************\n+REFERENCE\n+********************************************************************************\n+If you use this program in your research, please cite:\n+\n+Timothy L. Bailey and Charles Elkan,\n+"Fitting a mixture model by expectation maximization to discover\n+motifs in biopolymers", Proceedings of the Second International\n+Conference on Intelligent Systems for Molecular Biology, pp. 28-36,\n+AAAI Press, Menlo Park, California, 1994.\n+********************************************************************************\n+\n+\n+********************************************************************************\n+TRAINING SET\n+********************************************************************************\n+DATAFILE= Galaxy_FASTA_Input\n+ALPHABET= ACGT\n+Sequence name            Weight Length  Sequence name            Weight Length  \n+-------------            ------ ------  -------------            ------ ------  \n+chr21_19617074_19617124_ 1.0000     50  chr21_26934381_26934431_ 1.0000     50  \n+chr21_28217753_28217803_ 1.0000     50  chr21_31710037_31710087_ 1.0000     50  \n+chr21_31744582_31744632_ 1.0000     50  chr21_31768316_31768366_ 1.0000     50  \n+chr21_31914206_31914256_ 1.0000     50  chr21_31933633_31933683_ 1.0000     50  \n+chr21_31962741_31962791_ 1.0000     50  chr21_31964683_31964733_ 1.0000     50  \n+chr21_31973364_31973414_ 1.0000     50  chr21_31992870_31992920_ 1.0000     50  \n+chr21_32185595_32185645_ 1.0000     50  chr21_32202076_32202126_ 1.0000     50  \n+chr21_32253899_32253949_ 1.0000     50  chr21_32410820_32410870_ 1.0000     50  \n+chr21_36411748_36411798_ 1.0000     50  chr21_37838750_37838800_ 1.0000     50  \n+chr21_45705687_45705737_ 1.0000     50  chr21_45971413_45971463_ 1.0000     50  \n+chr21_45978668_45978718_ 1.0000     50  chr21_45993530_45993580_ 1.0000     50  \n+chr21_46020421_46020471_ 1.0000     50  chr21_46031920_46031970_ 1.0000     50  \n+chr21_46046964_46047014_ 1.0000     50  chr21_46057197_46057247_ 1.0000     50  \n+chr21_46086869_46086919_ 1.0000     50  chr21_46102103_46102153_ 1.0000     50  \n+chr21_47517957_47518007_ 1.0000     50  chr21_47575506_47575556_ 1.0000     50  \n+********************************************************************************\n+\n+********************************************************************************\n+COMMAND LINE SUMMARY\n+********************************************************************************\n+This information can also be useful in the event you wish to report a\n+problem with the MEME software.\n+\n+command: meme meme_input_1.fasta -o meme_test2_out -nostatus -maxsize 1000000 -sf Galaxy_FASTA_Input -dna -mod zoops -nmotifs 1 -wnsites 0.8 -minw 8 -maxw 50 -wg 11 -ws 1 -maxiter 50 -distance 0.001 -prior dirichlet -b 0.01 -plib prior30.plib -spmap uni -spfuzz 0.5 \n+\n+model:  mod=         zoops    nmotifs=         1    evt=           inf\n+object function=  E-value of product of p-values\n+width:  minw=            8    maxw=           50\n+width:  wg=             11    ws=              1    endgaps=       yes\n+nsites: minsites=        2    maxsites=       30    wnsites=       0.8\n+theta:  spmap=         uni    spfuzz=        0.5\n+global: substring=     yes    branching=      no    wbranch'..b'y matrix\n+--------------------------------------------------------------------------------\n+letter-probability matrix: alength= 4 w= 11 nsites= 30 E= 5.1e-040 \n+ 0.266667  0.066667  0.566667  0.100000 \n+ 0.300000  0.000000  0.666667  0.033333 \n+ 0.133333  0.266667  0.466667  0.133333 \n+ 0.300000  0.033333  0.600000  0.066667 \n+ 0.000000  0.000000  0.033333  0.966667 \n+ 0.866667  0.066667  0.000000  0.066667 \n+ 0.000000  0.000000  0.000000  1.000000 \n+ 0.966667  0.033333  0.000000  0.000000 \n+ 0.700000  0.000000  0.000000  0.300000 \n+ 0.933333  0.066667  0.000000  0.000000 \n+ 0.800000  0.000000  0.166667  0.033333 \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif GGSRTATAAAA MEME-1 regular expression\n+--------------------------------------------------------------------------------\n+[GA][GA][GC][GA]TATA[AT]AA\n+--------------------------------------------------------------------------------\n+\n+\n+\n+\n+Time  0.38 secs.\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+SUMMARY OF MOTIFS\n+********************************************************************************\n+\n+--------------------------------------------------------------------------------\n+\tCombined block diagrams: non-overlapping sites with p-value < 0.0001\n+--------------------------------------------------------------------------------\n+SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM\n+-------------            ----------------  -------------\n+chr21_19617074_19617124_         5.63e-04  39_[+1(1.41e-05)]\n+chr21_26934381_26934431_         1.57e-03  27_[+1(3.93e-05)]_12\n+chr21_28217753_28217803_         1.00e-01  50\n+chr21_31710037_31710087_         2.49e-03  14_[+1(6.24e-05)]_25\n+chr21_31744582_31744632_         1.22e-03  12_[+1(3.04e-05)]_27\n+chr21_31768316_31768366_         1.47e-03  [+1(3.67e-05)]_39\n+chr21_31914206_31914256_         6.45e-04  15_[+1(1.61e-05)]_24\n+chr21_31933633_31933683_         2.26e-03  4_[+1(5.65e-05)]_35\n+chr21_31962741_31962791_         3.37e-02  50\n+chr21_31964683_31964733_         1.95e-04  13_[+1(4.86e-06)]_26\n+chr21_31973364_31973414_         5.73e-02  50\n+chr21_31992870_31992920_         5.52e-04  16_[+1(1.38e-05)]_23\n+chr21_32185595_32185645_         2.59e-04  18_[+1(6.48e-06)]_21\n+chr21_32202076_32202126_         1.10e-04  13_[+1(2.74e-06)]_26\n+chr21_32253899_32253949_         7.78e-04  17_[+1(1.95e-05)]_22\n+chr21_32410820_32410870_         5.52e-04  21_[+1(1.38e-05)]_18\n+chr21_36411748_36411798_         2.85e-03  22_[+1(7.15e-05)]_17\n+chr21_37838750_37838800_         1.90e-02  50\n+chr21_45705687_45705737_         8.63e-04  37_[+1(2.16e-05)]_2\n+chr21_45971413_45971463_         1.95e-04  9_[+1(4.86e-06)]_30\n+chr21_45978668_45978718_         2.59e-04  4_[+1(6.48e-06)]_35\n+chr21_45993530_45993580_         1.95e-04  7_[+1(4.86e-06)]_32\n+chr21_46020421_46020471_         7.78e-04  2_[+1(1.95e-05)]_37\n+chr21_46031920_46031970_         8.89e-05  15_[+1(2.22e-06)]_24\n+chr21_46046964_46047014_         1.80e-05  12_[+1(4.51e-07)]_27\n+chr21_46057197_46057247_         1.95e-04  36_[+1(4.86e-06)]_3\n+chr21_46086869_46086919_         5.54e-03  50\n+chr21_46102103_46102153_         5.54e-03  50\n+chr21_47517957_47518007_         2.59e-04  32_[+1(6.48e-06)]_7\n+chr21_47575506_47575556_         1.22e-03  30_[+1(3.04e-05)]_9\n+--------------------------------------------------------------------------------\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+Stopped because requested number of motifs (1) found.\n+********************************************************************************\n+\n+CPU: ThinkPad-T450s\n+\n+********************************************************************************\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_test2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_test2.xml Thu May 17 14:11:15 2018 -0400
[
b'@@ -0,0 +1,984 @@\n+<?xml version=\'1.0\' encoding=\'UTF-8\' standalone=\'yes\'?>\n+<!-- Document definition -->\n+<!DOCTYPE MEME[\n+<!ELEMENT MEME (\n+  training_set,\n+  model, \n+  motifs, \n+  scanned_sites_summary?\n+)>\n+<!ATTLIST MEME \n+  version CDATA #REQUIRED\n+  release CDATA #REQUIRED\n+>\n+<!-- Training-set elements -->\n+<!ELEMENT training_set (alphabet, ambigs, sequence*, letter_frequencies)>\n+<!ATTLIST training_set datafile CDATA #REQUIRED length CDATA #REQUIRED>\n+<!ELEMENT alphabet (letter*)>\n+<!ATTLIST alphabet name CDATA #REQUIRED>\n+<!ELEMENT ambigs (letter*)>\n+<!ELEMENT letter EMPTY>\n+<!ATTLIST letter id ID #REQUIRED>\n+<!ATTLIST letter symbol CDATA #REQUIRED>\n+<!ATTLIST letter equals CDATA #IMPLIED>\n+<!ATTLIST letter aliases CDATA #IMPLIED>\n+<!ATTLIST letter complement CDATA #IMPLIED>\n+<!ATTLIST letter name CDATA #IMPLIED>\n+<!ATTLIST letter colour CDATA #IMPLIED>\n+<!ELEMENT sequence EMPTY>\n+<!ATTLIST sequence id ID #REQUIRED\n+                   name CDATA #REQUIRED\n+                   length CDATA #REQUIRED\n+                   weight CDATA #REQUIRED\n+>\n+<!ELEMENT letter_frequencies (alphabet_array)>\n+\n+<!-- Model elements -->\n+<!ELEMENT model (\n+  command_line,\n+  host,\n+  type,\n+  nmotifs,\n+  evalue_threshold,\n+  object_function,\n+  spfun,\n+  min_width,\n+  max_width,\n+  wg,\n+  ws,\n+  endgaps,\n+  minsites,\n+  maxsites,\n+  wnsites,\n+  spmap,\n+  spfuzz,\n+  prior,\n+  beta,\n+  maxiter,\n+  distance,\n+  num_sequences,\n+  num_positions,\n+  seed,\n+  hsfrac,\n+  maxwords,\n+  maxsize,\n+  csites,\n+  strands,\n+  priors_file,\n+  reason_for_stopping,\n+  back_order,\n+  background_frequencies\n+)>\n+<!ELEMENT command_line (#PCDATA)*>\n+<!ELEMENT host (#PCDATA)*>\n+<!ELEMENT type (#PCDATA)*>\n+<!ELEMENT nmotifs (#PCDATA)*>\n+<!ELEMENT evalue_threshold (#PCDATA)*>\n+<!ELEMENT object_function (#PCDATA)*>\n+<!ELEMENT spfun (#PCDATA)*>\n+<!ELEMENT min_width (#PCDATA)*>\n+<!ELEMENT max_width (#PCDATA)*>\n+<!ELEMENT wg (#PCDATA)*>\n+<!ELEMENT ws (#PCDATA)*>\n+<!ELEMENT endgaps (#PCDATA)*>\n+<!ELEMENT minsites (#PCDATA)*>\n+<!ELEMENT maxsites (#PCDATA)*>\n+<!ELEMENT wnsites (#PCDATA)*>\n+<!ELEMENT spmap (#PCDATA)*>\n+<!ELEMENT spfuzz (#PCDATA)*>\n+<!ELEMENT prior (#PCDATA)*>\n+<!ELEMENT beta (#PCDATA)*>\n+<!ELEMENT maxiter (#PCDATA)*>\n+<!ELEMENT distance (#PCDATA)*>\n+<!ELEMENT num_sequences (#PCDATA)*>\n+<!ELEMENT num_positions (#PCDATA)*>\n+<!ELEMENT seed (#PCDATA)*>\n+<!ELEMENT hsfrac (#PCDATA)*>\n+<!ELEMENT maxwords (#PCDATA)*>\n+<!ELEMENT maxsites (#PCDATA)*>\n+<!ELEMENT csites (#PCDATA)*>\n+<!ELEMENT strands (#PCDATA)*>\n+<!ELEMENT priors_file (#PCDATA)*>\n+<!ELEMENT reason_for_stopping (#PCDATA)*>\n+<!ELEMENT back_order (#PCDATA)*>\n+<!ELEMENT background_frequencies (alphabet_array)>\n+<!ATTLIST background_frequencies source CDATA #REQUIRED>\n+\n+<!-- Motif elements -->\n+<!ELEMENT motifs (motif*)>\n+<!ELEMENT motif (scores, probabilities, regular_expression?, contributing_sites)>\n+<!ATTLIST motif id ID #REQUIRED\n+                name CDATA #REQUIRED\n+                alt CDATA ""\n+                width CDATA #REQUIRED\n+                sites CDATA #REQUIRED\n+                llr CDATA #REQUIRED\n+                ic CDATA #REQUIRED\n+                re CDATA #REQUIRED\n+                bayes_threshold CDATA #REQUIRED\n+                e_value CDATA #REQUIRED\n+                elapsed_time CDATA #REQUIRED\n+                url CDATA ""\n+>\n+<!ELEMENT scores (alphabet_matrix)>\n+<!ELEMENT probabilities (alphabet_matrix)>\n+<!ELEMENT regular_expression (#PCDATA)*>\n+\n+<!-- Contributing site elements -->\n+<!-- Contributing sites are motif occurences found during the motif discovery phase -->\n+<!ELEMENT contributing_sites (contributing_site*)>\n+<!ELEMENT contributing_site (left_flank, site, right_flank)>\n+<!ATTLIST contributing_site sequence_id IDREF #REQUIRED\n+                          position CDATA #REQUIRED\n+                          strand (plus|minus|none) \'none\'\n+                          pvalue CDATA #REQUIRED\n+>\n+<!-- The left_flank contains the sequence for 10 bases to the left of the mot'..b'="plus" position="12" pvalue="3.04e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_5" pvalue="1.47e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="0" pvalue="3.67e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_6" pvalue="6.45e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="15" pvalue="1.61e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_7" pvalue="2.26e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="4" pvalue="5.65e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_8" pvalue="3.37e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_9" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="13" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_10" pvalue="5.73e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_11" pvalue="5.52e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="16" pvalue="1.38e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_12" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="18" pvalue="6.48e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_13" pvalue="1.10e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="13" pvalue="2.74e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_14" pvalue="7.78e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="17" pvalue="1.95e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_15" pvalue="5.52e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="21" pvalue="1.38e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_16" pvalue="2.85e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="22" pvalue="7.15e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_17" pvalue="1.90e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_18" pvalue="8.63e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="37" pvalue="2.16e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_19" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="9" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_20" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="4" pvalue="6.48e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_21" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="7" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_22" pvalue="7.78e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="2" pvalue="1.95e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_23" pvalue="8.89e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="15" pvalue="2.22e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_24" pvalue="1.80e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="12" pvalue="4.51e-07"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_25" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="36" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_26" pvalue="5.54e-03" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_27" pvalue="5.54e-03" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_28" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="32" pvalue="6.48e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_29" pvalue="1.22e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="30" pvalue="3.04e-05"/>\n+</scanned_sites>\n+</scanned_sites_summary>\n+</MEME>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_txt_1.txt
--- a/test-data/meme_output_txt_1.txt Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,325 +0,0 @@\n-********************************************************************************\n-MEME - Motif discovery tool\n-********************************************************************************\n-MEME version 4.11.2 (Release date: Thu May 05 14:58:55 2016 -0700)\n-\n-For further information on how to interpret these results or to get\n-a copy of the MEME software please access http://meme-suite.org .\n-\n-This file may be used as input to the MAST algorithm for searching\n-sequence databases for matches to groups of motifs.  MAST is available\n-for interactive use and downloading at http://meme-suite.org .\n-********************************************************************************\n-\n-\n-********************************************************************************\n-REFERENCE\n-********************************************************************************\n-If you use this program in your research, please cite:\n-\n-Timothy L. Bailey and Charles Elkan,\n-"Fitting a mixture model by expectation maximization to discover\n-motifs in biopolymers", Proceedings of the Second International\n-Conference on Intelligent Systems for Molecular Biology, pp. 28-36,\n-AAAI Press, Menlo Park, California, 1994.\n-********************************************************************************\n-\n-\n-********************************************************************************\n-TRAINING SET\n-********************************************************************************\n-DATAFILE= /tmp/tmpCNK6l0/files/000/dataset_22.dat\n-ALPHABET= ACDEFGHIKLMNPQRSTVWY\n-Sequence name            Weight Length  Sequence name            Weight Length  \n--------------            ------ ------  -------------            ------ ------  \n-chr21_19617074_19617124_ 1.0000     50  chr21_26934381_26934431_ 1.0000     50  \n-chr21_28217753_28217803_ 1.0000     50  chr21_31710037_31710087_ 1.0000     50  \n-chr21_31744582_31744632_ 1.0000     50  chr21_31768316_31768366_ 1.0000     50  \n-chr21_31914206_31914256_ 1.0000     50  chr21_31933633_31933683_ 1.0000     50  \n-chr21_31962741_31962791_ 1.0000     50  chr21_31964683_31964733_ 1.0000     50  \n-chr21_31973364_31973414_ 1.0000     50  chr21_31992870_31992920_ 1.0000     50  \n-chr21_32185595_32185645_ 1.0000     50  chr21_32202076_32202126_ 1.0000     50  \n-chr21_32253899_32253949_ 1.0000     50  chr21_32410820_32410870_ 1.0000     50  \n-chr21_36411748_36411798_ 1.0000     50  chr21_37838750_37838800_ 1.0000     50  \n-chr21_45705687_45705737_ 1.0000     50  chr21_45971413_45971463_ 1.0000     50  \n-chr21_45978668_45978718_ 1.0000     50  chr21_45993530_45993580_ 1.0000     50  \n-chr21_46020421_46020471_ 1.0000     50  chr21_46031920_46031970_ 1.0000     50  \n-chr21_46046964_46047014_ 1.0000     50  chr21_46057197_46057247_ 1.0000     50  \n-chr21_46086869_46086919_ 1.0000     50  chr21_46102103_46102153_ 1.0000     50  \n-chr21_47517957_47518007_ 1.0000     50  chr21_47575506_47575556_ 1.0000     50  \n-********************************************************************************\n-\n-********************************************************************************\n-COMMAND LINE SUMMARY\n-********************************************************************************\n-This information can also be useful in the event you wish to report a\n-problem with the MEME software.\n-\n-command: meme /tmp/tmpCNK6l0/files/000/dataset_22.dat -o /tmp/tmpCNK6l0/job_working_directory/000/11/dataset_23_files -nostatus -maxsize 1000000 \n-\n-model:  mod=         zoops    nmotifs=         1    evt=           inf\n-object function=  E-value of product of p-values\n-width:  minw=            8    maxw=           50\n-width:  wg=             11    ws=              1    endgaps=       yes\n-nsites: minsites=        2    maxsites=       30    wnsites=       0.8\n-theta:  spmap=         pam    spfuzz=        120\n-global: substring=     yes    branching=      no    wbranch=        no\n-em:     prior=       megap    b=            7500    maxiter=        50\n'..b'000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000 \n- 0.760000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.240000  0.000000  0.000000  0.000000 \n- 0.960000  0.040000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000 \n- 0.840000  0.000000  0.000000  0.000000  0.000000  0.120000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.040000  0.000000  0.000000  0.000000 \n---------------------------------------------------------------------------------\n-\n---------------------------------------------------------------------------------\n-\tMotif 1 regular expression\n---------------------------------------------------------------------------------\n-[GA][GA][GC][GA]TATA[AT]AA\n---------------------------------------------------------------------------------\n-\n-\n-\n-\n-Time  0.72 secs.\n-\n-********************************************************************************\n-\n-\n-********************************************************************************\n-SUMMARY OF MOTIFS\n-********************************************************************************\n-\n---------------------------------------------------------------------------------\n-\tCombined block diagrams: non-overlapping sites with p-value < 0.0001\n---------------------------------------------------------------------------------\n-SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM\n--------------            ----------------  -------------\n-chr21_19617074_19617124_         1.22e-03  39_[1(3.06e-05)]\n-chr21_26934381_26934431_         2.21e-03  27_[1(5.52e-05)]_12\n-chr21_28217753_28217803_         7.29e-01  50\n-chr21_31710037_31710087_         2.37e-03  14_[1(5.94e-05)]_25\n-chr21_31744582_31744632_         1.22e-03  12_[1(3.06e-05)]_27\n-chr21_31768316_31768366_         1.53e-03  [1(3.82e-05)]_39\n-chr21_31914206_31914256_         6.70e-04  15_[1(1.68e-05)]_24\n-chr21_31933633_31933683_         1.81e-03  4_[1(4.54e-05)]_35\n-chr21_31962741_31962791_         1.61e-02  50\n-chr21_31964683_31964733_         1.36e-04  13_[1(3.41e-06)]_26\n-chr21_31973364_31973414_         1.99e-01  50\n-chr21_31992870_31992920_         3.47e-04  16_[1(8.67e-06)]_23\n-chr21_32185595_32185645_         3.47e-04  18_[1(8.67e-06)]_21\n-chr21_32202076_32202126_         2.01e-04  13_[1(5.01e-06)]_26\n-chr21_32253899_32253949_         8.11e-04  19_[1(2.03e-05)]_20\n-chr21_32410820_32410870_         3.47e-04  21_[1(8.67e-06)]_18\n-chr21_36411748_36411798_         2.71e-03  22_[1(6.78e-05)]_17\n-chr21_37838750_37838800_         8.23e-02  50\n-chr21_45705687_45705737_         1.53e-03  37_[1(3.82e-05)]_2\n-chr21_45971413_45971463_         1.36e-04  9_[1(3.41e-06)]_30\n-chr21_45978668_45978718_         6.37e-04  4_[1(1.59e-05)]_35\n-chr21_45993530_45993580_         1.60e-04  7_[1(4.00e-06)]_32\n-chr21_46020421_46020471_         4.83e-04  2_[1(1.21e-05)]_37\n-chr21_46031920_46031970_         2.43e-04  15_[1(6.06e-06)]_24\n-chr21_46046964_46047014_         4.26e-05  12_[1(1.06e-06)]_27\n-chr21_46057197_46057247_         1.36e-04  36_[1(3.41e-06)]_3\n-chr21_46086869_46086919_         4.30e-02  50\n-chr21_46102103_46102153_         4.30e-02  50\n-chr21_47517957_47518007_         6.37e-04  32_[1(1.59e-05)]_7\n-chr21_47575506_47575556_         1.61e-03  30_[1(4.02e-05)]_9\n---------------------------------------------------------------------------------\n-\n-********************************************************************************\n-\n-\n-********************************************************************************\n-Stopped because requested number of motifs (1) found.\n-********************************************************************************\n-\n-CPU: bigsky\n-\n-********************************************************************************\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_txt_2.txt
--- a/test-data/meme_output_txt_2.txt Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,319 +0,0 @@\n-********************************************************************************\n-MEME - Motif discovery tool\n-********************************************************************************\n-MEME version 4.11.2 (Release date: Thu May 05 14:58:55 2016 -0700)\n-\n-For further information on how to interpret these results or to get\n-a copy of the MEME software please access http://meme-suite.org .\n-\n-This file may be used as input to the MAST algorithm for searching\n-sequence databases for matches to groups of motifs.  MAST is available\n-for interactive use and downloading at http://meme-suite.org .\n-********************************************************************************\n-\n-\n-********************************************************************************\n-REFERENCE\n-********************************************************************************\n-If you use this program in your research, please cite:\n-\n-Timothy L. Bailey and Charles Elkan,\n-"Fitting a mixture model by expectation maximization to discover\n-motifs in biopolymers", Proceedings of the Second International\n-Conference on Intelligent Systems for Molecular Biology, pp. 28-36,\n-AAAI Press, Menlo Park, California, 1994.\n-********************************************************************************\n-\n-\n-********************************************************************************\n-TRAINING SET\n-********************************************************************************\n-DATAFILE= Galaxy_FASTA_Input\n-ALPHABET= ACGT\n-Sequence name            Weight Length  Sequence name            Weight Length  \n--------------            ------ ------  -------------            ------ ------  \n-chr21_19617074_19617124_ 1.0000     50  chr21_26934381_26934431_ 1.0000     50  \n-chr21_28217753_28217803_ 1.0000     50  chr21_31710037_31710087_ 1.0000     50  \n-chr21_31744582_31744632_ 1.0000     50  chr21_31768316_31768366_ 1.0000     50  \n-chr21_31914206_31914256_ 1.0000     50  chr21_31933633_31933683_ 1.0000     50  \n-chr21_31962741_31962791_ 1.0000     50  chr21_31964683_31964733_ 1.0000     50  \n-chr21_31973364_31973414_ 1.0000     50  chr21_31992870_31992920_ 1.0000     50  \n-chr21_32185595_32185645_ 1.0000     50  chr21_32202076_32202126_ 1.0000     50  \n-chr21_32253899_32253949_ 1.0000     50  chr21_32410820_32410870_ 1.0000     50  \n-chr21_36411748_36411798_ 1.0000     50  chr21_37838750_37838800_ 1.0000     50  \n-chr21_45705687_45705737_ 1.0000     50  chr21_45971413_45971463_ 1.0000     50  \n-chr21_45978668_45978718_ 1.0000     50  chr21_45993530_45993580_ 1.0000     50  \n-chr21_46020421_46020471_ 1.0000     50  chr21_46031920_46031970_ 1.0000     50  \n-chr21_46046964_46047014_ 1.0000     50  chr21_46057197_46057247_ 1.0000     50  \n-chr21_46086869_46086919_ 1.0000     50  chr21_46102103_46102153_ 1.0000     50  \n-chr21_47517957_47518007_ 1.0000     50  chr21_47575506_47575556_ 1.0000     50  \n-********************************************************************************\n-\n-********************************************************************************\n-COMMAND LINE SUMMARY\n-********************************************************************************\n-This information can also be useful in the event you wish to report a\n-problem with the MEME software.\n-\n-command: meme /tmp/tmpCNK6l0/files/000/dataset_26.dat -o /tmp/tmpCNK6l0/job_working_directory/000/14/dataset_28_files -nostatus -maxsize 1000000 -sf Galaxy_FASTA_Input -dna -mod zoops -nmotifs 1 -wnsites 0.8 -evt inf -minw 8 -maxw 50 -wg 11 -ws 1 -maxiter 50 -distance 0.001 -prior dirichlet -b 0.01 -plib /tmp/tmpCNK6l0/files/000/dataset_27.dat -spmap uni -spfuzz 0.5 \n-\n-model:  mod=         zoops    nmotifs=         1    evt=           inf\n-object function=  E-value of product of p-values\n-width:  minw=            8    maxw=           50\n-width:  wg=             11    ws=              1    endgaps=       yes\n-nsites: minsites=        2    maxsites=       30    wnsites=       0.8\n-theta:'..b'ition-specific probability matrix\n---------------------------------------------------------------------------------\n-letter-probability matrix: alength= 4 w= 11 nsites= 30 E= 5.1e-040 \n- 0.266667  0.066667  0.566667  0.100000 \n- 0.300000  0.000000  0.666667  0.033333 \n- 0.133333  0.266667  0.466667  0.133333 \n- 0.300000  0.033333  0.600000  0.066667 \n- 0.000000  0.000000  0.033333  0.966667 \n- 0.866667  0.066667  0.000000  0.066667 \n- 0.000000  0.000000  0.000000  1.000000 \n- 0.966667  0.033333  0.000000  0.000000 \n- 0.700000  0.000000  0.000000  0.300000 \n- 0.933333  0.066667  0.000000  0.000000 \n- 0.800000  0.000000  0.166667  0.033333 \n---------------------------------------------------------------------------------\n-\n---------------------------------------------------------------------------------\n-\tMotif 1 regular expression\n---------------------------------------------------------------------------------\n-[GA][GA][GC][GA]TATA[AT]AA\n---------------------------------------------------------------------------------\n-\n-\n-\n-\n-Time  0.32 secs.\n-\n-********************************************************************************\n-\n-\n-********************************************************************************\n-SUMMARY OF MOTIFS\n-********************************************************************************\n-\n---------------------------------------------------------------------------------\n-\tCombined block diagrams: non-overlapping sites with p-value < 0.0001\n---------------------------------------------------------------------------------\n-SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM\n--------------            ----------------  -------------\n-chr21_19617074_19617124_         5.63e-04  39_[+1(1.41e-05)]\n-chr21_26934381_26934431_         1.57e-03  27_[+1(3.93e-05)]_12\n-chr21_28217753_28217803_         1.00e-01  50\n-chr21_31710037_31710087_         2.49e-03  14_[+1(6.24e-05)]_25\n-chr21_31744582_31744632_         1.22e-03  12_[+1(3.04e-05)]_27\n-chr21_31768316_31768366_         1.47e-03  [+1(3.67e-05)]_39\n-chr21_31914206_31914256_         6.45e-04  15_[+1(1.61e-05)]_24\n-chr21_31933633_31933683_         2.26e-03  4_[+1(5.65e-05)]_35\n-chr21_31962741_31962791_         3.37e-02  50\n-chr21_31964683_31964733_         1.95e-04  13_[+1(4.86e-06)]_26\n-chr21_31973364_31973414_         5.73e-02  50\n-chr21_31992870_31992920_         5.52e-04  16_[+1(1.38e-05)]_23\n-chr21_32185595_32185645_         2.59e-04  18_[+1(6.48e-06)]_21\n-chr21_32202076_32202126_         1.10e-04  13_[+1(2.74e-06)]_26\n-chr21_32253899_32253949_         7.78e-04  17_[+1(1.95e-05)]_22\n-chr21_32410820_32410870_         5.52e-04  21_[+1(1.38e-05)]_18\n-chr21_36411748_36411798_         2.85e-03  22_[+1(7.15e-05)]_17\n-chr21_37838750_37838800_         1.90e-02  50\n-chr21_45705687_45705737_         8.63e-04  37_[+1(2.16e-05)]_2\n-chr21_45971413_45971463_         1.95e-04  9_[+1(4.86e-06)]_30\n-chr21_45978668_45978718_         2.59e-04  4_[+1(6.48e-06)]_35\n-chr21_45993530_45993580_         1.95e-04  7_[+1(4.86e-06)]_32\n-chr21_46020421_46020471_         7.78e-04  2_[+1(1.95e-05)]_37\n-chr21_46031920_46031970_         8.89e-05  15_[+1(2.22e-06)]_24\n-chr21_46046964_46047014_         1.80e-05  12_[+1(4.51e-07)]_27\n-chr21_46057197_46057247_         1.95e-04  36_[+1(4.86e-06)]_3\n-chr21_46086869_46086919_         5.54e-03  50\n-chr21_46102103_46102153_         5.54e-03  50\n-chr21_47517957_47518007_         2.59e-04  32_[+1(6.48e-06)]_7\n-chr21_47575506_47575556_         1.22e-03  30_[+1(3.04e-05)]_9\n---------------------------------------------------------------------------------\n-\n-********************************************************************************\n-\n-\n-********************************************************************************\n-Stopped because requested number of motifs (1) found.\n-********************************************************************************\n-\n-CPU: bigsky\n-\n-********************************************************************************\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_xml_1.xml
--- a/test-data/meme_output_xml_1.xml Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,1285 +0,0 @@\n-<?xml version=\'1.0\' encoding=\'UTF-8\' standalone=\'yes\'?>\n-<!-- Document definition -->\n-<!DOCTYPE MEME[\n-<!ELEMENT MEME (\n-  training_set,\n-  model, \n-  motifs, \n-  scanned_sites_summary?\n-)>\n-<!ATTLIST MEME \n-  version CDATA #REQUIRED\n-  release CDATA #REQUIRED\n->\n-<!-- Training-set elements -->\n-<!ELEMENT training_set (alphabet, ambigs, sequence*, letter_frequencies)>\n-<!ATTLIST training_set datafile CDATA #REQUIRED length CDATA #REQUIRED>\n-<!ELEMENT alphabet (letter*)>\n-<!ATTLIST alphabet name CDATA #REQUIRED>\n-<!ELEMENT ambigs (letter*)>\n-<!ELEMENT letter EMPTY>\n-<!ATTLIST letter id ID #REQUIRED>\n-<!ATTLIST letter symbol CDATA #REQUIRED>\n-<!ATTLIST letter equals CDATA #IMPLIED>\n-<!ATTLIST letter aliases CDATA #IMPLIED>\n-<!ATTLIST letter complement CDATA #IMPLIED>\n-<!ATTLIST letter name CDATA #IMPLIED>\n-<!ATTLIST letter colour CDATA #IMPLIED>\n-<!ELEMENT sequence EMPTY>\n-<!ATTLIST sequence id ID #REQUIRED\n-                   name CDATA #REQUIRED\n-                   length CDATA #REQUIRED\n-                   weight CDATA #REQUIRED\n->\n-<!ELEMENT letter_frequencies (alphabet_array)>\n-\n-<!-- Model elements -->\n-<!ELEMENT model (\n-  command_line,\n-  host,\n-  type,\n-  nmotifs,\n-  evalue_threshold,\n-  object_function,\n-  min_width,\n-  max_width,\n-  minic,\n-  wg,\n-  ws,\n-  endgaps,\n-  minsites,\n-  maxsites,\n-  wnsites,\n-  prob,\n-  spmap,\n-  spfuzz,\n-  prior,\n-  beta,\n-  maxiter,\n-  distance,\n-  num_sequences,\n-  num_positions,\n-  seed,\n-  seqfrac,\n-  strands,\n-  priors_file,\n-  reason_for_stopping,\n-  background_frequencies\n-)>\n-<!ELEMENT command_line (#PCDATA)*>\n-<!ELEMENT host (#PCDATA)*>\n-<!ELEMENT type (#PCDATA)*>\n-<!ELEMENT nmotifs (#PCDATA)*>\n-<!ELEMENT evalue_threshold (#PCDATA)*>\n-<!ELEMENT object_function (#PCDATA)*>\n-<!ELEMENT min_width (#PCDATA)*>\n-<!ELEMENT max_width (#PCDATA)*>\n-<!ELEMENT minic (#PCDATA)*>\n-<!ELEMENT wg (#PCDATA)*>\n-<!ELEMENT ws (#PCDATA)*>\n-<!ELEMENT endgaps (#PCDATA)*>\n-<!ELEMENT minsites (#PCDATA)*>\n-<!ELEMENT maxsites (#PCDATA)*>\n-<!ELEMENT wnsites (#PCDATA)*>\n-<!ELEMENT prob (#PCDATA)*>\n-<!ELEMENT spmap (#PCDATA)*>\n-<!ELEMENT spfuzz (#PCDATA)*>\n-<!ELEMENT prior (#PCDATA)*>\n-<!ELEMENT beta (#PCDATA)*>\n-<!ELEMENT maxiter (#PCDATA)*>\n-<!ELEMENT distance (#PCDATA)*>\n-<!ELEMENT num_sequences (#PCDATA)*>\n-<!ELEMENT num_positions (#PCDATA)*>\n-<!ELEMENT seed (#PCDATA)*>\n-<!ELEMENT seqfrac (#PCDATA)*>\n-<!ELEMENT strands (#PCDATA)*>\n-<!ELEMENT priors_file (#PCDATA)*>\n-<!ELEMENT reason_for_stopping (#PCDATA)*>\n-<!ELEMENT background_frequencies (alphabet_array)>\n-<!ATTLIST background_frequencies source CDATA #REQUIRED>\n-\n-<!-- Motif elements -->\n-<!ELEMENT motifs (motif*)>\n-<!ELEMENT motif (scores, probabilities, regular_expression?, contributing_sites)>\n-<!ATTLIST motif id ID #REQUIRED\n-                name CDATA #REQUIRED\n-                width CDATA #REQUIRED\n-                sites CDATA #REQUIRED\n-                llr CDATA #REQUIRED\n-                ic CDATA #REQUIRED\n-                re CDATA #REQUIRED\n-                bayes_threshold CDATA #REQUIRED\n-                e_value CDATA #REQUIRED\n-                elapsed_time CDATA #REQUIRED\n-                url CDATA ""\n->\n-<!ELEMENT scores (alphabet_matrix)>\n-<!ELEMENT probabilities (alphabet_matrix)>\n-<!ELEMENT regular_expression (#PCDATA)*>\n-\n-<!-- Contributing site elements -->\n-<!-- Contributing sites are motif occurences found during the motif discovery phase -->\n-<!ELEMENT contributing_sites (contributing_site*)>\n-<!ELEMENT contributing_site (left_flank, site, right_flank)>\n-<!ATTLIST contributing_site sequence_id IDREF #REQUIRED\n-                          position CDATA #REQUIRED\n-                          strand (plus|minus|none) \'none\'\n-                          pvalue CDATA #REQUIRED\n->\n-<!-- The left_flank contains the sequence for 10 bases to the left of the motif start -->\n-<!ELEMENT left_flank (#PCDATA)>\n-<!-- The site contains the sequence for the motif instance -->\n-<!ELEMENT site (letter_ref*)>\n-<!-- The right_flank contai'..b'="none" position="12" pvalue="3.06e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_5" pvalue="1.53e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="0" pvalue="3.82e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_6" pvalue="6.70e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="15" pvalue="1.68e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_7" pvalue="1.81e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="4" pvalue="4.54e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_8" pvalue="1.61e-02" num_sites="0"></scanned_sites>\n-<scanned_sites sequence_id="sequence_9" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="13" pvalue="3.41e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_10" pvalue="1.99e-01" num_sites="0"></scanned_sites>\n-<scanned_sites sequence_id="sequence_11" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="16" pvalue="8.67e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_12" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="18" pvalue="8.67e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_13" pvalue="2.01e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="13" pvalue="5.01e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_14" pvalue="8.11e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="19" pvalue="2.03e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_15" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="21" pvalue="8.67e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_16" pvalue="2.71e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="22" pvalue="6.78e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_17" pvalue="8.23e-02" num_sites="0"></scanned_sites>\n-<scanned_sites sequence_id="sequence_18" pvalue="1.53e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="37" pvalue="3.82e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_19" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="9" pvalue="3.41e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_20" pvalue="6.37e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="4" pvalue="1.59e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_21" pvalue="1.60e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="7" pvalue="4.00e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_22" pvalue="4.83e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="2" pvalue="1.21e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_23" pvalue="2.43e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="15" pvalue="6.06e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_24" pvalue="4.26e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="12" pvalue="1.06e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_25" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="36" pvalue="3.41e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_26" pvalue="4.30e-02" num_sites="0"></scanned_sites>\n-<scanned_sites sequence_id="sequence_27" pvalue="4.30e-02" num_sites="0"></scanned_sites>\n-<scanned_sites sequence_id="sequence_28" pvalue="6.37e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="32" pvalue="1.59e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_29" pvalue="1.61e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="30" pvalue="4.02e-05"/>\n-</scanned_sites>\n-</scanned_sites_summary>\n-</MEME>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_output_xml_2.xml
--- a/test-data/meme_output_xml_2.xml Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,977 +0,0 @@\n-<?xml version=\'1.0\' encoding=\'UTF-8\' standalone=\'yes\'?>\n-<!-- Document definition -->\n-<!DOCTYPE MEME[\n-<!ELEMENT MEME (\n-  training_set,\n-  model, \n-  motifs, \n-  scanned_sites_summary?\n-)>\n-<!ATTLIST MEME \n-  version CDATA #REQUIRED\n-  release CDATA #REQUIRED\n->\n-<!-- Training-set elements -->\n-<!ELEMENT training_set (alphabet, ambigs, sequence*, letter_frequencies)>\n-<!ATTLIST training_set datafile CDATA #REQUIRED length CDATA #REQUIRED>\n-<!ELEMENT alphabet (letter*)>\n-<!ATTLIST alphabet name CDATA #REQUIRED>\n-<!ELEMENT ambigs (letter*)>\n-<!ELEMENT letter EMPTY>\n-<!ATTLIST letter id ID #REQUIRED>\n-<!ATTLIST letter symbol CDATA #REQUIRED>\n-<!ATTLIST letter equals CDATA #IMPLIED>\n-<!ATTLIST letter aliases CDATA #IMPLIED>\n-<!ATTLIST letter complement CDATA #IMPLIED>\n-<!ATTLIST letter name CDATA #IMPLIED>\n-<!ATTLIST letter colour CDATA #IMPLIED>\n-<!ELEMENT sequence EMPTY>\n-<!ATTLIST sequence id ID #REQUIRED\n-                   name CDATA #REQUIRED\n-                   length CDATA #REQUIRED\n-                   weight CDATA #REQUIRED\n->\n-<!ELEMENT letter_frequencies (alphabet_array)>\n-\n-<!-- Model elements -->\n-<!ELEMENT model (\n-  command_line,\n-  host,\n-  type,\n-  nmotifs,\n-  evalue_threshold,\n-  object_function,\n-  min_width,\n-  max_width,\n-  minic,\n-  wg,\n-  ws,\n-  endgaps,\n-  minsites,\n-  maxsites,\n-  wnsites,\n-  prob,\n-  spmap,\n-  spfuzz,\n-  prior,\n-  beta,\n-  maxiter,\n-  distance,\n-  num_sequences,\n-  num_positions,\n-  seed,\n-  seqfrac,\n-  strands,\n-  priors_file,\n-  reason_for_stopping,\n-  background_frequencies\n-)>\n-<!ELEMENT command_line (#PCDATA)*>\n-<!ELEMENT host (#PCDATA)*>\n-<!ELEMENT type (#PCDATA)*>\n-<!ELEMENT nmotifs (#PCDATA)*>\n-<!ELEMENT evalue_threshold (#PCDATA)*>\n-<!ELEMENT object_function (#PCDATA)*>\n-<!ELEMENT min_width (#PCDATA)*>\n-<!ELEMENT max_width (#PCDATA)*>\n-<!ELEMENT minic (#PCDATA)*>\n-<!ELEMENT wg (#PCDATA)*>\n-<!ELEMENT ws (#PCDATA)*>\n-<!ELEMENT endgaps (#PCDATA)*>\n-<!ELEMENT minsites (#PCDATA)*>\n-<!ELEMENT maxsites (#PCDATA)*>\n-<!ELEMENT wnsites (#PCDATA)*>\n-<!ELEMENT prob (#PCDATA)*>\n-<!ELEMENT spmap (#PCDATA)*>\n-<!ELEMENT spfuzz (#PCDATA)*>\n-<!ELEMENT prior (#PCDATA)*>\n-<!ELEMENT beta (#PCDATA)*>\n-<!ELEMENT maxiter (#PCDATA)*>\n-<!ELEMENT distance (#PCDATA)*>\n-<!ELEMENT num_sequences (#PCDATA)*>\n-<!ELEMENT num_positions (#PCDATA)*>\n-<!ELEMENT seed (#PCDATA)*>\n-<!ELEMENT seqfrac (#PCDATA)*>\n-<!ELEMENT strands (#PCDATA)*>\n-<!ELEMENT priors_file (#PCDATA)*>\n-<!ELEMENT reason_for_stopping (#PCDATA)*>\n-<!ELEMENT background_frequencies (alphabet_array)>\n-<!ATTLIST background_frequencies source CDATA #REQUIRED>\n-\n-<!-- Motif elements -->\n-<!ELEMENT motifs (motif*)>\n-<!ELEMENT motif (scores, probabilities, regular_expression?, contributing_sites)>\n-<!ATTLIST motif id ID #REQUIRED\n-                name CDATA #REQUIRED\n-                width CDATA #REQUIRED\n-                sites CDATA #REQUIRED\n-                llr CDATA #REQUIRED\n-                ic CDATA #REQUIRED\n-                re CDATA #REQUIRED\n-                bayes_threshold CDATA #REQUIRED\n-                e_value CDATA #REQUIRED\n-                elapsed_time CDATA #REQUIRED\n-                url CDATA ""\n->\n-<!ELEMENT scores (alphabet_matrix)>\n-<!ELEMENT probabilities (alphabet_matrix)>\n-<!ELEMENT regular_expression (#PCDATA)*>\n-\n-<!-- Contributing site elements -->\n-<!-- Contributing sites are motif occurences found during the motif discovery phase -->\n-<!ELEMENT contributing_sites (contributing_site*)>\n-<!ELEMENT contributing_site (left_flank, site, right_flank)>\n-<!ATTLIST contributing_site sequence_id IDREF #REQUIRED\n-                          position CDATA #REQUIRED\n-                          strand (plus|minus|none) \'none\'\n-                          pvalue CDATA #REQUIRED\n->\n-<!-- The left_flank contains the sequence for 10 bases to the left of the motif start -->\n-<!ELEMENT left_flank (#PCDATA)>\n-<!-- The site contains the sequence for the motif instance -->\n-<!ELEMENT site (letter_ref*)>\n-<!-- The right_flank contain'..b'="plus" position="12" pvalue="3.04e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_5" pvalue="1.47e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="0" pvalue="3.67e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_6" pvalue="6.45e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="15" pvalue="1.61e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_7" pvalue="2.26e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="4" pvalue="5.65e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_8" pvalue="3.37e-02" num_sites="0"></scanned_sites>\n-<scanned_sites sequence_id="sequence_9" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="13" pvalue="4.86e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_10" pvalue="5.73e-02" num_sites="0"></scanned_sites>\n-<scanned_sites sequence_id="sequence_11" pvalue="5.52e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="16" pvalue="1.38e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_12" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="18" pvalue="6.48e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_13" pvalue="1.10e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="13" pvalue="2.74e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_14" pvalue="7.78e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="17" pvalue="1.95e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_15" pvalue="5.52e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="21" pvalue="1.38e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_16" pvalue="2.85e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="22" pvalue="7.15e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_17" pvalue="1.90e-02" num_sites="0"></scanned_sites>\n-<scanned_sites sequence_id="sequence_18" pvalue="8.63e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="37" pvalue="2.16e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_19" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="9" pvalue="4.86e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_20" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="4" pvalue="6.48e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_21" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="7" pvalue="4.86e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_22" pvalue="7.78e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="2" pvalue="1.95e-05"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_23" pvalue="8.89e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="15" pvalue="2.22e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_24" pvalue="1.80e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="12" pvalue="4.51e-07"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_25" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="36" pvalue="4.86e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_26" pvalue="5.54e-03" num_sites="0"></scanned_sites>\n-<scanned_sites sequence_id="sequence_27" pvalue="5.54e-03" num_sites="0"></scanned_sites>\n-<scanned_sites sequence_id="sequence_28" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="32" pvalue="6.48e-06"/>\n-</scanned_sites>\n-<scanned_sites sequence_id="sequence_29" pvalue="1.22e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="30" pvalue="3.04e-05"/>\n-</scanned_sites>\n-</scanned_sites_summary>\n-</MEME>\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_psp_gen_reports_output.tabular
--- a/test-data/meme_psp_gen_reports_output.tabular Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,19 +0,0 @@
-540 bases or amino acids
-0.5 0.5 4 4
-0.5 0.5 6 6
-0.5 0.5 7 7
-0.5 0.5 8 8
-0.5 0.5 9 9
-0.5 0.5 10 10
-0.5 0.5 11 11
-0.5 0.5 12 12
-0.5 0.5 13 13
-0.5 0.5 14 14
-0.5 0.5 15 15
-0.5 0.5 16 16
-0.5 0.5 17 17
-0.5 0.5 18 18
-0.5 0.5 19 19
-0.5 0.5 20 20
-
-score 0.9 occurred 483 times
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_psp_output_test1.memepsp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_psp_output_test1.memepsp Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,6 @@
+>BBP_PIEBR 20 scaledmin = 0.1 scaledmax = 0.9
+0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+>ICYA_MANSE 20 scaledmin = 0.1 scaledmax = 0.9
+0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+>LACB_BOVIN 20 scaledmin = 0.1 scaledmax = 0.9
+0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_psp_output_test1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_psp_output_test1.tabular Thu May 17 14:11:15 2018 -0400
b
@@ -0,0 +1,21 @@
+meme_psp_input_pos.fa: 540 bases or amino acids
+meme_psp_input_neg.fa: 540 bases or amino acids
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 4 4
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 5 5
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 6 6
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 7 7
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 8 8
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 9 9
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 10 10
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 11 11
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 12 12
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 13 13
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 14 14
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 15 15
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 16 16
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 17 17
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 18 18
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 19 19
+meme_psp_input_pos.fa meme_psp_input_neg.fa 0.5 0.5 20 20
+
+score 0.9 occurred 483 times
b
diff -r 793225b11202 -r b48e673af4e8 test-data/meme_psp_protein_input.fasta
--- a/test-data/meme_psp_protein_input.fasta Wed Apr 25 12:13:08 2018 -0400
+++ b/test-data/meme_psp_protein_input.fasta Thu May 17 14:11:15 2018 -0400
b
@@ -3,13 +3,11 @@
 DGKKASVYNSFVSNGVKEYMEGDLEIAPDAKYTKQGKYVMTFKFGQRVVN
 LVPWVLATDYKNYAINYNCDYHPDKKAHSIHAWILSKSKVLEGNTKEVVD
 NVLKTFSHLIDASKFISNDFSEAACQYSTTYSLTGPDRH
-
 >LACB_BOVIN 
 MKCLLLALALTCGAQALIVTQTMKGLDIQKVAGTWYSLAMAASDISLLDA
 QSAPLRVYVEELKPTPEGDLEILLQKWENGECAQKKIIAEKTKIPAVFKI
 DALNENKVLVLDTDYKKYLLFCMENSAEPEQSLACQCLVRTPEVDDEALE
 KFDKALKALPMHIRLSFNPTQLEEQCHI
-
 >BBP_PIEBR 
 NVYHDGACPEVKPVDNFDWSNYHGKWWEVAKYPNSVEKYGKCGWAEYTPE
 GKSVKVSNYHVIHGKEYFIEGTAYPVGDSKIGKIYHKLTYGGVTKENVFN
b
diff -r 793225b11202 -r b48e673af4e8 test-data/motif1.gff
--- a/test-data/motif1.gff Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,100 +0,0 @@\n-##gff-version 3\n-phiX174\tfimo\tpolypeptide_motif\t1\t11\t78.3\t+\t.\tName=1;ID=1-57-phiX174;pvalue=1.46e-08;sequence=GAGTTTTATCG;\n-phiX174\tfimo\tpolypeptide_motif\t3\t13\t57.5\t+\t.\tName=1;ID=1-471-phiX174;pvalue=1.79e-06;sequence=GTTTTATCGCT;\n-phiX174\tfimo\tpolypeptide_motif\t7\t17\t 45\t+\t.\tName=1;ID=1-1378-phiX174;pvalue=3.18e-05;sequence=TATCGCTTCCA;\n-phiX174\tfimo\tpolypeptide_motif\t10\t20\t53.9\t+\t.\tName=1;ID=1-605-phiX174;pvalue=4.1e-06;sequence=CGCTTCCATGA;\n-phiX174\tfimo\tpolypeptide_motif\t17\t27\t40.2\t+\t.\tName=1;ID=1-1887-phiX174;pvalue=9.55e-05;sequence=ATGACGCAGAA;\n-phiX174\tfimo\tpolypeptide_motif\t18\t28\t45.3\t+\t.\tName=1;ID=1-1349-phiX174;pvalue=2.98e-05;sequence=TGACGCAGAAG;\n-phiX174\tfimo\tpolypeptide_motif\t19\t29\t55.8\t+\t.\tName=1;ID=1-527-phiX174;pvalue=2.6e-06;sequence=GACGCAGAAGT;\n-phiX174\tfimo\tpolypeptide_motif\t21\t31\t41.5\t+\t.\tName=1;ID=1-1705-phiX174;pvalue=7.07e-05;sequence=CGCAGAAGTTA;\n-phiX174\tfimo\tpolypeptide_motif\t22\t32\t44.6\t+\t.\tName=1;ID=1-1404-phiX174;pvalue=3.44e-05;sequence=GCAGAAGTTAA;\n-phiX174\tfimo\tpolypeptide_motif\t24\t34\t79.1\t+\t.\tName=1;ID=1-53-phiX174;pvalue=1.23e-08;sequence=AGAAGTTAACA;\n-phiX174\tfimo\tpolypeptide_motif\t25\t35\t45.3\t+\t.\tName=1;ID=1-1347-phiX174;pvalue=2.97e-05;sequence=GAAGTTAACAC;\n-phiX174\tfimo\tpolypeptide_motif\t26\t36\t59.2\t+\t.\tName=1;ID=1-417-phiX174;pvalue=1.19e-06;sequence=AAGTTAACACT;\n-phiX174\tfimo\tpolypeptide_motif\t30\t40\t44.7\t+\t.\tName=1;ID=1-1399-phiX174;pvalue=3.4e-05;sequence=TAACACTTTCG;\n-phiX174\tfimo\tpolypeptide_motif\t37\t47\t72.4\t+\t.\tName=1;ID=1-98-phiX174;pvalue=5.79e-08;sequence=TTCGGATATTT;\n-phiX174\tfimo\tpolypeptide_motif\t39\t49\t65.3\t+\t.\tName=1;ID=1-213-phiX174;pvalue=2.92e-07;sequence=CGGATATTTCT;\n-phiX174\tfimo\tpolypeptide_motif\t41\t51\t55.3\t+\t.\tName=1;ID=1-548-phiX174;pvalue=2.97e-06;sequence=GATATTTCTGA;\n-phiX174\tfimo\tpolypeptide_motif\t43\t53\t58.4\t+\t.\tName=1;ID=1-442-phiX174;pvalue=1.43e-06;sequence=TATTTCTGATG;\n-phiX174\tfimo\tpolypeptide_motif\t46\t56\t53.7\t+\t.\tName=1;ID=1-617-phiX174;pvalue=4.23e-06;sequence=TTCTGATGAGT;\n-phiX174\tfimo\tpolypeptide_motif\t50\t60\t45.4\t+\t.\tName=1;ID=1-1333-phiX174;pvalue=2.86e-05;sequence=GATGAGTCGAA;\n-phiX174\tfimo\tpolypeptide_motif\t51\t61\t48.4\t+\t.\tName=1;ID=1-1094-phiX174;pvalue=1.44e-05;sequence=ATGAGTCGAAA;\n-phiX174\tfimo\tpolypeptide_motif\t52\t62\t83.9\t+\t.\tName=1;ID=1-22-phiX174;pvalue=4.06e-09;sequence=TGAGTCGAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t53\t63\t53.9\t+\t.\tName=1;ID=1-601-phiX174;pvalue=4.03e-06;sequence=GAGTCGAAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t54\t64\t62.9\t+\t.\tName=1;ID=1-297-phiX174;pvalue=5.16e-07;sequence=AGTCGAAAAAT;\n-phiX174\tfimo\tpolypeptide_motif\t55\t65\t52.8\t+\t.\tName=1;ID=1-675-phiX174;pvalue=5.26e-06;sequence=GTCGAAAAATT;\n-phiX174\tfimo\tpolypeptide_motif\t56\t66\t41.4\t+\t.\tName=1;ID=1-1713-phiX174;pvalue=7.2e-05;sequence=TCGAAAAATTA;\n-phiX174\tfimo\tpolypeptide_motif\t58\t68\t43.4\t+\t.\tName=1;ID=1-1500-phiX174;pvalue=4.56e-05;sequence=GAAAAATTATC;\n-phiX174\tfimo\tpolypeptide_motif\t59\t69\t59.6\t+\t.\tName=1;ID=1-409-phiX174;pvalue=1.1e-06;sequence=AAAAATTATCT;\n-phiX174\tfimo\tpolypeptide_motif\t61\t71\t61.8\t+\t.\tName=1;ID=1-329-phiX174;pvalue=6.52e-07;sequence=AAATTATCTTG;\n-phiX174\tfimo\tpolypeptide_motif\t63\t73\t59.2\t+\t.\tName=1;ID=1-419-phiX174;pvalue=1.2e-06;sequence=ATTATCTTGAT;\n-phiX174\tfimo\tpolypeptide_motif\t65\t75\t53.3\t+\t.\tName=1;ID=1-643-phiX174;pvalue=4.66e-06;sequence=TATCTTGATAA;\n-phiX174\tfimo\tpolypeptide_motif\t66\t76\t51.8\t+\t.\tName=1;ID=1-737-phiX174;pvalue=6.54e-06;sequence=ATCTTGATAAA;\n-phiX174\tfimo\tpolypeptide_motif\t67\t77\t73.2\t+\t.\tName=1;ID=1-89-phiX174;pvalue=4.78e-08;sequence=TCTTGATAAAG;\n-phiX174\tfimo\tpolypeptide_motif\t69\t79\t63.8\t+\t.\tName=1;ID=1-268-phiX174;pvalue=4.15e-07;sequence=TTGATAAAGCA;\n-phiX174\tfimo\tpolypeptide_motif\t71\t81\t40.2\t+\t.\tName=1;ID=1-1882-phiX174;pvalue=9.49e-05;sequence=GATAAAGCAGG;\n-phiX174\tfimo\tpolypeptide_motif\t73\t83\t45.4\t+\t.\tName=1;ID=1-1334-phiX174;pvalue=2.87e-05;sequence=TAAAGCAGGAA;\n-phiX174\tfimo\tpolypeptide_motif\t74\t84\t50.9\t+\t.\tName=1;ID=1-832-phiX174;pvalue=8.05e-06;sequence=AAAGCAGGAAT;\n-phiX174\tfimo\tpolypeptide_motif\t76\t86\t52.2'..b';\n-phiX174\tfimo\tpolypeptide_motif\t129\t139\t43.2\t+\t.\tName=1;ID=1-1522-phiX174;pvalue=4.78e-05;sequence=GAAAATGAGAA;\n-phiX174\tfimo\tpolypeptide_motif\t130\t140\t54.1\t+\t.\tName=1;ID=1-595-phiX174;pvalue=3.92e-06;sequence=AAAATGAGAAA;\n-phiX174\tfimo\tpolypeptide_motif\t131\t141\t 76\t+\t.\tName=1;ID=1-68-phiX174;pvalue=2.49e-08;sequence=AAATGAGAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t132\t142\t51.2\t+\t.\tName=1;ID=1-800-phiX174;pvalue=7.57e-06;sequence=AATGAGAAAAT;\n-phiX174\tfimo\tpolypeptide_motif\t133\t143\t56.2\t+\t.\tName=1;ID=1-513-phiX174;pvalue=2.41e-06;sequence=ATGAGAAAATT;\n-phiX174\tfimo\tpolypeptide_motif\t134\t144\t41.1\t+\t.\tName=1;ID=1-1761-phiX174;pvalue=7.83e-05;sequence=TGAGAAAATTC;\n-phiX174\tfimo\tpolypeptide_motif\t135\t145\t50.3\t+\t.\tName=1;ID=1-910-phiX174;pvalue=9.39e-06;sequence=GAGAAAATTCG;\n-phiX174\tfimo\tpolypeptide_motif\t136\t146\t43.3\t+\t.\tName=1;ID=1-1517-phiX174;pvalue=4.66e-05;sequence=AGAAAATTCGA;\n-phiX174\tfimo\tpolypeptide_motif\t139\t149\t54.2\t+\t.\tName=1;ID=1-588-phiX174;pvalue=3.75e-06;sequence=AAATTCGACCT;\n-phiX174\tfimo\tpolypeptide_motif\t141\t151\t42.2\t+\t.\tName=1;ID=1-1625-phiX174;pvalue=6.01e-05;sequence=ATTCGACCTAT;\n-phiX174\tfimo\tpolypeptide_motif\t143\t153\t 50\t+\t.\tName=1;ID=1-938-phiX174;pvalue=9.94e-06;sequence=TCGACCTATCC;\n-phiX174\tfimo\tpolypeptide_motif\t145\t155\t44.6\t+\t.\tName=1;ID=1-1403-phiX174;pvalue=3.42e-05;sequence=GACCTATCCTT;\n-phiX174\tfimo\tpolypeptide_motif\t155\t165\t51.3\t+\t.\tName=1;ID=1-787-phiX174;pvalue=7.35e-06;sequence=TGCGCAGCTCG;\n-phiX174\tfimo\tpolypeptide_motif\t157\t167\t51.1\t+\t.\tName=1;ID=1-807-phiX174;pvalue=7.68e-06;sequence=CGCAGCTCGAG;\n-phiX174\tfimo\tpolypeptide_motif\t159\t169\t44.5\t+\t.\tName=1;ID=1-1420-phiX174;pvalue=3.56e-05;sequence=CAGCTCGAGAA;\n-phiX174\tfimo\tpolypeptide_motif\t160\t170\t 40\t+\t.\tName=1;ID=1-1921-phiX174;pvalue=9.89e-05;sequence=AGCTCGAGAAG;\n-phiX174\tfimo\tpolypeptide_motif\t166\t176\t60.9\t+\t.\tName=1;ID=1-365-phiX174;pvalue=8.02e-07;sequence=AGAAGCTCTTA;\n-phiX174\tfimo\tpolypeptide_motif\t168\t178\t62.3\t+\t.\tName=1;ID=1-311-phiX174;pvalue=5.87e-07;sequence=AAGCTCTTACT;\n-phiX174\tfimo\tpolypeptide_motif\t181\t191\t49.9\t+\t.\tName=1;ID=1-946-phiX174;pvalue=1.01e-05;sequence=GCGACCTTTCG;\n-phiX174\tfimo\tpolypeptide_motif\t187\t197\t52.5\t+\t.\tName=1;ID=1-694-phiX174;pvalue=5.64e-06;sequence=TTTCGCCATCA;\n-phiX174\tfimo\tpolypeptide_motif\t191\t201\t46.6\t+\t.\tName=1;ID=1-1232-phiX174;pvalue=2.2e-05;sequence=GCCATCAACTA;\n-phiX174\tfimo\tpolypeptide_motif\t194\t204\t76.4\t+\t.\tName=1;ID=1-67-phiX174;pvalue=2.29e-08;sequence=ATCAACTAACG;\n-phiX174\tfimo\tpolypeptide_motif\t201\t211\t40.1\t+\t.\tName=1;ID=1-1908-phiX174;pvalue=9.77e-05;sequence=AACGATTCTGT;\n-phiX174\tfimo\tpolypeptide_motif\t203\t213\t 63\t+\t.\tName=1;ID=1-291-phiX174;pvalue=5e-07;sequence=CGATTCTGTCA;\n-phiX174\tfimo\tpolypeptide_motif\t205\t215\t53.8\t+\t.\tName=1;ID=1-610-phiX174;pvalue=4.16e-06;sequence=ATTCTGTCAAA;\n-phiX174\tfimo\tpolypeptide_motif\t206\t216\t59.1\t+\t.\tName=1;ID=1-421-phiX174;pvalue=1.23e-06;sequence=TTCTGTCAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t207\t217\t 68\t+\t.\tName=1;ID=1-153-phiX174;pvalue=1.58e-07;sequence=TCTGTCAAAAA;\n-phiX174\tfimo\tpolypeptide_motif\t209\t219\t49.6\t+\t.\tName=1;ID=1-988-phiX174;pvalue=1.09e-05;sequence=TGTCAAAAACT;\n-phiX174\tfimo\tpolypeptide_motif\t210\t220\t40.8\t+\t.\tName=1;ID=1-1810-phiX174;pvalue=8.33e-05;sequence=GTCAAAAACTG;\n-phiX174\tfimo\tpolypeptide_motif\t213\t223\t59.7\t+\t.\tName=1;ID=1-404-phiX174;pvalue=1.06e-06;sequence=AAAAACTGACG;\n-phiX174\tfimo\tpolypeptide_motif\t223\t233\t 42\t+\t.\tName=1;ID=1-1654-phiX174;pvalue=6.36e-05;sequence=GCGTTGGATGA;\n-phiX174\tfimo\tpolypeptide_motif\t225\t235\t61.4\t+\t.\tName=1;ID=1-349-phiX174;pvalue=7.16e-07;sequence=GTTGGATGAGG;\n-phiX174\tfimo\tpolypeptide_motif\t227\t237\t40.3\t+\t.\tName=1;ID=1-1874-phiX174;pvalue=9.32e-05;sequence=TGGATGAGGAG;\n-phiX174\tfimo\tpolypeptide_motif\t228\t238\t49.9\t+\t.\tName=1;ID=1-947-phiX174;pvalue=1.01e-05;sequence=GGATGAGGAGA;\n-phiX174\tfimo\tpolypeptide_motif\t229\t239\t 45\t+\t.\tName=1;ID=1-1370-phiX174;pvalue=3.16e-05;sequence=GATGAGGAGAA;\n-phiX174\tfimo\tpolypeptide_motif\t230\t240\t44.8\t+\t.\tName=1;ID=1-1395-phiX174;pvalue=3.33e-05;sequence=ATGAGGAGAAG;\n'
b
diff -r 793225b11202 -r b48e673af4e8 test-data/output.memepsp
--- a/test-data/output.memepsp Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
->BBP_PIEBR 20 scaledmin = 0.1 scaledmax = 0.9
-0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
->ICYA_MANSE 20 scaledmin = 0.1 scaledmax = 0.9
-0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
->LACB_BOVIN 20 scaledmin = 0.1 scaledmax = 0.9
-0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
b
diff -r 793225b11202 -r b48e673af4e8 test-data/phiX.fasta
--- a/test-data/phiX.fasta Wed Apr 25 12:13:08 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,79 +0,0 @@
->phiX174
-GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
-GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
-ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
-TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
-GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
-TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
-TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
-CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
-TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
-TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
-GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
-CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG
-TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
-AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
-CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
-TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
-TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
-CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
-GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
-GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
-ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
-TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
-TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
-ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC
-CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
-GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
-CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
-TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
-TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
-TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
-AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
-TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
-ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
-GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
-TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
-TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
-TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
-TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
-CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
-AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
-CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
-TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
-CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
-AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
-GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
-GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
-TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
-CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
-TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
-GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
-CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
-TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
-AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
-TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
-CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
-TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
-TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
-CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
-TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
-ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
-TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
-ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
-GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
-CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
-GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG
-GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
-ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
-CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
-CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
-GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
-CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
-CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
-TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
-TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
-TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
-AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
-TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
-