Repository 'meme_psp_gen'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/meme_psp_gen

Changeset 0:a0fa4efeeee3 (2017-08-23)
Next changeset 1:793225b11202 (2018-04-25)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meme commit 3f116ddc83447056068573320c148a9bfca9aa2e
added:
all_fasta.loc.sample
fimo_wrapper.py
macros.xml
meme_psp_gen.xml
test-data/fimo_output_almost-gff_1.txt
test-data/fimo_output_almost-gff_2.txt
test-data/fimo_output_html_1.html
test-data/fimo_output_html_2.html
test-data/fimo_output_interval_1.txt
test-data/fimo_output_interval_2.txt
test-data/fimo_output_txt_1.txt
test-data/fimo_output_txt_2.txt
test-data/fimo_output_xml_1.xml
test-data/fimo_output_xml_2.xml
test-data/meme_input_1.fasta
test-data/meme_output_html_1.html
test-data/meme_output_html_2.html
test-data/meme_output_txt_1.txt
test-data/meme_output_txt_2.txt
test-data/meme_output_xml_1.xml
test-data/meme_output_xml_2.xml
test-data/meme_psp_gen_reports_output.tabular
test-data/meme_psp_protein_input.fasta
test-data/motif1.gff
test-data/output.memepsp
test-data/phiX.fasta
test-data/prior30.plib
tool_data_table_conf.xml.sample
b
diff -r 000000000000 -r a0fa4efeeee3 all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.loc.sample Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
b
diff -r 000000000000 -r a0fa4efeeee3 fimo_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fimo_wrapper.py Wed Aug 23 20:57:34 2017 -0400
[
b'@@ -0,0 +1,194 @@\n+#!/usr/bin/env python\n+import argparse\n+import os\n+import shutil\n+import string\n+import subprocess\n+import sys\n+import tempfile\n+\n+BUFFSIZE = 1048576\n+# Translation table for reverse Complement, with ambiguity codes.\n+DNA_COMPLEMENT = string.maketrans("ACGTRYKMBDHVacgtrykmbdhv", "TGCAYRMKVHDBtgcayrmkvhdb")\n+\n+\n+def get_stderr(tmp_stderr):\n+    tmp_stderr.seek(0)\n+    stderr = \'\'\n+    try:\n+        while True:\n+            stderr += tmp_stderr.read(BUFFSIZE)\n+            if not stderr or len(stderr) % BUFFSIZE != 0:\n+                break\n+    except OverflowError:\n+        pass\n+    return stderr\n+\n+\n+def reverse(sequence):\n+    # Reverse sequence string.\n+    return sequence[::-1]\n+\n+\n+def dna_complement(sequence):\n+    # Complement DNA sequence string.\n+    return sequence.translate(DNA_COMPLEMENT)\n+\n+\n+def dna_reverse_complement(sequence):\n+    # Returns the reverse complement of the sequence.\n+    sequence = reverse(sequence)\n+    return dna_complement(sequence)\n+\n+\n+def stop_err(msg):\n+    sys.stderr.write(msg)\n+    sys.exit(1)\n+\n+\n+parser = argparse.ArgumentParser()\n+parser.add_argument(\'--input_motifs\', dest=\'input_motifs\', help=\'MEME output formatted files for input to fimo\')\n+parser.add_argument(\'--input_fasta\', dest=\'input_fasta\', help=\'Fassta sequence file\')\n+parser.add_argument(\'--options_type\', dest=\'options_type\', help=\'Basic or Advance options\')\n+parser.add_argument(\'--input_psp\', dest=\'input_psp\', default=None, help=\'File containing position specific priors\')\n+parser.add_argument(\'--input_prior_dist\', dest=\'input_prior_dist\', default=None, help=\'File containing binned distribution of priors\')\n+parser.add_argument(\'--alpha\', dest=\'alpha\', type=float, default=1.0, help=\'The alpha parameter for calculating position specific priors\')\n+parser.add_argument(\'--bgfile\', dest=\'bgfile\', default=None, help=\'Background file type, used only if not "default"\')\n+parser.add_argument(\'--max_strand\', action=\'store_true\', help=\'If matches on both strands at a given position satisfy the output threshold, only report the match for the strand with the higher score\')\n+parser.add_argument(\'--max_stored_scores\', dest=\'max_stored_scores\', type=int, help=\'Maximum score count to store\')\n+parser.add_argument(\'--motif\', dest=\'motifs\', action=\'append\', default=[], help=\'Specify motif by id\')\n+parser.add_argument(\'--output_separate_motifs\', dest=\'output_separate_motifs\', default=\'no\', help=\'Output one dataset per motif\')\n+parser.add_argument(\'--motif_pseudo\', dest=\'motif_pseudo\', type=float, default=0.1, help=\'Pseudocount to add to counts in motif matrix\')\n+parser.add_argument(\'--no_qvalue\', action=\'store_true\', help=\'Do not compute a q-value for each p-value\')\n+parser.add_argument(\'--norc\', action=\'store_true\', help=\'Do not score the reverse complement DNA strand\')\n+parser.add_argument(\'--output_path\', dest=\'output_path\', help=\'Output files directory\')\n+parser.add_argument(\'--parse_genomic_coord\', dest=\'parse_genomic_coord\', default=\'no\', help=\'Check each sequence header for UCSC style genomic coordinates\')\n+parser.add_argument(\'--remove_duplicate_coords\', dest=\'remove_duplicate_coords\', default=\'no\', help=\'Remove duplicate entries in unique GFF coordinates\')\n+parser.add_argument(\'--qv_thresh\', action=\'store_true\', help=\'Use q-values for the output threshold\')\n+parser.add_argument(\'--thresh\', dest=\'thresh\', type=float, help=\'p-value threshold\')\n+parser.add_argument(\'--gff_output\', dest=\'gff_output\', help=\'Gff output file\')\n+parser.add_argument(\'--html_output\', dest=\'html_output\', help=\'HTML output file\')\n+parser.add_argument(\'--interval_output\', dest=\'interval_output\', help=\'Interval output file\')\n+parser.add_argument(\'--txt_output\', dest=\'txt_output\', help=\'Text output file\')\n+parser.add_argument(\'--xml_output\', dest=\'xml_output\', help=\'XML output file\')\n+args = parser.parse_args()\n+\n+fimo_cmd_list = [\'fimo\']\n+if args.options_type == \'advanced\':\n+    fimo_cmd_list.append(\'--alpha %4f\' % args.alpha)\n+    if args.bgfile is'..b'.append(\'--thresh %4f\' % args.thresh)\n+    if args.input_psp is not None:\n+        fimo_cmd_list.append(\'--psp "%s"\' % args.input_psp)\n+    if args.input_prior_dist is not None:\n+        fimo_cmd_list.append(\'--prior-dist "%s"\' % args.input_prior_dist)\n+fimo_cmd_list.append(\'--o "%s"\' % (args.output_path))\n+fimo_cmd_list.append(\'--verbosity 1\')\n+fimo_cmd_list.append(args.input_motifs)\n+fimo_cmd_list.append(args.input_fasta)\n+\n+fimo_cmd = \' \'.join(fimo_cmd_list)\n+\n+try:\n+    tmp_stderr = tempfile.NamedTemporaryFile()\n+    proc = subprocess.Popen(args=fimo_cmd, shell=True, stderr=tmp_stderr)\n+    returncode = proc.wait()\n+    if returncode != 0:\n+        stderr = get_stderr(tmp_stderr)\n+        stop_err(stderr)\n+except Exception as e:\n+    stop_err(\'Error running FIMO:\\n%s\' % e)\n+\n+shutil.move(os.path.join(args.output_path, \'fimo.txt\'), args.txt_output)\n+\n+gff_file = os.path.join(args.output_path, \'fimo.gff\')\n+if args.remove_duplicate_coords == \'yes\':\n+    tmp_stderr = tempfile.NamedTemporaryFile()\n+    # Identify and eliminating identical motif occurrences.  These\n+    # are identical if the combination of chrom, start, end and\n+    # motif id are identical.\n+    cmd = \'sort -k1,1 -k4,4n -k5,5n -k9.1,9.6 -u -o %s %s\' % (gff_file, gff_file)\n+    proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, shell=True)\n+    returncode = proc.wait()\n+    if returncode != 0:\n+        stderr = get_stderr(tmp_stderr)\n+        stop_err(stderr)\n+    # Sort GFF output by a combination of chrom, score, start.\n+    cmd = \'sort -k1,1 -k4,4n -k6,6n -o %s %s\' % (gff_file, gff_file)\n+    proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, shell=True)\n+    returncode = proc.wait()\n+    if returncode != 0:\n+        stderr = get_stderr(tmp_stderr)\n+        stop_err(stderr)\n+if args.output_separate_motifs == \'yes\':\n+    # Create the collection output directory.\n+    collection_path = (os.path.join(os.getcwd(), \'output\'))\n+    # Keep track of motif occurrences.\n+    header_line = None\n+    motif_ids = []\n+    file_handles = []\n+    for line in open(gff_file, \'r\'):\n+        if line.startswith(\'#\'):\n+            if header_line is None:\n+                header_line = line\n+            continue\n+        items = line.split(\'\\t\')\n+        attribute = items[8]\n+        attributes = attribute.split(\';\')\n+        name = attributes[0]\n+        motif_id = name.split(\'=\')[1]\n+        file_name = os.path.join(collection_path, \'MOTIF%s.gff\' % motif_id)\n+        if motif_id in motif_ids:\n+            i = motif_ids.index(motif_id)\n+            fh = file_handles[i]\n+            fh.write(line)\n+        else:\n+            fh = open(file_name, \'wb\')\n+            if header_line is not None:\n+                fh.write(header_line)\n+            fh.write(line)\n+            motif_ids.append(motif_id)\n+            file_handles.append(fh)\n+    for file_handle in file_handles:\n+        file_handle.close()\n+else:\n+    shutil.move(gff_file, args.gff_output)\n+shutil.move(os.path.join(args.output_path, \'fimo.xml\'), args.xml_output)\n+shutil.move(os.path.join(args.output_path, \'fimo.html\'), args.html_output)\n+\n+out_file = open(args.interval_output, \'wb\')\n+out_file.write("#%s\\n" % "\\t".join(("chr", "start", "end", "pattern name", "score", "strand", "matched sequence", "p-value", "q-value")))\n+for line in open(args.txt_output):\n+    if line.startswith(\'#\'):\n+        continue\n+    fields = line.rstrip("\\n\\r").split("\\t")\n+    start, end = int(fields[2]), int(fields[3])\n+    sequence = fields[7]\n+    if start > end:\n+        # Flip start and end and set strand.\n+        start, end = end, start\n+        strand = "-"\n+        # We want sequences relative to strand; FIMO always provides + stranded sequence.\n+        sequence = dna_reverse_complement(sequence)\n+    else:\n+        strand = "+"\n+    # Make 0-based start position.\n+    start -= 1\n+    out_file.write("%s\\n" % "\\t".join([fields[1], str(start), str(end), fields[0], fields[4], strand, sequence, fields[5], fields[6]]))\n+out_file.close()\n'
b
diff -r 000000000000 -r a0fa4efeeee3 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,11 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <token name="@WRAPPER_VERSION@">4.11.2</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.3.23">graphicsmagick</requirement>
+            <requirement type="package" version="4.11.2">meme</requirement>
+        </requirements>
+    </xml>
+</macros>
+
b
diff -r 000000000000 -r a0fa4efeeee3 meme_psp_gen.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/meme_psp_gen.xml Wed Aug 23 20:57:34 2017 -0400
[
b'@@ -0,0 +1,140 @@\n+<tool id="meme_psp_gen" name="MEME psp-gen" version="@WRAPPER_VERSION@.0">\n+    <description>- perform discriminative motif discovery</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="requirements" />\n+    <command detect_errors="exit_code"><![CDATA[\n+psp-gen\n+-pos \'$primary_sequence\'\n+-neg \'$control_sequence\'\n+-minw $adv.minw\n+-maxw $adv.maxw\n+$adv.alphabet\n+#if str($adv.triples_cond.triples) == \'yes\':\n+    -triples\n+    $adv.triples_cond.fixedstart\n+#end if\n+$adv.equiv\n+$adv.revcomp\n+$adv.scalemin\n+$adv.scalemax\n+$adv.maxrange\n+$adv.raw\n+#if str($adv.report_scores_cond.report_scores) == \'yes\':\n+    -reportscores\n+    $adv.report_scores_cond.verbose\n+    2> \'$output_tabular\'\n+#end if\n+> \'$output_psp\'\n+    ]]></command>\n+    <inputs>\n+        <param format="fasta" name="primary_sequence" type="data" label="Primary sequence file"/>\n+        <param format="fasta" name="control_sequence" type="data" label="Control sequence file"/>\n+        <section name="adv" title="Additional Options">\n+            <param argument="-minw" type="integer" value="4" min="0" label="Minimum width to use for position specific priors"/>\n+            <param argument="-maxw" type="integer" value="20" min="0" label="Maximum width to use for position specific priors"/>\n+            <param name="alphabet" type="select" label="Alphabet">\n+                <option value="-dna" selected="true">DNA</option>\n+                <option value="-protein">protein</option>\n+                <option value="-rna">RNA</option>\n+            </param>\n+            <conditional name="triples_cond">\n+                <param name="triples" type="select" label="Use spaced triples instead of whole-word matches?" help="Recommended for protein">\n+                    <option value="no" selected="true">No</option>\n+                    <option value="yes">yes</option>\n+                </param>\n+                <when value="no"/>\n+                <when value="yes">\n+                    <param argument="-fixedstart" type="boolean" truevalue="-fixedstart" falsevalue="" checked="False" label="Allow triples to start anywhere within a site?" help="Select \'No\' to only consider triples starting at the start of the site"/>\n+                </when>\n+            </conditional>\n+            <param argument="-equiv" type="boolean" truevalue="-equiv" falsevalue="" checked="False" label="Match as equal sequences of letters that appear together?"/>\n+            <param argument="-revcomp" type="boolean" truevalue="-revcomp" falsevalue="" checked="False" label="Consider both strands when calculating position specific priors for alphabets?"/>\n+            <param argument="-scalemin" type="boolean" truevalue="-scalemin" falsevalue="" checked="False" label="Set the lowest score value after scaling?"/>\n+            <param argument="-scalemax" type="boolean" truevalue="-scalemax" falsevalue="" checked="False" label="Set the highest score value after scaling?"/>\n+            <param argument="-maxrange" type="boolean" truevalue="-maxrange" falsevalue="" checked="False" label="Choose the width with the biggest difference between minimum and maximum scores before scaling?" help="Select \'No\' to choose the width with the biggest maximum score (before scaling)"/>\n+            <param argument="-raw" type="boolean" truevalue="-raw" falsevalue="" checked="False" label="Output scores instead of priors?"/>\n+            <conditional name="report_scores_cond">\n+                <param name="report_scores" type="select" label="Output primary and control file names, scores and widths?">\n+                    <option value="no" selected="true">No</option>\n+                    <option value="yes">yes</option>\n+                </param>\n+                <when value="no"/>\n+                <when value="yes">\n+                    <param argument="-verbose" type="boolean" truevalue="-verbose" falsevalue="" checked="False" label="Report frequency of each score?"/>\n+ '..b'="output_tabular" file="meme_psp_gen_reports_output.tabular" compare="contains"/>\n+        </test>\n+    </tests>\n+    <help>\n+\n+.. class:: warningmark\n+\n+**WARNING: This tool is only available for non-commercial use. Use for educational, research and non-profit purposes is permitted.\n+Before using, be sure to review, agree, and comply with the license.**\n+\n+psp-gen is used to allow MEME to perform discriminative motif discovery\xe2\x80\x94to find motifs overrepresented in one set of sequences compared to in another set.\n+It takes two files as input\xe2\x80\x94the sequence file to be input to MEME, (the "primary" file) and a "control" sequence file of sequences believed not to contain\n+the same motifs as in the "primary" file. psp-gen creates a file for use by MEME that encapsulates information about probable discriminative motifs. psp-gen\n+records its chosen motif width in the file, and MEME is able to adjust the data when it tries different motif widths.\n+\n+.. class:: infomark\n+\n+For detailed information on psp-gen, click here_, or view the license_.\n+\n+.. _here: http://meme-suite.org/doc/psp-gen.html?man_type=web\n+.. _license: http://meme-suite.org/doc/copyright.html?man_type=web\n+\n+-----\n+\n+**Required options**\n+\n+* **Primary sequence file** - a file containing FASTA formatted sequences which are to be used as the primary set in PSP calculation.\n+* **Control sequence file** - a file containing FASTA formatted sequences which are to be used as the control set in PSP calculation.\n+\n+**Additional options**\n+\n+* **Minimum width to use for position specific priors** - the minimum width to use with selecting the "best" width for PSPs.\n+* **Maximum width to use for position specific priors** - the maximum width to use with selecting the "best" width for PSPs.\n+* **Alphabet** - The alphabet to be used, one of DNA, protein or RNA.\n+* **Use spaced triples instead of whole-word matches** - use spaced triples instead of whole-word matches (recommended when using the protein alphabet).\n+\n+   * **Allow triples to start anywhere within a site** - when using the -triples option, select \'Yes\' to only consider triples starting at the start of the site or \'No\' to allow triples to start anywhere in a width \'w\' site.\n+\n+* **Match as equal sequences of letters that appear together** - select \'Yes\' to match as equal any sequence of letter that appears together. Separate letter groups using "-" (e.g. -equiv "IVL-HKR") means treat all occurrences of I, V or L as the same, and all occurrences of H, K or R as the same.\n+* **Consider both strands when calculating position specific priors for alphabets** - select \'Yes\' to consider both strands when calculating PSPs for complementable alphabets or \'No to consider only the given strand.\n+* **Set the lowest score value after scaling** - select \'Yes\' to set the lowest score to 0.1 unless the the following "highest score" option is selected, in which case the lowest score is highest score - 1.\n+* **Set the highest score value after scaling** - select \'Yes\' to set the highest score to 0.9 unless the previous "lowest score" option is selected, in which case the highest score is lowest score + 1.\n+* **Choose the width with the biggest difference between minimum and maximum scores before scaling** - select \'Yes\' to choose the width with the biggest difference between minimum and maximum scores before scaling, or \'No\' to choose the width with the biggest maximum score before scaling.\n+* **Output scores instead of priors** - select \'Yes\' to output scores instead of position specific priors.\n+* **Output primary and control file names, scores and widths** - select \'Yes\' to produce an additional tabular output consisting of control file names, lowest and highest scores and lowest and highest widths.\n+\n+   * **Report frequency of each score** - select \'Yes\' to include the frequency of each score in the output.\n+\n+    </help>\n+    <citations>\n+        <citation type="doi">10.1186/1471-2105-11-179</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/fimo_output_almost-gff_1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_almost-gff_1.txt Wed Aug 23 20:57:34 2017 -0400
b
b'@@ -0,0 +1,100 @@\n+##gff-version 3\n+phiX174\tfimo\tpolypeptide_motif\t1388\t1398\t102\t+\t.\tName=1;ID=1-1-phiX174;pvalue=6.36e-11;qvalue= 1.25e-09;sequence=AATATCTATAA;\n+phiX174\tfimo\tpolypeptide_motif\t847\t857\t102\t+\t.\tName=1;ID=1-2-phiX174;pvalue=7.02e-11;qvalue= 1.25e-09;sequence=AATGTCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t2301\t2311\t99.6\t+\t.\tName=1;ID=1-3-phiX174;pvalue=1.08e-10;qvalue= 1.29e-09;sequence=AGGTTATAACG;\n+phiX174\tfimo\tpolypeptide_motif\t5063\t5073\t95.6\t+\t.\tName=1;ID=1-4-phiX174;pvalue=2.73e-10;qvalue= 2.25e-09;sequence=AGGAGCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t989\t999\t 95\t+\t.\tName=1;ID=1-5-phiX174;pvalue=3.15e-10;qvalue= 2.25e-09;sequence=TGAGGATAAAT;\n+phiX174\tfimo\tpolypeptide_motif\t4713\t4723\t91.1\t+\t.\tName=1;ID=1-6-phiX174;pvalue=7.74e-10;qvalue= 3.48e-09;sequence=GACTGCTATCA;\n+phiX174\tfimo\tpolypeptide_motif\t5048\t5058\t90.7\t+\t.\tName=1;ID=1-7-phiX174;pvalue=8.51e-10;qvalue= 3.48e-09;sequence=TGCTGCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t855\t865\t90.6\t+\t.\tName=1;ID=1-8-phiX174;pvalue=8.64e-10;qvalue= 3.48e-09;sequence=AAGGTAAAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t3155\t3165\t90.1\t+\t.\tName=1;ID=1-9-phiX174;pvalue=9.76e-10;qvalue= 3.48e-09;sequence=TATGGCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t5009\t5019\t90.1\t+\t.\tName=1;ID=1-10-phiX174;pvalue=9.76e-10;qvalue= 3.48e-09;sequence=TGTGGCTAAAT;\n+phiX174\tfimo\tpolypeptide_motif\t814\t824\t88.9\t+\t.\tName=1;ID=1-11-phiX174;pvalue=1.28e-09;qvalue= 4.14e-09;sequence=TGCGTCAAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t2832\t2842\t88.5\t+\t.\tName=1;ID=1-12-phiX174;pvalue=1.42e-09;qvalue= 4.23e-09;sequence=TTGGTCTAACT;\n+phiX174\tfimo\tpolypeptide_motif\t3830\t3840\t87.7\t+\t.\tName=1;ID=1-13-phiX174;pvalue=1.7e-09;qvalue= 4.68e-09;sequence=TATTGATAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t3560\t3570\t87.2\t+\t.\tName=1;ID=1-14-phiX174;pvalue=1.89e-09;qvalue= 4.82e-09;sequence=TGCGTCTATTA;\n+phiX174\tfimo\tpolypeptide_motif\t2882\t2892\t86.4\t+\t.\tName=1;ID=1-15-phiX174;pvalue=2.29e-09;qvalue= 5.46e-09;sequence=AGGTTATTAAA;\n+phiX174\tfimo\tpolypeptide_motif\t4453\t4463\t85.9\t+\t.\tName=1;ID=1-16-phiX174;pvalue=2.58e-09;qvalue= 5.75e-09;sequence=AAGGTATTAAG;\n+phiX174\tfimo\tpolypeptide_motif\t2493\t2503\t85.1\t+\t.\tName=1;ID=1-17-phiX174;pvalue=3.06e-09;qvalue= 5.79e-09;sequence=GACACCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t4104\t4114\t85.1\t+\t.\tName=1;ID=1-18-phiX174;pvalue=3.08e-09;qvalue= 5.79e-09;sequence=GGCTTCCATAA;\n+phiX174\tfimo\tpolypeptide_motif\t4955\t4965\t85.1\t+\t.\tName=1;ID=1-19-phiX174;pvalue=3.08e-09;qvalue= 5.79e-09;sequence=TGATGCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t1885\t1895\t84.4\t+\t.\tName=1;ID=1-20-phiX174;pvalue=3.61e-09;qvalue= 6.45e-09;sequence=TGCGACTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t3376\t3386\t84.2\t+\t.\tName=1;ID=1-21-phiX174;pvalue=3.81e-09;qvalue= 6.48e-09;sequence=AGAATCAAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t52\t62\t83.9\t+\t.\tName=1;ID=1-22-phiX174;pvalue=4.06e-09;qvalue= 6.58e-09;sequence=TGAGTCGAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t1390\t1400\t83.7\t+\t.\tName=1;ID=1-23-phiX174;pvalue=4.26e-09;qvalue= 6.61e-09;sequence=TATCTATAACA;\n+phiX174\tfimo\tpolypeptide_motif\t2017\t2027\t83.4\t+\t.\tName=1;ID=1-24-phiX174;pvalue=4.6e-09;qvalue= 6.85e-09;sequence=TTCGTCTAAGA;\n+phiX174\tfimo\tpolypeptide_motif\t1000\t1010\t83.1\t+\t.\tName=1;ID=1-25-phiX174;pvalue=4.88e-09;qvalue= 6.97e-09;sequence=TATGTCTAATA;\n+phiX174\tfimo\tpolypeptide_motif\t1555\t1565\t82.5\t+\t.\tName=1;ID=1-26-phiX174;pvalue=5.58e-09;qvalue= 7.37e-09;sequence=GACTTCTACCA;\n+phiX174\tfimo\tpolypeptide_motif\t4430\t4440\t82.5\t+\t.\tName=1;ID=1-27-phiX174;pvalue=5.62e-09;qvalue= 7.37e-09;sequence=TGAGTATAATT;\n+phiX174\tfimo\tpolypeptide_motif\t1927\t1937\t82.3\t+\t.\tName=1;ID=1-28-phiX174;pvalue=5.82e-09;qvalue= 7.37e-09;sequence=GACTTATACCG;\n+phiX174\tfimo\tpolypeptide_motif\t2981\t2991\t82.1\t+\t.\tName=1;ID=1-29-phiX174;pvalue=6.13e-09;qvalue= 7.37e-09;sequence=CATGTCTAAAT;\n+phiX174\tfimo\tpolypeptide_motif\t4203\t4213\t 82\t+\t.\tName=1;ID=1-30-phiX174;pvalue=6.34e-09;qvalue= 7.37e-09;sequence=GACGGCCATAA;\n+phiX174\tfimo\tpolypeptide_motif\t1669\t1679\t81.9\t+\t.\tName=1;ID=1-31-phiX174;pvalue=6.4e-09;qvalue= 7.37e-09;sequence=TGGAGG'..b'= 1.31e-08;sequence=AAATGAGAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t1491\t1501\t75.9\t+\t.\tName=1;ID=1-69-phiX174;pvalue=2.55e-08;qvalue= 1.32e-08;sequence=GCCATCTCAAA;\n+phiX174\tfimo\tpolypeptide_motif\t434\t444\t75.7\t+\t.\tName=1;ID=1-70-phiX174;pvalue=2.67e-08;qvalue= 1.36e-08;sequence=GGCCTCTATTA;\n+phiX174\tfimo\tpolypeptide_motif\t4565\t4575\t75.6\t+\t.\tName=1;ID=1-71-phiX174;pvalue=2.73e-08;qvalue= 1.36e-08;sequence=TTGGTTTATCG;\n+phiX174\tfimo\tpolypeptide_motif\t102\t112\t75.6\t+\t.\tName=1;ID=1-72-phiX174;pvalue=2.75e-08;qvalue= 1.36e-08;sequence=GAATTAAATCG;\n+phiX174\tfimo\tpolypeptide_motif\t903\t913\t75.5\t+\t.\tName=1;ID=1-73-phiX174;pvalue=2.82e-08;qvalue= 1.38e-08;sequence=GAGGTACTAAA;\n+phiX174\tfimo\tpolypeptide_motif\t4748\t4758\t75.2\t+\t.\tName=1;ID=1-74-phiX174;pvalue=3.01e-08;qvalue= 1.45e-08;sequence=TACAGCTAATG;\n+phiX174\tfimo\tpolypeptide_motif\t2622\t2632\t 75\t+\t.\tName=1;ID=1-75-phiX174;pvalue=3.16e-08;qvalue= 1.5e-08;sequence=TGCTGATATTG;\n+phiX174\tfimo\tpolypeptide_motif\t467\t477\t74.7\t+\t.\tName=1;ID=1-76-phiX174;pvalue=3.35e-08;qvalue= 1.57e-08;sequence=TTTGGATTTAA;\n+phiX174\tfimo\tpolypeptide_motif\t4033\t4043\t74.6\t+\t.\tName=1;ID=1-77-phiX174;pvalue=3.44e-08;qvalue= 1.58e-08;sequence=AGCGTATCGAG;\n+phiX174\tfimo\tpolypeptide_motif\t1348\t1358\t74.6\t+\t.\tName=1;ID=1-78-phiX174;pvalue=3.46e-08;qvalue= 1.58e-08;sequence=TACCAATAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t239\t249\t74.4\t+\t.\tName=1;ID=1-79-phiX174;pvalue=3.62e-08;qvalue= 1.64e-08;sequence=AGTGGCTTAAT;\n+phiX174\tfimo\tpolypeptide_motif\t500\t510\t74.1\t+\t.\tName=1;ID=1-80-phiX174;pvalue=3.84e-08;qvalue= 1.71e-08;sequence=GACGAGTAACA;\n+phiX174\tfimo\tpolypeptide_motif\t3001\t3011\t 74\t+\t.\tName=1;ID=1-81-phiX174;pvalue=3.93e-08;qvalue= 1.73e-08;sequence=GCGGTCAAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t3776\t3786\t 74\t+\t.\tName=1;ID=1-82-phiX174;pvalue=3.98e-08;qvalue= 1.73e-08;sequence=TATTTCTAATG;\n+phiX174\tfimo\tpolypeptide_motif\t2026\t2036\t73.9\t+\t.\tName=1;ID=1-83-phiX174;pvalue=4.06e-08;qvalue= 1.75e-08;sequence=GAAGTTTAAGA;\n+phiX174\tfimo\tpolypeptide_motif\t4237\t4247\t73.8\t+\t.\tName=1;ID=1-84-phiX174;pvalue=4.12e-08;qvalue= 1.75e-08;sequence=AGTTTGTATCT;\n+phiX174\tfimo\tpolypeptide_motif\t803\t813\t73.7\t+\t.\tName=1;ID=1-85-phiX174;pvalue=4.24e-08;qvalue= 1.78e-08;sequence=AGAAGAAAACG;\n+phiX174\tfimo\tpolypeptide_motif\t3770\t3780\t73.6\t+\t.\tName=1;ID=1-86-phiX174;pvalue=4.35e-08;qvalue= 1.81e-08;sequence=AAAGGATATTT;\n+phiX174\tfimo\tpolypeptide_motif\t3429\t3439\t73.5\t+\t.\tName=1;ID=1-87-phiX174;pvalue=4.45e-08;qvalue= 1.82e-08;sequence=GAGATGCAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t99\t109\t73.5\t+\t.\tName=1;ID=1-88-phiX174;pvalue=4.48e-08;qvalue= 1.82e-08;sequence=TACGAATTAAA;\n+phiX174\tfimo\tpolypeptide_motif\t67\t77\t73.2\t+\t.\tName=1;ID=1-89-phiX174;pvalue=4.78e-08;qvalue= 1.92e-08;sequence=TCTTGATAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t5332\t5342\t72.9\t+\t.\tName=1;ID=1-90-phiX174;pvalue=5.13e-08;qvalue= 2.01e-08;sequence=ATCTGCTCAAA;\n+phiX174\tfimo\tpolypeptide_motif\t277\t287\t72.9\t+\t.\tName=1;ID=1-91-phiX174;pvalue=5.14e-08;qvalue= 2.01e-08;sequence=TTTAGATATGA;\n+phiX174\tfimo\tpolypeptide_motif\t4338\t4348\t72.8\t+\t.\tName=1;ID=1-92-phiX174;pvalue=5.18e-08;qvalue= 2.01e-08;sequence=GGGGACGAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t3812\t3822\t72.8\t+\t.\tName=1;ID=1-93-phiX174;pvalue=5.28e-08;qvalue= 2.03e-08;sequence=GGTTGATATTT;\n+phiX174\tfimo\tpolypeptide_motif\t1909\t1919\t72.6\t+\t.\tName=1;ID=1-94-phiX174;pvalue=5.51e-08;qvalue= 2.08e-08;sequence=TAACGCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t3000\t3010\t72.6\t+\t.\tName=1;ID=1-95-phiX174;pvalue=5.54e-08;qvalue= 2.08e-08;sequence=GGCGGTCAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t3891\t3901\t72.4\t+\t.\tName=1;ID=1-96-phiX174;pvalue=5.75e-08;qvalue= 2.11e-08;sequence=ATTGGCTCTAA;\n+phiX174\tfimo\tpolypeptide_motif\t3079\t3089\t72.4\t+\t.\tName=1;ID=1-97-phiX174;pvalue=5.76e-08;qvalue= 2.11e-08;sequence=CTGGTATTAAA;\n+phiX174\tfimo\tpolypeptide_motif\t37\t47\t72.4\t+\t.\tName=1;ID=1-98-phiX174;pvalue=5.79e-08;qvalue= 2.11e-08;sequence=TTCGGATATTT;\n+phiX174\tfimo\tpolypeptide_motif\t380\t390\t72.2\t+\t.\tName=1;ID=1-99-phiX174;pvalue=6.01e-08;qvalue= 2.17e-08;sequence=GTAAGAAATCA;\n'
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/fimo_output_almost-gff_2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_almost-gff_2.txt Wed Aug 23 20:57:34 2017 -0400
b
b'@@ -0,0 +1,100 @@\n+##gff-version 3\n+phiX174\tfimo\tpolypeptide_motif\t1388\t1398\t102\t+\t.\tName=1;ID=1-1-phiX174;pvalue=6.36e-11;sequence=AATATCTATAA;\n+phiX174\tfimo\tpolypeptide_motif\t847\t857\t102\t+\t.\tName=1;ID=1-2-phiX174;pvalue=7.02e-11;sequence=AATGTCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t2301\t2311\t99.6\t+\t.\tName=1;ID=1-3-phiX174;pvalue=1.08e-10;sequence=AGGTTATAACG;\n+phiX174\tfimo\tpolypeptide_motif\t5063\t5073\t95.6\t+\t.\tName=1;ID=1-4-phiX174;pvalue=2.73e-10;sequence=AGGAGCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t989\t999\t 95\t+\t.\tName=1;ID=1-5-phiX174;pvalue=3.15e-10;sequence=TGAGGATAAAT;\n+phiX174\tfimo\tpolypeptide_motif\t4713\t4723\t91.1\t+\t.\tName=1;ID=1-6-phiX174;pvalue=7.74e-10;sequence=GACTGCTATCA;\n+phiX174\tfimo\tpolypeptide_motif\t5048\t5058\t90.7\t+\t.\tName=1;ID=1-7-phiX174;pvalue=8.51e-10;sequence=TGCTGCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t855\t865\t90.6\t+\t.\tName=1;ID=1-8-phiX174;pvalue=8.64e-10;sequence=AAGGTAAAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t3155\t3165\t90.1\t+\t.\tName=1;ID=1-9-phiX174;pvalue=9.76e-10;sequence=TATGGCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t5009\t5019\t90.1\t+\t.\tName=1;ID=1-10-phiX174;pvalue=9.76e-10;sequence=TGTGGCTAAAT;\n+phiX174\tfimo\tpolypeptide_motif\t814\t824\t88.9\t+\t.\tName=1;ID=1-11-phiX174;pvalue=1.28e-09;sequence=TGCGTCAAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t2832\t2842\t88.5\t+\t.\tName=1;ID=1-12-phiX174;pvalue=1.42e-09;sequence=TTGGTCTAACT;\n+phiX174\tfimo\tpolypeptide_motif\t3830\t3840\t87.7\t+\t.\tName=1;ID=1-13-phiX174;pvalue=1.7e-09;sequence=TATTGATAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t3560\t3570\t87.2\t+\t.\tName=1;ID=1-14-phiX174;pvalue=1.89e-09;sequence=TGCGTCTATTA;\n+phiX174\tfimo\tpolypeptide_motif\t2882\t2892\t86.4\t+\t.\tName=1;ID=1-15-phiX174;pvalue=2.29e-09;sequence=AGGTTATTAAA;\n+phiX174\tfimo\tpolypeptide_motif\t4453\t4463\t85.9\t+\t.\tName=1;ID=1-16-phiX174;pvalue=2.58e-09;sequence=AAGGTATTAAG;\n+phiX174\tfimo\tpolypeptide_motif\t2493\t2503\t85.1\t+\t.\tName=1;ID=1-17-phiX174;pvalue=3.06e-09;sequence=GACACCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t4104\t4114\t85.1\t+\t.\tName=1;ID=1-18-phiX174;pvalue=3.08e-09;sequence=GGCTTCCATAA;\n+phiX174\tfimo\tpolypeptide_motif\t4955\t4965\t85.1\t+\t.\tName=1;ID=1-19-phiX174;pvalue=3.08e-09;sequence=TGATGCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t1885\t1895\t84.4\t+\t.\tName=1;ID=1-20-phiX174;pvalue=3.61e-09;sequence=TGCGACTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t3376\t3386\t84.2\t+\t.\tName=1;ID=1-21-phiX174;pvalue=3.81e-09;sequence=AGAATCAAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t52\t62\t83.9\t+\t.\tName=1;ID=1-22-phiX174;pvalue=4.06e-09;sequence=TGAGTCGAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t1390\t1400\t83.7\t+\t.\tName=1;ID=1-23-phiX174;pvalue=4.26e-09;sequence=TATCTATAACA;\n+phiX174\tfimo\tpolypeptide_motif\t2017\t2027\t83.4\t+\t.\tName=1;ID=1-24-phiX174;pvalue=4.6e-09;sequence=TTCGTCTAAGA;\n+phiX174\tfimo\tpolypeptide_motif\t1000\t1010\t83.1\t+\t.\tName=1;ID=1-25-phiX174;pvalue=4.88e-09;sequence=TATGTCTAATA;\n+phiX174\tfimo\tpolypeptide_motif\t1555\t1565\t82.5\t+\t.\tName=1;ID=1-26-phiX174;pvalue=5.58e-09;sequence=GACTTCTACCA;\n+phiX174\tfimo\tpolypeptide_motif\t4430\t4440\t82.5\t+\t.\tName=1;ID=1-27-phiX174;pvalue=5.62e-09;sequence=TGAGTATAATT;\n+phiX174\tfimo\tpolypeptide_motif\t1927\t1937\t82.3\t+\t.\tName=1;ID=1-28-phiX174;pvalue=5.82e-09;sequence=GACTTATACCG;\n+phiX174\tfimo\tpolypeptide_motif\t2981\t2991\t82.1\t+\t.\tName=1;ID=1-29-phiX174;pvalue=6.13e-09;sequence=CATGTCTAAAT;\n+phiX174\tfimo\tpolypeptide_motif\t4203\t4213\t 82\t+\t.\tName=1;ID=1-30-phiX174;pvalue=6.34e-09;sequence=GACGGCCATAA;\n+phiX174\tfimo\tpolypeptide_motif\t1669\t1679\t81.9\t+\t.\tName=1;ID=1-31-phiX174;pvalue=6.4e-09;sequence=TGGAGGTAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t3260\t3270\t81.5\t+\t.\tName=1;ID=1-32-phiX174;pvalue=7.01e-09;sequence=CGCTGATAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t3047\t3057\t81.3\t+\t.\tName=1;ID=1-33-phiX174;pvalue=7.4e-09;sequence=TACCGATAACA;\n+phiX174\tfimo\tpolypeptide_motif\t4176\t4186\t81.2\t+\t.\tName=1;ID=1-34-phiX174;pvalue=7.6e-09;sequence=GAGTTCGATAA;\n+phiX174\tfimo\tpolypeptide_motif\t4118\t4128\t81.1\t+\t.\tName=1;ID=1-35-phiX174;pvalue=7.7e-09;sequence=GATGGATAACC;\n+phiX174\tfimo\tpolypeptide_motif\t5370\t5380\t80.9\t+\t.\tName=1;ID=1-36-phiX174;p'..b'GT;\n+phiX174\tfimo\tpolypeptide_motif\t4217\t4227\t76.7\t+\t.\tName=1;ID=1-64-phiX174;pvalue=2.15e-08;sequence=TGCTTCTGACG;\n+phiX174\tfimo\tpolypeptide_motif\t4262\t4272\t76.6\t+\t.\tName=1;ID=1-65-phiX174;pvalue=2.18e-08;sequence=AATGGATGAAT;\n+phiX174\tfimo\tpolypeptide_motif\t3569\t3579\t76.5\t+\t.\tName=1;ID=1-66-phiX174;pvalue=2.26e-08;sequence=TATGGAAAACA;\n+phiX174\tfimo\tpolypeptide_motif\t194\t204\t76.4\t+\t.\tName=1;ID=1-67-phiX174;pvalue=2.29e-08;sequence=ATCAACTAACG;\n+phiX174\tfimo\tpolypeptide_motif\t131\t141\t 76\t+\t.\tName=1;ID=1-68-phiX174;pvalue=2.49e-08;sequence=AAATGAGAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t1491\t1501\t75.9\t+\t.\tName=1;ID=1-69-phiX174;pvalue=2.55e-08;sequence=GCCATCTCAAA;\n+phiX174\tfimo\tpolypeptide_motif\t434\t444\t75.7\t+\t.\tName=1;ID=1-70-phiX174;pvalue=2.67e-08;sequence=GGCCTCTATTA;\n+phiX174\tfimo\tpolypeptide_motif\t4565\t4575\t75.6\t+\t.\tName=1;ID=1-71-phiX174;pvalue=2.73e-08;sequence=TTGGTTTATCG;\n+phiX174\tfimo\tpolypeptide_motif\t102\t112\t75.6\t+\t.\tName=1;ID=1-72-phiX174;pvalue=2.75e-08;sequence=GAATTAAATCG;\n+phiX174\tfimo\tpolypeptide_motif\t903\t913\t75.5\t+\t.\tName=1;ID=1-73-phiX174;pvalue=2.82e-08;sequence=GAGGTACTAAA;\n+phiX174\tfimo\tpolypeptide_motif\t4748\t4758\t75.2\t+\t.\tName=1;ID=1-74-phiX174;pvalue=3.01e-08;sequence=TACAGCTAATG;\n+phiX174\tfimo\tpolypeptide_motif\t2622\t2632\t 75\t+\t.\tName=1;ID=1-75-phiX174;pvalue=3.16e-08;sequence=TGCTGATATTG;\n+phiX174\tfimo\tpolypeptide_motif\t467\t477\t74.7\t+\t.\tName=1;ID=1-76-phiX174;pvalue=3.35e-08;sequence=TTTGGATTTAA;\n+phiX174\tfimo\tpolypeptide_motif\t4033\t4043\t74.6\t+\t.\tName=1;ID=1-77-phiX174;pvalue=3.44e-08;sequence=AGCGTATCGAG;\n+phiX174\tfimo\tpolypeptide_motif\t1348\t1358\t74.6\t+\t.\tName=1;ID=1-78-phiX174;pvalue=3.46e-08;sequence=TACCAATAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t239\t249\t74.4\t+\t.\tName=1;ID=1-79-phiX174;pvalue=3.62e-08;sequence=AGTGGCTTAAT;\n+phiX174\tfimo\tpolypeptide_motif\t500\t510\t74.1\t+\t.\tName=1;ID=1-80-phiX174;pvalue=3.84e-08;sequence=GACGAGTAACA;\n+phiX174\tfimo\tpolypeptide_motif\t3001\t3011\t 74\t+\t.\tName=1;ID=1-81-phiX174;pvalue=3.93e-08;sequence=GCGGTCAAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t3776\t3786\t 74\t+\t.\tName=1;ID=1-82-phiX174;pvalue=3.98e-08;sequence=TATTTCTAATG;\n+phiX174\tfimo\tpolypeptide_motif\t2026\t2036\t73.9\t+\t.\tName=1;ID=1-83-phiX174;pvalue=4.06e-08;sequence=GAAGTTTAAGA;\n+phiX174\tfimo\tpolypeptide_motif\t4237\t4247\t73.8\t+\t.\tName=1;ID=1-84-phiX174;pvalue=4.12e-08;sequence=AGTTTGTATCT;\n+phiX174\tfimo\tpolypeptide_motif\t803\t813\t73.7\t+\t.\tName=1;ID=1-85-phiX174;pvalue=4.24e-08;sequence=AGAAGAAAACG;\n+phiX174\tfimo\tpolypeptide_motif\t3770\t3780\t73.6\t+\t.\tName=1;ID=1-86-phiX174;pvalue=4.35e-08;sequence=AAAGGATATTT;\n+phiX174\tfimo\tpolypeptide_motif\t3429\t3439\t73.5\t+\t.\tName=1;ID=1-87-phiX174;pvalue=4.45e-08;sequence=GAGATGCAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t99\t109\t73.5\t+\t.\tName=1;ID=1-88-phiX174;pvalue=4.48e-08;sequence=TACGAATTAAA;\n+phiX174\tfimo\tpolypeptide_motif\t67\t77\t73.2\t+\t.\tName=1;ID=1-89-phiX174;pvalue=4.78e-08;sequence=TCTTGATAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t5332\t5342\t72.9\t+\t.\tName=1;ID=1-90-phiX174;pvalue=5.13e-08;sequence=ATCTGCTCAAA;\n+phiX174\tfimo\tpolypeptide_motif\t277\t287\t72.9\t+\t.\tName=1;ID=1-91-phiX174;pvalue=5.14e-08;sequence=TTTAGATATGA;\n+phiX174\tfimo\tpolypeptide_motif\t4338\t4348\t72.8\t+\t.\tName=1;ID=1-92-phiX174;pvalue=5.18e-08;sequence=GGGGACGAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t3812\t3822\t72.8\t+\t.\tName=1;ID=1-93-phiX174;pvalue=5.28e-08;sequence=GGTTGATATTT;\n+phiX174\tfimo\tpolypeptide_motif\t1909\t1919\t72.6\t+\t.\tName=1;ID=1-94-phiX174;pvalue=5.51e-08;sequence=TAACGCTAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t3000\t3010\t72.6\t+\t.\tName=1;ID=1-95-phiX174;pvalue=5.54e-08;sequence=GGCGGTCAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t3891\t3901\t72.4\t+\t.\tName=1;ID=1-96-phiX174;pvalue=5.75e-08;sequence=ATTGGCTCTAA;\n+phiX174\tfimo\tpolypeptide_motif\t3079\t3089\t72.4\t+\t.\tName=1;ID=1-97-phiX174;pvalue=5.76e-08;sequence=CTGGTATTAAA;\n+phiX174\tfimo\tpolypeptide_motif\t37\t47\t72.4\t+\t.\tName=1;ID=1-98-phiX174;pvalue=5.79e-08;sequence=TTCGGATATTT;\n+phiX174\tfimo\tpolypeptide_motif\t380\t390\t72.2\t+\t.\tName=1;ID=1-99-phiX174;pvalue=6.01e-08;sequence=GTAAGAAATCA;\n'
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/fimo_output_html_1.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_html_1.html Wed Aug 23 20:57:34 2017 -0400
[
@@ -0,0 +1,97 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html xmlns:cis="http://zlab.bu.edu/schema/cisml" xmlns:fimo="http://noble.gs.washington.edu/schema/cisml" xmlns:mem="http://noble.gs.washington.edu/meme">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta charset="UTF-8">
+<title>FIMO Results</title>
+<style type="text/css">
+td.left {text-align: left;}
+td.right {text-align: right; padding-right: 1cm;}
+</style>
+</head>
+<body bgcolor="#D5F0FF">
+<a name="top_buttons"></a>
+<hr>
+<table summary="buttons" align="left" cellspacing="0">
+<tr>
+<td bgcolor="#00FFFF"><a href="#database_and_motifs"><b>Database and Motifs</b></a></td>
+<td bgcolor="#DDFFDD"><a href="#sec_i"><b>High-scoring Motif Occurences</b></a></td>
+<td bgcolor="#DDDDFF"><a href="#debugging_information"><b>Debugging Information</b></a></td>
+</tr>
+</table>
+<br/>
+<br/>
+<hr/>
+<center><big><b>FIMO - Motif search tool</b></big></center>
+<hr>
+<p>
+For further information on how to interpret these results
+or to get a copy of the FIMO software please access
+<a href="http://meme.nbcr.net">http://meme.nbcr.net</a></p>
+<p>If you use FIMO in your research, please cite the following paper:<br>
+Charles E. Grant, Timothy L. Bailey, and William Stafford Noble,
+"FIMO: Scanning for occurrences of a given motif",
+<i>Bioinformatics</i>, <b>27</b>(7):1017-1018, 2011.
+<a href="http://bioinformatics.oxfordjournals.org/content/27/7/1017">[full text]</a></p>
+<hr>
+<center><big><b><a name="database_and_motifs">DATABASE AND MOTIFS</a></b></big></center>
+<hr>
+<div style="padding-left: 0.75in; line-height: 1em; font-family: monospace;">
+<p>
+  <br />
+  Database contains 1 sequences, 5386 residues
+</p>
+<p>
+  <table>
+    <thead>
+      <tr>
+        <th style="border-bottom: 1px dashed;">MOTIF</th>
+        <th style="border-bottom: 1px dashed; padding-left: 1em;">WIDTH</th>
+        <th style="border-bottom: 1px dashed; padding-left: 1em;text-align:left;" >
+         BEST POSSIBLE MATCH
+        </th>
+      </tr>
+    </thead>
+    <tbody>
+      <tr>
+        <td style="text-align:right;">1</td>
+        <td style="text-align:right;padding-left: 1em;">11</td>
+        <td style="text-align:left;padding-left: 1em;">GGGGTATAAAA</td>
+       </tr>
+    </tbody>
+  </table>
+</p>
+<p>
+Random model letter frequencies (from non-redundant database):
+<br/>
+
+A 0.073 C 0.018 D 0.052 E 0.062 F 0.040 G 0.069 H 0.022 I 0.056 K 0.058 
+L 0.092 M 0.023 N 0.046 P 0.051 Q 0.041 R 0.052 S 0.074 T 0.059 V 0.064 
+W 0.013 Y 0.033 </p>
+</div>
+<hr>
+<center><big><b><a name="sec_i">SECTION I: HIGH-SCORING MOTIF OCCURENCES</a></b></big></center>
+<hr>
+<ul>
+<li>
+There were 1937 motif occurences with a p-value less than 0.0001.
+<b>Only the most significant 1000 matches are shown here.</b>
+
+The full set of motif occurences can be seen in the
+tab-delimited plain text output file
+<a href="fimo.txt">fimo.txt</a>, 
+the GFF file 
+<a href="fimo.gff">fimo.gff</a> 
+which may be suitable for uploading to the 
+<a href="http://genome.ucsc.edu/cgi-bin/hgTables">UCSC Genome Table Browser</a>
+(assuming the FASTA input sequences included genomic coordinates in UCSC or Galaxy format),
+or the XML file 
+<a href="fimo.xml">fimo.xml</a>.
+</li>
+<li>
+The p-value of a motif occurrence is defined as the
+probability of a random sequence of the same length as the motif
+matching that position of the sequence with as good or better a score.
+</li>
+<li>
+The score for the match of a position in a sequence to a motif
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/fimo_output_html_2.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_html_2.html Wed Aug 23 20:57:34 2017 -0400
[
@@ -0,0 +1,97 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html xmlns:cis="http://zlab.bu.edu/schema/cisml" xmlns:fimo="http://noble.gs.washington.edu/schema/cisml" xmlns:mem="http://noble.gs.washington.edu/meme">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta charset="UTF-8">
+<title>FIMO Results</title>
+<style type="text/css">
+td.left {text-align: left;}
+td.right {text-align: right; padding-right: 1cm;}
+</style>
+</head>
+<body bgcolor="#D5F0FF">
+<a name="top_buttons"></a>
+<hr>
+<table summary="buttons" align="left" cellspacing="0">
+<tr>
+<td bgcolor="#00FFFF"><a href="#database_and_motifs"><b>Database and Motifs</b></a></td>
+<td bgcolor="#DDFFDD"><a href="#sec_i"><b>High-scoring Motif Occurences</b></a></td>
+<td bgcolor="#DDDDFF"><a href="#debugging_information"><b>Debugging Information</b></a></td>
+</tr>
+</table>
+<br/>
+<br/>
+<hr/>
+<center><big><b>FIMO - Motif search tool</b></big></center>
+<hr>
+<p>
+For further information on how to interpret these results
+or to get a copy of the FIMO software please access
+<a href="http://meme.nbcr.net">http://meme.nbcr.net</a></p>
+<p>If you use FIMO in your research, please cite the following paper:<br>
+Charles E. Grant, Timothy L. Bailey, and William Stafford Noble,
+"FIMO: Scanning for occurrences of a given motif",
+<i>Bioinformatics</i>, <b>27</b>(7):1017-1018, 2011.
+<a href="http://bioinformatics.oxfordjournals.org/content/27/7/1017">[full text]</a></p>
+<hr>
+<center><big><b><a name="database_and_motifs">DATABASE AND MOTIFS</a></b></big></center>
+<hr>
+<div style="padding-left: 0.75in; line-height: 1em; font-family: monospace;">
+<p>
+  <br />
+  Database contains 1 sequences, 5386 residues
+</p>
+<p>
+  <table>
+    <thead>
+      <tr>
+        <th style="border-bottom: 1px dashed;">MOTIF</th>
+        <th style="border-bottom: 1px dashed; padding-left: 1em;">WIDTH</th>
+        <th style="border-bottom: 1px dashed; padding-left: 1em;text-align:left;" >
+         BEST POSSIBLE MATCH
+        </th>
+      </tr>
+    </thead>
+    <tbody>
+      <tr>
+        <td style="text-align:right;">1</td>
+        <td style="text-align:right;padding-left: 1em;">11</td>
+        <td style="text-align:left;padding-left: 1em;">GGGGTATAAAA</td>
+       </tr>
+    </tbody>
+  </table>
+</p>
+<p>
+Random model letter frequencies (from non-redundant database):
+<br/>
+
+A 0.073 C 0.018 D 0.052 E 0.062 F 0.040 G 0.069 H 0.022 I 0.056 K 0.058 
+L 0.092 M 0.023 N 0.046 P 0.051 Q 0.041 R 0.052 S 0.074 T 0.059 V 0.064 
+W 0.013 Y 0.033 </p>
+</div>
+<hr>
+<center><big><b><a name="sec_i">SECTION I: HIGH-SCORING MOTIF OCCURENCES</a></b></big></center>
+<hr>
+<ul>
+<li>
+There were 1937 motif occurences with a p-value less than 0.0001.
+<b>Only the most significant 1000 matches are shown here.</b>
+
+The full set of motif occurences can be seen in the
+tab-delimited plain text output file
+<a href="fimo.txt">fimo.txt</a>, 
+the GFF file 
+<a href="fimo.gff">fimo.gff</a> 
+which may be suitable for uploading to the 
+<a href="http://genome.ucsc.edu/cgi-bin/hgTables">UCSC Genome Table Browser</a>
+(assuming the FASTA input sequences included genomic coordinates in UCSC or Galaxy format),
+or the XML file 
+<a href="fimo.xml">fimo.xml</a>.
+</li>
+<li>
+The p-value of a motif occurrence is defined as the
+probability of a random sequence of the same length as the motif
+matching that position of the sequence with as good or better a score.
+</li>
+<li>
+The score for the match of a position in a sequence to a motif
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/fimo_output_interval_1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_interval_1.txt Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,100 @@
+#chr start end pattern name score strand matched sequence p-value q-value
+phiX174 1387 1398 1 + + 1.25e-09 29.4024 6.36e-11
+phiX174 846 857 1 + + 1.25e-09 29.122 7.02e-11
+phiX174 2300 2311 1 + + 1.29e-09 27.6463 1.08e-10
+phiX174 5062 5073 1 + + 2.25e-09 25.5366 2.73e-10
+phiX174 988 999 1 + + 2.25e-09 25.3049 3.15e-10
+phiX174 4712 4723 1 + + 3.48e-09 23.622 7.74e-10
+phiX174 5047 5058 1 + + 3.48e-09 23.3293 8.51e-10
+phiX174 854 865 1 + + 3.48e-09 23.3049 8.64e-10
+phiX174 3154 3165 1 + + 3.48e-09 23.0366 9.76e-10
+phiX174 5008 5019 1 + + 3.48e-09 23.0366 9.76e-10
+phiX174 813 824 1 + + 4.14e-09 22.5854 1.28e-09
+phiX174 2831 2842 1 + + 4.23e-09 22.3415 1.42e-09
+phiX174 3829 3840 1 + + 4.68e-09 21.8293 1.7e-09
+phiX174 3559 3570 1 + + 4.82e-09 21.5976 1.89e-09
+phiX174 2881 2892 1 + + 5.46e-09 21.1951 2.29e-09
+phiX174 4452 4463 1 + + 5.75e-09 20.8902 2.58e-09
+phiX174 2492 2503 1 + + 5.79e-09 20.3415 3.06e-09
+phiX174 4103 4114 1 + + 5.79e-09 20.3171 3.08e-09
+phiX174 4954 4965 1 + + 5.79e-09 20.3171 3.08e-09
+phiX174 1884 1895 1 + + 6.45e-09 19.9268 3.61e-09
+phiX174 3375 3386 1 + + 6.48e-09 19.7683 3.81e-09
+phiX174 51 62 1 + + 6.58e-09 19.5732 4.06e-09
+phiX174 1389 1400 1 + + 6.61e-09 19.378 4.26e-09
+phiX174 2016 2027 1 + + 6.85e-09 19.0854 4.6e-09
+phiX174 999 1010 1 + + 6.97e-09 18.878 4.88e-09
+phiX174 1554 1565 1 + + 7.37e-09 18.439 5.58e-09
+phiX174 4429 4440 1 + + 7.37e-09 18.4268 5.62e-09
+phiX174 1926 1937 1 + + 7.37e-09 18.2927 5.82e-09
+phiX174 2980 2991 1 + + 7.37e-09 18.0732 6.13e-09
+phiX174 4202 4213 1 + + 7.37e-09 17.9268 6.34e-09
+phiX174 1668 1679 1 + + 7.37e-09 17.8659 6.4e-09
+phiX174 3259 3270 1 + + 7.82e-09 17.5 7.01e-09
+phiX174 3046 3057 1 + + 7.85e-09 17.2805 7.4e-09
+phiX174 4175 4186 1 + + 7.85e-09 17.1829 7.6e-09
+phiX174 4117 4128 1 + + 7.85e-09 17.1341 7.7e-09
+phiX174 5369 5380 1 + + 7.87e-09 16.9878 8.03e-09
+phiX174 1241 1252 1 + + 7.87e-09 16.5122 8.94e-09
+phiX174 2582 2593 1 + + 7.87e-09 16.5122 8.94e-09
+phiX174 697 708 1 + + 7.87e-09 16.4146 9.13e-09
+phiX174 2298 2309 1 + + 7.87e-09 16.3537 9.26e-09
+phiX174 4188 4199 1 + + 7.87e-09 16.1707 9.69e-09
+phiX174 274 285 1 + + 7.87e-09 16.0976 9.85e-09
+phiX174 1800 1811 1 + + 7.87e-09 16.0366 1e-08
+phiX174 1385 1396 1 + + 7.87e-09 15.9268 1.03e-08
+phiX174 1302 1313 1 + + 7.87e-09 15.9024 1.03e-08
+phiX174 3771 3782 1 + + 7.87e-09 15.878 1.04e-08
+phiX174 1287 1298 1 + + 7.87e-09 15.8659 1.04e-08
+phiX174 2576 2587 1 + + 7.87e-09 15.7683 1.08e-08
+phiX174 936 947 1 + + 7.87e-09 15.7561 1.08e-08
+phiX174 903 914 1 + + 7.93e-09 15.6585 1.11e-08
+phiX174 2278 2289 1 + + 7.93e-09 15.5854 1.13e-08
+phiX174 3163 3174 1 + + 7.98e-09 15.5 1.16e-08
+phiX174 23 34 1 + + 8.24e-09 15.3293 1.23e-08
+phiX174 837 848 1 + + 8.24e-09 15.2561 1.27e-08
+phiX174 852 863 1 + + 8.24e-09 15.2561 1.27e-08
+phiX174 1983 1994 1 + + 8.68e-09 15.0244 1.36e-08
+phiX174 0 11 1 + + 9.05e-09 14.8293 1.46e-08
+phiX174 4306 4317 1 + + 9.05e-09 14.7927 1.47e-08
+phiX174 4302 4313 1 + + 9.19e-09 14.6585 1.52e-08
+phiX174 5032 5043 1 + + 9.41e-09 14.561 1.58e-08
+phiX174 2578 2589 1 + + 1.01e-08 14.2927 1.73e-08
+phiX174 321 332 1 + + 1.05e-08 14.1951 1.82e-08
+phiX174 5000 5011 1 + + 1.19e-08 13.8902 2.09e-08
+phiX174 4216 4227 1 + + 1.2e-08 13.8171 2.15e-08
+phiX174 4261 4272 1 + + 1.2e-08 13.7805 2.18e-08
+phiX174 3568 3579 1 + + 1.22e-08 13.7073 2.26e-08
+phiX174 193 204 1 + + 1.22e-08 13.6829 2.29e-08
+phiX174 130 141 1 + + 1.31e-08 13.4756 2.49e-08
+phiX174 1490 1501 1 + + 1.32e-08 13.4024 2.55e-08
+phiX174 433 444 1 + + 1.36e-08 13.2805 2.67e-08
+phiX174 4564 4575 1 + + 1.36e-08 13.2439 2.73e-08
+phiX174 101 112 1 + + 1.36e-08 13.2195 2.75e-08
+phiX174 902 913 1 + + 1.38e-08 13.1463 2.82e-08
+phiX174 4747 4758 1 + + 1.45e-08 12.9756 3.01e-08
+phiX174 2621 2632 1 + + 1.5e-08 12.8659 3.16e-08
+phiX174 466 477 1 + + 1.57e-08 12.7317 3.35e-08
+phiX174 4032 4043 1 + + 1.58e-08 12.6829 3.44e-08
+phiX174 1347 1358 1 + + 1.58e-08 12.6707 3.46e-08
+phiX174 238 249 1 + + 1.64e-08 12.5732 3.62e-08
+phiX174 499 510 1 + + 1.71e-08 12.4634 3.84e-08
+phiX174 3000 3011 1 + + 1.73e-08 12.4146 3.93e-08
+phiX174 3775 3786 1 + + 1.73e-08 12.378 3.98e-08
+phiX174 2025 2036 1 + + 1.75e-08 12.3293 4.06e-08
+phiX174 4236 4247 1 + + 1.75e-08 12.3049 4.12e-08
+phiX174 802 813 1 + + 1.78e-08 12.2439 4.24e-08
+phiX174 3769 3780 1 + + 1.81e-08 12.1829 4.35e-08
+phiX174 3428 3439 1 + + 1.82e-08 12.122 4.45e-08
+phiX174 98 109 1 + + 1.82e-08 12.1098 4.48e-08
+phiX174 66 77 1 + + 1.92e-08 11.9268 4.78e-08
+phiX174 5331 5342 1 + + 2.01e-08 11.7195 5.13e-08
+phiX174 276 287 1 + + 2.01e-08 11.7073 5.14e-08
+phiX174 4337 4348 1 + + 2.01e-08 11.6951 5.18e-08
+phiX174 3811 3822 1 + + 2.03e-08 11.6585 5.28e-08
+phiX174 1908 1919 1 + + 2.08e-08 11.5488 5.51e-08
+phiX174 2999 3010 1 + + 2.08e-08 11.5366 5.54e-08
+phiX174 3890 3901 1 + + 2.11e-08 11.439 5.75e-08
+phiX174 3078 3089 1 + + 2.11e-08 11.4268 5.76e-08
+phiX174 36 47 1 + + 2.11e-08 11.4146 5.79e-08
+phiX174 379 390 1 + + 2.17e-08 11.3293 6.01e-08
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/fimo_output_interval_2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_interval_2.txt Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,100 @@
+#chr start end pattern name score strand matched sequence p-value q-value
+phiX174 1387 1398 1 + + 0 29.4024 6.36e-11
+phiX174 846 857 1 + + 0 29.122 7.02e-11
+phiX174 2300 2311 1 + + 0 27.6463 1.08e-10
+phiX174 5062 5073 1 + + 0 25.5366 2.73e-10
+phiX174 988 999 1 + + 0 25.3049 3.15e-10
+phiX174 4712 4723 1 + + 0 23.622 7.74e-10
+phiX174 5047 5058 1 + + 0 23.3293 8.51e-10
+phiX174 854 865 1 + + 0 23.3049 8.64e-10
+phiX174 3154 3165 1 + + 0 23.0366 9.76e-10
+phiX174 5008 5019 1 + + 0 23.0366 9.76e-10
+phiX174 813 824 1 + + 0 22.5854 1.28e-09
+phiX174 2831 2842 1 + + 0 22.3415 1.42e-09
+phiX174 3829 3840 1 + + 0 21.8293 1.7e-09
+phiX174 3559 3570 1 + + 0 21.5976 1.89e-09
+phiX174 2881 2892 1 + + 0 21.1951 2.29e-09
+phiX174 4452 4463 1 + + 0 20.8902 2.58e-09
+phiX174 2492 2503 1 + + 0 20.3415 3.06e-09
+phiX174 4103 4114 1 + + 0 20.3171 3.08e-09
+phiX174 4954 4965 1 + + 0 20.3171 3.08e-09
+phiX174 1884 1895 1 + + 0 19.9268 3.61e-09
+phiX174 3375 3386 1 + + 0 19.7683 3.81e-09
+phiX174 51 62 1 + + 0 19.5732 4.06e-09
+phiX174 1389 1400 1 + + 0 19.378 4.26e-09
+phiX174 2016 2027 1 + + 0 19.0854 4.6e-09
+phiX174 999 1010 1 + + 0 18.878 4.88e-09
+phiX174 1554 1565 1 + + 0 18.439 5.58e-09
+phiX174 4429 4440 1 + + 0 18.4268 5.62e-09
+phiX174 1926 1937 1 + + 0 18.2927 5.82e-09
+phiX174 2980 2991 1 + + 0 18.0732 6.13e-09
+phiX174 4202 4213 1 + + 0 17.9268 6.34e-09
+phiX174 1668 1679 1 + + 0 17.8659 6.4e-09
+phiX174 3259 3270 1 + + 0 17.5 7.01e-09
+phiX174 3046 3057 1 + + 0 17.2805 7.4e-09
+phiX174 4175 4186 1 + + 0 17.1829 7.6e-09
+phiX174 4117 4128 1 + + 0 17.1341 7.7e-09
+phiX174 5369 5380 1 + + 0 16.9878 8.03e-09
+phiX174 1241 1252 1 + + 0 16.5122 8.94e-09
+phiX174 2582 2593 1 + + 0 16.5122 8.94e-09
+phiX174 697 708 1 + + 0 16.4146 9.13e-09
+phiX174 2298 2309 1 + + 0 16.3537 9.26e-09
+phiX174 4188 4199 1 + + 0 16.1707 9.69e-09
+phiX174 274 285 1 + + 0 16.0976 9.85e-09
+phiX174 1800 1811 1 + + 0 16.0366 1e-08
+phiX174 1385 1396 1 + + 0 15.9268 1.03e-08
+phiX174 1302 1313 1 + + 0 15.9024 1.03e-08
+phiX174 3771 3782 1 + + 0 15.878 1.04e-08
+phiX174 1287 1298 1 + + 0 15.8659 1.04e-08
+phiX174 2576 2587 1 + + 0 15.7683 1.08e-08
+phiX174 936 947 1 + + 0 15.7561 1.08e-08
+phiX174 903 914 1 + + 0 15.6585 1.11e-08
+phiX174 2278 2289 1 + + 0 15.5854 1.13e-08
+phiX174 3163 3174 1 + + 0 15.5 1.16e-08
+phiX174 23 34 1 + + 0 15.3293 1.23e-08
+phiX174 837 848 1 + + 0 15.2561 1.27e-08
+phiX174 852 863 1 + + 0 15.2561 1.27e-08
+phiX174 1983 1994 1 + + 0 15.0244 1.36e-08
+phiX174 0 11 1 + + 0 14.8293 1.46e-08
+phiX174 4306 4317 1 + + 0 14.7927 1.47e-08
+phiX174 4302 4313 1 + + 0 14.6585 1.52e-08
+phiX174 5032 5043 1 + + 0 14.561 1.58e-08
+phiX174 2578 2589 1 + + 0 14.2927 1.73e-08
+phiX174 321 332 1 + + 0 14.1951 1.82e-08
+phiX174 5000 5011 1 + + 0 13.8902 2.09e-08
+phiX174 4216 4227 1 + + 0 13.8171 2.15e-08
+phiX174 4261 4272 1 + + 0 13.7805 2.18e-08
+phiX174 3568 3579 1 + + 0 13.7073 2.26e-08
+phiX174 193 204 1 + + 0 13.6829 2.29e-08
+phiX174 130 141 1 + + 0 13.4756 2.49e-08
+phiX174 1490 1501 1 + + 0 13.4024 2.55e-08
+phiX174 433 444 1 + + 0 13.2805 2.67e-08
+phiX174 4564 4575 1 + + 0 13.2439 2.73e-08
+phiX174 101 112 1 + + 0 13.2195 2.75e-08
+phiX174 902 913 1 + + 0 13.1463 2.82e-08
+phiX174 4747 4758 1 + + 0 12.9756 3.01e-08
+phiX174 2621 2632 1 + + 0 12.8659 3.16e-08
+phiX174 466 477 1 + + 0 12.7317 3.35e-08
+phiX174 4032 4043 1 + + 0 12.6829 3.44e-08
+phiX174 1347 1358 1 + + 0 12.6707 3.46e-08
+phiX174 238 249 1 + + 0 12.5732 3.62e-08
+phiX174 499 510 1 + + 0 12.4634 3.84e-08
+phiX174 3000 3011 1 + + 0 12.4146 3.93e-08
+phiX174 3775 3786 1 + + 0 12.378 3.98e-08
+phiX174 2025 2036 1 + + 0 12.3293 4.06e-08
+phiX174 4236 4247 1 + + 0 12.3049 4.12e-08
+phiX174 802 813 1 + + 0 12.2439 4.24e-08
+phiX174 3769 3780 1 + + 0 12.1829 4.35e-08
+phiX174 3428 3439 1 + + 0 12.122 4.45e-08
+phiX174 98 109 1 + + 0 12.1098 4.48e-08
+phiX174 66 77 1 + + 0 11.9268 4.78e-08
+phiX174 5331 5342 1 + + 0 11.7195 5.13e-08
+phiX174 276 287 1 + + 0 11.7073 5.14e-08
+phiX174 4337 4348 1 + + 0 11.6951 5.18e-08
+phiX174 3811 3822 1 + + 0 11.6585 5.28e-08
+phiX174 1908 1919 1 + + 0 11.5488 5.51e-08
+phiX174 2999 3010 1 + + 0 11.5366 5.54e-08
+phiX174 3890 3901 1 + + 0 11.439 5.75e-08
+phiX174 3078 3089 1 + + 0 11.4268 5.76e-08
+phiX174 36 47 1 + + 0 11.4146 5.79e-08
+phiX174 379 390 1 + + 0 11.3293 6.01e-08
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/fimo_output_txt_1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_txt_1.txt Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,100 @@
+#pattern name sequence name start stop strand score p-value q-value matched sequence
+1 phiX174 1388 1398 + 29.4024 6.36e-11 1.25e-09 AATATCTATAA
+1 phiX174 847 857 + 29.122 7.02e-11 1.25e-09 AATGTCTAAAG
+1 phiX174 2301 2311 + 27.6463 1.08e-10 1.29e-09 AGGTTATAACG
+1 phiX174 5063 5073 + 25.5366 2.73e-10 2.25e-09 AGGAGCTAAAG
+1 phiX174 989 999 + 25.3049 3.15e-10 2.25e-09 TGAGGATAAAT
+1 phiX174 4713 4723 + 23.622 7.74e-10 3.48e-09 GACTGCTATCA
+1 phiX174 5048 5058 + 23.3293 8.51e-10 3.48e-09 TGCTGCTAAAG
+1 phiX174 855 865 + 23.3049 8.64e-10 3.48e-09 AAGGTAAAAAA
+1 phiX174 3155 3165 + 23.0366 9.76e-10 3.48e-09 TATGGCTAAAG
+1 phiX174 5009 5019 + 23.0366 9.76e-10 3.48e-09 TGTGGCTAAAT
+1 phiX174 814 824 + 22.5854 1.28e-09 4.14e-09 TGCGTCAAAAA
+1 phiX174 2832 2842 + 22.3415 1.42e-09 4.23e-09 TTGGTCTAACT
+1 phiX174 3830 3840 + 21.8293 1.7e-09 4.68e-09 TATTGATAAAG
+1 phiX174 3560 3570 + 21.5976 1.89e-09 4.82e-09 TGCGTCTATTA
+1 phiX174 2882 2892 + 21.1951 2.29e-09 5.46e-09 AGGTTATTAAA
+1 phiX174 4453 4463 + 20.8902 2.58e-09 5.75e-09 AAGGTATTAAG
+1 phiX174 2493 2503 + 20.3415 3.06e-09 5.79e-09 GACACCTAAAG
+1 phiX174 4104 4114 + 20.3171 3.08e-09 5.79e-09 GGCTTCCATAA
+1 phiX174 4955 4965 + 20.3171 3.08e-09 5.79e-09 TGATGCTAAAG
+1 phiX174 1885 1895 + 19.9268 3.61e-09 6.45e-09 TGCGACTAAAG
+1 phiX174 3376 3386 + 19.7683 3.81e-09 6.48e-09 AGAATCAAAAA
+1 phiX174 52 62 + 19.5732 4.06e-09 6.58e-09 TGAGTCGAAAA
+1 phiX174 1390 1400 + 19.378 4.26e-09 6.61e-09 TATCTATAACA
+1 phiX174 2017 2027 + 19.0854 4.6e-09 6.85e-09 TTCGTCTAAGA
+1 phiX174 1000 1010 + 18.878 4.88e-09 6.97e-09 TATGTCTAATA
+1 phiX174 1555 1565 + 18.439 5.58e-09 7.37e-09 GACTTCTACCA
+1 phiX174 4430 4440 + 18.4268 5.62e-09 7.37e-09 TGAGTATAATT
+1 phiX174 1927 1937 + 18.2927 5.82e-09 7.37e-09 GACTTATACCG
+1 phiX174 2981 2991 + 18.0732 6.13e-09 7.37e-09 CATGTCTAAAT
+1 phiX174 4203 4213 + 17.9268 6.34e-09 7.37e-09 GACGGCCATAA
+1 phiX174 1669 1679 + 17.8659 6.4e-09 7.37e-09 TGGAGGTAAAA
+1 phiX174 3260 3270 + 17.5 7.01e-09 7.82e-09 CGCTGATAAAG
+1 phiX174 3047 3057 + 17.2805 7.4e-09 7.85e-09 TACCGATAACA
+1 phiX174 4176 4186 + 17.1829 7.6e-09 7.85e-09 GAGTTCGATAA
+1 phiX174 4118 4128 + 17.1341 7.7e-09 7.85e-09 GATGGATAACC
+1 phiX174 5370 5380 + 16.9878 8.03e-09 7.87e-09 GGCGTATCCAA
+1 phiX174 1242 1252 + 16.5122 8.94e-09 7.87e-09 AGTGGATTAAG
+1 phiX174 2583 2593 + 16.5122 8.94e-09 7.87e-09 TACATCTGTCA
+1 phiX174 698 708 + 16.4146 9.13e-09 7.87e-09 TACGGAAAACA
+1 phiX174 2299 2309 + 16.3537 9.26e-09 7.87e-09 TGAGGTTATAA
+1 phiX174 4189 4199 + 16.1707 9.69e-09 7.87e-09 GTGATATGTAT
+1 phiX174 275 285 + 16.0976 9.85e-09 7.87e-09 GGTTTAGATAT
+1 phiX174 1801 1811 + 16.0366 1e-08 7.87e-09 GACCTATAAAC
+1 phiX174 1386 1396 + 15.9268 1.03e-08 7.87e-09 TGAATATCTAT
+1 phiX174 1303 1313 + 15.9024 1.03e-08 7.87e-09 TGGTTATATTG
+1 phiX174 3772 3782 + 15.878 1.04e-08 7.87e-09 AGGATATTTCT
+1 phiX174 1288 1298 + 15.8659 1.04e-08 7.87e-09 GACTGTTAACA
+1 phiX174 2577 2587 + 15.7683 1.08e-08 7.87e-09 GATGGATACAT
+1 phiX174 937 947 + 15.7561 1.08e-08 7.87e-09 TTGGTATGTAG
+1 phiX174 904 914 + 15.6585 1.11e-08 7.93e-09 AGGTACTAAAG
+1 phiX174 2279 2289 + 15.5854 1.13e-08 7.93e-09 TCGTGATAAAA
+1 phiX174 3164 3174 + 15.5 1.16e-08 7.98e-09 AGCTGGTAAAG
+1 phiX174 24 34 + 15.3293 1.23e-08 8.24e-09 AGAAGTTAACA
+1 phiX174 838 848 + 15.2561 1.27e-08 8.24e-09 GAGTGATGTAA
+1 phiX174 853 863 + 15.2561 1.27e-08 8.24e-09 TAAAGGTAAAA
+1 phiX174 1984 1994 + 15.0244 1.36e-08 8.68e-09 AATTTCTATGA
+1 phiX174 1 11 + 14.8293 1.46e-08 9.05e-09 GAGTTTTATCG
+1 phiX174 4307 4317 + 14.7927 1.47e-08 9.05e-09 TATTAATAACA
+1 phiX174 4303 4313 + 14.6585 1.52e-08 9.19e-09 TTGATATTAAT
+1 phiX174 5033 5043 + 14.561 1.58e-08 9.41e-09 GTCAGATATGG
+1 phiX174 2579 2589 + 14.2927 1.73e-08 1.01e-08 TGGATACATCT
+1 phiX174 322 332 + 14.1951 1.82e-08 1.05e-08 GACATTTTAAA
+1 phiX174 5001 5011 + 13.8902 2.09e-08 1.19e-08 GGTTTCTATGT
+1 phiX174 4217 4227 + 13.8171 2.15e-08 1.2e-08 TGCTTCTGACG
+1 phiX174 4262 4272 + 13.7805 2.18e-08 1.2e-08 AATGGATGAAT
+1 phiX174 3569 3579 + 13.7073 2.26e-08 1.22e-08 TATGGAAAACA
+1 phiX174 194 204 + 13.6829 2.29e-08 1.22e-08 ATCAACTAACG
+1 phiX174 131 141 + 13.4756 2.49e-08 1.31e-08 AAATGAGAAAA
+1 phiX174 1491 1501 + 13.4024 2.55e-08 1.32e-08 GCCATCTCAAA
+1 phiX174 434 444 + 13.2805 2.67e-08 1.36e-08 GGCCTCTATTA
+1 phiX174 4565 4575 + 13.2439 2.73e-08 1.36e-08 TTGGTTTATCG
+1 phiX174 102 112 + 13.2195 2.75e-08 1.36e-08 GAATTAAATCG
+1 phiX174 903 913 + 13.1463 2.82e-08 1.38e-08 GAGGTACTAAA
+1 phiX174 4748 4758 + 12.9756 3.01e-08 1.45e-08 TACAGCTAATG
+1 phiX174 2622 2632 + 12.8659 3.16e-08 1.5e-08 TGCTGATATTG
+1 phiX174 467 477 + 12.7317 3.35e-08 1.57e-08 TTTGGATTTAA
+1 phiX174 4033 4043 + 12.6829 3.44e-08 1.58e-08 AGCGTATCGAG
+1 phiX174 1348 1358 + 12.6707 3.46e-08 1.58e-08 TACCAATAAAA
+1 phiX174 239 249 + 12.5732 3.62e-08 1.64e-08 AGTGGCTTAAT
+1 phiX174 500 510 + 12.4634 3.84e-08 1.71e-08 GACGAGTAACA
+1 phiX174 3001 3011 + 12.4146 3.93e-08 1.73e-08 GCGGTCAAAAA
+1 phiX174 3776 3786 + 12.378 3.98e-08 1.73e-08 TATTTCTAATG
+1 phiX174 2026 2036 + 12.3293 4.06e-08 1.75e-08 GAAGTTTAAGA
+1 phiX174 4237 4247 + 12.3049 4.12e-08 1.75e-08 AGTTTGTATCT
+1 phiX174 803 813 + 12.2439 4.24e-08 1.78e-08 AGAAGAAAACG
+1 phiX174 3770 3780 + 12.1829 4.35e-08 1.81e-08 AAAGGATATTT
+1 phiX174 3429 3439 + 12.122 4.45e-08 1.82e-08 GAGATGCAAAA
+1 phiX174 99 109 + 12.1098 4.48e-08 1.82e-08 TACGAATTAAA
+1 phiX174 67 77 + 11.9268 4.78e-08 1.92e-08 TCTTGATAAAG
+1 phiX174 5332 5342 + 11.7195 5.13e-08 2.01e-08 ATCTGCTCAAA
+1 phiX174 277 287 + 11.7073 5.14e-08 2.01e-08 TTTAGATATGA
+1 phiX174 4338 4348 + 11.6951 5.18e-08 2.01e-08 GGGGACGAAAA
+1 phiX174 3812 3822 + 11.6585 5.28e-08 2.03e-08 GGTTGATATTT
+1 phiX174 1909 1919 + 11.5488 5.51e-08 2.08e-08 TAACGCTAAAG
+1 phiX174 3000 3010 + 11.5366 5.54e-08 2.08e-08 GGCGGTCAAAA
+1 phiX174 3891 3901 + 11.439 5.75e-08 2.11e-08 ATTGGCTCTAA
+1 phiX174 3079 3089 + 11.4268 5.76e-08 2.11e-08 CTGGTATTAAA
+1 phiX174 37 47 + 11.4146 5.79e-08 2.11e-08 TTCGGATATTT
+1 phiX174 380 390 + 11.3293 6.01e-08 2.17e-08 GTAAGAAATCA
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/fimo_output_txt_2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_txt_2.txt Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,100 @@
+#pattern name sequence name start stop strand score p-value q-value matched sequence
+1 phiX174 1388 1398 + 29.4024 6.36e-11 0 AATATCTATAA
+1 phiX174 847 857 + 29.122 7.02e-11 0 AATGTCTAAAG
+1 phiX174 2301 2311 + 27.6463 1.08e-10 0 AGGTTATAACG
+1 phiX174 5063 5073 + 25.5366 2.73e-10 0 AGGAGCTAAAG
+1 phiX174 989 999 + 25.3049 3.15e-10 0 TGAGGATAAAT
+1 phiX174 4713 4723 + 23.622 7.74e-10 0 GACTGCTATCA
+1 phiX174 5048 5058 + 23.3293 8.51e-10 0 TGCTGCTAAAG
+1 phiX174 855 865 + 23.3049 8.64e-10 0 AAGGTAAAAAA
+1 phiX174 3155 3165 + 23.0366 9.76e-10 0 TATGGCTAAAG
+1 phiX174 5009 5019 + 23.0366 9.76e-10 0 TGTGGCTAAAT
+1 phiX174 814 824 + 22.5854 1.28e-09 0 TGCGTCAAAAA
+1 phiX174 2832 2842 + 22.3415 1.42e-09 0 TTGGTCTAACT
+1 phiX174 3830 3840 + 21.8293 1.7e-09 0 TATTGATAAAG
+1 phiX174 3560 3570 + 21.5976 1.89e-09 0 TGCGTCTATTA
+1 phiX174 2882 2892 + 21.1951 2.29e-09 0 AGGTTATTAAA
+1 phiX174 4453 4463 + 20.8902 2.58e-09 0 AAGGTATTAAG
+1 phiX174 2493 2503 + 20.3415 3.06e-09 0 GACACCTAAAG
+1 phiX174 4104 4114 + 20.3171 3.08e-09 0 GGCTTCCATAA
+1 phiX174 4955 4965 + 20.3171 3.08e-09 0 TGATGCTAAAG
+1 phiX174 1885 1895 + 19.9268 3.61e-09 0 TGCGACTAAAG
+1 phiX174 3376 3386 + 19.7683 3.81e-09 0 AGAATCAAAAA
+1 phiX174 52 62 + 19.5732 4.06e-09 0 TGAGTCGAAAA
+1 phiX174 1390 1400 + 19.378 4.26e-09 0 TATCTATAACA
+1 phiX174 2017 2027 + 19.0854 4.6e-09 0 TTCGTCTAAGA
+1 phiX174 1000 1010 + 18.878 4.88e-09 0 TATGTCTAATA
+1 phiX174 1555 1565 + 18.439 5.58e-09 0 GACTTCTACCA
+1 phiX174 4430 4440 + 18.4268 5.62e-09 0 TGAGTATAATT
+1 phiX174 1927 1937 + 18.2927 5.82e-09 0 GACTTATACCG
+1 phiX174 2981 2991 + 18.0732 6.13e-09 0 CATGTCTAAAT
+1 phiX174 4203 4213 + 17.9268 6.34e-09 0 GACGGCCATAA
+1 phiX174 1669 1679 + 17.8659 6.4e-09 0 TGGAGGTAAAA
+1 phiX174 3260 3270 + 17.5 7.01e-09 0 CGCTGATAAAG
+1 phiX174 3047 3057 + 17.2805 7.4e-09 0 TACCGATAACA
+1 phiX174 4176 4186 + 17.1829 7.6e-09 0 GAGTTCGATAA
+1 phiX174 4118 4128 + 17.1341 7.7e-09 0 GATGGATAACC
+1 phiX174 5370 5380 + 16.9878 8.03e-09 0 GGCGTATCCAA
+1 phiX174 1242 1252 + 16.5122 8.94e-09 0 AGTGGATTAAG
+1 phiX174 2583 2593 + 16.5122 8.94e-09 0 TACATCTGTCA
+1 phiX174 698 708 + 16.4146 9.13e-09 0 TACGGAAAACA
+1 phiX174 2299 2309 + 16.3537 9.26e-09 0 TGAGGTTATAA
+1 phiX174 4189 4199 + 16.1707 9.69e-09 0 GTGATATGTAT
+1 phiX174 275 285 + 16.0976 9.85e-09 0 GGTTTAGATAT
+1 phiX174 1801 1811 + 16.0366 1e-08 0 GACCTATAAAC
+1 phiX174 1386 1396 + 15.9268 1.03e-08 0 TGAATATCTAT
+1 phiX174 1303 1313 + 15.9024 1.03e-08 0 TGGTTATATTG
+1 phiX174 3772 3782 + 15.878 1.04e-08 0 AGGATATTTCT
+1 phiX174 1288 1298 + 15.8659 1.04e-08 0 GACTGTTAACA
+1 phiX174 2577 2587 + 15.7683 1.08e-08 0 GATGGATACAT
+1 phiX174 937 947 + 15.7561 1.08e-08 0 TTGGTATGTAG
+1 phiX174 904 914 + 15.6585 1.11e-08 0 AGGTACTAAAG
+1 phiX174 2279 2289 + 15.5854 1.13e-08 0 TCGTGATAAAA
+1 phiX174 3164 3174 + 15.5 1.16e-08 0 AGCTGGTAAAG
+1 phiX174 24 34 + 15.3293 1.23e-08 0 AGAAGTTAACA
+1 phiX174 838 848 + 15.2561 1.27e-08 0 GAGTGATGTAA
+1 phiX174 853 863 + 15.2561 1.27e-08 0 TAAAGGTAAAA
+1 phiX174 1984 1994 + 15.0244 1.36e-08 0 AATTTCTATGA
+1 phiX174 1 11 + 14.8293 1.46e-08 0 GAGTTTTATCG
+1 phiX174 4307 4317 + 14.7927 1.47e-08 0 TATTAATAACA
+1 phiX174 4303 4313 + 14.6585 1.52e-08 0 TTGATATTAAT
+1 phiX174 5033 5043 + 14.561 1.58e-08 0 GTCAGATATGG
+1 phiX174 2579 2589 + 14.2927 1.73e-08 0 TGGATACATCT
+1 phiX174 322 332 + 14.1951 1.82e-08 0 GACATTTTAAA
+1 phiX174 5001 5011 + 13.8902 2.09e-08 0 GGTTTCTATGT
+1 phiX174 4217 4227 + 13.8171 2.15e-08 0 TGCTTCTGACG
+1 phiX174 4262 4272 + 13.7805 2.18e-08 0 AATGGATGAAT
+1 phiX174 3569 3579 + 13.7073 2.26e-08 0 TATGGAAAACA
+1 phiX174 194 204 + 13.6829 2.29e-08 0 ATCAACTAACG
+1 phiX174 131 141 + 13.4756 2.49e-08 0 AAATGAGAAAA
+1 phiX174 1491 1501 + 13.4024 2.55e-08 0 GCCATCTCAAA
+1 phiX174 434 444 + 13.2805 2.67e-08 0 GGCCTCTATTA
+1 phiX174 4565 4575 + 13.2439 2.73e-08 0 TTGGTTTATCG
+1 phiX174 102 112 + 13.2195 2.75e-08 0 GAATTAAATCG
+1 phiX174 903 913 + 13.1463 2.82e-08 0 GAGGTACTAAA
+1 phiX174 4748 4758 + 12.9756 3.01e-08 0 TACAGCTAATG
+1 phiX174 2622 2632 + 12.8659 3.16e-08 0 TGCTGATATTG
+1 phiX174 467 477 + 12.7317 3.35e-08 0 TTTGGATTTAA
+1 phiX174 4033 4043 + 12.6829 3.44e-08 0 AGCGTATCGAG
+1 phiX174 1348 1358 + 12.6707 3.46e-08 0 TACCAATAAAA
+1 phiX174 239 249 + 12.5732 3.62e-08 0 AGTGGCTTAAT
+1 phiX174 500 510 + 12.4634 3.84e-08 0 GACGAGTAACA
+1 phiX174 3001 3011 + 12.4146 3.93e-08 0 GCGGTCAAAAA
+1 phiX174 3776 3786 + 12.378 3.98e-08 0 TATTTCTAATG
+1 phiX174 2026 2036 + 12.3293 4.06e-08 0 GAAGTTTAAGA
+1 phiX174 4237 4247 + 12.3049 4.12e-08 0 AGTTTGTATCT
+1 phiX174 803 813 + 12.2439 4.24e-08 0 AGAAGAAAACG
+1 phiX174 3770 3780 + 12.1829 4.35e-08 0 AAAGGATATTT
+1 phiX174 3429 3439 + 12.122 4.45e-08 0 GAGATGCAAAA
+1 phiX174 99 109 + 12.1098 4.48e-08 0 TACGAATTAAA
+1 phiX174 67 77 + 11.9268 4.78e-08 0 TCTTGATAAAG
+1 phiX174 5332 5342 + 11.7195 5.13e-08 0 ATCTGCTCAAA
+1 phiX174 277 287 + 11.7073 5.14e-08 0 TTTAGATATGA
+1 phiX174 4338 4348 + 11.6951 5.18e-08 0 GGGGACGAAAA
+1 phiX174 3812 3822 + 11.6585 5.28e-08 0 GGTTGATATTT
+1 phiX174 1909 1919 + 11.5488 5.51e-08 0 TAACGCTAAAG
+1 phiX174 3000 3010 + 11.5366 5.54e-08 0 GGCGGTCAAAA
+1 phiX174 3891 3901 + 11.439 5.75e-08 0 ATTGGCTCTAA
+1 phiX174 3079 3089 + 11.4268 5.76e-08 0 CTGGTATTAAA
+1 phiX174 37 47 + 11.4146 5.79e-08 0 TTCGGATATTT
+1 phiX174 380 390 + 11.3293 6.01e-08 0 GTAAGAAATCA
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/fimo_output_xml_1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_xml_1.xml Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Begin document body -->
+<settings>
+<setting name="allow clobber">false</setting>
+<setting name="compute q-values">true</setting>
+<setting name="parse genomic coord.">false</setting>
+<setting name="text only">false</setting>
+<setting name="scan both strands">false</setting>
+<setting name="output threshold">0.0001</setting>
+<setting name="threshold type">p-value</setting>
+<setting name="max stored scores">100000</setting>
+<setting name="pseudocount">0.1</setting>
+<setting name="verbosity">1</setting>
+</settings>
+<sequence-data num-sequences="1" num-residues="5386" />
+<alphabet name="Protein" like="protein">
+<letter id="A" symbol="A" name="Alanine" colour="0000CC"/>
+<letter id="C" symbol="C" name="Cysteine" colour="0000CC"/>
+<letter id="D" symbol="D" name="Aspartic acid" colour="FF00FF"/>
+<letter id="E" symbol="E" name="Glutamic acid" colour="FF00FF"/>
+<letter id="F" symbol="F" name="Phenylalanine" colour="0000CC"/>
+<letter id="G" symbol="G" name="Glycine" colour="FFB300"/>
+<letter id="H" symbol="H" name="Histidine" colour="FFCCCC"/>
+<letter id="I" symbol="I" name="Isoleucine" colour="0000CC"/>
+<letter id="K" symbol="K" name="Lysine" colour="CC0000"/>
+<letter id="L" symbol="L" name="Leucine" colour="0000CC"/>
+<letter id="M" symbol="M" name="Methionine" colour="0000CC"/>
+<letter id="N" symbol="N" name="Asparagine" colour="008000"/>
+<letter id="P" symbol="P" name="Proline" colour="FFFF00"/>
+<letter id="Q" symbol="Q" name="Glutamine" colour="008000"/>
+<letter id="R" symbol="R" name="Arginine" colour="CC0000"/>
+<letter id="S" symbol="S" name="Serine" colour="008000"/>
+<letter id="T" symbol="T" name="Threonine" colour="008000"/>
+<letter id="V" symbol="V" name="Valine" colour="0000CC"/>
+<letter id="W" symbol="W" name="Tryptophan" colour="0000CC"/>
+<letter id="Y" symbol="Y" name="Tyrosine" colour="33E6CC"/>
+<letter id="X" symbol="X" aliases="*." equals="ACDEFGHIKLMNPQRSTVWY" name="Any amino acid"/>
+<letter id="B" symbol="B" equals="DN" name="Asparagine or Aspartic acid"/>
+<letter id="Z" symbol="Z" equals="EQ" name="Glutamine or Glutamic acid"/>
+<letter id="J" symbol="J" equals="IL" name="Leucine or Isoleucine"/>
+</alphabet>
+<motif name="1" width="11" best-possible-match="GGGGTATAAAA"/>
+<background source="non-redundant database">
+<value letter="A">0.073</value>
+<value letter="C">0.018</value>
+<value letter="D">0.052</value>
+<value letter="E">0.062</value>
+<value letter="F">0.040</value>
+<value letter="G">0.069</value>
+<value letter="H">0.022</value>
+<value letter="I">0.056</value>
+<value letter="K">0.058</value>
+<value letter="L">0.092</value>
+<value letter="M">0.023</value>
+<value letter="N">0.046</value>
+<value letter="P">0.051</value>
+<value letter="Q">0.041</value>
+<value letter="R">0.052</value>
+<value letter="S">0.074</value>
+<value letter="T">0.059</value>
+<value letter="V">0.064</value>
+<value letter="W">0.013</value>
+<value letter="Y">0.033</value>
+</background>
+<cisml-file>cisml.xml</cisml-file>
+</fimo>
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/fimo_output_xml_2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fimo_output_xml_2.xml Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Begin document body -->
+<settings>
+<setting name="allow clobber">false</setting>
+<setting name="compute q-values">false</setting>
+<setting name="text only">false</setting>
+<setting name="scan both strands">false</setting>
+<setting name="output threshold">0.0001</setting>
+<setting name="threshold type">p-value</setting>
+<setting name="max stored scores">100000</setting>
+<setting name="pseudocount">0.1</setting>
+<setting name="verbosity">1</setting>
+</settings>
+<sequence-data num-sequences="1" num-residues="5386" />
+<alphabet name="Protein" like="protein">
+<letter id="A" symbol="A" name="Alanine" colour="0000CC"/>
+<letter id="C" symbol="C" name="Cysteine" colour="0000CC"/>
+<letter id="D" symbol="D" name="Aspartic acid" colour="FF00FF"/>
+<letter id="E" symbol="E" name="Glutamic acid" colour="FF00FF"/>
+<letter id="F" symbol="F" name="Phenylalanine" colour="0000CC"/>
+<letter id="G" symbol="G" name="Glycine" colour="FFB300"/>
+<letter id="H" symbol="H" name="Histidine" colour="FFCCCC"/>
+<letter id="I" symbol="I" name="Isoleucine" colour="0000CC"/>
+<letter id="K" symbol="K" name="Lysine" colour="CC0000"/>
+<letter id="L" symbol="L" name="Leucine" colour="0000CC"/>
+<letter id="M" symbol="M" name="Methionine" colour="0000CC"/>
+<letter id="N" symbol="N" name="Asparagine" colour="008000"/>
+<letter id="P" symbol="P" name="Proline" colour="FFFF00"/>
+<letter id="Q" symbol="Q" name="Glutamine" colour="008000"/>
+<letter id="R" symbol="R" name="Arginine" colour="CC0000"/>
+<letter id="S" symbol="S" name="Serine" colour="008000"/>
+<letter id="T" symbol="T" name="Threonine" colour="008000"/>
+<letter id="V" symbol="V" name="Valine" colour="0000CC"/>
+<letter id="W" symbol="W" name="Tryptophan" colour="0000CC"/>
+<letter id="Y" symbol="Y" name="Tyrosine" colour="33E6CC"/>
+<letter id="X" symbol="X" aliases="*." equals="ACDEFGHIKLMNPQRSTVWY" name="Any amino acid"/>
+<letter id="B" symbol="B" equals="DN" name="Asparagine or Aspartic acid"/>
+<letter id="Z" symbol="Z" equals="EQ" name="Glutamine or Glutamic acid"/>
+<letter id="J" symbol="J" equals="IL" name="Leucine or Isoleucine"/>
+</alphabet>
+<motif name="1" width="11" best-possible-match="GGGGTATAAAA"/>
+<background source="non-redundant database">
+<value letter="A">0.073</value>
+<value letter="C">0.018</value>
+<value letter="D">0.052</value>
+<value letter="E">0.062</value>
+<value letter="F">0.040</value>
+<value letter="G">0.069</value>
+<value letter="H">0.022</value>
+<value letter="I">0.056</value>
+<value letter="K">0.058</value>
+<value letter="L">0.092</value>
+<value letter="M">0.023</value>
+<value letter="N">0.046</value>
+<value letter="P">0.051</value>
+<value letter="Q">0.041</value>
+<value letter="R">0.052</value>
+<value letter="S">0.074</value>
+<value letter="T">0.059</value>
+<value letter="V">0.064</value>
+<value letter="W">0.013</value>
+<value letter="Y">0.033</value>
+</background>
+<cisml-file>cisml.xml</cisml-file>
+</fimo>
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/meme_input_1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_input_1.fasta Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,66 @@
+>chr21_19617074_19617124_+
+AAAAATTATTACTAGGGAGGGGGCCGGAACCTCGGGACGTGGGTATATAA
+>chr21_26934381_26934431_+
+GCGCCTGGTCGGTTATGAGTCACAAGTGAGTTATAAAAGGGTCGCACGTT
+>chr21_28217753_28217803_-
+CAAAGGGGAGGAGTGGGGTGGGGGTGGGGGTTTCACTGGTCCACTATAAA
+>chr21_31710037_31710087_-
+AACACCCAGGTTTCTGAGTATATAATCGCCGCACCAAAGAATTTAATTTT
+>chr21_31744582_31744632_-
+CCCAGGTCTAAGAGCATATATAACTTGGAGTCCAGACTATGACATTCAAA
+>chr21_31768316_31768366_+
+AACGTATATAAATGGTCCTGTCCAGATGTGGCATGCAAACTCAGAATCTT
+>chr21_31914206_31914256_-
+TGACACCCACTACTTAGAGTATAAAATCATTCTGAGAAGTTAGAGACACC
+>chr21_31933633_31933683_-
+TCAGAGTATATATAAATGTTCCTGTCCAGTCACAGTCACCAAACTGACCT
+>chr21_31962741_31962791_-
+ACATATAACTCAGGTTGGATAAAATAATTTGTACAAATCAGGAGAGTCAA
+>chr21_31964683_31964733_+
+TCTGATTCACTGAGGCATATAAAAGGCCCTCTGCGGAGAAGTGTCCATAC
+>chr21_31973364_31973414_+
+aaacttaaaactctataaacttaaaactCTAGAATCTGATCCTGCTATAC
+>chr21_31992870_31992920_+
+CTCATACACTATTGAAGATGTATAAAATTTCATTTGCAGATGGTGACATT
+>chr21_32185595_32185645_-
+TCACCACCCACCAGAGCTGGGATATATAAAGAAGGTTCTGAGACTAGGAA
+>chr21_32202076_32202126_-
+TGCCCACCAGCTTGAGGTATAAAAAGCCCTGTACGGGAAGAGACCTTCAT
+>chr21_32253899_32253949_-
+AGCCCCACCCACCAGCAAGGATATATAAAAGCTCAGGAGTCTGGAGTGAC
+>chr21_32410820_32410870_-
+TCTACCCCACTAATCACTGAGGATGTATAAAAGTCCCAGGGAAGCTGGTG
+>chr21_36411748_36411798_-
+ATAGTTCTGTATAGTTTCAGTTGGCATCtaaaaattatataactttattt
+>chr21_37838750_37838800_-
+gatggttttataaggggcctcaccctcggctcagccctcattcttctcct
+>chr21_45705687_45705737_+
+CCGGGGCGGAGCGGCCTTTGCTCTTTGCGTGGTCGCGGGGGTATAACAGC
+>chr21_45971413_45971463_-
+CAGGCCCTGGGCATATAAAAGCCCCAGCAGCCAACAGGctcacacacaca
+>chr21_45978668_45978718_-
+CAGAGGGGTATAAAGGTTCCGACCACTCAGAGGCCTGGCACGAtcactca
+>chr21_45993530_45993580_+
+CCAAGGAGGAGTATAAAAGCCCCACAAACCCGAGCACCTCACTCACTCGC
+>chr21_46020421_46020471_+
+GAGACATATAAAAGCCAACATCCCTGAGCACCTAACACACGGactcactc
+>chr21_46031920_46031970_+
+GGAAAATACCCAGGGAGGGTATAAAACCTCAGCAGCCAGGGCACACAAAC
+>chr21_46046964_46047014_+
+ACAAGGCCAGGAGGGGTATAAAAGCCTGAGAGCCCCAAGAACctcacaca
+>chr21_46057197_46057247_+
+ATTGCTGAGTCTCCTGCTGGGAAAACACAGGCCCTGGGCATATAAAAGCC
+>chr21_46086869_46086919_-
+GACAGGTGTGCTTCTGTGCTGTGGGGATGCCTGGGCCCAGGTATAAAGGC
+>chr21_46102103_46102153_-
+AGGTGTGTGCTTCTGTGCTGTGGGGATGCCTGGGTCCAGGTATAAAGGCT
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/meme_output_html_1.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_html_1.html Wed Aug 23 20:57:34 2017 -0400
[
@@ -0,0 +1,95 @@
+<!DOCTYPE HTML>
+<html>
+  <head>
+    <meta charset="UTF-8">
+    <title>MEME</title>
+    <script>
+      // @JSON_VAR data
+      var data = {
+        "program": "MEME",
+        "stop_reason": "Stopped because requested number of motifs (1) found.",
+        "cmd": [
+          "meme",
+          "-nostatus"
+        ],
+        "options": {
+          "mod": "zoops",
+          "revcomp": false,
+          "nmotifs": 1,
+          "minw": 8,
+          "maxw": 50,
+          "minsites": 2,
+          "maxsites": 30,
+          "wnsites": 0.8,
+          "spmap": "pam",
+          "spfuzz": 120,
+          "maxwords": -1,
+          "prior": "megap",
+          "b": 7500,
+          "maxiter": 50,
+          "distance": 1e-05,
+          "wg": 11,
+          "ws": 1,
+          "noendgaps": false,
+          "substring": true
+        },
+        "alphabet": {
+          "name": "Protein",
+          "like": "protein",
+          "ncore": 20,
+          "symbols": [
+            {
+              "symbol": "A",
+              "name": "Alanine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "C",
+              "name": "Cysteine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "D",
+              "name": "Aspartic acid",
+              "colour": "FF00FF"
+            }, {
+              "symbol": "E",
+              "name": "Glutamic acid",
+              "colour": "FF00FF"
+            }, {
+              "symbol": "F",
+              "name": "Phenylalanine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "G",
+              "name": "Glycine",
+              "colour": "FFB300"
+            }, {
+              "symbol": "H",
+              "name": "Histidine",
+              "colour": "FFCCCC"
+            }, {
+              "symbol": "I",
+              "name": "Isoleucine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "K",
+              "name": "Lysine",
+              "colour": "CC0000"
+            }, {
+              "symbol": "L",
+              "name": "Leucine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "M",
+              "name": "Methionine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "N",
+              "name": "Asparagine",
+              "colour": "008000"
+            }, {
+              "symbol": "P",
+              "name": "Proline",
+              "colour": "FFFF00"
+            }, {
+              "symbol": "Q",
+              "name": "Glutamine",
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/meme_output_html_2.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_html_2.html Wed Aug 23 20:57:34 2017 -0400
[
@@ -0,0 +1,89 @@
+<!DOCTYPE HTML>
+<html>
+  <head>
+    <meta charset="UTF-8">
+    <title>MEME</title>
+    <script>
+      // @JSON_VAR data
+      var data = {
+        "program": "MEME",
+        "stop_reason": "Stopped because requested number of motifs (1) found.",
+        "cmd": [
+          "meme",
+        ],
+        "options": {
+          "mod": "zoops",
+          "revcomp": false,
+          "nmotifs": 1,
+          "minw": 8,
+          "maxw": 50,
+          "minsites": 2,
+          "maxsites": 30,
+          "wnsites": 0.8,
+          "spmap": "uni",
+          "spfuzz": 0.5,
+          "maxwords": -1,
+          "prior": "dirichlet",
+          "b": 0.01,
+          "maxiter": 50,
+          "distance": 0.001,
+          "wg": 11,
+          "ws": 1,
+          "noendgaps": false,
+          "substring": true
+        },
+        "alphabet": {
+          "name": "DNA",
+          "like": "dna",
+          "ncore": 4,
+          "symbols": [
+            {
+              "symbol": "A",
+              "name": "Adenine",
+              "colour": "CC0000",
+              "complement": "T"
+            }, {
+              "symbol": "C",
+              "name": "Cytosine",
+              "colour": "0000CC",
+              "complement": "G"
+            }, {
+              "symbol": "G",
+              "name": "Guanine",
+              "colour": "FFB300",
+              "complement": "C"
+            }, {
+              "symbol": "T",
+              "aliases": "U",
+              "name": "Thymine",
+              "colour": "008000",
+              "complement": "A"
+            }, {
+              "symbol": "N",
+              "aliases": "X.",
+              "name": "Any base",
+              "equals": "ACGT"
+            }, {
+              "symbol": "V",
+              "name": "Not T",
+              "equals": "ACG"
+            }, {
+              "symbol": "H",
+              "name": "Not G",
+              "equals": "ACT"
+            }, {
+              "symbol": "D",
+              "name": "Not C",
+              "equals": "AGT"
+            }, {
+              "symbol": "B",
+              "name": "Not A",
+              "equals": "CGT"
+            }, {
+              "symbol": "M",
+              "name": "Amino",
+              "equals": "AC"
+            }, {
+              "symbol": "R",
+              "name": "Purine",
+              "equals": "AG"
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/meme_output_txt_1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_txt_1.txt Wed Aug 23 20:57:34 2017 -0400
[
b'@@ -0,0 +1,325 @@\n+********************************************************************************\n+MEME - Motif discovery tool\n+********************************************************************************\n+MEME version 4.11.2 (Release date: Thu May 05 14:58:55 2016 -0700)\n+\n+For further information on how to interpret these results or to get\n+a copy of the MEME software please access http://meme-suite.org .\n+\n+This file may be used as input to the MAST algorithm for searching\n+sequence databases for matches to groups of motifs.  MAST is available\n+for interactive use and downloading at http://meme-suite.org .\n+********************************************************************************\n+\n+\n+********************************************************************************\n+REFERENCE\n+********************************************************************************\n+If you use this program in your research, please cite:\n+\n+Timothy L. Bailey and Charles Elkan,\n+"Fitting a mixture model by expectation maximization to discover\n+motifs in biopolymers", Proceedings of the Second International\n+Conference on Intelligent Systems for Molecular Biology, pp. 28-36,\n+AAAI Press, Menlo Park, California, 1994.\n+********************************************************************************\n+\n+\n+********************************************************************************\n+TRAINING SET\n+********************************************************************************\n+DATAFILE= /tmp/tmpCNK6l0/files/000/dataset_22.dat\n+ALPHABET= ACDEFGHIKLMNPQRSTVWY\n+Sequence name            Weight Length  Sequence name            Weight Length  \n+-------------            ------ ------  -------------            ------ ------  \n+chr21_19617074_19617124_ 1.0000     50  chr21_26934381_26934431_ 1.0000     50  \n+chr21_28217753_28217803_ 1.0000     50  chr21_31710037_31710087_ 1.0000     50  \n+chr21_31744582_31744632_ 1.0000     50  chr21_31768316_31768366_ 1.0000     50  \n+chr21_31914206_31914256_ 1.0000     50  chr21_31933633_31933683_ 1.0000     50  \n+chr21_31962741_31962791_ 1.0000     50  chr21_31964683_31964733_ 1.0000     50  \n+chr21_31973364_31973414_ 1.0000     50  chr21_31992870_31992920_ 1.0000     50  \n+chr21_32185595_32185645_ 1.0000     50  chr21_32202076_32202126_ 1.0000     50  \n+chr21_32253899_32253949_ 1.0000     50  chr21_32410820_32410870_ 1.0000     50  \n+chr21_36411748_36411798_ 1.0000     50  chr21_37838750_37838800_ 1.0000     50  \n+chr21_45705687_45705737_ 1.0000     50  chr21_45971413_45971463_ 1.0000     50  \n+chr21_45978668_45978718_ 1.0000     50  chr21_45993530_45993580_ 1.0000     50  \n+chr21_46020421_46020471_ 1.0000     50  chr21_46031920_46031970_ 1.0000     50  \n+chr21_46046964_46047014_ 1.0000     50  chr21_46057197_46057247_ 1.0000     50  \n+chr21_46086869_46086919_ 1.0000     50  chr21_46102103_46102153_ 1.0000     50  \n+chr21_47517957_47518007_ 1.0000     50  chr21_47575506_47575556_ 1.0000     50  \n+********************************************************************************\n+\n+********************************************************************************\n+COMMAND LINE SUMMARY\n+********************************************************************************\n+This information can also be useful in the event you wish to report a\n+problem with the MEME software.\n+\n+command: meme /tmp/tmpCNK6l0/files/000/dataset_22.dat -o /tmp/tmpCNK6l0/job_working_directory/000/11/dataset_23_files -nostatus -maxsize 1000000 \n+\n+model:  mod=         zoops    nmotifs=         1    evt=           inf\n+object function=  E-value of product of p-values\n+width:  minw=            8    maxw=           50\n+width:  wg=             11    ws=              1    endgaps=       yes\n+nsites: minsites=        2    maxsites=       30    wnsites=       0.8\n+theta:  spmap=         pam    spfuzz=        120\n+global: substring=     yes    branching=      no    wbranch=        no\n+em:     prior=       megap    b=            7500    maxiter=        50\n'..b'000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000 \n+ 0.760000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.240000  0.000000  0.000000  0.000000 \n+ 0.960000  0.040000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000 \n+ 0.840000  0.000000  0.000000  0.000000  0.000000  0.120000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.040000  0.000000  0.000000  0.000000 \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif 1 regular expression\n+--------------------------------------------------------------------------------\n+[GA][GA][GC][GA]TATA[AT]AA\n+--------------------------------------------------------------------------------\n+\n+\n+\n+\n+Time  0.72 secs.\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+SUMMARY OF MOTIFS\n+********************************************************************************\n+\n+--------------------------------------------------------------------------------\n+\tCombined block diagrams: non-overlapping sites with p-value < 0.0001\n+--------------------------------------------------------------------------------\n+SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM\n+-------------            ----------------  -------------\n+chr21_19617074_19617124_         1.22e-03  39_[1(3.06e-05)]\n+chr21_26934381_26934431_         2.21e-03  27_[1(5.52e-05)]_12\n+chr21_28217753_28217803_         7.29e-01  50\n+chr21_31710037_31710087_         2.37e-03  14_[1(5.94e-05)]_25\n+chr21_31744582_31744632_         1.22e-03  12_[1(3.06e-05)]_27\n+chr21_31768316_31768366_         1.53e-03  [1(3.82e-05)]_39\n+chr21_31914206_31914256_         6.70e-04  15_[1(1.68e-05)]_24\n+chr21_31933633_31933683_         1.81e-03  4_[1(4.54e-05)]_35\n+chr21_31962741_31962791_         1.61e-02  50\n+chr21_31964683_31964733_         1.36e-04  13_[1(3.41e-06)]_26\n+chr21_31973364_31973414_         1.99e-01  50\n+chr21_31992870_31992920_         3.47e-04  16_[1(8.67e-06)]_23\n+chr21_32185595_32185645_         3.47e-04  18_[1(8.67e-06)]_21\n+chr21_32202076_32202126_         2.01e-04  13_[1(5.01e-06)]_26\n+chr21_32253899_32253949_         8.11e-04  19_[1(2.03e-05)]_20\n+chr21_32410820_32410870_         3.47e-04  21_[1(8.67e-06)]_18\n+chr21_36411748_36411798_         2.71e-03  22_[1(6.78e-05)]_17\n+chr21_37838750_37838800_         8.23e-02  50\n+chr21_45705687_45705737_         1.53e-03  37_[1(3.82e-05)]_2\n+chr21_45971413_45971463_         1.36e-04  9_[1(3.41e-06)]_30\n+chr21_45978668_45978718_         6.37e-04  4_[1(1.59e-05)]_35\n+chr21_45993530_45993580_         1.60e-04  7_[1(4.00e-06)]_32\n+chr21_46020421_46020471_         4.83e-04  2_[1(1.21e-05)]_37\n+chr21_46031920_46031970_         2.43e-04  15_[1(6.06e-06)]_24\n+chr21_46046964_46047014_         4.26e-05  12_[1(1.06e-06)]_27\n+chr21_46057197_46057247_         1.36e-04  36_[1(3.41e-06)]_3\n+chr21_46086869_46086919_         4.30e-02  50\n+chr21_46102103_46102153_         4.30e-02  50\n+chr21_47517957_47518007_         6.37e-04  32_[1(1.59e-05)]_7\n+chr21_47575506_47575556_         1.61e-03  30_[1(4.02e-05)]_9\n+--------------------------------------------------------------------------------\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+Stopped because requested number of motifs (1) found.\n+********************************************************************************\n+\n+CPU: bigsky\n+\n+********************************************************************************\n'
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/meme_output_txt_2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_txt_2.txt Wed Aug 23 20:57:34 2017 -0400
[
b'@@ -0,0 +1,319 @@\n+********************************************************************************\n+MEME - Motif discovery tool\n+********************************************************************************\n+MEME version 4.11.2 (Release date: Thu May 05 14:58:55 2016 -0700)\n+\n+For further information on how to interpret these results or to get\n+a copy of the MEME software please access http://meme-suite.org .\n+\n+This file may be used as input to the MAST algorithm for searching\n+sequence databases for matches to groups of motifs.  MAST is available\n+for interactive use and downloading at http://meme-suite.org .\n+********************************************************************************\n+\n+\n+********************************************************************************\n+REFERENCE\n+********************************************************************************\n+If you use this program in your research, please cite:\n+\n+Timothy L. Bailey and Charles Elkan,\n+"Fitting a mixture model by expectation maximization to discover\n+motifs in biopolymers", Proceedings of the Second International\n+Conference on Intelligent Systems for Molecular Biology, pp. 28-36,\n+AAAI Press, Menlo Park, California, 1994.\n+********************************************************************************\n+\n+\n+********************************************************************************\n+TRAINING SET\n+********************************************************************************\n+DATAFILE= Galaxy_FASTA_Input\n+ALPHABET= ACGT\n+Sequence name            Weight Length  Sequence name            Weight Length  \n+-------------            ------ ------  -------------            ------ ------  \n+chr21_19617074_19617124_ 1.0000     50  chr21_26934381_26934431_ 1.0000     50  \n+chr21_28217753_28217803_ 1.0000     50  chr21_31710037_31710087_ 1.0000     50  \n+chr21_31744582_31744632_ 1.0000     50  chr21_31768316_31768366_ 1.0000     50  \n+chr21_31914206_31914256_ 1.0000     50  chr21_31933633_31933683_ 1.0000     50  \n+chr21_31962741_31962791_ 1.0000     50  chr21_31964683_31964733_ 1.0000     50  \n+chr21_31973364_31973414_ 1.0000     50  chr21_31992870_31992920_ 1.0000     50  \n+chr21_32185595_32185645_ 1.0000     50  chr21_32202076_32202126_ 1.0000     50  \n+chr21_32253899_32253949_ 1.0000     50  chr21_32410820_32410870_ 1.0000     50  \n+chr21_36411748_36411798_ 1.0000     50  chr21_37838750_37838800_ 1.0000     50  \n+chr21_45705687_45705737_ 1.0000     50  chr21_45971413_45971463_ 1.0000     50  \n+chr21_45978668_45978718_ 1.0000     50  chr21_45993530_45993580_ 1.0000     50  \n+chr21_46020421_46020471_ 1.0000     50  chr21_46031920_46031970_ 1.0000     50  \n+chr21_46046964_46047014_ 1.0000     50  chr21_46057197_46057247_ 1.0000     50  \n+chr21_46086869_46086919_ 1.0000     50  chr21_46102103_46102153_ 1.0000     50  \n+chr21_47517957_47518007_ 1.0000     50  chr21_47575506_47575556_ 1.0000     50  \n+********************************************************************************\n+\n+********************************************************************************\n+COMMAND LINE SUMMARY\n+********************************************************************************\n+This information can also be useful in the event you wish to report a\n+problem with the MEME software.\n+\n+command: meme /tmp/tmpCNK6l0/files/000/dataset_26.dat -o /tmp/tmpCNK6l0/job_working_directory/000/14/dataset_28_files -nostatus -maxsize 1000000 -sf Galaxy_FASTA_Input -dna -mod zoops -nmotifs 1 -wnsites 0.8 -evt inf -minw 8 -maxw 50 -wg 11 -ws 1 -maxiter 50 -distance 0.001 -prior dirichlet -b 0.01 -plib /tmp/tmpCNK6l0/files/000/dataset_27.dat -spmap uni -spfuzz 0.5 \n+\n+model:  mod=         zoops    nmotifs=         1    evt=           inf\n+object function=  E-value of product of p-values\n+width:  minw=            8    maxw=           50\n+width:  wg=             11    ws=              1    endgaps=       yes\n+nsites: minsites=        2    maxsites=       30    wnsites=       0.8\n+theta:'..b'ition-specific probability matrix\n+--------------------------------------------------------------------------------\n+letter-probability matrix: alength= 4 w= 11 nsites= 30 E= 5.1e-040 \n+ 0.266667  0.066667  0.566667  0.100000 \n+ 0.300000  0.000000  0.666667  0.033333 \n+ 0.133333  0.266667  0.466667  0.133333 \n+ 0.300000  0.033333  0.600000  0.066667 \n+ 0.000000  0.000000  0.033333  0.966667 \n+ 0.866667  0.066667  0.000000  0.066667 \n+ 0.000000  0.000000  0.000000  1.000000 \n+ 0.966667  0.033333  0.000000  0.000000 \n+ 0.700000  0.000000  0.000000  0.300000 \n+ 0.933333  0.066667  0.000000  0.000000 \n+ 0.800000  0.000000  0.166667  0.033333 \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif 1 regular expression\n+--------------------------------------------------------------------------------\n+[GA][GA][GC][GA]TATA[AT]AA\n+--------------------------------------------------------------------------------\n+\n+\n+\n+\n+Time  0.32 secs.\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+SUMMARY OF MOTIFS\n+********************************************************************************\n+\n+--------------------------------------------------------------------------------\n+\tCombined block diagrams: non-overlapping sites with p-value < 0.0001\n+--------------------------------------------------------------------------------\n+SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM\n+-------------            ----------------  -------------\n+chr21_19617074_19617124_         5.63e-04  39_[+1(1.41e-05)]\n+chr21_26934381_26934431_         1.57e-03  27_[+1(3.93e-05)]_12\n+chr21_28217753_28217803_         1.00e-01  50\n+chr21_31710037_31710087_         2.49e-03  14_[+1(6.24e-05)]_25\n+chr21_31744582_31744632_         1.22e-03  12_[+1(3.04e-05)]_27\n+chr21_31768316_31768366_         1.47e-03  [+1(3.67e-05)]_39\n+chr21_31914206_31914256_         6.45e-04  15_[+1(1.61e-05)]_24\n+chr21_31933633_31933683_         2.26e-03  4_[+1(5.65e-05)]_35\n+chr21_31962741_31962791_         3.37e-02  50\n+chr21_31964683_31964733_         1.95e-04  13_[+1(4.86e-06)]_26\n+chr21_31973364_31973414_         5.73e-02  50\n+chr21_31992870_31992920_         5.52e-04  16_[+1(1.38e-05)]_23\n+chr21_32185595_32185645_         2.59e-04  18_[+1(6.48e-06)]_21\n+chr21_32202076_32202126_         1.10e-04  13_[+1(2.74e-06)]_26\n+chr21_32253899_32253949_         7.78e-04  17_[+1(1.95e-05)]_22\n+chr21_32410820_32410870_         5.52e-04  21_[+1(1.38e-05)]_18\n+chr21_36411748_36411798_         2.85e-03  22_[+1(7.15e-05)]_17\n+chr21_37838750_37838800_         1.90e-02  50\n+chr21_45705687_45705737_         8.63e-04  37_[+1(2.16e-05)]_2\n+chr21_45971413_45971463_         1.95e-04  9_[+1(4.86e-06)]_30\n+chr21_45978668_45978718_         2.59e-04  4_[+1(6.48e-06)]_35\n+chr21_45993530_45993580_         1.95e-04  7_[+1(4.86e-06)]_32\n+chr21_46020421_46020471_         7.78e-04  2_[+1(1.95e-05)]_37\n+chr21_46031920_46031970_         8.89e-05  15_[+1(2.22e-06)]_24\n+chr21_46046964_46047014_         1.80e-05  12_[+1(4.51e-07)]_27\n+chr21_46057197_46057247_         1.95e-04  36_[+1(4.86e-06)]_3\n+chr21_46086869_46086919_         5.54e-03  50\n+chr21_46102103_46102153_         5.54e-03  50\n+chr21_47517957_47518007_         2.59e-04  32_[+1(6.48e-06)]_7\n+chr21_47575506_47575556_         1.22e-03  30_[+1(3.04e-05)]_9\n+--------------------------------------------------------------------------------\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+Stopped because requested number of motifs (1) found.\n+********************************************************************************\n+\n+CPU: bigsky\n+\n+********************************************************************************\n'
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/meme_output_xml_1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_xml_1.xml Wed Aug 23 20:57:34 2017 -0400
[
b'@@ -0,0 +1,1285 @@\n+<?xml version=\'1.0\' encoding=\'UTF-8\' standalone=\'yes\'?>\n+<!-- Document definition -->\n+<!DOCTYPE MEME[\n+<!ELEMENT MEME (\n+  training_set,\n+  model, \n+  motifs, \n+  scanned_sites_summary?\n+)>\n+<!ATTLIST MEME \n+  version CDATA #REQUIRED\n+  release CDATA #REQUIRED\n+>\n+<!-- Training-set elements -->\n+<!ELEMENT training_set (alphabet, ambigs, sequence*, letter_frequencies)>\n+<!ATTLIST training_set datafile CDATA #REQUIRED length CDATA #REQUIRED>\n+<!ELEMENT alphabet (letter*)>\n+<!ATTLIST alphabet name CDATA #REQUIRED>\n+<!ELEMENT ambigs (letter*)>\n+<!ELEMENT letter EMPTY>\n+<!ATTLIST letter id ID #REQUIRED>\n+<!ATTLIST letter symbol CDATA #REQUIRED>\n+<!ATTLIST letter equals CDATA #IMPLIED>\n+<!ATTLIST letter aliases CDATA #IMPLIED>\n+<!ATTLIST letter complement CDATA #IMPLIED>\n+<!ATTLIST letter name CDATA #IMPLIED>\n+<!ATTLIST letter colour CDATA #IMPLIED>\n+<!ELEMENT sequence EMPTY>\n+<!ATTLIST sequence id ID #REQUIRED\n+                   name CDATA #REQUIRED\n+                   length CDATA #REQUIRED\n+                   weight CDATA #REQUIRED\n+>\n+<!ELEMENT letter_frequencies (alphabet_array)>\n+\n+<!-- Model elements -->\n+<!ELEMENT model (\n+  command_line,\n+  host,\n+  type,\n+  nmotifs,\n+  evalue_threshold,\n+  object_function,\n+  min_width,\n+  max_width,\n+  minic,\n+  wg,\n+  ws,\n+  endgaps,\n+  minsites,\n+  maxsites,\n+  wnsites,\n+  prob,\n+  spmap,\n+  spfuzz,\n+  prior,\n+  beta,\n+  maxiter,\n+  distance,\n+  num_sequences,\n+  num_positions,\n+  seed,\n+  seqfrac,\n+  strands,\n+  priors_file,\n+  reason_for_stopping,\n+  background_frequencies\n+)>\n+<!ELEMENT command_line (#PCDATA)*>\n+<!ELEMENT host (#PCDATA)*>\n+<!ELEMENT type (#PCDATA)*>\n+<!ELEMENT nmotifs (#PCDATA)*>\n+<!ELEMENT evalue_threshold (#PCDATA)*>\n+<!ELEMENT object_function (#PCDATA)*>\n+<!ELEMENT min_width (#PCDATA)*>\n+<!ELEMENT max_width (#PCDATA)*>\n+<!ELEMENT minic (#PCDATA)*>\n+<!ELEMENT wg (#PCDATA)*>\n+<!ELEMENT ws (#PCDATA)*>\n+<!ELEMENT endgaps (#PCDATA)*>\n+<!ELEMENT minsites (#PCDATA)*>\n+<!ELEMENT maxsites (#PCDATA)*>\n+<!ELEMENT wnsites (#PCDATA)*>\n+<!ELEMENT prob (#PCDATA)*>\n+<!ELEMENT spmap (#PCDATA)*>\n+<!ELEMENT spfuzz (#PCDATA)*>\n+<!ELEMENT prior (#PCDATA)*>\n+<!ELEMENT beta (#PCDATA)*>\n+<!ELEMENT maxiter (#PCDATA)*>\n+<!ELEMENT distance (#PCDATA)*>\n+<!ELEMENT num_sequences (#PCDATA)*>\n+<!ELEMENT num_positions (#PCDATA)*>\n+<!ELEMENT seed (#PCDATA)*>\n+<!ELEMENT seqfrac (#PCDATA)*>\n+<!ELEMENT strands (#PCDATA)*>\n+<!ELEMENT priors_file (#PCDATA)*>\n+<!ELEMENT reason_for_stopping (#PCDATA)*>\n+<!ELEMENT background_frequencies (alphabet_array)>\n+<!ATTLIST background_frequencies source CDATA #REQUIRED>\n+\n+<!-- Motif elements -->\n+<!ELEMENT motifs (motif*)>\n+<!ELEMENT motif (scores, probabilities, regular_expression?, contributing_sites)>\n+<!ATTLIST motif id ID #REQUIRED\n+                name CDATA #REQUIRED\n+                width CDATA #REQUIRED\n+                sites CDATA #REQUIRED\n+                llr CDATA #REQUIRED\n+                ic CDATA #REQUIRED\n+                re CDATA #REQUIRED\n+                bayes_threshold CDATA #REQUIRED\n+                e_value CDATA #REQUIRED\n+                elapsed_time CDATA #REQUIRED\n+                url CDATA ""\n+>\n+<!ELEMENT scores (alphabet_matrix)>\n+<!ELEMENT probabilities (alphabet_matrix)>\n+<!ELEMENT regular_expression (#PCDATA)*>\n+\n+<!-- Contributing site elements -->\n+<!-- Contributing sites are motif occurences found during the motif discovery phase -->\n+<!ELEMENT contributing_sites (contributing_site*)>\n+<!ELEMENT contributing_site (left_flank, site, right_flank)>\n+<!ATTLIST contributing_site sequence_id IDREF #REQUIRED\n+                          position CDATA #REQUIRED\n+                          strand (plus|minus|none) \'none\'\n+                          pvalue CDATA #REQUIRED\n+>\n+<!-- The left_flank contains the sequence for 10 bases to the left of the motif start -->\n+<!ELEMENT left_flank (#PCDATA)>\n+<!-- The site contains the sequence for the motif instance -->\n+<!ELEMENT site (letter_ref*)>\n+<!-- The right_flank contai'..b'="none" position="12" pvalue="3.06e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_5" pvalue="1.53e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="0" pvalue="3.82e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_6" pvalue="6.70e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="15" pvalue="1.68e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_7" pvalue="1.81e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="4" pvalue="4.54e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_8" pvalue="1.61e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_9" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="13" pvalue="3.41e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_10" pvalue="1.99e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_11" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="16" pvalue="8.67e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_12" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="18" pvalue="8.67e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_13" pvalue="2.01e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="13" pvalue="5.01e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_14" pvalue="8.11e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="19" pvalue="2.03e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_15" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="21" pvalue="8.67e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_16" pvalue="2.71e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="22" pvalue="6.78e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_17" pvalue="8.23e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_18" pvalue="1.53e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="37" pvalue="3.82e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_19" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="9" pvalue="3.41e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_20" pvalue="6.37e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="4" pvalue="1.59e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_21" pvalue="1.60e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="7" pvalue="4.00e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_22" pvalue="4.83e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="2" pvalue="1.21e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_23" pvalue="2.43e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="15" pvalue="6.06e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_24" pvalue="4.26e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="12" pvalue="1.06e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_25" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="36" pvalue="3.41e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_26" pvalue="4.30e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_27" pvalue="4.30e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_28" pvalue="6.37e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="32" pvalue="1.59e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_29" pvalue="1.61e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="30" pvalue="4.02e-05"/>\n+</scanned_sites>\n+</scanned_sites_summary>\n+</MEME>\n'
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/meme_output_xml_2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_xml_2.xml Wed Aug 23 20:57:34 2017 -0400
[
b'@@ -0,0 +1,977 @@\n+<?xml version=\'1.0\' encoding=\'UTF-8\' standalone=\'yes\'?>\n+<!-- Document definition -->\n+<!DOCTYPE MEME[\n+<!ELEMENT MEME (\n+  training_set,\n+  model, \n+  motifs, \n+  scanned_sites_summary?\n+)>\n+<!ATTLIST MEME \n+  version CDATA #REQUIRED\n+  release CDATA #REQUIRED\n+>\n+<!-- Training-set elements -->\n+<!ELEMENT training_set (alphabet, ambigs, sequence*, letter_frequencies)>\n+<!ATTLIST training_set datafile CDATA #REQUIRED length CDATA #REQUIRED>\n+<!ELEMENT alphabet (letter*)>\n+<!ATTLIST alphabet name CDATA #REQUIRED>\n+<!ELEMENT ambigs (letter*)>\n+<!ELEMENT letter EMPTY>\n+<!ATTLIST letter id ID #REQUIRED>\n+<!ATTLIST letter symbol CDATA #REQUIRED>\n+<!ATTLIST letter equals CDATA #IMPLIED>\n+<!ATTLIST letter aliases CDATA #IMPLIED>\n+<!ATTLIST letter complement CDATA #IMPLIED>\n+<!ATTLIST letter name CDATA #IMPLIED>\n+<!ATTLIST letter colour CDATA #IMPLIED>\n+<!ELEMENT sequence EMPTY>\n+<!ATTLIST sequence id ID #REQUIRED\n+                   name CDATA #REQUIRED\n+                   length CDATA #REQUIRED\n+                   weight CDATA #REQUIRED\n+>\n+<!ELEMENT letter_frequencies (alphabet_array)>\n+\n+<!-- Model elements -->\n+<!ELEMENT model (\n+  command_line,\n+  host,\n+  type,\n+  nmotifs,\n+  evalue_threshold,\n+  object_function,\n+  min_width,\n+  max_width,\n+  minic,\n+  wg,\n+  ws,\n+  endgaps,\n+  minsites,\n+  maxsites,\n+  wnsites,\n+  prob,\n+  spmap,\n+  spfuzz,\n+  prior,\n+  beta,\n+  maxiter,\n+  distance,\n+  num_sequences,\n+  num_positions,\n+  seed,\n+  seqfrac,\n+  strands,\n+  priors_file,\n+  reason_for_stopping,\n+  background_frequencies\n+)>\n+<!ELEMENT command_line (#PCDATA)*>\n+<!ELEMENT host (#PCDATA)*>\n+<!ELEMENT type (#PCDATA)*>\n+<!ELEMENT nmotifs (#PCDATA)*>\n+<!ELEMENT evalue_threshold (#PCDATA)*>\n+<!ELEMENT object_function (#PCDATA)*>\n+<!ELEMENT min_width (#PCDATA)*>\n+<!ELEMENT max_width (#PCDATA)*>\n+<!ELEMENT minic (#PCDATA)*>\n+<!ELEMENT wg (#PCDATA)*>\n+<!ELEMENT ws (#PCDATA)*>\n+<!ELEMENT endgaps (#PCDATA)*>\n+<!ELEMENT minsites (#PCDATA)*>\n+<!ELEMENT maxsites (#PCDATA)*>\n+<!ELEMENT wnsites (#PCDATA)*>\n+<!ELEMENT prob (#PCDATA)*>\n+<!ELEMENT spmap (#PCDATA)*>\n+<!ELEMENT spfuzz (#PCDATA)*>\n+<!ELEMENT prior (#PCDATA)*>\n+<!ELEMENT beta (#PCDATA)*>\n+<!ELEMENT maxiter (#PCDATA)*>\n+<!ELEMENT distance (#PCDATA)*>\n+<!ELEMENT num_sequences (#PCDATA)*>\n+<!ELEMENT num_positions (#PCDATA)*>\n+<!ELEMENT seed (#PCDATA)*>\n+<!ELEMENT seqfrac (#PCDATA)*>\n+<!ELEMENT strands (#PCDATA)*>\n+<!ELEMENT priors_file (#PCDATA)*>\n+<!ELEMENT reason_for_stopping (#PCDATA)*>\n+<!ELEMENT background_frequencies (alphabet_array)>\n+<!ATTLIST background_frequencies source CDATA #REQUIRED>\n+\n+<!-- Motif elements -->\n+<!ELEMENT motifs (motif*)>\n+<!ELEMENT motif (scores, probabilities, regular_expression?, contributing_sites)>\n+<!ATTLIST motif id ID #REQUIRED\n+                name CDATA #REQUIRED\n+                width CDATA #REQUIRED\n+                sites CDATA #REQUIRED\n+                llr CDATA #REQUIRED\n+                ic CDATA #REQUIRED\n+                re CDATA #REQUIRED\n+                bayes_threshold CDATA #REQUIRED\n+                e_value CDATA #REQUIRED\n+                elapsed_time CDATA #REQUIRED\n+                url CDATA ""\n+>\n+<!ELEMENT scores (alphabet_matrix)>\n+<!ELEMENT probabilities (alphabet_matrix)>\n+<!ELEMENT regular_expression (#PCDATA)*>\n+\n+<!-- Contributing site elements -->\n+<!-- Contributing sites are motif occurences found during the motif discovery phase -->\n+<!ELEMENT contributing_sites (contributing_site*)>\n+<!ELEMENT contributing_site (left_flank, site, right_flank)>\n+<!ATTLIST contributing_site sequence_id IDREF #REQUIRED\n+                          position CDATA #REQUIRED\n+                          strand (plus|minus|none) \'none\'\n+                          pvalue CDATA #REQUIRED\n+>\n+<!-- The left_flank contains the sequence for 10 bases to the left of the motif start -->\n+<!ELEMENT left_flank (#PCDATA)>\n+<!-- The site contains the sequence for the motif instance -->\n+<!ELEMENT site (letter_ref*)>\n+<!-- The right_flank contain'..b'="plus" position="12" pvalue="3.04e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_5" pvalue="1.47e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="0" pvalue="3.67e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_6" pvalue="6.45e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="15" pvalue="1.61e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_7" pvalue="2.26e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="4" pvalue="5.65e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_8" pvalue="3.37e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_9" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="13" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_10" pvalue="5.73e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_11" pvalue="5.52e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="16" pvalue="1.38e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_12" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="18" pvalue="6.48e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_13" pvalue="1.10e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="13" pvalue="2.74e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_14" pvalue="7.78e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="17" pvalue="1.95e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_15" pvalue="5.52e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="21" pvalue="1.38e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_16" pvalue="2.85e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="22" pvalue="7.15e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_17" pvalue="1.90e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_18" pvalue="8.63e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="37" pvalue="2.16e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_19" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="9" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_20" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="4" pvalue="6.48e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_21" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="7" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_22" pvalue="7.78e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="2" pvalue="1.95e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_23" pvalue="8.89e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="15" pvalue="2.22e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_24" pvalue="1.80e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="12" pvalue="4.51e-07"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_25" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="36" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_26" pvalue="5.54e-03" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_27" pvalue="5.54e-03" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_28" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="32" pvalue="6.48e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_29" pvalue="1.22e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="30" pvalue="3.04e-05"/>\n+</scanned_sites>\n+</scanned_sites_summary>\n+</MEME>\n'
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/meme_psp_gen_reports_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_psp_gen_reports_output.tabular Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,19 @@
+540 bases or amino acids
+0.5 0.5 4 4
+0.5 0.5 6 6
+0.5 0.5 7 7
+0.5 0.5 8 8
+0.5 0.5 9 9
+0.5 0.5 10 10
+0.5 0.5 11 11
+0.5 0.5 12 12
+0.5 0.5 13 13
+0.5 0.5 14 14
+0.5 0.5 15 15
+0.5 0.5 16 16
+0.5 0.5 17 17
+0.5 0.5 18 18
+0.5 0.5 19 19
+0.5 0.5 20 20
+
+score 0.9 occurred 483 times
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/meme_psp_protein_input.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_psp_protein_input.fasta Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,17 @@
+>ICYA_MANSE 
+GDIFYPGYCPDVKPVNDFDLSAFAGAWHEIAKLPLENENQGKCTIAEYKY
+DGKKASVYNSFVSNGVKEYMEGDLEIAPDAKYTKQGKYVMTFKFGQRVVN
+LVPWVLATDYKNYAINYNCDYHPDKKAHSIHAWILSKSKVLEGNTKEVVD
+NVLKTFSHLIDASKFISNDFSEAACQYSTTYSLTGPDRH
+
+>LACB_BOVIN 
+MKCLLLALALTCGAQALIVTQTMKGLDIQKVAGTWYSLAMAASDISLLDA
+QSAPLRVYVEELKPTPEGDLEILLQKWENGECAQKKIIAEKTKIPAVFKI
+DALNENKVLVLDTDYKKYLLFCMENSAEPEQSLACQCLVRTPEVDDEALE
+KFDKALKALPMHIRLSFNPTQLEEQCHI
+
+>BBP_PIEBR 
+NVYHDGACPEVKPVDNFDWSNYHGKWWEVAKYPNSVEKYGKCGWAEYTPE
+GKSVKVSNYHVIHGKEYFIEGTAYPVGDSKIGKIYHKLTYGGVTKENVFN
+VLSTDNKNYIIGYYCKYDEDKKGHQDFVWVLSRSKVLTGEAKTAVENYLI
+GSPVVDSQKLVYSDFSEAACKVN
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/motif1.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif1.gff Wed Aug 23 20:57:34 2017 -0400
b
b'@@ -0,0 +1,100 @@\n+##gff-version 3\n+phiX174\tfimo\tpolypeptide_motif\t1\t11\t78.3\t+\t.\tName=1;ID=1-57-phiX174;pvalue=1.46e-08;sequence=GAGTTTTATCG;\n+phiX174\tfimo\tpolypeptide_motif\t3\t13\t57.5\t+\t.\tName=1;ID=1-471-phiX174;pvalue=1.79e-06;sequence=GTTTTATCGCT;\n+phiX174\tfimo\tpolypeptide_motif\t7\t17\t 45\t+\t.\tName=1;ID=1-1378-phiX174;pvalue=3.18e-05;sequence=TATCGCTTCCA;\n+phiX174\tfimo\tpolypeptide_motif\t10\t20\t53.9\t+\t.\tName=1;ID=1-605-phiX174;pvalue=4.1e-06;sequence=CGCTTCCATGA;\n+phiX174\tfimo\tpolypeptide_motif\t17\t27\t40.2\t+\t.\tName=1;ID=1-1887-phiX174;pvalue=9.55e-05;sequence=ATGACGCAGAA;\n+phiX174\tfimo\tpolypeptide_motif\t18\t28\t45.3\t+\t.\tName=1;ID=1-1349-phiX174;pvalue=2.98e-05;sequence=TGACGCAGAAG;\n+phiX174\tfimo\tpolypeptide_motif\t19\t29\t55.8\t+\t.\tName=1;ID=1-527-phiX174;pvalue=2.6e-06;sequence=GACGCAGAAGT;\n+phiX174\tfimo\tpolypeptide_motif\t21\t31\t41.5\t+\t.\tName=1;ID=1-1705-phiX174;pvalue=7.07e-05;sequence=CGCAGAAGTTA;\n+phiX174\tfimo\tpolypeptide_motif\t22\t32\t44.6\t+\t.\tName=1;ID=1-1404-phiX174;pvalue=3.44e-05;sequence=GCAGAAGTTAA;\n+phiX174\tfimo\tpolypeptide_motif\t24\t34\t79.1\t+\t.\tName=1;ID=1-53-phiX174;pvalue=1.23e-08;sequence=AGAAGTTAACA;\n+phiX174\tfimo\tpolypeptide_motif\t25\t35\t45.3\t+\t.\tName=1;ID=1-1347-phiX174;pvalue=2.97e-05;sequence=GAAGTTAACAC;\n+phiX174\tfimo\tpolypeptide_motif\t26\t36\t59.2\t+\t.\tName=1;ID=1-417-phiX174;pvalue=1.19e-06;sequence=AAGTTAACACT;\n+phiX174\tfimo\tpolypeptide_motif\t30\t40\t44.7\t+\t.\tName=1;ID=1-1399-phiX174;pvalue=3.4e-05;sequence=TAACACTTTCG;\n+phiX174\tfimo\tpolypeptide_motif\t37\t47\t72.4\t+\t.\tName=1;ID=1-98-phiX174;pvalue=5.79e-08;sequence=TTCGGATATTT;\n+phiX174\tfimo\tpolypeptide_motif\t39\t49\t65.3\t+\t.\tName=1;ID=1-213-phiX174;pvalue=2.92e-07;sequence=CGGATATTTCT;\n+phiX174\tfimo\tpolypeptide_motif\t41\t51\t55.3\t+\t.\tName=1;ID=1-548-phiX174;pvalue=2.97e-06;sequence=GATATTTCTGA;\n+phiX174\tfimo\tpolypeptide_motif\t43\t53\t58.4\t+\t.\tName=1;ID=1-442-phiX174;pvalue=1.43e-06;sequence=TATTTCTGATG;\n+phiX174\tfimo\tpolypeptide_motif\t46\t56\t53.7\t+\t.\tName=1;ID=1-617-phiX174;pvalue=4.23e-06;sequence=TTCTGATGAGT;\n+phiX174\tfimo\tpolypeptide_motif\t50\t60\t45.4\t+\t.\tName=1;ID=1-1333-phiX174;pvalue=2.86e-05;sequence=GATGAGTCGAA;\n+phiX174\tfimo\tpolypeptide_motif\t51\t61\t48.4\t+\t.\tName=1;ID=1-1094-phiX174;pvalue=1.44e-05;sequence=ATGAGTCGAAA;\n+phiX174\tfimo\tpolypeptide_motif\t52\t62\t83.9\t+\t.\tName=1;ID=1-22-phiX174;pvalue=4.06e-09;sequence=TGAGTCGAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t53\t63\t53.9\t+\t.\tName=1;ID=1-601-phiX174;pvalue=4.03e-06;sequence=GAGTCGAAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t54\t64\t62.9\t+\t.\tName=1;ID=1-297-phiX174;pvalue=5.16e-07;sequence=AGTCGAAAAAT;\n+phiX174\tfimo\tpolypeptide_motif\t55\t65\t52.8\t+\t.\tName=1;ID=1-675-phiX174;pvalue=5.26e-06;sequence=GTCGAAAAATT;\n+phiX174\tfimo\tpolypeptide_motif\t56\t66\t41.4\t+\t.\tName=1;ID=1-1713-phiX174;pvalue=7.2e-05;sequence=TCGAAAAATTA;\n+phiX174\tfimo\tpolypeptide_motif\t58\t68\t43.4\t+\t.\tName=1;ID=1-1500-phiX174;pvalue=4.56e-05;sequence=GAAAAATTATC;\n+phiX174\tfimo\tpolypeptide_motif\t59\t69\t59.6\t+\t.\tName=1;ID=1-409-phiX174;pvalue=1.1e-06;sequence=AAAAATTATCT;\n+phiX174\tfimo\tpolypeptide_motif\t61\t71\t61.8\t+\t.\tName=1;ID=1-329-phiX174;pvalue=6.52e-07;sequence=AAATTATCTTG;\n+phiX174\tfimo\tpolypeptide_motif\t63\t73\t59.2\t+\t.\tName=1;ID=1-419-phiX174;pvalue=1.2e-06;sequence=ATTATCTTGAT;\n+phiX174\tfimo\tpolypeptide_motif\t65\t75\t53.3\t+\t.\tName=1;ID=1-643-phiX174;pvalue=4.66e-06;sequence=TATCTTGATAA;\n+phiX174\tfimo\tpolypeptide_motif\t66\t76\t51.8\t+\t.\tName=1;ID=1-737-phiX174;pvalue=6.54e-06;sequence=ATCTTGATAAA;\n+phiX174\tfimo\tpolypeptide_motif\t67\t77\t73.2\t+\t.\tName=1;ID=1-89-phiX174;pvalue=4.78e-08;sequence=TCTTGATAAAG;\n+phiX174\tfimo\tpolypeptide_motif\t69\t79\t63.8\t+\t.\tName=1;ID=1-268-phiX174;pvalue=4.15e-07;sequence=TTGATAAAGCA;\n+phiX174\tfimo\tpolypeptide_motif\t71\t81\t40.2\t+\t.\tName=1;ID=1-1882-phiX174;pvalue=9.49e-05;sequence=GATAAAGCAGG;\n+phiX174\tfimo\tpolypeptide_motif\t73\t83\t45.4\t+\t.\tName=1;ID=1-1334-phiX174;pvalue=2.87e-05;sequence=TAAAGCAGGAA;\n+phiX174\tfimo\tpolypeptide_motif\t74\t84\t50.9\t+\t.\tName=1;ID=1-832-phiX174;pvalue=8.05e-06;sequence=AAAGCAGGAAT;\n+phiX174\tfimo\tpolypeptide_motif\t76\t86\t52.2'..b';\n+phiX174\tfimo\tpolypeptide_motif\t129\t139\t43.2\t+\t.\tName=1;ID=1-1522-phiX174;pvalue=4.78e-05;sequence=GAAAATGAGAA;\n+phiX174\tfimo\tpolypeptide_motif\t130\t140\t54.1\t+\t.\tName=1;ID=1-595-phiX174;pvalue=3.92e-06;sequence=AAAATGAGAAA;\n+phiX174\tfimo\tpolypeptide_motif\t131\t141\t 76\t+\t.\tName=1;ID=1-68-phiX174;pvalue=2.49e-08;sequence=AAATGAGAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t132\t142\t51.2\t+\t.\tName=1;ID=1-800-phiX174;pvalue=7.57e-06;sequence=AATGAGAAAAT;\n+phiX174\tfimo\tpolypeptide_motif\t133\t143\t56.2\t+\t.\tName=1;ID=1-513-phiX174;pvalue=2.41e-06;sequence=ATGAGAAAATT;\n+phiX174\tfimo\tpolypeptide_motif\t134\t144\t41.1\t+\t.\tName=1;ID=1-1761-phiX174;pvalue=7.83e-05;sequence=TGAGAAAATTC;\n+phiX174\tfimo\tpolypeptide_motif\t135\t145\t50.3\t+\t.\tName=1;ID=1-910-phiX174;pvalue=9.39e-06;sequence=GAGAAAATTCG;\n+phiX174\tfimo\tpolypeptide_motif\t136\t146\t43.3\t+\t.\tName=1;ID=1-1517-phiX174;pvalue=4.66e-05;sequence=AGAAAATTCGA;\n+phiX174\tfimo\tpolypeptide_motif\t139\t149\t54.2\t+\t.\tName=1;ID=1-588-phiX174;pvalue=3.75e-06;sequence=AAATTCGACCT;\n+phiX174\tfimo\tpolypeptide_motif\t141\t151\t42.2\t+\t.\tName=1;ID=1-1625-phiX174;pvalue=6.01e-05;sequence=ATTCGACCTAT;\n+phiX174\tfimo\tpolypeptide_motif\t143\t153\t 50\t+\t.\tName=1;ID=1-938-phiX174;pvalue=9.94e-06;sequence=TCGACCTATCC;\n+phiX174\tfimo\tpolypeptide_motif\t145\t155\t44.6\t+\t.\tName=1;ID=1-1403-phiX174;pvalue=3.42e-05;sequence=GACCTATCCTT;\n+phiX174\tfimo\tpolypeptide_motif\t155\t165\t51.3\t+\t.\tName=1;ID=1-787-phiX174;pvalue=7.35e-06;sequence=TGCGCAGCTCG;\n+phiX174\tfimo\tpolypeptide_motif\t157\t167\t51.1\t+\t.\tName=1;ID=1-807-phiX174;pvalue=7.68e-06;sequence=CGCAGCTCGAG;\n+phiX174\tfimo\tpolypeptide_motif\t159\t169\t44.5\t+\t.\tName=1;ID=1-1420-phiX174;pvalue=3.56e-05;sequence=CAGCTCGAGAA;\n+phiX174\tfimo\tpolypeptide_motif\t160\t170\t 40\t+\t.\tName=1;ID=1-1921-phiX174;pvalue=9.89e-05;sequence=AGCTCGAGAAG;\n+phiX174\tfimo\tpolypeptide_motif\t166\t176\t60.9\t+\t.\tName=1;ID=1-365-phiX174;pvalue=8.02e-07;sequence=AGAAGCTCTTA;\n+phiX174\tfimo\tpolypeptide_motif\t168\t178\t62.3\t+\t.\tName=1;ID=1-311-phiX174;pvalue=5.87e-07;sequence=AAGCTCTTACT;\n+phiX174\tfimo\tpolypeptide_motif\t181\t191\t49.9\t+\t.\tName=1;ID=1-946-phiX174;pvalue=1.01e-05;sequence=GCGACCTTTCG;\n+phiX174\tfimo\tpolypeptide_motif\t187\t197\t52.5\t+\t.\tName=1;ID=1-694-phiX174;pvalue=5.64e-06;sequence=TTTCGCCATCA;\n+phiX174\tfimo\tpolypeptide_motif\t191\t201\t46.6\t+\t.\tName=1;ID=1-1232-phiX174;pvalue=2.2e-05;sequence=GCCATCAACTA;\n+phiX174\tfimo\tpolypeptide_motif\t194\t204\t76.4\t+\t.\tName=1;ID=1-67-phiX174;pvalue=2.29e-08;sequence=ATCAACTAACG;\n+phiX174\tfimo\tpolypeptide_motif\t201\t211\t40.1\t+\t.\tName=1;ID=1-1908-phiX174;pvalue=9.77e-05;sequence=AACGATTCTGT;\n+phiX174\tfimo\tpolypeptide_motif\t203\t213\t 63\t+\t.\tName=1;ID=1-291-phiX174;pvalue=5e-07;sequence=CGATTCTGTCA;\n+phiX174\tfimo\tpolypeptide_motif\t205\t215\t53.8\t+\t.\tName=1;ID=1-610-phiX174;pvalue=4.16e-06;sequence=ATTCTGTCAAA;\n+phiX174\tfimo\tpolypeptide_motif\t206\t216\t59.1\t+\t.\tName=1;ID=1-421-phiX174;pvalue=1.23e-06;sequence=TTCTGTCAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t207\t217\t 68\t+\t.\tName=1;ID=1-153-phiX174;pvalue=1.58e-07;sequence=TCTGTCAAAAA;\n+phiX174\tfimo\tpolypeptide_motif\t209\t219\t49.6\t+\t.\tName=1;ID=1-988-phiX174;pvalue=1.09e-05;sequence=TGTCAAAAACT;\n+phiX174\tfimo\tpolypeptide_motif\t210\t220\t40.8\t+\t.\tName=1;ID=1-1810-phiX174;pvalue=8.33e-05;sequence=GTCAAAAACTG;\n+phiX174\tfimo\tpolypeptide_motif\t213\t223\t59.7\t+\t.\tName=1;ID=1-404-phiX174;pvalue=1.06e-06;sequence=AAAAACTGACG;\n+phiX174\tfimo\tpolypeptide_motif\t223\t233\t 42\t+\t.\tName=1;ID=1-1654-phiX174;pvalue=6.36e-05;sequence=GCGTTGGATGA;\n+phiX174\tfimo\tpolypeptide_motif\t225\t235\t61.4\t+\t.\tName=1;ID=1-349-phiX174;pvalue=7.16e-07;sequence=GTTGGATGAGG;\n+phiX174\tfimo\tpolypeptide_motif\t227\t237\t40.3\t+\t.\tName=1;ID=1-1874-phiX174;pvalue=9.32e-05;sequence=TGGATGAGGAG;\n+phiX174\tfimo\tpolypeptide_motif\t228\t238\t49.9\t+\t.\tName=1;ID=1-947-phiX174;pvalue=1.01e-05;sequence=GGATGAGGAGA;\n+phiX174\tfimo\tpolypeptide_motif\t229\t239\t 45\t+\t.\tName=1;ID=1-1370-phiX174;pvalue=3.16e-05;sequence=GATGAGGAGAA;\n+phiX174\tfimo\tpolypeptide_motif\t230\t240\t44.8\t+\t.\tName=1;ID=1-1395-phiX174;pvalue=3.33e-05;sequence=ATGAGGAGAAG;\n'
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/output.memepsp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.memepsp Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,6 @@
+>BBP_PIEBR 20 scaledmin = 0.1 scaledmax = 0.9
+0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+>ICYA_MANSE 20 scaledmin = 0.1 scaledmax = 0.9
+0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+>LACB_BOVIN 20 scaledmin = 0.1 scaledmax = 0.9
+0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/phiX.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX.fasta Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,79 @@
+>phiX174
+GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
+GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
+ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
+TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
+GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
+TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
+TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
+CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
+TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
+TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
+GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
+CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG
+TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
+AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
+CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
+TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
+TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
+CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
+GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
+GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
+ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
+TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
+TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
+ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC
+CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
+GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
+CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
+TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
+TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
+TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
+AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
+TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
+ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
+GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
+TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
+TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
+TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
+TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
+CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
+AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
+CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
+TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
+CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
+AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
+GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
+GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
+TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
+CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
+TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
+GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
+CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
+TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
+AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
+TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
+CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
+TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
+TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
+CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
+TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
+ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
+TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
+ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
+GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
+CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
+GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG
+GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
+ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
+CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
+CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
+GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
+CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
+CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
+TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
+TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
+TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
+AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
+TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
+
b
diff -r 000000000000 -r a0fa4efeeee3 test-data/prior30.plib
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/prior30.plib Wed Aug 23 20:57:34 2017 -0400
b
b'@@ -0,0 +1,275 @@\n+Alphabet= ACDEFGHIKLMNPQRSTVWY\n+NumDistr= 30\n+Number= 0\n+Mixture= 0.055795\n+B= 5.623820\n+Alpha= 0.0855491 0.0221831 0.0111063 0.0209959 0.0505726 0.025437 0.0155389 0.132951 0.0247865 0.150287 0.0577239 0.0209317 0.0166629 0.0220905 0.0244295 0.0497608 0.070277 0.157532 0.0102219 0.0309633 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=    HMM9.4 reestimated in henikoff29.2\n+\n+Number= 1\n+Mixture= 0.198333\n+B= 0.097240\n+Alpha= 0.0562629 0.0329597 0.0692513 0.0385232 0.0400041 0.143573 0.0428939 0.0226244 0.0442102 0.0665467 0.0117853 0.0447655 0.0833299 0.0395825 0.0611271 0.0588852 0.0513472 0.0317153 0.0237865 0.0368161 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       24\n+Comment=      Outside\n+\n+Number= 2\n+Mixture= 0.043566\n+B= 1.648336\n+Alpha= 0.0144564 0.00845337 0.00785519 0.00864933 0.255959 0.0110815 0.0509526 0.0234533 0.0120443 0.0561967 0.015111 0.0190974 0.00857653 0.0167812 0.0164918 0.0197108 0.0151013 0.0252782 0.050139 0.364613 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       26\n+Comment=       Inside\n+\n+Number= 3\n+Mixture= 0.060170\n+B= 2.595432\n+Alpha= 0.0452144 0.00587917 0.169731 0.0751478 0.00749471 0.0845832 0.0369819 0.00610072 0.0548186 0.011029 0.00382749 0.212785 0.0206532 0.0416705 0.0280716 0.117267 0.0533742 0.00943157 0.00216149 0.0137784 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       19\n+Comment=      Outside Alpha\n+\n+Number= 4\n+Mixture= 0.065466\n+B= 3.112271\n+Alpha= 0.0361167 0.0049157 0.0134924 0.0461325 0.00557631 0.0209043 0.0302551 0.016425 0.307554 0.0338255 0.0139435 0.0360733 0.0127659 0.0873761 0.222668 0.0369042 0.0354442 0.0228891 0.00434827 0.0123906 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       21\n+Comment=       Outside Beta\n+\n+Number= 5\n+Mixture= 0.067614\n+B= 2.053644\n+Alpha= 0.0194362 0.00765176 0.00188738 0.00372898 0.0849894 0.00421787 0.00400459 0.152735 0.00407958 0.4568 0.106051 0.00304386 0.00545956 0.00900935 0.00605071 0.00519029 0.016255 0.0861045 0.00787965 0.0154248 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       22\n+Comment=      Inside alpha\n+\n+Number= 6\n+Mixture= 0.080724\n+B= 2.138987\n+Alpha= 0.0423172 0.0153891 0.00409306 0.00565735 0.0197117 0.00590607 0.00139926 0.307863 0.00544884 0.115721 0.0285808 0.00522771 0.00474851 0.00328193 0.00351054 0.00892385 0.0348922 0.380003 0.00117673 0.00614917 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       23\n+Comment=       Inside beta\n+\n+Number= 7\n+Mixture= 0.051030\n+B= 3.878926\n+Alpha= 0.0548123 0.000759746 0.144127 0.46019 0.00249502 0.0192754 0.0106535 0.00938765 0.0562429 0.0163148 0.00717389 0.0245612 0.0177482 0.0744802 0.0199233 0.0323535 0.0257651 0.018574 0.00087086 0.00429088 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       23\n+Comment=      Alpha helix\n+\n+Number= 8\n+Mixture= 0.103529\n+B= 1.486325\n+Alpha= 0.315754 0.0384546 0.0116388 0.0133665 0.0111126 0.107921 0.00752325 0.0154885 0.0111281 0.0231087 0.011626 0.0228375 0.0304785 0.0166632 0.0156345 0.186379 0.0954421 0.0546691 0.00351538 0.00725682 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       23\n+Comment=      Beta strand\n+\n+Number= 9\n+Mixture= 0.062940\n+B= 8.221215\n+Alpha= 0.0869919 0.00672577 0.0600995 0.10763 0.0153489 0.0378086 0.0325335 0.023388 0.113765 0.041623 0.0196906 0.0625344 0.0262599 0.0788667 0.0707399 0.0886634 0.0666777 0.0361472 0.00484308 0.0196629 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       23\n+Comment=      Other\n+\n+Number= 10\n+Mixture= 0.012518\n+B= 38.955631\n+Alpha= 0.732922 0.0145131 0.00623235 0.00951423 0.00717778 0.0289521 0.00351664 0.0125081 0.00886593 0.0183651 0.00832812 0.00670968 0.00364556 0.00622169 0.00812899 0.0582399 0.0205067 0.0394327 0.00207485 0.00414489 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=      A\n+\n+Number= 11\n+Mixture= 0.004953\n+B= 381.562195\n+Alpha= 0.00563239 0.959814 0.00144129 0.00213042 0.00158645 0.00168393 0.000989765 0.00325263 0.00148501 0.00343924 0.00168673 0.00159054 0.00121534 0.00129942 0.00195209 0.00296106 0.0039912 0.00266944 0.000327808 0.000851203 \n+FullUpdate= 1\n+QUpdate= 1\n+Str'..b'nt=     I \n+\n+Number= 18\n+Mixture= 0.009400\n+B= 150.415985\n+Alpha= 0.00688657 0.00169711 0.00222738 0.00346887 0.00115861 0.00302866 0.00209171 0.00400905 0.903944 0.0037747 0.00186061 0.00449531 0.00249618 0.00324487 0.041775 0.00392196 0.00461714 0.00296607 0.000893256 0.00144282 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=      K \n+\n+Number= 19\n+Mixture= 0.017057\n+B= 31.896633\n+Alpha= 0.0114646 0.00367926 0.00296188 0.00596126 0.0190009 0.00382486 0.00338381 0.0401936 0.00650072 0.790038 0.031659 0.00392791 0.0050046 0.00753591 0.00771818 0.00748621 0.0101555 0.0312597 0.00242405 0.00581952 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     L \n+\n+Number= 20\n+Mixture= 0.002761\n+B= 201.346268\n+Alpha= 0.00353933 0.00165628 0.0014931 0.00161065 0.00279831 0.00194259 0.00101868 0.00969101 0.00211316 0.0217036 0.928022 0.00162899 0.0015681 0.0015629 0.00138977 0.00294601 0.00311476 0.00723178 0.00156295 0.00340569 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     M \n+\n+Number= 21\n+Mixture= 0.005734\n+B= 108.343185\n+Alpha= 0.0067512 0.00239062 0.0140378 0.0043452 0.00365788 0.00689345 0.0148828 0.00715373 0.00789036 0.00614036 0.00289697 0.858995 0.00399721 0.00770961 0.00570515 0.0238176 0.011602 0.00591549 0.00167893 0.00353897 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     N \n+\n+Number= 22\n+Mixture= 0.022818\n+B= 15.153304\n+Alpha= 0.0417987 0.00360232 0.0113792 0.0152366 0.00564775 0.0123795 0.00606957 0.0091353 0.0165122 0.0167265 0.00490487 0.00915437 0.755604 0.0131375 0.012587 0.0283392 0.0189623 0.0140029 0.0012848 0.00353553 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     P \n+\n+Number= 23\n+Mixture= 0.005931\n+B= 79.417511\n+Alpha= 0.0142993 0.00266984 0.0053289 0.0321605 0.0028715 0.00426743 0.0257509 0.00565307 0.0106106 0.0161186 0.00955753 0.0104696 0.00638107 0.807311 0.0149106 0.0111968 0.00889459 0.00681482 0.00206658 0.00266624 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     Q \n+\n+Number= 24\n+Mixture= 0.011491\n+B= 93.103897\n+Alpha= 0.00756896 0.00314197 0.00296652 0.00327634 0.00194604 0.00467894 0.00721049 0.00406061 0.0277257 0.00663852 0.00217868 0.00577047 0.00473306 0.00953551 0.889701 0.00650859 0.00506022 0.00294281 0.00205549 0.00230062 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=      R \n+\n+Number= 25\n+Mixture= 0.008219\n+B= 47.504795\n+Alpha= 0.0284818 0.00697155 0.00749796 0.00604665 0.00515171 0.00954817 0.00380684 0.00637929 0.0104463 0.00908885 0.00471437 0.0194592 0.00711823 0.00611827 0.00979722 0.707416 0.139256 0.00656298 0.0015377 0.00460086 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=    0\n+Comment=     S \n+\n+Number= 26\n+Mixture= 0.019050\n+B= 14.027470\n+Alpha= 0.0247201 0.00718027 0.00845584 0.0076239 0.00600101 0.0073401 0.00492149 0.0173757 0.0129878 0.0125773 0.0100452 0.0230424 0.00659406 0.0110314 0.0112037 0.107763 0.690341 0.0249364 0.00193884 0.00392074 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     T \n+\n+Number= 27\n+Mixture= 0.007047\n+B= 76.958153\n+Alpha= 0.0447488 0.00734525 0.00576457 0.00805666 0.00714188 0.00593389 0.0041663 0.0688592 0.00714299 0.0255115 0.00800708 0.00501678 0.00632646 0.00492002 0.00812967 0.0100074 0.0240134 0.745035 0.00126243 0.00261056 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     V \n+\n+Number= 28\n+Mixture= 0.003957\n+B= 150.973328\n+Alpha= 0.00517343 0.00213336 0.00350645 0.00390297 0.018439 0.0041919 0.0023655 0.00404231 0.00420998 0.0171406 0.00379068 0.00363696 0.00245861 0.00387467 0.00502035 0.00465674 0.00417283 0.00620977 0.888513 0.012561 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     W \n+\n+Number= 29\n+Mixture= 0.004904\n+B= 30.653225\n+Alpha= 0.0342049 0.00809912 0.0126852 0.0174701 0.156033 0.0118268 0.0431342 0.0204751 0.0164439 0.0363664 0.0129811 0.0131986 0.0103037 0.0116235 0.0159032 0.0287792 0.0176143 0.024986 0.0131845 0.494687 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     Y \n+\n+/* $Header$ */\n+/* $Header$ */\n+/* $Header$ */\n'
b
diff -r 000000000000 -r a0fa4efeeee3 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Aug 23 20:57:34 2017 -0400
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>