Mercurial > repos > iuc > meme_psp_gen
changeset 0:a0fa4efeeee3 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meme commit 3f116ddc83447056068573320c148a9bfca9aa2e
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/all_fasta.loc.sample Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fimo_wrapper.py Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,194 @@ +#!/usr/bin/env python +import argparse +import os +import shutil +import string +import subprocess +import sys +import tempfile + +BUFFSIZE = 1048576 +# Translation table for reverse Complement, with ambiguity codes. +DNA_COMPLEMENT = string.maketrans("ACGTRYKMBDHVacgtrykmbdhv", "TGCAYRMKVHDBtgcayrmkvhdb") + + +def get_stderr(tmp_stderr): + tmp_stderr.seek(0) + stderr = '' + try: + while True: + stderr += tmp_stderr.read(BUFFSIZE) + if not stderr or len(stderr) % BUFFSIZE != 0: + break + except OverflowError: + pass + return stderr + + +def reverse(sequence): + # Reverse sequence string. + return sequence[::-1] + + +def dna_complement(sequence): + # Complement DNA sequence string. + return sequence.translate(DNA_COMPLEMENT) + + +def dna_reverse_complement(sequence): + # Returns the reverse complement of the sequence. + sequence = reverse(sequence) + return dna_complement(sequence) + + +def stop_err(msg): + sys.stderr.write(msg) + sys.exit(1) + + +parser = argparse.ArgumentParser() +parser.add_argument('--input_motifs', dest='input_motifs', help='MEME output formatted files for input to fimo') +parser.add_argument('--input_fasta', dest='input_fasta', help='Fassta sequence file') +parser.add_argument('--options_type', dest='options_type', help='Basic or Advance options') +parser.add_argument('--input_psp', dest='input_psp', default=None, help='File containing position specific priors') +parser.add_argument('--input_prior_dist', dest='input_prior_dist', default=None, help='File containing binned distribution of priors') +parser.add_argument('--alpha', dest='alpha', type=float, default=1.0, help='The alpha parameter for calculating position specific priors') +parser.add_argument('--bgfile', dest='bgfile', default=None, help='Background file type, used only if not "default"') +parser.add_argument('--max_strand', action='store_true', help='If matches on both strands at a given position satisfy the output threshold, only report the match for the strand with the higher score') +parser.add_argument('--max_stored_scores', dest='max_stored_scores', type=int, help='Maximum score count to store') +parser.add_argument('--motif', dest='motifs', action='append', default=[], help='Specify motif by id') +parser.add_argument('--output_separate_motifs', dest='output_separate_motifs', default='no', help='Output one dataset per motif') +parser.add_argument('--motif_pseudo', dest='motif_pseudo', type=float, default=0.1, help='Pseudocount to add to counts in motif matrix') +parser.add_argument('--no_qvalue', action='store_true', help='Do not compute a q-value for each p-value') +parser.add_argument('--norc', action='store_true', help='Do not score the reverse complement DNA strand') +parser.add_argument('--output_path', dest='output_path', help='Output files directory') +parser.add_argument('--parse_genomic_coord', dest='parse_genomic_coord', default='no', help='Check each sequence header for UCSC style genomic coordinates') +parser.add_argument('--remove_duplicate_coords', dest='remove_duplicate_coords', default='no', help='Remove duplicate entries in unique GFF coordinates') +parser.add_argument('--qv_thresh', action='store_true', help='Use q-values for the output threshold') +parser.add_argument('--thresh', dest='thresh', type=float, help='p-value threshold') +parser.add_argument('--gff_output', dest='gff_output', help='Gff output file') +parser.add_argument('--html_output', dest='html_output', help='HTML output file') +parser.add_argument('--interval_output', dest='interval_output', help='Interval output file') +parser.add_argument('--txt_output', dest='txt_output', help='Text output file') +parser.add_argument('--xml_output', dest='xml_output', help='XML output file') +args = parser.parse_args() + +fimo_cmd_list = ['fimo'] +if args.options_type == 'advanced': + fimo_cmd_list.append('--alpha %4f' % args.alpha) + if args.bgfile is not None: + fimo_cmd_list.append('--bgfile "%s"' % args.bgfile) + if args.max_strand: + fimo_cmd_list.append('--max-strand') + fimo_cmd_list.append('--max-stored-scores %d' % args.max_stored_scores) + if len(args.motifs) > 0: + for motif in args.motifs: + fimo_cmd_list.append('--motif "%s"' % motif) + fimo_cmd_list.append('--motif-pseudo %4f' % args.motif_pseudo) + if args.no_qvalue: + fimo_cmd_list.append('--no-qvalue') + if args.norc: + fimo_cmd_list.append('--norc') + if args.parse_genomic_coord == 'yes': + fimo_cmd_list.append('--parse-genomic-coord') + if args.qv_thresh: + fimo_cmd_list.append('--qv-thresh') + fimo_cmd_list.append('--thresh %4f' % args.thresh) + if args.input_psp is not None: + fimo_cmd_list.append('--psp "%s"' % args.input_psp) + if args.input_prior_dist is not None: + fimo_cmd_list.append('--prior-dist "%s"' % args.input_prior_dist) +fimo_cmd_list.append('--o "%s"' % (args.output_path)) +fimo_cmd_list.append('--verbosity 1') +fimo_cmd_list.append(args.input_motifs) +fimo_cmd_list.append(args.input_fasta) + +fimo_cmd = ' '.join(fimo_cmd_list) + +try: + tmp_stderr = tempfile.NamedTemporaryFile() + proc = subprocess.Popen(args=fimo_cmd, shell=True, stderr=tmp_stderr) + returncode = proc.wait() + if returncode != 0: + stderr = get_stderr(tmp_stderr) + stop_err(stderr) +except Exception as e: + stop_err('Error running FIMO:\n%s' % e) + +shutil.move(os.path.join(args.output_path, 'fimo.txt'), args.txt_output) + +gff_file = os.path.join(args.output_path, 'fimo.gff') +if args.remove_duplicate_coords == 'yes': + tmp_stderr = tempfile.NamedTemporaryFile() + # Identify and eliminating identical motif occurrences. These + # are identical if the combination of chrom, start, end and + # motif id are identical. + cmd = 'sort -k1,1 -k4,4n -k5,5n -k9.1,9.6 -u -o %s %s' % (gff_file, gff_file) + proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, shell=True) + returncode = proc.wait() + if returncode != 0: + stderr = get_stderr(tmp_stderr) + stop_err(stderr) + # Sort GFF output by a combination of chrom, score, start. + cmd = 'sort -k1,1 -k4,4n -k6,6n -o %s %s' % (gff_file, gff_file) + proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, shell=True) + returncode = proc.wait() + if returncode != 0: + stderr = get_stderr(tmp_stderr) + stop_err(stderr) +if args.output_separate_motifs == 'yes': + # Create the collection output directory. + collection_path = (os.path.join(os.getcwd(), 'output')) + # Keep track of motif occurrences. + header_line = None + motif_ids = [] + file_handles = [] + for line in open(gff_file, 'r'): + if line.startswith('#'): + if header_line is None: + header_line = line + continue + items = line.split('\t') + attribute = items[8] + attributes = attribute.split(';') + name = attributes[0] + motif_id = name.split('=')[1] + file_name = os.path.join(collection_path, 'MOTIF%s.gff' % motif_id) + if motif_id in motif_ids: + i = motif_ids.index(motif_id) + fh = file_handles[i] + fh.write(line) + else: + fh = open(file_name, 'wb') + if header_line is not None: + fh.write(header_line) + fh.write(line) + motif_ids.append(motif_id) + file_handles.append(fh) + for file_handle in file_handles: + file_handle.close() +else: + shutil.move(gff_file, args.gff_output) +shutil.move(os.path.join(args.output_path, 'fimo.xml'), args.xml_output) +shutil.move(os.path.join(args.output_path, 'fimo.html'), args.html_output) + +out_file = open(args.interval_output, 'wb') +out_file.write("#%s\n" % "\t".join(("chr", "start", "end", "pattern name", "score", "strand", "matched sequence", "p-value", "q-value"))) +for line in open(args.txt_output): + if line.startswith('#'): + continue + fields = line.rstrip("\n\r").split("\t") + start, end = int(fields[2]), int(fields[3]) + sequence = fields[7] + if start > end: + # Flip start and end and set strand. + start, end = end, start + strand = "-" + # We want sequences relative to strand; FIMO always provides + stranded sequence. + sequence = dna_reverse_complement(sequence) + else: + strand = "+" + # Make 0-based start position. + start -= 1 + out_file.write("%s\n" % "\t".join([fields[1], str(start), str(end), fields[0], fields[4], strand, sequence, fields[5], fields[6]])) +out_file.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,11 @@ +<?xml version='1.0' encoding='UTF-8'?> +<macros> + <token name="@WRAPPER_VERSION@">4.11.2</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.3.23">graphicsmagick</requirement> + <requirement type="package" version="4.11.2">meme</requirement> + </requirements> + </xml> +</macros> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/meme_psp_gen.xml Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,140 @@ +<tool id="meme_psp_gen" name="MEME psp-gen" version="@WRAPPER_VERSION@.0"> + <description>- perform discriminative motif discovery</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"><![CDATA[ +psp-gen +-pos '$primary_sequence' +-neg '$control_sequence' +-minw $adv.minw +-maxw $adv.maxw +$adv.alphabet +#if str($adv.triples_cond.triples) == 'yes': + -triples + $adv.triples_cond.fixedstart +#end if +$adv.equiv +$adv.revcomp +$adv.scalemin +$adv.scalemax +$adv.maxrange +$adv.raw +#if str($adv.report_scores_cond.report_scores) == 'yes': + -reportscores + $adv.report_scores_cond.verbose + 2> '$output_tabular' +#end if +> '$output_psp' + ]]></command> + <inputs> + <param format="fasta" name="primary_sequence" type="data" label="Primary sequence file"/> + <param format="fasta" name="control_sequence" type="data" label="Control sequence file"/> + <section name="adv" title="Additional Options"> + <param argument="-minw" type="integer" value="4" min="0" label="Minimum width to use for position specific priors"/> + <param argument="-maxw" type="integer" value="20" min="0" label="Maximum width to use for position specific priors"/> + <param name="alphabet" type="select" label="Alphabet"> + <option value="-dna" selected="true">DNA</option> + <option value="-protein">protein</option> + <option value="-rna">RNA</option> + </param> + <conditional name="triples_cond"> + <param name="triples" type="select" label="Use spaced triples instead of whole-word matches?" help="Recommended for protein"> + <option value="no" selected="true">No</option> + <option value="yes">yes</option> + </param> + <when value="no"/> + <when value="yes"> + <param argument="-fixedstart" type="boolean" truevalue="-fixedstart" falsevalue="" checked="False" label="Allow triples to start anywhere within a site?" help="Select 'No' to only consider triples starting at the start of the site"/> + </when> + </conditional> + <param argument="-equiv" type="boolean" truevalue="-equiv" falsevalue="" checked="False" label="Match as equal sequences of letters that appear together?"/> + <param argument="-revcomp" type="boolean" truevalue="-revcomp" falsevalue="" checked="False" label="Consider both strands when calculating position specific priors for alphabets?"/> + <param argument="-scalemin" type="boolean" truevalue="-scalemin" falsevalue="" checked="False" label="Set the lowest score value after scaling?"/> + <param argument="-scalemax" type="boolean" truevalue="-scalemax" falsevalue="" checked="False" label="Set the highest score value after scaling?"/> + <param argument="-maxrange" type="boolean" truevalue="-maxrange" falsevalue="" checked="False" label="Choose the width with the biggest difference between minimum and maximum scores before scaling?" help="Select 'No' to choose the width with the biggest maximum score (before scaling)"/> + <param argument="-raw" type="boolean" truevalue="-raw" falsevalue="" checked="False" label="Output scores instead of priors?"/> + <conditional name="report_scores_cond"> + <param name="report_scores" type="select" label="Output primary and control file names, scores and widths?"> + <option value="no" selected="true">No</option> + <option value="yes">yes</option> + </param> + <when value="no"/> + <when value="yes"> + <param argument="-verbose" type="boolean" truevalue="-verbose" falsevalue="" checked="False" label="Report frequency of each score?"/> + </when> + </conditional> + </section> + <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False"> + <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator> + </param> + </inputs> + <outputs> + <data format="memepsp" name="output_psp"/> + <data format="tabular" name="output_tabular" label="${tool.name} (report) on ${on_string}"> + <filter>adv['report_scores_cond']['report_scores'] == 'yes'</filter> + </data> + </outputs> + <tests> + <test> + <param name="primary_sequence" value="meme_psp_protein_input.fasta" ftype="fasta"/> + <param name="control_sequence" value="meme_psp_protein_input.fasta" ftype="fasta"/> + <param name="alphabet" value="protein"/> + <param name="report_scores" value="yes"/> + <param name="verbose" value="-verbose"/> + <param name="non_commercial_use" value="NON_COMMERCIAL_USE"/> + <output name="output_psp" file="output.memepsp"/> + <output name="output_tabular" file="meme_psp_gen_reports_output.tabular" compare="contains"/> + </test> + </tests> + <help> + +.. class:: warningmark + +**WARNING: This tool is only available for non-commercial use. Use for educational, research and non-profit purposes is permitted. +Before using, be sure to review, agree, and comply with the license.** + +psp-gen is used to allow MEME to perform discriminative motif discovery—to find motifs overrepresented in one set of sequences compared to in another set. +It takes two files as input—the sequence file to be input to MEME, (the "primary" file) and a "control" sequence file of sequences believed not to contain +the same motifs as in the "primary" file. psp-gen creates a file for use by MEME that encapsulates information about probable discriminative motifs. psp-gen +records its chosen motif width in the file, and MEME is able to adjust the data when it tries different motif widths. + +.. class:: infomark + +For detailed information on psp-gen, click here_, or view the license_. + +.. _here: http://meme-suite.org/doc/psp-gen.html?man_type=web +.. _license: http://meme-suite.org/doc/copyright.html?man_type=web + +----- + +**Required options** + +* **Primary sequence file** - a file containing FASTA formatted sequences which are to be used as the primary set in PSP calculation. +* **Control sequence file** - a file containing FASTA formatted sequences which are to be used as the control set in PSP calculation. + +**Additional options** + +* **Minimum width to use for position specific priors** - the minimum width to use with selecting the "best" width for PSPs. +* **Maximum width to use for position specific priors** - the maximum width to use with selecting the "best" width for PSPs. +* **Alphabet** - The alphabet to be used, one of DNA, protein or RNA. +* **Use spaced triples instead of whole-word matches** - use spaced triples instead of whole-word matches (recommended when using the protein alphabet). + + * **Allow triples to start anywhere within a site** - when using the -triples option, select 'Yes' to only consider triples starting at the start of the site or 'No' to allow triples to start anywhere in a width 'w' site. + +* **Match as equal sequences of letters that appear together** - select 'Yes' to match as equal any sequence of letter that appears together. Separate letter groups using "-" (e.g. -equiv "IVL-HKR") means treat all occurrences of I, V or L as the same, and all occurrences of H, K or R as the same. +* **Consider both strands when calculating position specific priors for alphabets** - select 'Yes' to consider both strands when calculating PSPs for complementable alphabets or 'No to consider only the given strand. +* **Set the lowest score value after scaling** - select 'Yes' to set the lowest score to 0.1 unless the the following "highest score" option is selected, in which case the lowest score is highest score - 1. +* **Set the highest score value after scaling** - select 'Yes' to set the highest score to 0.9 unless the previous "lowest score" option is selected, in which case the highest score is lowest score + 1. +* **Choose the width with the biggest difference between minimum and maximum scores before scaling** - select 'Yes' to choose the width with the biggest difference between minimum and maximum scores before scaling, or 'No' to choose the width with the biggest maximum score before scaling. +* **Output scores instead of priors** - select 'Yes' to output scores instead of position specific priors. +* **Output primary and control file names, scores and widths** - select 'Yes' to produce an additional tabular output consisting of control file names, lowest and highest scores and lowest and highest widths. + + * **Report frequency of each score** - select 'Yes' to include the frequency of each score in the output. + + </help> + <citations> + <citation type="doi">10.1186/1471-2105-11-179</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fimo_output_almost-gff_1.txt Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,100 @@ +##gff-version 3 +phiX174 fimo polypeptide_motif 1388 1398 102 + . Name=1;ID=1-1-phiX174;pvalue=6.36e-11;qvalue= 1.25e-09;sequence=AATATCTATAA; +phiX174 fimo polypeptide_motif 847 857 102 + . Name=1;ID=1-2-phiX174;pvalue=7.02e-11;qvalue= 1.25e-09;sequence=AATGTCTAAAG; +phiX174 fimo polypeptide_motif 2301 2311 99.6 + . Name=1;ID=1-3-phiX174;pvalue=1.08e-10;qvalue= 1.29e-09;sequence=AGGTTATAACG; +phiX174 fimo polypeptide_motif 5063 5073 95.6 + . Name=1;ID=1-4-phiX174;pvalue=2.73e-10;qvalue= 2.25e-09;sequence=AGGAGCTAAAG; +phiX174 fimo polypeptide_motif 989 999 95 + . Name=1;ID=1-5-phiX174;pvalue=3.15e-10;qvalue= 2.25e-09;sequence=TGAGGATAAAT; +phiX174 fimo polypeptide_motif 4713 4723 91.1 + . Name=1;ID=1-6-phiX174;pvalue=7.74e-10;qvalue= 3.48e-09;sequence=GACTGCTATCA; +phiX174 fimo polypeptide_motif 5048 5058 90.7 + . Name=1;ID=1-7-phiX174;pvalue=8.51e-10;qvalue= 3.48e-09;sequence=TGCTGCTAAAG; +phiX174 fimo polypeptide_motif 855 865 90.6 + . Name=1;ID=1-8-phiX174;pvalue=8.64e-10;qvalue= 3.48e-09;sequence=AAGGTAAAAAA; +phiX174 fimo polypeptide_motif 3155 3165 90.1 + . Name=1;ID=1-9-phiX174;pvalue=9.76e-10;qvalue= 3.48e-09;sequence=TATGGCTAAAG; +phiX174 fimo polypeptide_motif 5009 5019 90.1 + . Name=1;ID=1-10-phiX174;pvalue=9.76e-10;qvalue= 3.48e-09;sequence=TGTGGCTAAAT; +phiX174 fimo polypeptide_motif 814 824 88.9 + . Name=1;ID=1-11-phiX174;pvalue=1.28e-09;qvalue= 4.14e-09;sequence=TGCGTCAAAAA; +phiX174 fimo polypeptide_motif 2832 2842 88.5 + . Name=1;ID=1-12-phiX174;pvalue=1.42e-09;qvalue= 4.23e-09;sequence=TTGGTCTAACT; +phiX174 fimo polypeptide_motif 3830 3840 87.7 + . Name=1;ID=1-13-phiX174;pvalue=1.7e-09;qvalue= 4.68e-09;sequence=TATTGATAAAG; +phiX174 fimo polypeptide_motif 3560 3570 87.2 + . Name=1;ID=1-14-phiX174;pvalue=1.89e-09;qvalue= 4.82e-09;sequence=TGCGTCTATTA; +phiX174 fimo polypeptide_motif 2882 2892 86.4 + . Name=1;ID=1-15-phiX174;pvalue=2.29e-09;qvalue= 5.46e-09;sequence=AGGTTATTAAA; +phiX174 fimo polypeptide_motif 4453 4463 85.9 + . Name=1;ID=1-16-phiX174;pvalue=2.58e-09;qvalue= 5.75e-09;sequence=AAGGTATTAAG; +phiX174 fimo polypeptide_motif 2493 2503 85.1 + . Name=1;ID=1-17-phiX174;pvalue=3.06e-09;qvalue= 5.79e-09;sequence=GACACCTAAAG; +phiX174 fimo polypeptide_motif 4104 4114 85.1 + . Name=1;ID=1-18-phiX174;pvalue=3.08e-09;qvalue= 5.79e-09;sequence=GGCTTCCATAA; +phiX174 fimo polypeptide_motif 4955 4965 85.1 + . Name=1;ID=1-19-phiX174;pvalue=3.08e-09;qvalue= 5.79e-09;sequence=TGATGCTAAAG; +phiX174 fimo polypeptide_motif 1885 1895 84.4 + . Name=1;ID=1-20-phiX174;pvalue=3.61e-09;qvalue= 6.45e-09;sequence=TGCGACTAAAG; +phiX174 fimo polypeptide_motif 3376 3386 84.2 + . Name=1;ID=1-21-phiX174;pvalue=3.81e-09;qvalue= 6.48e-09;sequence=AGAATCAAAAA; +phiX174 fimo polypeptide_motif 52 62 83.9 + . Name=1;ID=1-22-phiX174;pvalue=4.06e-09;qvalue= 6.58e-09;sequence=TGAGTCGAAAA; +phiX174 fimo polypeptide_motif 1390 1400 83.7 + . Name=1;ID=1-23-phiX174;pvalue=4.26e-09;qvalue= 6.61e-09;sequence=TATCTATAACA; +phiX174 fimo polypeptide_motif 2017 2027 83.4 + . Name=1;ID=1-24-phiX174;pvalue=4.6e-09;qvalue= 6.85e-09;sequence=TTCGTCTAAGA; +phiX174 fimo polypeptide_motif 1000 1010 83.1 + . Name=1;ID=1-25-phiX174;pvalue=4.88e-09;qvalue= 6.97e-09;sequence=TATGTCTAATA; +phiX174 fimo polypeptide_motif 1555 1565 82.5 + . Name=1;ID=1-26-phiX174;pvalue=5.58e-09;qvalue= 7.37e-09;sequence=GACTTCTACCA; +phiX174 fimo polypeptide_motif 4430 4440 82.5 + . Name=1;ID=1-27-phiX174;pvalue=5.62e-09;qvalue= 7.37e-09;sequence=TGAGTATAATT; +phiX174 fimo polypeptide_motif 1927 1937 82.3 + . Name=1;ID=1-28-phiX174;pvalue=5.82e-09;qvalue= 7.37e-09;sequence=GACTTATACCG; +phiX174 fimo polypeptide_motif 2981 2991 82.1 + . Name=1;ID=1-29-phiX174;pvalue=6.13e-09;qvalue= 7.37e-09;sequence=CATGTCTAAAT; +phiX174 fimo polypeptide_motif 4203 4213 82 + . Name=1;ID=1-30-phiX174;pvalue=6.34e-09;qvalue= 7.37e-09;sequence=GACGGCCATAA; +phiX174 fimo polypeptide_motif 1669 1679 81.9 + . Name=1;ID=1-31-phiX174;pvalue=6.4e-09;qvalue= 7.37e-09;sequence=TGGAGGTAAAA; +phiX174 fimo polypeptide_motif 3260 3270 81.5 + . Name=1;ID=1-32-phiX174;pvalue=7.01e-09;qvalue= 7.82e-09;sequence=CGCTGATAAAG; +phiX174 fimo polypeptide_motif 3047 3057 81.3 + . Name=1;ID=1-33-phiX174;pvalue=7.4e-09;qvalue= 7.85e-09;sequence=TACCGATAACA; +phiX174 fimo polypeptide_motif 4176 4186 81.2 + . Name=1;ID=1-34-phiX174;pvalue=7.6e-09;qvalue= 7.85e-09;sequence=GAGTTCGATAA; +phiX174 fimo polypeptide_motif 4118 4128 81.1 + . Name=1;ID=1-35-phiX174;pvalue=7.7e-09;qvalue= 7.85e-09;sequence=GATGGATAACC; +phiX174 fimo polypeptide_motif 5370 5380 80.9 + . Name=1;ID=1-36-phiX174;pvalue=8.03e-09;qvalue= 7.87e-09;sequence=GGCGTATCCAA; +phiX174 fimo polypeptide_motif 1242 1252 80.5 + . Name=1;ID=1-37-phiX174;pvalue=8.94e-09;qvalue= 7.87e-09;sequence=AGTGGATTAAG; +phiX174 fimo polypeptide_motif 2583 2593 80.5 + . Name=1;ID=1-38-phiX174;pvalue=8.94e-09;qvalue= 7.87e-09;sequence=TACATCTGTCA; +phiX174 fimo polypeptide_motif 698 708 80.4 + . Name=1;ID=1-39-phiX174;pvalue=9.13e-09;qvalue= 7.87e-09;sequence=TACGGAAAACA; +phiX174 fimo polypeptide_motif 2299 2309 80.3 + . Name=1;ID=1-40-phiX174;pvalue=9.26e-09;qvalue= 7.87e-09;sequence=TGAGGTTATAA; +phiX174 fimo polypeptide_motif 4189 4199 80.1 + . Name=1;ID=1-41-phiX174;pvalue=9.69e-09;qvalue= 7.87e-09;sequence=GTGATATGTAT; +phiX174 fimo polypeptide_motif 275 285 80.1 + . Name=1;ID=1-42-phiX174;pvalue=9.85e-09;qvalue= 7.87e-09;sequence=GGTTTAGATAT; +phiX174 fimo polypeptide_motif 1801 1811 80 + . Name=1;ID=1-43-phiX174;pvalue=1e-08;qvalue= 7.87e-09;sequence=GACCTATAAAC; +phiX174 fimo polypeptide_motif 1386 1396 79.9 + . Name=1;ID=1-44-phiX174;pvalue=1.03e-08;qvalue= 7.87e-09;sequence=TGAATATCTAT; +phiX174 fimo polypeptide_motif 1303 1313 79.8 + . Name=1;ID=1-45-phiX174;pvalue=1.03e-08;qvalue= 7.87e-09;sequence=TGGTTATATTG; +phiX174 fimo polypeptide_motif 3772 3782 79.8 + . Name=1;ID=1-46-phiX174;pvalue=1.04e-08;qvalue= 7.87e-09;sequence=AGGATATTTCT; +phiX174 fimo polypeptide_motif 1288 1298 79.8 + . Name=1;ID=1-47-phiX174;pvalue=1.04e-08;qvalue= 7.87e-09;sequence=GACTGTTAACA; +phiX174 fimo polypeptide_motif 2577 2587 79.7 + . Name=1;ID=1-48-phiX174;pvalue=1.08e-08;qvalue= 7.87e-09;sequence=GATGGATACAT; +phiX174 fimo polypeptide_motif 937 947 79.6 + . Name=1;ID=1-49-phiX174;pvalue=1.08e-08;qvalue= 7.87e-09;sequence=TTGGTATGTAG; +phiX174 fimo polypeptide_motif 904 914 79.5 + . Name=1;ID=1-50-phiX174;pvalue=1.11e-08;qvalue= 7.93e-09;sequence=AGGTACTAAAG; +phiX174 fimo polypeptide_motif 2279 2289 79.4 + . Name=1;ID=1-51-phiX174;pvalue=1.13e-08;qvalue= 7.93e-09;sequence=TCGTGATAAAA; +phiX174 fimo polypeptide_motif 3164 3174 79.3 + . Name=1;ID=1-52-phiX174;pvalue=1.16e-08;qvalue= 7.98e-09;sequence=AGCTGGTAAAG; +phiX174 fimo polypeptide_motif 24 34 79.1 + . Name=1;ID=1-53-phiX174;pvalue=1.23e-08;qvalue= 8.24e-09;sequence=AGAAGTTAACA; +phiX174 fimo polypeptide_motif 838 848 78.9 + . Name=1;ID=1-54-phiX174;pvalue=1.27e-08;qvalue= 8.24e-09;sequence=GAGTGATGTAA; +phiX174 fimo polypeptide_motif 853 863 78.9 + . Name=1;ID=1-55-phiX174;pvalue=1.27e-08;qvalue= 8.24e-09;sequence=TAAAGGTAAAA; +phiX174 fimo polypeptide_motif 1984 1994 78.6 + . Name=1;ID=1-56-phiX174;pvalue=1.36e-08;qvalue= 8.68e-09;sequence=AATTTCTATGA; +phiX174 fimo polypeptide_motif 1 11 78.3 + . Name=1;ID=1-57-phiX174;pvalue=1.46e-08;qvalue= 9.05e-09;sequence=GAGTTTTATCG; +phiX174 fimo polypeptide_motif 4307 4317 78.3 + . Name=1;ID=1-58-phiX174;pvalue=1.47e-08;qvalue= 9.05e-09;sequence=TATTAATAACA; +phiX174 fimo polypeptide_motif 4303 4313 78.2 + . Name=1;ID=1-59-phiX174;pvalue=1.52e-08;qvalue= 9.19e-09;sequence=TTGATATTAAT; +phiX174 fimo polypeptide_motif 5033 5043 78 + . Name=1;ID=1-60-phiX174;pvalue=1.58e-08;qvalue= 9.41e-09;sequence=GTCAGATATGG; +phiX174 fimo polypeptide_motif 2579 2589 77.6 + . Name=1;ID=1-61-phiX174;pvalue=1.73e-08;qvalue= 1.01e-08;sequence=TGGATACATCT; +phiX174 fimo polypeptide_motif 322 332 77.4 + . Name=1;ID=1-62-phiX174;pvalue=1.82e-08;qvalue= 1.05e-08;sequence=GACATTTTAAA; +phiX174 fimo polypeptide_motif 5001 5011 76.8 + . Name=1;ID=1-63-phiX174;pvalue=2.09e-08;qvalue= 1.19e-08;sequence=GGTTTCTATGT; +phiX174 fimo polypeptide_motif 4217 4227 76.7 + . Name=1;ID=1-64-phiX174;pvalue=2.15e-08;qvalue= 1.2e-08;sequence=TGCTTCTGACG; +phiX174 fimo polypeptide_motif 4262 4272 76.6 + . Name=1;ID=1-65-phiX174;pvalue=2.18e-08;qvalue= 1.2e-08;sequence=AATGGATGAAT; +phiX174 fimo polypeptide_motif 3569 3579 76.5 + . Name=1;ID=1-66-phiX174;pvalue=2.26e-08;qvalue= 1.22e-08;sequence=TATGGAAAACA; +phiX174 fimo polypeptide_motif 194 204 76.4 + . Name=1;ID=1-67-phiX174;pvalue=2.29e-08;qvalue= 1.22e-08;sequence=ATCAACTAACG; +phiX174 fimo polypeptide_motif 131 141 76 + . Name=1;ID=1-68-phiX174;pvalue=2.49e-08;qvalue= 1.31e-08;sequence=AAATGAGAAAA; +phiX174 fimo polypeptide_motif 1491 1501 75.9 + . Name=1;ID=1-69-phiX174;pvalue=2.55e-08;qvalue= 1.32e-08;sequence=GCCATCTCAAA; +phiX174 fimo polypeptide_motif 434 444 75.7 + . Name=1;ID=1-70-phiX174;pvalue=2.67e-08;qvalue= 1.36e-08;sequence=GGCCTCTATTA; +phiX174 fimo polypeptide_motif 4565 4575 75.6 + . Name=1;ID=1-71-phiX174;pvalue=2.73e-08;qvalue= 1.36e-08;sequence=TTGGTTTATCG; +phiX174 fimo polypeptide_motif 102 112 75.6 + . Name=1;ID=1-72-phiX174;pvalue=2.75e-08;qvalue= 1.36e-08;sequence=GAATTAAATCG; +phiX174 fimo polypeptide_motif 903 913 75.5 + . Name=1;ID=1-73-phiX174;pvalue=2.82e-08;qvalue= 1.38e-08;sequence=GAGGTACTAAA; +phiX174 fimo polypeptide_motif 4748 4758 75.2 + . Name=1;ID=1-74-phiX174;pvalue=3.01e-08;qvalue= 1.45e-08;sequence=TACAGCTAATG; +phiX174 fimo polypeptide_motif 2622 2632 75 + . Name=1;ID=1-75-phiX174;pvalue=3.16e-08;qvalue= 1.5e-08;sequence=TGCTGATATTG; +phiX174 fimo polypeptide_motif 467 477 74.7 + . Name=1;ID=1-76-phiX174;pvalue=3.35e-08;qvalue= 1.57e-08;sequence=TTTGGATTTAA; +phiX174 fimo polypeptide_motif 4033 4043 74.6 + . Name=1;ID=1-77-phiX174;pvalue=3.44e-08;qvalue= 1.58e-08;sequence=AGCGTATCGAG; +phiX174 fimo polypeptide_motif 1348 1358 74.6 + . Name=1;ID=1-78-phiX174;pvalue=3.46e-08;qvalue= 1.58e-08;sequence=TACCAATAAAA; +phiX174 fimo polypeptide_motif 239 249 74.4 + . Name=1;ID=1-79-phiX174;pvalue=3.62e-08;qvalue= 1.64e-08;sequence=AGTGGCTTAAT; +phiX174 fimo polypeptide_motif 500 510 74.1 + . Name=1;ID=1-80-phiX174;pvalue=3.84e-08;qvalue= 1.71e-08;sequence=GACGAGTAACA; +phiX174 fimo polypeptide_motif 3001 3011 74 + . Name=1;ID=1-81-phiX174;pvalue=3.93e-08;qvalue= 1.73e-08;sequence=GCGGTCAAAAA; +phiX174 fimo polypeptide_motif 3776 3786 74 + . Name=1;ID=1-82-phiX174;pvalue=3.98e-08;qvalue= 1.73e-08;sequence=TATTTCTAATG; +phiX174 fimo polypeptide_motif 2026 2036 73.9 + . Name=1;ID=1-83-phiX174;pvalue=4.06e-08;qvalue= 1.75e-08;sequence=GAAGTTTAAGA; +phiX174 fimo polypeptide_motif 4237 4247 73.8 + . Name=1;ID=1-84-phiX174;pvalue=4.12e-08;qvalue= 1.75e-08;sequence=AGTTTGTATCT; +phiX174 fimo polypeptide_motif 803 813 73.7 + . Name=1;ID=1-85-phiX174;pvalue=4.24e-08;qvalue= 1.78e-08;sequence=AGAAGAAAACG; +phiX174 fimo polypeptide_motif 3770 3780 73.6 + . Name=1;ID=1-86-phiX174;pvalue=4.35e-08;qvalue= 1.81e-08;sequence=AAAGGATATTT; +phiX174 fimo polypeptide_motif 3429 3439 73.5 + . Name=1;ID=1-87-phiX174;pvalue=4.45e-08;qvalue= 1.82e-08;sequence=GAGATGCAAAA; +phiX174 fimo polypeptide_motif 99 109 73.5 + . Name=1;ID=1-88-phiX174;pvalue=4.48e-08;qvalue= 1.82e-08;sequence=TACGAATTAAA; +phiX174 fimo polypeptide_motif 67 77 73.2 + . Name=1;ID=1-89-phiX174;pvalue=4.78e-08;qvalue= 1.92e-08;sequence=TCTTGATAAAG; +phiX174 fimo polypeptide_motif 5332 5342 72.9 + . Name=1;ID=1-90-phiX174;pvalue=5.13e-08;qvalue= 2.01e-08;sequence=ATCTGCTCAAA; +phiX174 fimo polypeptide_motif 277 287 72.9 + . Name=1;ID=1-91-phiX174;pvalue=5.14e-08;qvalue= 2.01e-08;sequence=TTTAGATATGA; +phiX174 fimo polypeptide_motif 4338 4348 72.8 + . Name=1;ID=1-92-phiX174;pvalue=5.18e-08;qvalue= 2.01e-08;sequence=GGGGACGAAAA; +phiX174 fimo polypeptide_motif 3812 3822 72.8 + . Name=1;ID=1-93-phiX174;pvalue=5.28e-08;qvalue= 2.03e-08;sequence=GGTTGATATTT; +phiX174 fimo polypeptide_motif 1909 1919 72.6 + . Name=1;ID=1-94-phiX174;pvalue=5.51e-08;qvalue= 2.08e-08;sequence=TAACGCTAAAG; +phiX174 fimo polypeptide_motif 3000 3010 72.6 + . Name=1;ID=1-95-phiX174;pvalue=5.54e-08;qvalue= 2.08e-08;sequence=GGCGGTCAAAA; +phiX174 fimo polypeptide_motif 3891 3901 72.4 + . Name=1;ID=1-96-phiX174;pvalue=5.75e-08;qvalue= 2.11e-08;sequence=ATTGGCTCTAA; +phiX174 fimo polypeptide_motif 3079 3089 72.4 + . Name=1;ID=1-97-phiX174;pvalue=5.76e-08;qvalue= 2.11e-08;sequence=CTGGTATTAAA; +phiX174 fimo polypeptide_motif 37 47 72.4 + . Name=1;ID=1-98-phiX174;pvalue=5.79e-08;qvalue= 2.11e-08;sequence=TTCGGATATTT; +phiX174 fimo polypeptide_motif 380 390 72.2 + . Name=1;ID=1-99-phiX174;pvalue=6.01e-08;qvalue= 2.17e-08;sequence=GTAAGAAATCA;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fimo_output_almost-gff_2.txt Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,100 @@ +##gff-version 3 +phiX174 fimo polypeptide_motif 1388 1398 102 + . Name=1;ID=1-1-phiX174;pvalue=6.36e-11;sequence=AATATCTATAA; +phiX174 fimo polypeptide_motif 847 857 102 + . Name=1;ID=1-2-phiX174;pvalue=7.02e-11;sequence=AATGTCTAAAG; +phiX174 fimo polypeptide_motif 2301 2311 99.6 + . Name=1;ID=1-3-phiX174;pvalue=1.08e-10;sequence=AGGTTATAACG; +phiX174 fimo polypeptide_motif 5063 5073 95.6 + . Name=1;ID=1-4-phiX174;pvalue=2.73e-10;sequence=AGGAGCTAAAG; +phiX174 fimo polypeptide_motif 989 999 95 + . Name=1;ID=1-5-phiX174;pvalue=3.15e-10;sequence=TGAGGATAAAT; +phiX174 fimo polypeptide_motif 4713 4723 91.1 + . Name=1;ID=1-6-phiX174;pvalue=7.74e-10;sequence=GACTGCTATCA; +phiX174 fimo polypeptide_motif 5048 5058 90.7 + . Name=1;ID=1-7-phiX174;pvalue=8.51e-10;sequence=TGCTGCTAAAG; +phiX174 fimo polypeptide_motif 855 865 90.6 + . Name=1;ID=1-8-phiX174;pvalue=8.64e-10;sequence=AAGGTAAAAAA; +phiX174 fimo polypeptide_motif 3155 3165 90.1 + . Name=1;ID=1-9-phiX174;pvalue=9.76e-10;sequence=TATGGCTAAAG; +phiX174 fimo polypeptide_motif 5009 5019 90.1 + . Name=1;ID=1-10-phiX174;pvalue=9.76e-10;sequence=TGTGGCTAAAT; +phiX174 fimo polypeptide_motif 814 824 88.9 + . Name=1;ID=1-11-phiX174;pvalue=1.28e-09;sequence=TGCGTCAAAAA; +phiX174 fimo polypeptide_motif 2832 2842 88.5 + . Name=1;ID=1-12-phiX174;pvalue=1.42e-09;sequence=TTGGTCTAACT; +phiX174 fimo polypeptide_motif 3830 3840 87.7 + . Name=1;ID=1-13-phiX174;pvalue=1.7e-09;sequence=TATTGATAAAG; +phiX174 fimo polypeptide_motif 3560 3570 87.2 + . Name=1;ID=1-14-phiX174;pvalue=1.89e-09;sequence=TGCGTCTATTA; +phiX174 fimo polypeptide_motif 2882 2892 86.4 + . Name=1;ID=1-15-phiX174;pvalue=2.29e-09;sequence=AGGTTATTAAA; +phiX174 fimo polypeptide_motif 4453 4463 85.9 + . Name=1;ID=1-16-phiX174;pvalue=2.58e-09;sequence=AAGGTATTAAG; +phiX174 fimo polypeptide_motif 2493 2503 85.1 + . Name=1;ID=1-17-phiX174;pvalue=3.06e-09;sequence=GACACCTAAAG; +phiX174 fimo polypeptide_motif 4104 4114 85.1 + . Name=1;ID=1-18-phiX174;pvalue=3.08e-09;sequence=GGCTTCCATAA; +phiX174 fimo polypeptide_motif 4955 4965 85.1 + . Name=1;ID=1-19-phiX174;pvalue=3.08e-09;sequence=TGATGCTAAAG; +phiX174 fimo polypeptide_motif 1885 1895 84.4 + . Name=1;ID=1-20-phiX174;pvalue=3.61e-09;sequence=TGCGACTAAAG; +phiX174 fimo polypeptide_motif 3376 3386 84.2 + . Name=1;ID=1-21-phiX174;pvalue=3.81e-09;sequence=AGAATCAAAAA; +phiX174 fimo polypeptide_motif 52 62 83.9 + . Name=1;ID=1-22-phiX174;pvalue=4.06e-09;sequence=TGAGTCGAAAA; +phiX174 fimo polypeptide_motif 1390 1400 83.7 + . Name=1;ID=1-23-phiX174;pvalue=4.26e-09;sequence=TATCTATAACA; +phiX174 fimo polypeptide_motif 2017 2027 83.4 + . Name=1;ID=1-24-phiX174;pvalue=4.6e-09;sequence=TTCGTCTAAGA; +phiX174 fimo polypeptide_motif 1000 1010 83.1 + . Name=1;ID=1-25-phiX174;pvalue=4.88e-09;sequence=TATGTCTAATA; +phiX174 fimo polypeptide_motif 1555 1565 82.5 + . Name=1;ID=1-26-phiX174;pvalue=5.58e-09;sequence=GACTTCTACCA; +phiX174 fimo polypeptide_motif 4430 4440 82.5 + . Name=1;ID=1-27-phiX174;pvalue=5.62e-09;sequence=TGAGTATAATT; +phiX174 fimo polypeptide_motif 1927 1937 82.3 + . Name=1;ID=1-28-phiX174;pvalue=5.82e-09;sequence=GACTTATACCG; +phiX174 fimo polypeptide_motif 2981 2991 82.1 + . Name=1;ID=1-29-phiX174;pvalue=6.13e-09;sequence=CATGTCTAAAT; +phiX174 fimo polypeptide_motif 4203 4213 82 + . Name=1;ID=1-30-phiX174;pvalue=6.34e-09;sequence=GACGGCCATAA; +phiX174 fimo polypeptide_motif 1669 1679 81.9 + . Name=1;ID=1-31-phiX174;pvalue=6.4e-09;sequence=TGGAGGTAAAA; +phiX174 fimo polypeptide_motif 3260 3270 81.5 + . Name=1;ID=1-32-phiX174;pvalue=7.01e-09;sequence=CGCTGATAAAG; +phiX174 fimo polypeptide_motif 3047 3057 81.3 + . Name=1;ID=1-33-phiX174;pvalue=7.4e-09;sequence=TACCGATAACA; +phiX174 fimo polypeptide_motif 4176 4186 81.2 + . Name=1;ID=1-34-phiX174;pvalue=7.6e-09;sequence=GAGTTCGATAA; +phiX174 fimo polypeptide_motif 4118 4128 81.1 + . Name=1;ID=1-35-phiX174;pvalue=7.7e-09;sequence=GATGGATAACC; +phiX174 fimo polypeptide_motif 5370 5380 80.9 + . Name=1;ID=1-36-phiX174;pvalue=8.03e-09;sequence=GGCGTATCCAA; +phiX174 fimo polypeptide_motif 1242 1252 80.5 + . Name=1;ID=1-37-phiX174;pvalue=8.94e-09;sequence=AGTGGATTAAG; +phiX174 fimo polypeptide_motif 2583 2593 80.5 + . Name=1;ID=1-38-phiX174;pvalue=8.94e-09;sequence=TACATCTGTCA; +phiX174 fimo polypeptide_motif 698 708 80.4 + . Name=1;ID=1-39-phiX174;pvalue=9.13e-09;sequence=TACGGAAAACA; +phiX174 fimo polypeptide_motif 2299 2309 80.3 + . Name=1;ID=1-40-phiX174;pvalue=9.26e-09;sequence=TGAGGTTATAA; +phiX174 fimo polypeptide_motif 4189 4199 80.1 + . Name=1;ID=1-41-phiX174;pvalue=9.69e-09;sequence=GTGATATGTAT; +phiX174 fimo polypeptide_motif 275 285 80.1 + . Name=1;ID=1-42-phiX174;pvalue=9.85e-09;sequence=GGTTTAGATAT; +phiX174 fimo polypeptide_motif 1801 1811 80 + . Name=1;ID=1-43-phiX174;pvalue=1e-08;sequence=GACCTATAAAC; +phiX174 fimo polypeptide_motif 1386 1396 79.9 + . Name=1;ID=1-44-phiX174;pvalue=1.03e-08;sequence=TGAATATCTAT; +phiX174 fimo polypeptide_motif 1303 1313 79.8 + . Name=1;ID=1-45-phiX174;pvalue=1.03e-08;sequence=TGGTTATATTG; +phiX174 fimo polypeptide_motif 3772 3782 79.8 + . Name=1;ID=1-46-phiX174;pvalue=1.04e-08;sequence=AGGATATTTCT; +phiX174 fimo polypeptide_motif 1288 1298 79.8 + . Name=1;ID=1-47-phiX174;pvalue=1.04e-08;sequence=GACTGTTAACA; +phiX174 fimo polypeptide_motif 2577 2587 79.7 + . Name=1;ID=1-48-phiX174;pvalue=1.08e-08;sequence=GATGGATACAT; +phiX174 fimo polypeptide_motif 937 947 79.6 + . Name=1;ID=1-49-phiX174;pvalue=1.08e-08;sequence=TTGGTATGTAG; +phiX174 fimo polypeptide_motif 904 914 79.5 + . Name=1;ID=1-50-phiX174;pvalue=1.11e-08;sequence=AGGTACTAAAG; +phiX174 fimo polypeptide_motif 2279 2289 79.4 + . Name=1;ID=1-51-phiX174;pvalue=1.13e-08;sequence=TCGTGATAAAA; +phiX174 fimo polypeptide_motif 3164 3174 79.3 + . Name=1;ID=1-52-phiX174;pvalue=1.16e-08;sequence=AGCTGGTAAAG; +phiX174 fimo polypeptide_motif 24 34 79.1 + . Name=1;ID=1-53-phiX174;pvalue=1.23e-08;sequence=AGAAGTTAACA; +phiX174 fimo polypeptide_motif 838 848 78.9 + . Name=1;ID=1-54-phiX174;pvalue=1.27e-08;sequence=GAGTGATGTAA; +phiX174 fimo polypeptide_motif 853 863 78.9 + . Name=1;ID=1-55-phiX174;pvalue=1.27e-08;sequence=TAAAGGTAAAA; +phiX174 fimo polypeptide_motif 1984 1994 78.6 + . Name=1;ID=1-56-phiX174;pvalue=1.36e-08;sequence=AATTTCTATGA; +phiX174 fimo polypeptide_motif 1 11 78.3 + . Name=1;ID=1-57-phiX174;pvalue=1.46e-08;sequence=GAGTTTTATCG; +phiX174 fimo polypeptide_motif 4307 4317 78.3 + . Name=1;ID=1-58-phiX174;pvalue=1.47e-08;sequence=TATTAATAACA; +phiX174 fimo polypeptide_motif 4303 4313 78.2 + . Name=1;ID=1-59-phiX174;pvalue=1.52e-08;sequence=TTGATATTAAT; +phiX174 fimo polypeptide_motif 5033 5043 78 + . Name=1;ID=1-60-phiX174;pvalue=1.58e-08;sequence=GTCAGATATGG; +phiX174 fimo polypeptide_motif 2579 2589 77.6 + . Name=1;ID=1-61-phiX174;pvalue=1.73e-08;sequence=TGGATACATCT; +phiX174 fimo polypeptide_motif 322 332 77.4 + . Name=1;ID=1-62-phiX174;pvalue=1.82e-08;sequence=GACATTTTAAA; +phiX174 fimo polypeptide_motif 5001 5011 76.8 + . Name=1;ID=1-63-phiX174;pvalue=2.09e-08;sequence=GGTTTCTATGT; +phiX174 fimo polypeptide_motif 4217 4227 76.7 + . Name=1;ID=1-64-phiX174;pvalue=2.15e-08;sequence=TGCTTCTGACG; +phiX174 fimo polypeptide_motif 4262 4272 76.6 + . Name=1;ID=1-65-phiX174;pvalue=2.18e-08;sequence=AATGGATGAAT; +phiX174 fimo polypeptide_motif 3569 3579 76.5 + . Name=1;ID=1-66-phiX174;pvalue=2.26e-08;sequence=TATGGAAAACA; +phiX174 fimo polypeptide_motif 194 204 76.4 + . Name=1;ID=1-67-phiX174;pvalue=2.29e-08;sequence=ATCAACTAACG; +phiX174 fimo polypeptide_motif 131 141 76 + . Name=1;ID=1-68-phiX174;pvalue=2.49e-08;sequence=AAATGAGAAAA; +phiX174 fimo polypeptide_motif 1491 1501 75.9 + . Name=1;ID=1-69-phiX174;pvalue=2.55e-08;sequence=GCCATCTCAAA; +phiX174 fimo polypeptide_motif 434 444 75.7 + . Name=1;ID=1-70-phiX174;pvalue=2.67e-08;sequence=GGCCTCTATTA; +phiX174 fimo polypeptide_motif 4565 4575 75.6 + . Name=1;ID=1-71-phiX174;pvalue=2.73e-08;sequence=TTGGTTTATCG; +phiX174 fimo polypeptide_motif 102 112 75.6 + . Name=1;ID=1-72-phiX174;pvalue=2.75e-08;sequence=GAATTAAATCG; +phiX174 fimo polypeptide_motif 903 913 75.5 + . Name=1;ID=1-73-phiX174;pvalue=2.82e-08;sequence=GAGGTACTAAA; +phiX174 fimo polypeptide_motif 4748 4758 75.2 + . Name=1;ID=1-74-phiX174;pvalue=3.01e-08;sequence=TACAGCTAATG; +phiX174 fimo polypeptide_motif 2622 2632 75 + . Name=1;ID=1-75-phiX174;pvalue=3.16e-08;sequence=TGCTGATATTG; +phiX174 fimo polypeptide_motif 467 477 74.7 + . Name=1;ID=1-76-phiX174;pvalue=3.35e-08;sequence=TTTGGATTTAA; +phiX174 fimo polypeptide_motif 4033 4043 74.6 + . Name=1;ID=1-77-phiX174;pvalue=3.44e-08;sequence=AGCGTATCGAG; +phiX174 fimo polypeptide_motif 1348 1358 74.6 + . Name=1;ID=1-78-phiX174;pvalue=3.46e-08;sequence=TACCAATAAAA; +phiX174 fimo polypeptide_motif 239 249 74.4 + . Name=1;ID=1-79-phiX174;pvalue=3.62e-08;sequence=AGTGGCTTAAT; +phiX174 fimo polypeptide_motif 500 510 74.1 + . Name=1;ID=1-80-phiX174;pvalue=3.84e-08;sequence=GACGAGTAACA; +phiX174 fimo polypeptide_motif 3001 3011 74 + . Name=1;ID=1-81-phiX174;pvalue=3.93e-08;sequence=GCGGTCAAAAA; +phiX174 fimo polypeptide_motif 3776 3786 74 + . Name=1;ID=1-82-phiX174;pvalue=3.98e-08;sequence=TATTTCTAATG; +phiX174 fimo polypeptide_motif 2026 2036 73.9 + . Name=1;ID=1-83-phiX174;pvalue=4.06e-08;sequence=GAAGTTTAAGA; +phiX174 fimo polypeptide_motif 4237 4247 73.8 + . Name=1;ID=1-84-phiX174;pvalue=4.12e-08;sequence=AGTTTGTATCT; +phiX174 fimo polypeptide_motif 803 813 73.7 + . Name=1;ID=1-85-phiX174;pvalue=4.24e-08;sequence=AGAAGAAAACG; +phiX174 fimo polypeptide_motif 3770 3780 73.6 + . Name=1;ID=1-86-phiX174;pvalue=4.35e-08;sequence=AAAGGATATTT; +phiX174 fimo polypeptide_motif 3429 3439 73.5 + . Name=1;ID=1-87-phiX174;pvalue=4.45e-08;sequence=GAGATGCAAAA; +phiX174 fimo polypeptide_motif 99 109 73.5 + . Name=1;ID=1-88-phiX174;pvalue=4.48e-08;sequence=TACGAATTAAA; +phiX174 fimo polypeptide_motif 67 77 73.2 + . Name=1;ID=1-89-phiX174;pvalue=4.78e-08;sequence=TCTTGATAAAG; +phiX174 fimo polypeptide_motif 5332 5342 72.9 + . Name=1;ID=1-90-phiX174;pvalue=5.13e-08;sequence=ATCTGCTCAAA; +phiX174 fimo polypeptide_motif 277 287 72.9 + . Name=1;ID=1-91-phiX174;pvalue=5.14e-08;sequence=TTTAGATATGA; +phiX174 fimo polypeptide_motif 4338 4348 72.8 + . Name=1;ID=1-92-phiX174;pvalue=5.18e-08;sequence=GGGGACGAAAA; +phiX174 fimo polypeptide_motif 3812 3822 72.8 + . Name=1;ID=1-93-phiX174;pvalue=5.28e-08;sequence=GGTTGATATTT; +phiX174 fimo polypeptide_motif 1909 1919 72.6 + . Name=1;ID=1-94-phiX174;pvalue=5.51e-08;sequence=TAACGCTAAAG; +phiX174 fimo polypeptide_motif 3000 3010 72.6 + . Name=1;ID=1-95-phiX174;pvalue=5.54e-08;sequence=GGCGGTCAAAA; +phiX174 fimo polypeptide_motif 3891 3901 72.4 + . Name=1;ID=1-96-phiX174;pvalue=5.75e-08;sequence=ATTGGCTCTAA; +phiX174 fimo polypeptide_motif 3079 3089 72.4 + . Name=1;ID=1-97-phiX174;pvalue=5.76e-08;sequence=CTGGTATTAAA; +phiX174 fimo polypeptide_motif 37 47 72.4 + . Name=1;ID=1-98-phiX174;pvalue=5.79e-08;sequence=TTCGGATATTT; +phiX174 fimo polypeptide_motif 380 390 72.2 + . Name=1;ID=1-99-phiX174;pvalue=6.01e-08;sequence=GTAAGAAATCA;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fimo_output_html_1.html Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,97 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html xmlns:cis="http://zlab.bu.edu/schema/cisml" xmlns:fimo="http://noble.gs.washington.edu/schema/cisml" xmlns:mem="http://noble.gs.washington.edu/meme"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> +<meta charset="UTF-8"> +<title>FIMO Results</title> +<style type="text/css"> +td.left {text-align: left;} +td.right {text-align: right; padding-right: 1cm;} +</style> +</head> +<body bgcolor="#D5F0FF"> +<a name="top_buttons"></a> +<hr> +<table summary="buttons" align="left" cellspacing="0"> +<tr> +<td bgcolor="#00FFFF"><a href="#database_and_motifs"><b>Database and Motifs</b></a></td> +<td bgcolor="#DDFFDD"><a href="#sec_i"><b>High-scoring Motif Occurences</b></a></td> +<td bgcolor="#DDDDFF"><a href="#debugging_information"><b>Debugging Information</b></a></td> +</tr> +</table> +<br/> +<br/> +<hr/> +<center><big><b>FIMO - Motif search tool</b></big></center> +<hr> +<p> +For further information on how to interpret these results +or to get a copy of the FIMO software please access +<a href="http://meme.nbcr.net">http://meme.nbcr.net</a></p> +<p>If you use FIMO in your research, please cite the following paper:<br> +Charles E. Grant, Timothy L. Bailey, and William Stafford Noble, +"FIMO: Scanning for occurrences of a given motif", +<i>Bioinformatics</i>, <b>27</b>(7):1017-1018, 2011. +<a href="http://bioinformatics.oxfordjournals.org/content/27/7/1017">[full text]</a></p> +<hr> +<center><big><b><a name="database_and_motifs">DATABASE AND MOTIFS</a></b></big></center> +<hr> +<div style="padding-left: 0.75in; line-height: 1em; font-family: monospace;"> +<p> + <br /> + Database contains 1 sequences, 5386 residues +</p> +<p> + <table> + <thead> + <tr> + <th style="border-bottom: 1px dashed;">MOTIF</th> + <th style="border-bottom: 1px dashed; padding-left: 1em;">WIDTH</th> + <th style="border-bottom: 1px dashed; padding-left: 1em;text-align:left;" > + BEST POSSIBLE MATCH + </th> + </tr> + </thead> + <tbody> + <tr> + <td style="text-align:right;">1</td> + <td style="text-align:right;padding-left: 1em;">11</td> + <td style="text-align:left;padding-left: 1em;">GGGGTATAAAA</td> + </tr> + </tbody> + </table> +</p> +<p> +Random model letter frequencies (from non-redundant database): +<br/> + +A 0.073 C 0.018 D 0.052 E 0.062 F 0.040 G 0.069 H 0.022 I 0.056 K 0.058 +L 0.092 M 0.023 N 0.046 P 0.051 Q 0.041 R 0.052 S 0.074 T 0.059 V 0.064 +W 0.013 Y 0.033 </p> +</div> +<hr> +<center><big><b><a name="sec_i">SECTION I: HIGH-SCORING MOTIF OCCURENCES</a></b></big></center> +<hr> +<ul> +<li> +There were 1937 motif occurences with a p-value less than 0.0001. +<b>Only the most significant 1000 matches are shown here.</b> + +The full set of motif occurences can be seen in the +tab-delimited plain text output file +<a href="fimo.txt">fimo.txt</a>, +the GFF file +<a href="fimo.gff">fimo.gff</a> +which may be suitable for uploading to the +<a href="http://genome.ucsc.edu/cgi-bin/hgTables">UCSC Genome Table Browser</a> +(assuming the FASTA input sequences included genomic coordinates in UCSC or Galaxy format), +or the XML file +<a href="fimo.xml">fimo.xml</a>. +</li> +<li> +The p-value of a motif occurrence is defined as the +probability of a random sequence of the same length as the motif +matching that position of the sequence with as good or better a score. +</li> +<li> +The score for the match of a position in a sequence to a motif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fimo_output_html_2.html Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,97 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html xmlns:cis="http://zlab.bu.edu/schema/cisml" xmlns:fimo="http://noble.gs.washington.edu/schema/cisml" xmlns:mem="http://noble.gs.washington.edu/meme"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> +<meta charset="UTF-8"> +<title>FIMO Results</title> +<style type="text/css"> +td.left {text-align: left;} +td.right {text-align: right; padding-right: 1cm;} +</style> +</head> +<body bgcolor="#D5F0FF"> +<a name="top_buttons"></a> +<hr> +<table summary="buttons" align="left" cellspacing="0"> +<tr> +<td bgcolor="#00FFFF"><a href="#database_and_motifs"><b>Database and Motifs</b></a></td> +<td bgcolor="#DDFFDD"><a href="#sec_i"><b>High-scoring Motif Occurences</b></a></td> +<td bgcolor="#DDDDFF"><a href="#debugging_information"><b>Debugging Information</b></a></td> +</tr> +</table> +<br/> +<br/> +<hr/> +<center><big><b>FIMO - Motif search tool</b></big></center> +<hr> +<p> +For further information on how to interpret these results +or to get a copy of the FIMO software please access +<a href="http://meme.nbcr.net">http://meme.nbcr.net</a></p> +<p>If you use FIMO in your research, please cite the following paper:<br> +Charles E. Grant, Timothy L. Bailey, and William Stafford Noble, +"FIMO: Scanning for occurrences of a given motif", +<i>Bioinformatics</i>, <b>27</b>(7):1017-1018, 2011. +<a href="http://bioinformatics.oxfordjournals.org/content/27/7/1017">[full text]</a></p> +<hr> +<center><big><b><a name="database_and_motifs">DATABASE AND MOTIFS</a></b></big></center> +<hr> +<div style="padding-left: 0.75in; line-height: 1em; font-family: monospace;"> +<p> + <br /> + Database contains 1 sequences, 5386 residues +</p> +<p> + <table> + <thead> + <tr> + <th style="border-bottom: 1px dashed;">MOTIF</th> + <th style="border-bottom: 1px dashed; padding-left: 1em;">WIDTH</th> + <th style="border-bottom: 1px dashed; padding-left: 1em;text-align:left;" > + BEST POSSIBLE MATCH + </th> + </tr> + </thead> + <tbody> + <tr> + <td style="text-align:right;">1</td> + <td style="text-align:right;padding-left: 1em;">11</td> + <td style="text-align:left;padding-left: 1em;">GGGGTATAAAA</td> + </tr> + </tbody> + </table> +</p> +<p> +Random model letter frequencies (from non-redundant database): +<br/> + +A 0.073 C 0.018 D 0.052 E 0.062 F 0.040 G 0.069 H 0.022 I 0.056 K 0.058 +L 0.092 M 0.023 N 0.046 P 0.051 Q 0.041 R 0.052 S 0.074 T 0.059 V 0.064 +W 0.013 Y 0.033 </p> +</div> +<hr> +<center><big><b><a name="sec_i">SECTION I: HIGH-SCORING MOTIF OCCURENCES</a></b></big></center> +<hr> +<ul> +<li> +There were 1937 motif occurences with a p-value less than 0.0001. +<b>Only the most significant 1000 matches are shown here.</b> + +The full set of motif occurences can be seen in the +tab-delimited plain text output file +<a href="fimo.txt">fimo.txt</a>, +the GFF file +<a href="fimo.gff">fimo.gff</a> +which may be suitable for uploading to the +<a href="http://genome.ucsc.edu/cgi-bin/hgTables">UCSC Genome Table Browser</a> +(assuming the FASTA input sequences included genomic coordinates in UCSC or Galaxy format), +or the XML file +<a href="fimo.xml">fimo.xml</a>. +</li> +<li> +The p-value of a motif occurrence is defined as the +probability of a random sequence of the same length as the motif +matching that position of the sequence with as good or better a score. +</li> +<li> +The score for the match of a position in a sequence to a motif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fimo_output_interval_1.txt Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,100 @@ +#chr start end pattern name score strand matched sequence p-value q-value +phiX174 1387 1398 1 + + 1.25e-09 29.4024 6.36e-11 +phiX174 846 857 1 + + 1.25e-09 29.122 7.02e-11 +phiX174 2300 2311 1 + + 1.29e-09 27.6463 1.08e-10 +phiX174 5062 5073 1 + + 2.25e-09 25.5366 2.73e-10 +phiX174 988 999 1 + + 2.25e-09 25.3049 3.15e-10 +phiX174 4712 4723 1 + + 3.48e-09 23.622 7.74e-10 +phiX174 5047 5058 1 + + 3.48e-09 23.3293 8.51e-10 +phiX174 854 865 1 + + 3.48e-09 23.3049 8.64e-10 +phiX174 3154 3165 1 + + 3.48e-09 23.0366 9.76e-10 +phiX174 5008 5019 1 + + 3.48e-09 23.0366 9.76e-10 +phiX174 813 824 1 + + 4.14e-09 22.5854 1.28e-09 +phiX174 2831 2842 1 + + 4.23e-09 22.3415 1.42e-09 +phiX174 3829 3840 1 + + 4.68e-09 21.8293 1.7e-09 +phiX174 3559 3570 1 + + 4.82e-09 21.5976 1.89e-09 +phiX174 2881 2892 1 + + 5.46e-09 21.1951 2.29e-09 +phiX174 4452 4463 1 + + 5.75e-09 20.8902 2.58e-09 +phiX174 2492 2503 1 + + 5.79e-09 20.3415 3.06e-09 +phiX174 4103 4114 1 + + 5.79e-09 20.3171 3.08e-09 +phiX174 4954 4965 1 + + 5.79e-09 20.3171 3.08e-09 +phiX174 1884 1895 1 + + 6.45e-09 19.9268 3.61e-09 +phiX174 3375 3386 1 + + 6.48e-09 19.7683 3.81e-09 +phiX174 51 62 1 + + 6.58e-09 19.5732 4.06e-09 +phiX174 1389 1400 1 + + 6.61e-09 19.378 4.26e-09 +phiX174 2016 2027 1 + + 6.85e-09 19.0854 4.6e-09 +phiX174 999 1010 1 + + 6.97e-09 18.878 4.88e-09 +phiX174 1554 1565 1 + + 7.37e-09 18.439 5.58e-09 +phiX174 4429 4440 1 + + 7.37e-09 18.4268 5.62e-09 +phiX174 1926 1937 1 + + 7.37e-09 18.2927 5.82e-09 +phiX174 2980 2991 1 + + 7.37e-09 18.0732 6.13e-09 +phiX174 4202 4213 1 + + 7.37e-09 17.9268 6.34e-09 +phiX174 1668 1679 1 + + 7.37e-09 17.8659 6.4e-09 +phiX174 3259 3270 1 + + 7.82e-09 17.5 7.01e-09 +phiX174 3046 3057 1 + + 7.85e-09 17.2805 7.4e-09 +phiX174 4175 4186 1 + + 7.85e-09 17.1829 7.6e-09 +phiX174 4117 4128 1 + + 7.85e-09 17.1341 7.7e-09 +phiX174 5369 5380 1 + + 7.87e-09 16.9878 8.03e-09 +phiX174 1241 1252 1 + + 7.87e-09 16.5122 8.94e-09 +phiX174 2582 2593 1 + + 7.87e-09 16.5122 8.94e-09 +phiX174 697 708 1 + + 7.87e-09 16.4146 9.13e-09 +phiX174 2298 2309 1 + + 7.87e-09 16.3537 9.26e-09 +phiX174 4188 4199 1 + + 7.87e-09 16.1707 9.69e-09 +phiX174 274 285 1 + + 7.87e-09 16.0976 9.85e-09 +phiX174 1800 1811 1 + + 7.87e-09 16.0366 1e-08 +phiX174 1385 1396 1 + + 7.87e-09 15.9268 1.03e-08 +phiX174 1302 1313 1 + + 7.87e-09 15.9024 1.03e-08 +phiX174 3771 3782 1 + + 7.87e-09 15.878 1.04e-08 +phiX174 1287 1298 1 + + 7.87e-09 15.8659 1.04e-08 +phiX174 2576 2587 1 + + 7.87e-09 15.7683 1.08e-08 +phiX174 936 947 1 + + 7.87e-09 15.7561 1.08e-08 +phiX174 903 914 1 + + 7.93e-09 15.6585 1.11e-08 +phiX174 2278 2289 1 + + 7.93e-09 15.5854 1.13e-08 +phiX174 3163 3174 1 + + 7.98e-09 15.5 1.16e-08 +phiX174 23 34 1 + + 8.24e-09 15.3293 1.23e-08 +phiX174 837 848 1 + + 8.24e-09 15.2561 1.27e-08 +phiX174 852 863 1 + + 8.24e-09 15.2561 1.27e-08 +phiX174 1983 1994 1 + + 8.68e-09 15.0244 1.36e-08 +phiX174 0 11 1 + + 9.05e-09 14.8293 1.46e-08 +phiX174 4306 4317 1 + + 9.05e-09 14.7927 1.47e-08 +phiX174 4302 4313 1 + + 9.19e-09 14.6585 1.52e-08 +phiX174 5032 5043 1 + + 9.41e-09 14.561 1.58e-08 +phiX174 2578 2589 1 + + 1.01e-08 14.2927 1.73e-08 +phiX174 321 332 1 + + 1.05e-08 14.1951 1.82e-08 +phiX174 5000 5011 1 + + 1.19e-08 13.8902 2.09e-08 +phiX174 4216 4227 1 + + 1.2e-08 13.8171 2.15e-08 +phiX174 4261 4272 1 + + 1.2e-08 13.7805 2.18e-08 +phiX174 3568 3579 1 + + 1.22e-08 13.7073 2.26e-08 +phiX174 193 204 1 + + 1.22e-08 13.6829 2.29e-08 +phiX174 130 141 1 + + 1.31e-08 13.4756 2.49e-08 +phiX174 1490 1501 1 + + 1.32e-08 13.4024 2.55e-08 +phiX174 433 444 1 + + 1.36e-08 13.2805 2.67e-08 +phiX174 4564 4575 1 + + 1.36e-08 13.2439 2.73e-08 +phiX174 101 112 1 + + 1.36e-08 13.2195 2.75e-08 +phiX174 902 913 1 + + 1.38e-08 13.1463 2.82e-08 +phiX174 4747 4758 1 + + 1.45e-08 12.9756 3.01e-08 +phiX174 2621 2632 1 + + 1.5e-08 12.8659 3.16e-08 +phiX174 466 477 1 + + 1.57e-08 12.7317 3.35e-08 +phiX174 4032 4043 1 + + 1.58e-08 12.6829 3.44e-08 +phiX174 1347 1358 1 + + 1.58e-08 12.6707 3.46e-08 +phiX174 238 249 1 + + 1.64e-08 12.5732 3.62e-08 +phiX174 499 510 1 + + 1.71e-08 12.4634 3.84e-08 +phiX174 3000 3011 1 + + 1.73e-08 12.4146 3.93e-08 +phiX174 3775 3786 1 + + 1.73e-08 12.378 3.98e-08 +phiX174 2025 2036 1 + + 1.75e-08 12.3293 4.06e-08 +phiX174 4236 4247 1 + + 1.75e-08 12.3049 4.12e-08 +phiX174 802 813 1 + + 1.78e-08 12.2439 4.24e-08 +phiX174 3769 3780 1 + + 1.81e-08 12.1829 4.35e-08 +phiX174 3428 3439 1 + + 1.82e-08 12.122 4.45e-08 +phiX174 98 109 1 + + 1.82e-08 12.1098 4.48e-08 +phiX174 66 77 1 + + 1.92e-08 11.9268 4.78e-08 +phiX174 5331 5342 1 + + 2.01e-08 11.7195 5.13e-08 +phiX174 276 287 1 + + 2.01e-08 11.7073 5.14e-08 +phiX174 4337 4348 1 + + 2.01e-08 11.6951 5.18e-08 +phiX174 3811 3822 1 + + 2.03e-08 11.6585 5.28e-08 +phiX174 1908 1919 1 + + 2.08e-08 11.5488 5.51e-08 +phiX174 2999 3010 1 + + 2.08e-08 11.5366 5.54e-08 +phiX174 3890 3901 1 + + 2.11e-08 11.439 5.75e-08 +phiX174 3078 3089 1 + + 2.11e-08 11.4268 5.76e-08 +phiX174 36 47 1 + + 2.11e-08 11.4146 5.79e-08 +phiX174 379 390 1 + + 2.17e-08 11.3293 6.01e-08
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fimo_output_interval_2.txt Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,100 @@ +#chr start end pattern name score strand matched sequence p-value q-value +phiX174 1387 1398 1 + + 0 29.4024 6.36e-11 +phiX174 846 857 1 + + 0 29.122 7.02e-11 +phiX174 2300 2311 1 + + 0 27.6463 1.08e-10 +phiX174 5062 5073 1 + + 0 25.5366 2.73e-10 +phiX174 988 999 1 + + 0 25.3049 3.15e-10 +phiX174 4712 4723 1 + + 0 23.622 7.74e-10 +phiX174 5047 5058 1 + + 0 23.3293 8.51e-10 +phiX174 854 865 1 + + 0 23.3049 8.64e-10 +phiX174 3154 3165 1 + + 0 23.0366 9.76e-10 +phiX174 5008 5019 1 + + 0 23.0366 9.76e-10 +phiX174 813 824 1 + + 0 22.5854 1.28e-09 +phiX174 2831 2842 1 + + 0 22.3415 1.42e-09 +phiX174 3829 3840 1 + + 0 21.8293 1.7e-09 +phiX174 3559 3570 1 + + 0 21.5976 1.89e-09 +phiX174 2881 2892 1 + + 0 21.1951 2.29e-09 +phiX174 4452 4463 1 + + 0 20.8902 2.58e-09 +phiX174 2492 2503 1 + + 0 20.3415 3.06e-09 +phiX174 4103 4114 1 + + 0 20.3171 3.08e-09 +phiX174 4954 4965 1 + + 0 20.3171 3.08e-09 +phiX174 1884 1895 1 + + 0 19.9268 3.61e-09 +phiX174 3375 3386 1 + + 0 19.7683 3.81e-09 +phiX174 51 62 1 + + 0 19.5732 4.06e-09 +phiX174 1389 1400 1 + + 0 19.378 4.26e-09 +phiX174 2016 2027 1 + + 0 19.0854 4.6e-09 +phiX174 999 1010 1 + + 0 18.878 4.88e-09 +phiX174 1554 1565 1 + + 0 18.439 5.58e-09 +phiX174 4429 4440 1 + + 0 18.4268 5.62e-09 +phiX174 1926 1937 1 + + 0 18.2927 5.82e-09 +phiX174 2980 2991 1 + + 0 18.0732 6.13e-09 +phiX174 4202 4213 1 + + 0 17.9268 6.34e-09 +phiX174 1668 1679 1 + + 0 17.8659 6.4e-09 +phiX174 3259 3270 1 + + 0 17.5 7.01e-09 +phiX174 3046 3057 1 + + 0 17.2805 7.4e-09 +phiX174 4175 4186 1 + + 0 17.1829 7.6e-09 +phiX174 4117 4128 1 + + 0 17.1341 7.7e-09 +phiX174 5369 5380 1 + + 0 16.9878 8.03e-09 +phiX174 1241 1252 1 + + 0 16.5122 8.94e-09 +phiX174 2582 2593 1 + + 0 16.5122 8.94e-09 +phiX174 697 708 1 + + 0 16.4146 9.13e-09 +phiX174 2298 2309 1 + + 0 16.3537 9.26e-09 +phiX174 4188 4199 1 + + 0 16.1707 9.69e-09 +phiX174 274 285 1 + + 0 16.0976 9.85e-09 +phiX174 1800 1811 1 + + 0 16.0366 1e-08 +phiX174 1385 1396 1 + + 0 15.9268 1.03e-08 +phiX174 1302 1313 1 + + 0 15.9024 1.03e-08 +phiX174 3771 3782 1 + + 0 15.878 1.04e-08 +phiX174 1287 1298 1 + + 0 15.8659 1.04e-08 +phiX174 2576 2587 1 + + 0 15.7683 1.08e-08 +phiX174 936 947 1 + + 0 15.7561 1.08e-08 +phiX174 903 914 1 + + 0 15.6585 1.11e-08 +phiX174 2278 2289 1 + + 0 15.5854 1.13e-08 +phiX174 3163 3174 1 + + 0 15.5 1.16e-08 +phiX174 23 34 1 + + 0 15.3293 1.23e-08 +phiX174 837 848 1 + + 0 15.2561 1.27e-08 +phiX174 852 863 1 + + 0 15.2561 1.27e-08 +phiX174 1983 1994 1 + + 0 15.0244 1.36e-08 +phiX174 0 11 1 + + 0 14.8293 1.46e-08 +phiX174 4306 4317 1 + + 0 14.7927 1.47e-08 +phiX174 4302 4313 1 + + 0 14.6585 1.52e-08 +phiX174 5032 5043 1 + + 0 14.561 1.58e-08 +phiX174 2578 2589 1 + + 0 14.2927 1.73e-08 +phiX174 321 332 1 + + 0 14.1951 1.82e-08 +phiX174 5000 5011 1 + + 0 13.8902 2.09e-08 +phiX174 4216 4227 1 + + 0 13.8171 2.15e-08 +phiX174 4261 4272 1 + + 0 13.7805 2.18e-08 +phiX174 3568 3579 1 + + 0 13.7073 2.26e-08 +phiX174 193 204 1 + + 0 13.6829 2.29e-08 +phiX174 130 141 1 + + 0 13.4756 2.49e-08 +phiX174 1490 1501 1 + + 0 13.4024 2.55e-08 +phiX174 433 444 1 + + 0 13.2805 2.67e-08 +phiX174 4564 4575 1 + + 0 13.2439 2.73e-08 +phiX174 101 112 1 + + 0 13.2195 2.75e-08 +phiX174 902 913 1 + + 0 13.1463 2.82e-08 +phiX174 4747 4758 1 + + 0 12.9756 3.01e-08 +phiX174 2621 2632 1 + + 0 12.8659 3.16e-08 +phiX174 466 477 1 + + 0 12.7317 3.35e-08 +phiX174 4032 4043 1 + + 0 12.6829 3.44e-08 +phiX174 1347 1358 1 + + 0 12.6707 3.46e-08 +phiX174 238 249 1 + + 0 12.5732 3.62e-08 +phiX174 499 510 1 + + 0 12.4634 3.84e-08 +phiX174 3000 3011 1 + + 0 12.4146 3.93e-08 +phiX174 3775 3786 1 + + 0 12.378 3.98e-08 +phiX174 2025 2036 1 + + 0 12.3293 4.06e-08 +phiX174 4236 4247 1 + + 0 12.3049 4.12e-08 +phiX174 802 813 1 + + 0 12.2439 4.24e-08 +phiX174 3769 3780 1 + + 0 12.1829 4.35e-08 +phiX174 3428 3439 1 + + 0 12.122 4.45e-08 +phiX174 98 109 1 + + 0 12.1098 4.48e-08 +phiX174 66 77 1 + + 0 11.9268 4.78e-08 +phiX174 5331 5342 1 + + 0 11.7195 5.13e-08 +phiX174 276 287 1 + + 0 11.7073 5.14e-08 +phiX174 4337 4348 1 + + 0 11.6951 5.18e-08 +phiX174 3811 3822 1 + + 0 11.6585 5.28e-08 +phiX174 1908 1919 1 + + 0 11.5488 5.51e-08 +phiX174 2999 3010 1 + + 0 11.5366 5.54e-08 +phiX174 3890 3901 1 + + 0 11.439 5.75e-08 +phiX174 3078 3089 1 + + 0 11.4268 5.76e-08 +phiX174 36 47 1 + + 0 11.4146 5.79e-08 +phiX174 379 390 1 + + 0 11.3293 6.01e-08
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fimo_output_txt_1.txt Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,100 @@ +#pattern name sequence name start stop strand score p-value q-value matched sequence +1 phiX174 1388 1398 + 29.4024 6.36e-11 1.25e-09 AATATCTATAA +1 phiX174 847 857 + 29.122 7.02e-11 1.25e-09 AATGTCTAAAG +1 phiX174 2301 2311 + 27.6463 1.08e-10 1.29e-09 AGGTTATAACG +1 phiX174 5063 5073 + 25.5366 2.73e-10 2.25e-09 AGGAGCTAAAG +1 phiX174 989 999 + 25.3049 3.15e-10 2.25e-09 TGAGGATAAAT +1 phiX174 4713 4723 + 23.622 7.74e-10 3.48e-09 GACTGCTATCA +1 phiX174 5048 5058 + 23.3293 8.51e-10 3.48e-09 TGCTGCTAAAG +1 phiX174 855 865 + 23.3049 8.64e-10 3.48e-09 AAGGTAAAAAA +1 phiX174 3155 3165 + 23.0366 9.76e-10 3.48e-09 TATGGCTAAAG +1 phiX174 5009 5019 + 23.0366 9.76e-10 3.48e-09 TGTGGCTAAAT +1 phiX174 814 824 + 22.5854 1.28e-09 4.14e-09 TGCGTCAAAAA +1 phiX174 2832 2842 + 22.3415 1.42e-09 4.23e-09 TTGGTCTAACT +1 phiX174 3830 3840 + 21.8293 1.7e-09 4.68e-09 TATTGATAAAG +1 phiX174 3560 3570 + 21.5976 1.89e-09 4.82e-09 TGCGTCTATTA +1 phiX174 2882 2892 + 21.1951 2.29e-09 5.46e-09 AGGTTATTAAA +1 phiX174 4453 4463 + 20.8902 2.58e-09 5.75e-09 AAGGTATTAAG +1 phiX174 2493 2503 + 20.3415 3.06e-09 5.79e-09 GACACCTAAAG +1 phiX174 4104 4114 + 20.3171 3.08e-09 5.79e-09 GGCTTCCATAA +1 phiX174 4955 4965 + 20.3171 3.08e-09 5.79e-09 TGATGCTAAAG +1 phiX174 1885 1895 + 19.9268 3.61e-09 6.45e-09 TGCGACTAAAG +1 phiX174 3376 3386 + 19.7683 3.81e-09 6.48e-09 AGAATCAAAAA +1 phiX174 52 62 + 19.5732 4.06e-09 6.58e-09 TGAGTCGAAAA +1 phiX174 1390 1400 + 19.378 4.26e-09 6.61e-09 TATCTATAACA +1 phiX174 2017 2027 + 19.0854 4.6e-09 6.85e-09 TTCGTCTAAGA +1 phiX174 1000 1010 + 18.878 4.88e-09 6.97e-09 TATGTCTAATA +1 phiX174 1555 1565 + 18.439 5.58e-09 7.37e-09 GACTTCTACCA +1 phiX174 4430 4440 + 18.4268 5.62e-09 7.37e-09 TGAGTATAATT +1 phiX174 1927 1937 + 18.2927 5.82e-09 7.37e-09 GACTTATACCG +1 phiX174 2981 2991 + 18.0732 6.13e-09 7.37e-09 CATGTCTAAAT +1 phiX174 4203 4213 + 17.9268 6.34e-09 7.37e-09 GACGGCCATAA +1 phiX174 1669 1679 + 17.8659 6.4e-09 7.37e-09 TGGAGGTAAAA +1 phiX174 3260 3270 + 17.5 7.01e-09 7.82e-09 CGCTGATAAAG +1 phiX174 3047 3057 + 17.2805 7.4e-09 7.85e-09 TACCGATAACA +1 phiX174 4176 4186 + 17.1829 7.6e-09 7.85e-09 GAGTTCGATAA +1 phiX174 4118 4128 + 17.1341 7.7e-09 7.85e-09 GATGGATAACC +1 phiX174 5370 5380 + 16.9878 8.03e-09 7.87e-09 GGCGTATCCAA +1 phiX174 1242 1252 + 16.5122 8.94e-09 7.87e-09 AGTGGATTAAG +1 phiX174 2583 2593 + 16.5122 8.94e-09 7.87e-09 TACATCTGTCA +1 phiX174 698 708 + 16.4146 9.13e-09 7.87e-09 TACGGAAAACA +1 phiX174 2299 2309 + 16.3537 9.26e-09 7.87e-09 TGAGGTTATAA +1 phiX174 4189 4199 + 16.1707 9.69e-09 7.87e-09 GTGATATGTAT +1 phiX174 275 285 + 16.0976 9.85e-09 7.87e-09 GGTTTAGATAT +1 phiX174 1801 1811 + 16.0366 1e-08 7.87e-09 GACCTATAAAC +1 phiX174 1386 1396 + 15.9268 1.03e-08 7.87e-09 TGAATATCTAT +1 phiX174 1303 1313 + 15.9024 1.03e-08 7.87e-09 TGGTTATATTG +1 phiX174 3772 3782 + 15.878 1.04e-08 7.87e-09 AGGATATTTCT +1 phiX174 1288 1298 + 15.8659 1.04e-08 7.87e-09 GACTGTTAACA +1 phiX174 2577 2587 + 15.7683 1.08e-08 7.87e-09 GATGGATACAT +1 phiX174 937 947 + 15.7561 1.08e-08 7.87e-09 TTGGTATGTAG +1 phiX174 904 914 + 15.6585 1.11e-08 7.93e-09 AGGTACTAAAG +1 phiX174 2279 2289 + 15.5854 1.13e-08 7.93e-09 TCGTGATAAAA +1 phiX174 3164 3174 + 15.5 1.16e-08 7.98e-09 AGCTGGTAAAG +1 phiX174 24 34 + 15.3293 1.23e-08 8.24e-09 AGAAGTTAACA +1 phiX174 838 848 + 15.2561 1.27e-08 8.24e-09 GAGTGATGTAA +1 phiX174 853 863 + 15.2561 1.27e-08 8.24e-09 TAAAGGTAAAA +1 phiX174 1984 1994 + 15.0244 1.36e-08 8.68e-09 AATTTCTATGA +1 phiX174 1 11 + 14.8293 1.46e-08 9.05e-09 GAGTTTTATCG +1 phiX174 4307 4317 + 14.7927 1.47e-08 9.05e-09 TATTAATAACA +1 phiX174 4303 4313 + 14.6585 1.52e-08 9.19e-09 TTGATATTAAT +1 phiX174 5033 5043 + 14.561 1.58e-08 9.41e-09 GTCAGATATGG +1 phiX174 2579 2589 + 14.2927 1.73e-08 1.01e-08 TGGATACATCT +1 phiX174 322 332 + 14.1951 1.82e-08 1.05e-08 GACATTTTAAA +1 phiX174 5001 5011 + 13.8902 2.09e-08 1.19e-08 GGTTTCTATGT +1 phiX174 4217 4227 + 13.8171 2.15e-08 1.2e-08 TGCTTCTGACG +1 phiX174 4262 4272 + 13.7805 2.18e-08 1.2e-08 AATGGATGAAT +1 phiX174 3569 3579 + 13.7073 2.26e-08 1.22e-08 TATGGAAAACA +1 phiX174 194 204 + 13.6829 2.29e-08 1.22e-08 ATCAACTAACG +1 phiX174 131 141 + 13.4756 2.49e-08 1.31e-08 AAATGAGAAAA +1 phiX174 1491 1501 + 13.4024 2.55e-08 1.32e-08 GCCATCTCAAA +1 phiX174 434 444 + 13.2805 2.67e-08 1.36e-08 GGCCTCTATTA +1 phiX174 4565 4575 + 13.2439 2.73e-08 1.36e-08 TTGGTTTATCG +1 phiX174 102 112 + 13.2195 2.75e-08 1.36e-08 GAATTAAATCG +1 phiX174 903 913 + 13.1463 2.82e-08 1.38e-08 GAGGTACTAAA +1 phiX174 4748 4758 + 12.9756 3.01e-08 1.45e-08 TACAGCTAATG +1 phiX174 2622 2632 + 12.8659 3.16e-08 1.5e-08 TGCTGATATTG +1 phiX174 467 477 + 12.7317 3.35e-08 1.57e-08 TTTGGATTTAA +1 phiX174 4033 4043 + 12.6829 3.44e-08 1.58e-08 AGCGTATCGAG +1 phiX174 1348 1358 + 12.6707 3.46e-08 1.58e-08 TACCAATAAAA +1 phiX174 239 249 + 12.5732 3.62e-08 1.64e-08 AGTGGCTTAAT +1 phiX174 500 510 + 12.4634 3.84e-08 1.71e-08 GACGAGTAACA +1 phiX174 3001 3011 + 12.4146 3.93e-08 1.73e-08 GCGGTCAAAAA +1 phiX174 3776 3786 + 12.378 3.98e-08 1.73e-08 TATTTCTAATG +1 phiX174 2026 2036 + 12.3293 4.06e-08 1.75e-08 GAAGTTTAAGA +1 phiX174 4237 4247 + 12.3049 4.12e-08 1.75e-08 AGTTTGTATCT +1 phiX174 803 813 + 12.2439 4.24e-08 1.78e-08 AGAAGAAAACG +1 phiX174 3770 3780 + 12.1829 4.35e-08 1.81e-08 AAAGGATATTT +1 phiX174 3429 3439 + 12.122 4.45e-08 1.82e-08 GAGATGCAAAA +1 phiX174 99 109 + 12.1098 4.48e-08 1.82e-08 TACGAATTAAA +1 phiX174 67 77 + 11.9268 4.78e-08 1.92e-08 TCTTGATAAAG +1 phiX174 5332 5342 + 11.7195 5.13e-08 2.01e-08 ATCTGCTCAAA +1 phiX174 277 287 + 11.7073 5.14e-08 2.01e-08 TTTAGATATGA +1 phiX174 4338 4348 + 11.6951 5.18e-08 2.01e-08 GGGGACGAAAA +1 phiX174 3812 3822 + 11.6585 5.28e-08 2.03e-08 GGTTGATATTT +1 phiX174 1909 1919 + 11.5488 5.51e-08 2.08e-08 TAACGCTAAAG +1 phiX174 3000 3010 + 11.5366 5.54e-08 2.08e-08 GGCGGTCAAAA +1 phiX174 3891 3901 + 11.439 5.75e-08 2.11e-08 ATTGGCTCTAA +1 phiX174 3079 3089 + 11.4268 5.76e-08 2.11e-08 CTGGTATTAAA +1 phiX174 37 47 + 11.4146 5.79e-08 2.11e-08 TTCGGATATTT +1 phiX174 380 390 + 11.3293 6.01e-08 2.17e-08 GTAAGAAATCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fimo_output_txt_2.txt Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,100 @@ +#pattern name sequence name start stop strand score p-value q-value matched sequence +1 phiX174 1388 1398 + 29.4024 6.36e-11 0 AATATCTATAA +1 phiX174 847 857 + 29.122 7.02e-11 0 AATGTCTAAAG +1 phiX174 2301 2311 + 27.6463 1.08e-10 0 AGGTTATAACG +1 phiX174 5063 5073 + 25.5366 2.73e-10 0 AGGAGCTAAAG +1 phiX174 989 999 + 25.3049 3.15e-10 0 TGAGGATAAAT +1 phiX174 4713 4723 + 23.622 7.74e-10 0 GACTGCTATCA +1 phiX174 5048 5058 + 23.3293 8.51e-10 0 TGCTGCTAAAG +1 phiX174 855 865 + 23.3049 8.64e-10 0 AAGGTAAAAAA +1 phiX174 3155 3165 + 23.0366 9.76e-10 0 TATGGCTAAAG +1 phiX174 5009 5019 + 23.0366 9.76e-10 0 TGTGGCTAAAT +1 phiX174 814 824 + 22.5854 1.28e-09 0 TGCGTCAAAAA +1 phiX174 2832 2842 + 22.3415 1.42e-09 0 TTGGTCTAACT +1 phiX174 3830 3840 + 21.8293 1.7e-09 0 TATTGATAAAG +1 phiX174 3560 3570 + 21.5976 1.89e-09 0 TGCGTCTATTA +1 phiX174 2882 2892 + 21.1951 2.29e-09 0 AGGTTATTAAA +1 phiX174 4453 4463 + 20.8902 2.58e-09 0 AAGGTATTAAG +1 phiX174 2493 2503 + 20.3415 3.06e-09 0 GACACCTAAAG +1 phiX174 4104 4114 + 20.3171 3.08e-09 0 GGCTTCCATAA +1 phiX174 4955 4965 + 20.3171 3.08e-09 0 TGATGCTAAAG +1 phiX174 1885 1895 + 19.9268 3.61e-09 0 TGCGACTAAAG +1 phiX174 3376 3386 + 19.7683 3.81e-09 0 AGAATCAAAAA +1 phiX174 52 62 + 19.5732 4.06e-09 0 TGAGTCGAAAA +1 phiX174 1390 1400 + 19.378 4.26e-09 0 TATCTATAACA +1 phiX174 2017 2027 + 19.0854 4.6e-09 0 TTCGTCTAAGA +1 phiX174 1000 1010 + 18.878 4.88e-09 0 TATGTCTAATA +1 phiX174 1555 1565 + 18.439 5.58e-09 0 GACTTCTACCA +1 phiX174 4430 4440 + 18.4268 5.62e-09 0 TGAGTATAATT +1 phiX174 1927 1937 + 18.2927 5.82e-09 0 GACTTATACCG +1 phiX174 2981 2991 + 18.0732 6.13e-09 0 CATGTCTAAAT +1 phiX174 4203 4213 + 17.9268 6.34e-09 0 GACGGCCATAA +1 phiX174 1669 1679 + 17.8659 6.4e-09 0 TGGAGGTAAAA +1 phiX174 3260 3270 + 17.5 7.01e-09 0 CGCTGATAAAG +1 phiX174 3047 3057 + 17.2805 7.4e-09 0 TACCGATAACA +1 phiX174 4176 4186 + 17.1829 7.6e-09 0 GAGTTCGATAA +1 phiX174 4118 4128 + 17.1341 7.7e-09 0 GATGGATAACC +1 phiX174 5370 5380 + 16.9878 8.03e-09 0 GGCGTATCCAA +1 phiX174 1242 1252 + 16.5122 8.94e-09 0 AGTGGATTAAG +1 phiX174 2583 2593 + 16.5122 8.94e-09 0 TACATCTGTCA +1 phiX174 698 708 + 16.4146 9.13e-09 0 TACGGAAAACA +1 phiX174 2299 2309 + 16.3537 9.26e-09 0 TGAGGTTATAA +1 phiX174 4189 4199 + 16.1707 9.69e-09 0 GTGATATGTAT +1 phiX174 275 285 + 16.0976 9.85e-09 0 GGTTTAGATAT +1 phiX174 1801 1811 + 16.0366 1e-08 0 GACCTATAAAC +1 phiX174 1386 1396 + 15.9268 1.03e-08 0 TGAATATCTAT +1 phiX174 1303 1313 + 15.9024 1.03e-08 0 TGGTTATATTG +1 phiX174 3772 3782 + 15.878 1.04e-08 0 AGGATATTTCT +1 phiX174 1288 1298 + 15.8659 1.04e-08 0 GACTGTTAACA +1 phiX174 2577 2587 + 15.7683 1.08e-08 0 GATGGATACAT +1 phiX174 937 947 + 15.7561 1.08e-08 0 TTGGTATGTAG +1 phiX174 904 914 + 15.6585 1.11e-08 0 AGGTACTAAAG +1 phiX174 2279 2289 + 15.5854 1.13e-08 0 TCGTGATAAAA +1 phiX174 3164 3174 + 15.5 1.16e-08 0 AGCTGGTAAAG +1 phiX174 24 34 + 15.3293 1.23e-08 0 AGAAGTTAACA +1 phiX174 838 848 + 15.2561 1.27e-08 0 GAGTGATGTAA +1 phiX174 853 863 + 15.2561 1.27e-08 0 TAAAGGTAAAA +1 phiX174 1984 1994 + 15.0244 1.36e-08 0 AATTTCTATGA +1 phiX174 1 11 + 14.8293 1.46e-08 0 GAGTTTTATCG +1 phiX174 4307 4317 + 14.7927 1.47e-08 0 TATTAATAACA +1 phiX174 4303 4313 + 14.6585 1.52e-08 0 TTGATATTAAT +1 phiX174 5033 5043 + 14.561 1.58e-08 0 GTCAGATATGG +1 phiX174 2579 2589 + 14.2927 1.73e-08 0 TGGATACATCT +1 phiX174 322 332 + 14.1951 1.82e-08 0 GACATTTTAAA +1 phiX174 5001 5011 + 13.8902 2.09e-08 0 GGTTTCTATGT +1 phiX174 4217 4227 + 13.8171 2.15e-08 0 TGCTTCTGACG +1 phiX174 4262 4272 + 13.7805 2.18e-08 0 AATGGATGAAT +1 phiX174 3569 3579 + 13.7073 2.26e-08 0 TATGGAAAACA +1 phiX174 194 204 + 13.6829 2.29e-08 0 ATCAACTAACG +1 phiX174 131 141 + 13.4756 2.49e-08 0 AAATGAGAAAA +1 phiX174 1491 1501 + 13.4024 2.55e-08 0 GCCATCTCAAA +1 phiX174 434 444 + 13.2805 2.67e-08 0 GGCCTCTATTA +1 phiX174 4565 4575 + 13.2439 2.73e-08 0 TTGGTTTATCG +1 phiX174 102 112 + 13.2195 2.75e-08 0 GAATTAAATCG +1 phiX174 903 913 + 13.1463 2.82e-08 0 GAGGTACTAAA +1 phiX174 4748 4758 + 12.9756 3.01e-08 0 TACAGCTAATG +1 phiX174 2622 2632 + 12.8659 3.16e-08 0 TGCTGATATTG +1 phiX174 467 477 + 12.7317 3.35e-08 0 TTTGGATTTAA +1 phiX174 4033 4043 + 12.6829 3.44e-08 0 AGCGTATCGAG +1 phiX174 1348 1358 + 12.6707 3.46e-08 0 TACCAATAAAA +1 phiX174 239 249 + 12.5732 3.62e-08 0 AGTGGCTTAAT +1 phiX174 500 510 + 12.4634 3.84e-08 0 GACGAGTAACA +1 phiX174 3001 3011 + 12.4146 3.93e-08 0 GCGGTCAAAAA +1 phiX174 3776 3786 + 12.378 3.98e-08 0 TATTTCTAATG +1 phiX174 2026 2036 + 12.3293 4.06e-08 0 GAAGTTTAAGA +1 phiX174 4237 4247 + 12.3049 4.12e-08 0 AGTTTGTATCT +1 phiX174 803 813 + 12.2439 4.24e-08 0 AGAAGAAAACG +1 phiX174 3770 3780 + 12.1829 4.35e-08 0 AAAGGATATTT +1 phiX174 3429 3439 + 12.122 4.45e-08 0 GAGATGCAAAA +1 phiX174 99 109 + 12.1098 4.48e-08 0 TACGAATTAAA +1 phiX174 67 77 + 11.9268 4.78e-08 0 TCTTGATAAAG +1 phiX174 5332 5342 + 11.7195 5.13e-08 0 ATCTGCTCAAA +1 phiX174 277 287 + 11.7073 5.14e-08 0 TTTAGATATGA +1 phiX174 4338 4348 + 11.6951 5.18e-08 0 GGGGACGAAAA +1 phiX174 3812 3822 + 11.6585 5.28e-08 0 GGTTGATATTT +1 phiX174 1909 1919 + 11.5488 5.51e-08 0 TAACGCTAAAG +1 phiX174 3000 3010 + 11.5366 5.54e-08 0 GGCGGTCAAAA +1 phiX174 3891 3901 + 11.439 5.75e-08 0 ATTGGCTCTAA +1 phiX174 3079 3089 + 11.4268 5.76e-08 0 CTGGTATTAAA +1 phiX174 37 47 + 11.4146 5.79e-08 0 TTCGGATATTT +1 phiX174 380 390 + 11.3293 6.01e-08 0 GTAAGAAATCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fimo_output_xml_1.xml Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,66 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Begin document body --> +<settings> +<setting name="allow clobber">false</setting> +<setting name="compute q-values">true</setting> +<setting name="parse genomic coord.">false</setting> +<setting name="text only">false</setting> +<setting name="scan both strands">false</setting> +<setting name="output threshold">0.0001</setting> +<setting name="threshold type">p-value</setting> +<setting name="max stored scores">100000</setting> +<setting name="pseudocount">0.1</setting> +<setting name="verbosity">1</setting> +</settings> +<sequence-data num-sequences="1" num-residues="5386" /> +<alphabet name="Protein" like="protein"> +<letter id="A" symbol="A" name="Alanine" colour="0000CC"/> +<letter id="C" symbol="C" name="Cysteine" colour="0000CC"/> +<letter id="D" symbol="D" name="Aspartic acid" colour="FF00FF"/> +<letter id="E" symbol="E" name="Glutamic acid" colour="FF00FF"/> +<letter id="F" symbol="F" name="Phenylalanine" colour="0000CC"/> +<letter id="G" symbol="G" name="Glycine" colour="FFB300"/> +<letter id="H" symbol="H" name="Histidine" colour="FFCCCC"/> +<letter id="I" symbol="I" name="Isoleucine" colour="0000CC"/> +<letter id="K" symbol="K" name="Lysine" colour="CC0000"/> +<letter id="L" symbol="L" name="Leucine" colour="0000CC"/> +<letter id="M" symbol="M" name="Methionine" colour="0000CC"/> +<letter id="N" symbol="N" name="Asparagine" colour="008000"/> +<letter id="P" symbol="P" name="Proline" colour="FFFF00"/> +<letter id="Q" symbol="Q" name="Glutamine" colour="008000"/> +<letter id="R" symbol="R" name="Arginine" colour="CC0000"/> +<letter id="S" symbol="S" name="Serine" colour="008000"/> +<letter id="T" symbol="T" name="Threonine" colour="008000"/> +<letter id="V" symbol="V" name="Valine" colour="0000CC"/> +<letter id="W" symbol="W" name="Tryptophan" colour="0000CC"/> +<letter id="Y" symbol="Y" name="Tyrosine" colour="33E6CC"/> +<letter id="X" symbol="X" aliases="*." equals="ACDEFGHIKLMNPQRSTVWY" name="Any amino acid"/> +<letter id="B" symbol="B" equals="DN" name="Asparagine or Aspartic acid"/> +<letter id="Z" symbol="Z" equals="EQ" name="Glutamine or Glutamic acid"/> +<letter id="J" symbol="J" equals="IL" name="Leucine or Isoleucine"/> +</alphabet> +<motif name="1" width="11" best-possible-match="GGGGTATAAAA"/> +<background source="non-redundant database"> +<value letter="A">0.073</value> +<value letter="C">0.018</value> +<value letter="D">0.052</value> +<value letter="E">0.062</value> +<value letter="F">0.040</value> +<value letter="G">0.069</value> +<value letter="H">0.022</value> +<value letter="I">0.056</value> +<value letter="K">0.058</value> +<value letter="L">0.092</value> +<value letter="M">0.023</value> +<value letter="N">0.046</value> +<value letter="P">0.051</value> +<value letter="Q">0.041</value> +<value letter="R">0.052</value> +<value letter="S">0.074</value> +<value letter="T">0.059</value> +<value letter="V">0.064</value> +<value letter="W">0.013</value> +<value letter="Y">0.033</value> +</background> +<cisml-file>cisml.xml</cisml-file> +</fimo>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fimo_output_xml_2.xml Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,65 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Begin document body --> +<settings> +<setting name="allow clobber">false</setting> +<setting name="compute q-values">false</setting> +<setting name="text only">false</setting> +<setting name="scan both strands">false</setting> +<setting name="output threshold">0.0001</setting> +<setting name="threshold type">p-value</setting> +<setting name="max stored scores">100000</setting> +<setting name="pseudocount">0.1</setting> +<setting name="verbosity">1</setting> +</settings> +<sequence-data num-sequences="1" num-residues="5386" /> +<alphabet name="Protein" like="protein"> +<letter id="A" symbol="A" name="Alanine" colour="0000CC"/> +<letter id="C" symbol="C" name="Cysteine" colour="0000CC"/> +<letter id="D" symbol="D" name="Aspartic acid" colour="FF00FF"/> +<letter id="E" symbol="E" name="Glutamic acid" colour="FF00FF"/> +<letter id="F" symbol="F" name="Phenylalanine" colour="0000CC"/> +<letter id="G" symbol="G" name="Glycine" colour="FFB300"/> +<letter id="H" symbol="H" name="Histidine" colour="FFCCCC"/> +<letter id="I" symbol="I" name="Isoleucine" colour="0000CC"/> +<letter id="K" symbol="K" name="Lysine" colour="CC0000"/> +<letter id="L" symbol="L" name="Leucine" colour="0000CC"/> +<letter id="M" symbol="M" name="Methionine" colour="0000CC"/> +<letter id="N" symbol="N" name="Asparagine" colour="008000"/> +<letter id="P" symbol="P" name="Proline" colour="FFFF00"/> +<letter id="Q" symbol="Q" name="Glutamine" colour="008000"/> +<letter id="R" symbol="R" name="Arginine" colour="CC0000"/> +<letter id="S" symbol="S" name="Serine" colour="008000"/> +<letter id="T" symbol="T" name="Threonine" colour="008000"/> +<letter id="V" symbol="V" name="Valine" colour="0000CC"/> +<letter id="W" symbol="W" name="Tryptophan" colour="0000CC"/> +<letter id="Y" symbol="Y" name="Tyrosine" colour="33E6CC"/> +<letter id="X" symbol="X" aliases="*." equals="ACDEFGHIKLMNPQRSTVWY" name="Any amino acid"/> +<letter id="B" symbol="B" equals="DN" name="Asparagine or Aspartic acid"/> +<letter id="Z" symbol="Z" equals="EQ" name="Glutamine or Glutamic acid"/> +<letter id="J" symbol="J" equals="IL" name="Leucine or Isoleucine"/> +</alphabet> +<motif name="1" width="11" best-possible-match="GGGGTATAAAA"/> +<background source="non-redundant database"> +<value letter="A">0.073</value> +<value letter="C">0.018</value> +<value letter="D">0.052</value> +<value letter="E">0.062</value> +<value letter="F">0.040</value> +<value letter="G">0.069</value> +<value letter="H">0.022</value> +<value letter="I">0.056</value> +<value letter="K">0.058</value> +<value letter="L">0.092</value> +<value letter="M">0.023</value> +<value letter="N">0.046</value> +<value letter="P">0.051</value> +<value letter="Q">0.041</value> +<value letter="R">0.052</value> +<value letter="S">0.074</value> +<value letter="T">0.059</value> +<value letter="V">0.064</value> +<value letter="W">0.013</value> +<value letter="Y">0.033</value> +</background> +<cisml-file>cisml.xml</cisml-file> +</fimo>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meme_input_1.fasta Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,66 @@ +>chr21_19617074_19617124_+ +AAAAATTATTACTAGGGAGGGGGCCGGAACCTCGGGACGTGGGTATATAA +>chr21_26934381_26934431_+ +GCGCCTGGTCGGTTATGAGTCACAAGTGAGTTATAAAAGGGTCGCACGTT +>chr21_28217753_28217803_- +CAAAGGGGAGGAGTGGGGTGGGGGTGGGGGTTTCACTGGTCCACTATAAA +>chr21_31710037_31710087_- +AACACCCAGGTTTCTGAGTATATAATCGCCGCACCAAAGAATTTAATTTT +>chr21_31744582_31744632_- +CCCAGGTCTAAGAGCATATATAACTTGGAGTCCAGACTATGACATTCAAA +>chr21_31768316_31768366_+ +AACGTATATAAATGGTCCTGTCCAGATGTGGCATGCAAACTCAGAATCTT +>chr21_31914206_31914256_- +TGACACCCACTACTTAGAGTATAAAATCATTCTGAGAAGTTAGAGACACC +>chr21_31933633_31933683_- +TCAGAGTATATATAAATGTTCCTGTCCAGTCACAGTCACCAAACTGACCT +>chr21_31962741_31962791_- +ACATATAACTCAGGTTGGATAAAATAATTTGTACAAATCAGGAGAGTCAA +>chr21_31964683_31964733_+ +TCTGATTCACTGAGGCATATAAAAGGCCCTCTGCGGAGAAGTGTCCATAC +>chr21_31973364_31973414_+ +aaacttaaaactctataaacttaaaactCTAGAATCTGATCCTGCTATAC +>chr21_31992870_31992920_+ +CTCATACACTATTGAAGATGTATAAAATTTCATTTGCAGATGGTGACATT +>chr21_32185595_32185645_- +TCACCACCCACCAGAGCTGGGATATATAAAGAAGGTTCTGAGACTAGGAA +>chr21_32202076_32202126_- +TGCCCACCAGCTTGAGGTATAAAAAGCCCTGTACGGGAAGAGACCTTCAT +>chr21_32253899_32253949_- +AGCCCCACCCACCAGCAAGGATATATAAAAGCTCAGGAGTCTGGAGTGAC +>chr21_32410820_32410870_- +TCTACCCCACTAATCACTGAGGATGTATAAAAGTCCCAGGGAAGCTGGTG +>chr21_36411748_36411798_- +ATAGTTCTGTATAGTTTCAGTTGGCATCtaaaaattatataactttattt +>chr21_37838750_37838800_- +gatggttttataaggggcctcaccctcggctcagccctcattcttctcct +>chr21_45705687_45705737_+ +CCGGGGCGGAGCGGCCTTTGCTCTTTGCGTGGTCGCGGGGGTATAACAGC +>chr21_45971413_45971463_- +CAGGCCCTGGGCATATAAAAGCCCCAGCAGCCAACAGGctcacacacaca +>chr21_45978668_45978718_- +CAGAGGGGTATAAAGGTTCCGACCACTCAGAGGCCTGGCACGAtcactca +>chr21_45993530_45993580_+ +CCAAGGAGGAGTATAAAAGCCCCACAAACCCGAGCACCTCACTCACTCGC +>chr21_46020421_46020471_+ +GAGACATATAAAAGCCAACATCCCTGAGCACCTAACACACGGactcactc +>chr21_46031920_46031970_+ +GGAAAATACCCAGGGAGGGTATAAAACCTCAGCAGCCAGGGCACACAAAC +>chr21_46046964_46047014_+ +ACAAGGCCAGGAGGGGTATAAAAGCCTGAGAGCCCCAAGAACctcacaca +>chr21_46057197_46057247_+ +ATTGCTGAGTCTCCTGCTGGGAAAACACAGGCCCTGGGCATATAAAAGCC +>chr21_46086869_46086919_- +GACAGGTGTGCTTCTGTGCTGTGGGGATGCCTGGGCCCAGGTATAAAGGC +>chr21_46102103_46102153_- +AGGTGTGTGCTTCTGTGCTGTGGGGATGCCTGGGTCCAGGTATAAAGGCT +>chr21_47517957_47518007_+ +CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG +>chr21_47517957_47518007_+ +CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG +>chr21_47517957_47518007_+ +CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG +>chr21_47575506_47575556_- +TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG +>chr21_47575506_47575556_- +TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meme_output_html_1.html Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,95 @@ +<!DOCTYPE HTML> +<html> + <head> + <meta charset="UTF-8"> + <title>MEME</title> + <script> + // @JSON_VAR data + var data = { + "program": "MEME", + "stop_reason": "Stopped because requested number of motifs (1) found.", + "cmd": [ + "meme", + "-nostatus" + ], + "options": { + "mod": "zoops", + "revcomp": false, + "nmotifs": 1, + "minw": 8, + "maxw": 50, + "minsites": 2, + "maxsites": 30, + "wnsites": 0.8, + "spmap": "pam", + "spfuzz": 120, + "maxwords": -1, + "prior": "megap", + "b": 7500, + "maxiter": 50, + "distance": 1e-05, + "wg": 11, + "ws": 1, + "noendgaps": false, + "substring": true + }, + "alphabet": { + "name": "Protein", + "like": "protein", + "ncore": 20, + "symbols": [ + { + "symbol": "A", + "name": "Alanine", + "colour": "0000CC" + }, { + "symbol": "C", + "name": "Cysteine", + "colour": "0000CC" + }, { + "symbol": "D", + "name": "Aspartic acid", + "colour": "FF00FF" + }, { + "symbol": "E", + "name": "Glutamic acid", + "colour": "FF00FF" + }, { + "symbol": "F", + "name": "Phenylalanine", + "colour": "0000CC" + }, { + "symbol": "G", + "name": "Glycine", + "colour": "FFB300" + }, { + "symbol": "H", + "name": "Histidine", + "colour": "FFCCCC" + }, { + "symbol": "I", + "name": "Isoleucine", + "colour": "0000CC" + }, { + "symbol": "K", + "name": "Lysine", + "colour": "CC0000" + }, { + "symbol": "L", + "name": "Leucine", + "colour": "0000CC" + }, { + "symbol": "M", + "name": "Methionine", + "colour": "0000CC" + }, { + "symbol": "N", + "name": "Asparagine", + "colour": "008000" + }, { + "symbol": "P", + "name": "Proline", + "colour": "FFFF00" + }, { + "symbol": "Q", + "name": "Glutamine",
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meme_output_html_2.html Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,89 @@ +<!DOCTYPE HTML> +<html> + <head> + <meta charset="UTF-8"> + <title>MEME</title> + <script> + // @JSON_VAR data + var data = { + "program": "MEME", + "stop_reason": "Stopped because requested number of motifs (1) found.", + "cmd": [ + "meme", + ], + "options": { + "mod": "zoops", + "revcomp": false, + "nmotifs": 1, + "minw": 8, + "maxw": 50, + "minsites": 2, + "maxsites": 30, + "wnsites": 0.8, + "spmap": "uni", + "spfuzz": 0.5, + "maxwords": -1, + "prior": "dirichlet", + "b": 0.01, + "maxiter": 50, + "distance": 0.001, + "wg": 11, + "ws": 1, + "noendgaps": false, + "substring": true + }, + "alphabet": { + "name": "DNA", + "like": "dna", + "ncore": 4, + "symbols": [ + { + "symbol": "A", + "name": "Adenine", + "colour": "CC0000", + "complement": "T" + }, { + "symbol": "C", + "name": "Cytosine", + "colour": "0000CC", + "complement": "G" + }, { + "symbol": "G", + "name": "Guanine", + "colour": "FFB300", + "complement": "C" + }, { + "symbol": "T", + "aliases": "U", + "name": "Thymine", + "colour": "008000", + "complement": "A" + }, { + "symbol": "N", + "aliases": "X.", + "name": "Any base", + "equals": "ACGT" + }, { + "symbol": "V", + "name": "Not T", + "equals": "ACG" + }, { + "symbol": "H", + "name": "Not G", + "equals": "ACT" + }, { + "symbol": "D", + "name": "Not C", + "equals": "AGT" + }, { + "symbol": "B", + "name": "Not A", + "equals": "CGT" + }, { + "symbol": "M", + "name": "Amino", + "equals": "AC" + }, { + "symbol": "R", + "name": "Purine", + "equals": "AG"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meme_output_txt_1.txt Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,325 @@ +******************************************************************************** +MEME - Motif discovery tool +******************************************************************************** +MEME version 4.11.2 (Release date: Thu May 05 14:58:55 2016 -0700) + +For further information on how to interpret these results or to get +a copy of the MEME software please access http://meme-suite.org . + +This file may be used as input to the MAST algorithm for searching +sequence databases for matches to groups of motifs. MAST is available +for interactive use and downloading at http://meme-suite.org . +******************************************************************************** + + +******************************************************************************** +REFERENCE +******************************************************************************** +If you use this program in your research, please cite: + +Timothy L. Bailey and Charles Elkan, +"Fitting a mixture model by expectation maximization to discover +motifs in biopolymers", Proceedings of the Second International +Conference on Intelligent Systems for Molecular Biology, pp. 28-36, +AAAI Press, Menlo Park, California, 1994. +******************************************************************************** + + +******************************************************************************** +TRAINING SET +******************************************************************************** +DATAFILE= /tmp/tmpCNK6l0/files/000/dataset_22.dat +ALPHABET= ACDEFGHIKLMNPQRSTVWY +Sequence name Weight Length Sequence name Weight Length +------------- ------ ------ ------------- ------ ------ +chr21_19617074_19617124_ 1.0000 50 chr21_26934381_26934431_ 1.0000 50 +chr21_28217753_28217803_ 1.0000 50 chr21_31710037_31710087_ 1.0000 50 +chr21_31744582_31744632_ 1.0000 50 chr21_31768316_31768366_ 1.0000 50 +chr21_31914206_31914256_ 1.0000 50 chr21_31933633_31933683_ 1.0000 50 +chr21_31962741_31962791_ 1.0000 50 chr21_31964683_31964733_ 1.0000 50 +chr21_31973364_31973414_ 1.0000 50 chr21_31992870_31992920_ 1.0000 50 +chr21_32185595_32185645_ 1.0000 50 chr21_32202076_32202126_ 1.0000 50 +chr21_32253899_32253949_ 1.0000 50 chr21_32410820_32410870_ 1.0000 50 +chr21_36411748_36411798_ 1.0000 50 chr21_37838750_37838800_ 1.0000 50 +chr21_45705687_45705737_ 1.0000 50 chr21_45971413_45971463_ 1.0000 50 +chr21_45978668_45978718_ 1.0000 50 chr21_45993530_45993580_ 1.0000 50 +chr21_46020421_46020471_ 1.0000 50 chr21_46031920_46031970_ 1.0000 50 +chr21_46046964_46047014_ 1.0000 50 chr21_46057197_46057247_ 1.0000 50 +chr21_46086869_46086919_ 1.0000 50 chr21_46102103_46102153_ 1.0000 50 +chr21_47517957_47518007_ 1.0000 50 chr21_47575506_47575556_ 1.0000 50 +******************************************************************************** + +******************************************************************************** +COMMAND LINE SUMMARY +******************************************************************************** +This information can also be useful in the event you wish to report a +problem with the MEME software. + +command: meme /tmp/tmpCNK6l0/files/000/dataset_22.dat -o /tmp/tmpCNK6l0/job_working_directory/000/11/dataset_23_files -nostatus -maxsize 1000000 + +model: mod= zoops nmotifs= 1 evt= inf +object function= E-value of product of p-values +width: minw= 8 maxw= 50 +width: wg= 11 ws= 1 endgaps= yes +nsites: minsites= 2 maxsites= 30 wnsites= 0.8 +theta: spmap= pam spfuzz= 120 +global: substring= yes branching= no wbranch= no +em: prior= megap b= 7500 maxiter= 50 + distance= 1e-05 +data: n= 1500 N= 30 shuffle= -1 + +sample: seed= 0 ctfrac= -1 maxwords= -1 +Dirichlet mixture priors file: prior30.plib +Letter frequencies in dataset: +A 0.294 C 0.231 D 0.000 E 0.000 F 0.000 G 0.257 H 0.000 I 0.000 K 0.000 +L 0.000 M 0.000 N 0.000 P 0.000 Q 0.000 R 0.000 S 0.000 T 0.217 V 0.000 +W 0.000 Y 0.000 +Background letter frequencies (from dataset with add-one prior applied): +A 0.291 C 0.229 D 0.001 E 0.001 F 0.001 G 0.255 H 0.001 I 0.001 K 0.001 +L 0.001 M 0.001 N 0.001 P 0.001 Q 0.001 R 0.001 S 0.001 T 0.215 V 0.001 +W 0.001 Y 0.001 +******************************************************************************** + + +******************************************************************************** +MOTIF 1 MEME width = 11 sites = 25 llr = 239 E-value = 2.4e-011 +******************************************************************************** +-------------------------------------------------------------------------------- + Motif 1 Description +-------------------------------------------------------------------------------- +Simplified A 2323:a:a8a8 +pos.-specific C ::3:::::::: +probability D ::::::::::: +matrix E ::::::::::: + F ::::::::::: + G 7746::::::1 + H ::::::::::: + I ::::::::::: + K ::::::::::: + L ::::::::::: + M ::::::::::: + N ::::::::::: + P ::::::::::: + Q ::::::::::: + R ::::::::::: + S ::::::::::: + T 1:2:a:a:2:: + V ::::::::::: + W ::::::::::: + Y ::::::::::: + + bits 10.6 + 9.5 + 8.5 + 7.4 +Relative 6.3 +Entropy 5.3 +(13.8 bits) 4.2 + 3.2 + 2.1 * ** + 1.1 ** ******** + 0.0 ----------- + +Multilevel GGGGTATAAAA +consensus AACA T +sequence + + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 sites sorted by position p-value +-------------------------------------------------------------------------------- +Sequence name Start P-value Site +------------- ----- --------- ----------- +chr21_46046964_46047014_ 13 1.06e-06 AAGGCCAGGA GGGGTATAAAA GCCTGAGAGC +chr21_46057197_46057247_ 37 3.41e-06 ACAGGCCCTG GGCATATAAAA GCC +chr21_45971413_45971463_ 10 3.41e-06 CAGGCCCTG GGCATATAAAA GCCCCAGCAG +chr21_31964683_31964733_ 14 3.41e-06 GATTCACTGA GGCATATAAAA GGCCCTCTGC +chr21_45993530_45993580_ 8 4.00e-06 CCAAGGA GGAGTATAAAA GCCCCACAAA +chr21_32202076_32202126_ 14 5.01e-06 CCACCAGCTT GAGGTATAAAA AGCCCTGTAC +chr21_46031920_46031970_ 16 6.06e-06 ATACCCAGGG AGGGTATAAAA CCTCAGCAGC +chr21_32410820_32410870_ 22 8.67e-06 AATCACTGAG GATGTATAAAA GTCCCAGGGA +chr21_32185595_32185645_ 19 8.67e-06 CACCAGAGCT GGGATATATAA AGAAGGTTCT +chr21_31992870_31992920_ 17 8.67e-06 CACTATTGAA GATGTATAAAA TTTCATTTGC +chr21_46020421_46020471_ 3 1.21e-05 GA GACATATAAAA GCCAACATCC +chr21_47517957_47518007_ 33 1.59e-05 CCGGCGGGGC GGGGTATAAAG GGGGCGG +chr21_45978668_45978718_ 5 1.59e-05 CAGA GGGGTATAAAG GTTCCGACCA +chr21_31914206_31914256_ 16 1.68e-05 CCCACTACTT AGAGTATAAAA TCATTCTGAG +chr21_32253899_32253949_ 20 2.03e-05 CACCAGCAAG GATATATAAAA GCTCAGGAGT +chr21_31744582_31744632_ 13 3.06e-05 CAGGTCTAAG AGCATATATAA CTTGGAGTCC +chr21_19617074_19617124_ 40 3.06e-05 CCTCGGGACG TGGGTATATAA +chr21_45705687_45705737_ 38 3.82e-05 CGTGGTCGCG GGGGTATAACA GC +chr21_31768316_31768366_ 1 3.82e-05 . AACGTATATAA ATGGTCCTGT +chr21_47575506_47575556_ 31 4.02e-05 GCTGCCGGTG AGCGTATAAAG GCCCTGGCG +chr21_26934381_26934431_ 28 5.52e-05 AGTCACAAGT GAGTTATAAAA GGGTCGCACG +chr21_31710037_31710087_ 15 5.94e-05 CCCAGGTTTC TGAGTATATAA TCGCCGCACC +chr21_36411748_36411798_ 23 6.78e-05 AGTTTCAGTT GGCATCtaaaa attatataac +chr21_31933633_31933683_ 3 2.08e-04 TC AGAGTATATAT AAATGTTCCT +chr21_31962741_31962791_ 14 4.05e-04 TATAACTCAG GTTGGATAAAA TAATTTGTAC +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 block diagrams +-------------------------------------------------------------------------------- +SEQUENCE NAME POSITION P-VALUE MOTIF DIAGRAM +------------- ---------------- ------------- +chr21_46046964_46047014_ 1.1e-06 12_[1]_27 +chr21_46057197_46057247_ 3.4e-06 36_[1]_3 +chr21_45971413_45971463_ 3.4e-06 9_[1]_30 +chr21_31964683_31964733_ 3.4e-06 13_[1]_26 +chr21_45993530_45993580_ 4e-06 7_[1]_32 +chr21_32202076_32202126_ 5e-06 13_[1]_26 +chr21_46031920_46031970_ 6.1e-06 15_[1]_24 +chr21_32410820_32410870_ 8.7e-06 21_[1]_18 +chr21_32185595_32185645_ 8.7e-06 18_[1]_21 +chr21_31992870_31992920_ 8.7e-06 16_[1]_23 +chr21_46020421_46020471_ 1.2e-05 2_[1]_37 +chr21_47517957_47518007_ 1.6e-05 32_[1]_7 +chr21_45978668_45978718_ 1.6e-05 4_[1]_35 +chr21_31914206_31914256_ 1.7e-05 15_[1]_24 +chr21_32253899_32253949_ 2e-05 19_[1]_20 +chr21_31744582_31744632_ 3.1e-05 12_[1]_27 +chr21_19617074_19617124_ 3.1e-05 39_[1] +chr21_45705687_45705737_ 3.8e-05 37_[1]_2 +chr21_31768316_31768366_ 3.8e-05 [1]_39 +chr21_47575506_47575556_ 4e-05 30_[1]_9 +chr21_26934381_26934431_ 5.5e-05 27_[1]_12 +chr21_31710037_31710087_ 5.9e-05 14_[1]_25 +chr21_36411748_36411798_ 6.8e-05 22_[1]_17 +chr21_31933633_31933683_ 0.00021 2_[1]_37 +chr21_31962741_31962791_ 0.0004 13_[1]_26 +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 in BLOCKS format +-------------------------------------------------------------------------------- +BL MOTIF 1 width=11 seqs=25 +chr21_46046964_46047014_ ( 13) GGGGTATAAAA 1 +chr21_46057197_46057247_ ( 37) GGCATATAAAA 1 +chr21_45971413_45971463_ ( 10) GGCATATAAAA 1 +chr21_31964683_31964733_ ( 14) GGCATATAAAA 1 +chr21_45993530_45993580_ ( 8) GGAGTATAAAA 1 +chr21_32202076_32202126_ ( 14) GAGGTATAAAA 1 +chr21_46031920_46031970_ ( 16) AGGGTATAAAA 1 +chr21_32410820_32410870_ ( 22) GATGTATAAAA 1 +chr21_32185595_32185645_ ( 19) GGGATATATAA 1 +chr21_31992870_31992920_ ( 17) GATGTATAAAA 1 +chr21_46020421_46020471_ ( 3) GACATATAAAA 1 +chr21_47517957_47518007_ ( 33) GGGGTATAAAG 1 +chr21_45978668_45978718_ ( 5) GGGGTATAAAG 1 +chr21_31914206_31914256_ ( 16) AGAGTATAAAA 1 +chr21_32253899_32253949_ ( 20) GATATATAAAA 1 +chr21_31744582_31744632_ ( 13) AGCATATATAA 1 +chr21_19617074_19617124_ ( 40) TGGGTATATAA 1 +chr21_45705687_45705737_ ( 38) GGGGTATAACA 1 +chr21_31768316_31768366_ ( 1) AACGTATATAA 1 +chr21_47575506_47575556_ ( 31) AGCGTATAAAG 1 +chr21_26934381_26934431_ ( 28) GAGTTATAAAA 1 +chr21_31710037_31710087_ ( 15) TGAGTATATAA 1 +chr21_36411748_36411798_ ( 23) GGCATCTAAAA 1 +chr21_31933633_31933683_ ( 3) AGAGTATATAT 1 +chr21_31962741_31962791_ ( 14) GTTGGATAAAA 1 +// + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 position-specific scoring matrix +-------------------------------------------------------------------------------- +log-odds matrix: alength= 20 w= 11 n= 1200 bayes= 5.33554 E= 2.4e-011 + -32 -680 91 77 7 138 -20 55 64 107 11 150 142 72 87 396 -148 221 -140 -36 + -11 -680 89 76 7 137 -21 55 63 107 10 149 141 71 87 396 -239 220 -140 -36 + -79 41 4 21 -7 44 -62 42 -5 99 0 99 138 52 42 399 -46 223 -173 -68 + 11 -677 48 47 -2 127 -43 46 27 101 3 124 138 60 62 397 -235 220 -160 -55 + -596 -820 12 -21 -53 -267 -74 37 16 44 -37 98 31 9 19 319 212 127 -193 -95 + 165 -261 70 110 77 -521 -4 147 95 201 90 121 124 91 107 425 -527 314 -95 8 + -838 -990 -89 -149 -151 -841 -161 -117 -113 -66 -209 -68 -69 -129 -91 111 221 -55 -255 -173 + 176 -858 -79 -103 -115 -717 -148 -95 -108 -17 -162 -61 -12 -95 -69 193 -737 52 -240 -153 + 134 -686 0 16 -12 -553 -68 44 -8 96 -9 88 124 41 36 384 11 216 -177 -71 + 165 -261 70 110 77 -521 -4 147 95 201 90 121 124 91 107 425 -527 314 -95 8 + 147 -614 89 129 93 -121 12 160 113 217 108 144 144 111 125 447 -241 332 -81 22 +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 position-specific probability matrix +-------------------------------------------------------------------------------- +letter-probability matrix: alength= 20 w= 11 nsites= 25 E= 2.4e-011 + 0.240000 0.000000 0.000000 0.000000 0.000000 0.680000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.080000 0.000000 0.000000 0.000000 + 0.280000 0.000000 0.000000 0.000000 0.000000 0.680000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.040000 0.000000 0.000000 0.000000 + 0.160000 0.320000 0.000000 0.000000 0.000000 0.360000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.160000 0.000000 0.000000 0.000000 + 0.320000 0.000000 0.000000 0.000000 0.000000 0.640000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.040000 0.000000 0.000000 0.000000 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.040000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.960000 0.000000 0.000000 0.000000 + 0.960000 0.040000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 + 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 + 0.760000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.240000 0.000000 0.000000 0.000000 + 0.960000 0.040000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 + 0.840000 0.000000 0.000000 0.000000 0.000000 0.120000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.040000 0.000000 0.000000 0.000000 +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 regular expression +-------------------------------------------------------------------------------- +[GA][GA][GC][GA]TATA[AT]AA +-------------------------------------------------------------------------------- + + + + +Time 0.72 secs. + +******************************************************************************** + + +******************************************************************************** +SUMMARY OF MOTIFS +******************************************************************************** + +-------------------------------------------------------------------------------- + Combined block diagrams: non-overlapping sites with p-value < 0.0001 +-------------------------------------------------------------------------------- +SEQUENCE NAME COMBINED P-VALUE MOTIF DIAGRAM +------------- ---------------- ------------- +chr21_19617074_19617124_ 1.22e-03 39_[1(3.06e-05)] +chr21_26934381_26934431_ 2.21e-03 27_[1(5.52e-05)]_12 +chr21_28217753_28217803_ 7.29e-01 50 +chr21_31710037_31710087_ 2.37e-03 14_[1(5.94e-05)]_25 +chr21_31744582_31744632_ 1.22e-03 12_[1(3.06e-05)]_27 +chr21_31768316_31768366_ 1.53e-03 [1(3.82e-05)]_39 +chr21_31914206_31914256_ 6.70e-04 15_[1(1.68e-05)]_24 +chr21_31933633_31933683_ 1.81e-03 4_[1(4.54e-05)]_35 +chr21_31962741_31962791_ 1.61e-02 50 +chr21_31964683_31964733_ 1.36e-04 13_[1(3.41e-06)]_26 +chr21_31973364_31973414_ 1.99e-01 50 +chr21_31992870_31992920_ 3.47e-04 16_[1(8.67e-06)]_23 +chr21_32185595_32185645_ 3.47e-04 18_[1(8.67e-06)]_21 +chr21_32202076_32202126_ 2.01e-04 13_[1(5.01e-06)]_26 +chr21_32253899_32253949_ 8.11e-04 19_[1(2.03e-05)]_20 +chr21_32410820_32410870_ 3.47e-04 21_[1(8.67e-06)]_18 +chr21_36411748_36411798_ 2.71e-03 22_[1(6.78e-05)]_17 +chr21_37838750_37838800_ 8.23e-02 50 +chr21_45705687_45705737_ 1.53e-03 37_[1(3.82e-05)]_2 +chr21_45971413_45971463_ 1.36e-04 9_[1(3.41e-06)]_30 +chr21_45978668_45978718_ 6.37e-04 4_[1(1.59e-05)]_35 +chr21_45993530_45993580_ 1.60e-04 7_[1(4.00e-06)]_32 +chr21_46020421_46020471_ 4.83e-04 2_[1(1.21e-05)]_37 +chr21_46031920_46031970_ 2.43e-04 15_[1(6.06e-06)]_24 +chr21_46046964_46047014_ 4.26e-05 12_[1(1.06e-06)]_27 +chr21_46057197_46057247_ 1.36e-04 36_[1(3.41e-06)]_3 +chr21_46086869_46086919_ 4.30e-02 50 +chr21_46102103_46102153_ 4.30e-02 50 +chr21_47517957_47518007_ 6.37e-04 32_[1(1.59e-05)]_7 +chr21_47575506_47575556_ 1.61e-03 30_[1(4.02e-05)]_9 +-------------------------------------------------------------------------------- + +******************************************************************************** + + +******************************************************************************** +Stopped because requested number of motifs (1) found. +******************************************************************************** + +CPU: bigsky + +********************************************************************************
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meme_output_txt_2.txt Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,319 @@ +******************************************************************************** +MEME - Motif discovery tool +******************************************************************************** +MEME version 4.11.2 (Release date: Thu May 05 14:58:55 2016 -0700) + +For further information on how to interpret these results or to get +a copy of the MEME software please access http://meme-suite.org . + +This file may be used as input to the MAST algorithm for searching +sequence databases for matches to groups of motifs. MAST is available +for interactive use and downloading at http://meme-suite.org . +******************************************************************************** + + +******************************************************************************** +REFERENCE +******************************************************************************** +If you use this program in your research, please cite: + +Timothy L. Bailey and Charles Elkan, +"Fitting a mixture model by expectation maximization to discover +motifs in biopolymers", Proceedings of the Second International +Conference on Intelligent Systems for Molecular Biology, pp. 28-36, +AAAI Press, Menlo Park, California, 1994. +******************************************************************************** + + +******************************************************************************** +TRAINING SET +******************************************************************************** +DATAFILE= Galaxy_FASTA_Input +ALPHABET= ACGT +Sequence name Weight Length Sequence name Weight Length +------------- ------ ------ ------------- ------ ------ +chr21_19617074_19617124_ 1.0000 50 chr21_26934381_26934431_ 1.0000 50 +chr21_28217753_28217803_ 1.0000 50 chr21_31710037_31710087_ 1.0000 50 +chr21_31744582_31744632_ 1.0000 50 chr21_31768316_31768366_ 1.0000 50 +chr21_31914206_31914256_ 1.0000 50 chr21_31933633_31933683_ 1.0000 50 +chr21_31962741_31962791_ 1.0000 50 chr21_31964683_31964733_ 1.0000 50 +chr21_31973364_31973414_ 1.0000 50 chr21_31992870_31992920_ 1.0000 50 +chr21_32185595_32185645_ 1.0000 50 chr21_32202076_32202126_ 1.0000 50 +chr21_32253899_32253949_ 1.0000 50 chr21_32410820_32410870_ 1.0000 50 +chr21_36411748_36411798_ 1.0000 50 chr21_37838750_37838800_ 1.0000 50 +chr21_45705687_45705737_ 1.0000 50 chr21_45971413_45971463_ 1.0000 50 +chr21_45978668_45978718_ 1.0000 50 chr21_45993530_45993580_ 1.0000 50 +chr21_46020421_46020471_ 1.0000 50 chr21_46031920_46031970_ 1.0000 50 +chr21_46046964_46047014_ 1.0000 50 chr21_46057197_46057247_ 1.0000 50 +chr21_46086869_46086919_ 1.0000 50 chr21_46102103_46102153_ 1.0000 50 +chr21_47517957_47518007_ 1.0000 50 chr21_47575506_47575556_ 1.0000 50 +******************************************************************************** + +******************************************************************************** +COMMAND LINE SUMMARY +******************************************************************************** +This information can also be useful in the event you wish to report a +problem with the MEME software. + +command: meme /tmp/tmpCNK6l0/files/000/dataset_26.dat -o /tmp/tmpCNK6l0/job_working_directory/000/14/dataset_28_files -nostatus -maxsize 1000000 -sf Galaxy_FASTA_Input -dna -mod zoops -nmotifs 1 -wnsites 0.8 -evt inf -minw 8 -maxw 50 -wg 11 -ws 1 -maxiter 50 -distance 0.001 -prior dirichlet -b 0.01 -plib /tmp/tmpCNK6l0/files/000/dataset_27.dat -spmap uni -spfuzz 0.5 + +model: mod= zoops nmotifs= 1 evt= inf +object function= E-value of product of p-values +width: minw= 8 maxw= 50 +width: wg= 11 ws= 1 endgaps= yes +nsites: minsites= 2 maxsites= 30 wnsites= 0.8 +theta: spmap= uni spfuzz= 0.5 +global: substring= yes branching= no wbranch= no +em: prior= dirichlet b= 0.01 maxiter= 50 + distance= 0.001 +data: n= 1500 N= 30 shuffle= -1 +strands: + +sample: seed= 0 ctfrac= -1 maxwords= -1 +Dirichlet mixture priors file: dataset_27.dat +Letter frequencies in dataset: +A 0.294 C 0.231 G 0.257 T 0.217 +Background letter frequencies (from dataset with add-one prior applied): +A 0.294 C 0.231 G 0.257 T 0.217 +******************************************************************************** + + +******************************************************************************** +MOTIF 1 MEME width = 11 sites = 30 llr = 254 E-value = 5.1e-040 +******************************************************************************** +-------------------------------------------------------------------------------- + Motif 1 Description +-------------------------------------------------------------------------------- +Simplified A 3313:9:a798 +pos.-specific C 1:3::1:::1: +probability G 6756::::::2 +matrix T 1:11a1a:3:: + + bits 2.2 * + 2.0 * * + 1.8 * * + 1.5 * ** * +Relative 1.3 * ** * +Entropy 1.1 ****** +(12.2 bits) 0.9 * ******* + 0.7 * ******* + 0.4 ** ******** + 0.2 *********** + 0.0 ----------- + +Multilevel GGGGTATAAAA +consensus AACA T +sequence + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 sites sorted by position p-value +-------------------------------------------------------------------------------- +Sequence name Start P-value Site +------------- ----- --------- ----------- +chr21_46046964_46047014_ 13 4.51e-07 AAGGCCAGGA GGGGTATAAAA GCCTGAGAGC +chr21_46031920_46031970_ 16 2.22e-06 ATACCCAGGG AGGGTATAAAA CCTCAGCAGC +chr21_32202076_32202126_ 14 2.74e-06 CCACCAGCTT GAGGTATAAAA AGCCCTGTAC +chr21_46057197_46057247_ 37 4.86e-06 ACAGGCCCTG GGCATATAAAA GCC +chr21_45993530_45993580_ 8 4.86e-06 CCAAGGA GGAGTATAAAA GCCCCACAAA +chr21_45971413_45971463_ 10 4.86e-06 CAGGCCCTG GGCATATAAAA GCCCCAGCAG +chr21_31964683_31964733_ 14 4.86e-06 GATTCACTGA GGCATATAAAA GGCCCTCTGC +chr21_47517957_47518007_ 33 6.48e-06 CCGGCGGGGC GGGGTATAAAG GGGGCGG +chr21_45978668_45978718_ 5 6.48e-06 CAGA GGGGTATAAAG GTTCCGACCA +chr21_32185595_32185645_ 19 6.48e-06 CACCAGAGCT GGGATATATAA AGAAGGTTCT +chr21_32410820_32410870_ 22 1.38e-05 AATCACTGAG GATGTATAAAA GTCCCAGGGA +chr21_31992870_31992920_ 17 1.38e-05 CACTATTGAA GATGTATAAAA TTTCATTTGC +chr21_19617074_19617124_ 40 1.41e-05 CCTCGGGACG TGGGTATATAA +chr21_31914206_31914256_ 16 1.61e-05 CCCACTACTT AGAGTATAAAA TCATTCTGAG +chr21_46020421_46020471_ 3 1.95e-05 GA GACATATAAAA GCCAACATCC +chr21_32253899_32253949_ 18 1.95e-05 CCCACCAGCA AGGATATATAA AAGCTCAGGA +chr21_45705687_45705737_ 38 2.16e-05 CGTGGTCGCG GGGGTATAACA GC +chr21_47575506_47575556_ 31 3.04e-05 GCTGCCGGTG AGCGTATAAAG GCCCTGGCG +chr21_31744582_31744632_ 13 3.04e-05 CAGGTCTAAG AGCATATATAA CTTGGAGTCC +chr21_31768316_31768366_ 1 3.67e-05 . AACGTATATAA ATGGTCCTGT +chr21_26934381_26934431_ 28 3.93e-05 AGTCACAAGT GAGTTATAAAA GGGTCGCACG +chr21_31933633_31933683_ 5 5.65e-05 TCAG AGTATATATAA ATGTTCCTGT +chr21_31710037_31710087_ 15 6.24e-05 CCCAGGTTTC TGAGTATATAA TCGCCGCACC +chr21_36411748_36411798_ 23 7.15e-05 AGTTTCAGTT GGCATCtaaaa attatataac +chr21_46102103_46102153_ 37 1.39e-04 TGCCTGGGTC CAGGTATAAAG GCT +chr21_46086869_46086919_ 38 1.39e-04 TGCCTGGGCC CAGGTATAAAG GC +chr21_37838750_37838800_ 3 4.81e-04 ga tggttttataa ggggcctcac +chr21_31962741_31962791_ 14 8.57e-04 TATAACTCAG GTTGGATAAAA TAATTTGTAC +chr21_31973364_31973414_ 8 1.47e-03 aaactta aaactctataa acttaaaact +chr21_28217753_28217803_ 27 2.64e-03 GGTGGGGGTG GGGGTTTCACT GGTCCACTAT +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 block diagrams +-------------------------------------------------------------------------------- +SEQUENCE NAME POSITION P-VALUE MOTIF DIAGRAM +------------- ---------------- ------------- +chr21_46046964_46047014_ 4.5e-07 12_[+1]_27 +chr21_46031920_46031970_ 2.2e-06 15_[+1]_24 +chr21_32202076_32202126_ 2.7e-06 13_[+1]_26 +chr21_46057197_46057247_ 4.9e-06 36_[+1]_3 +chr21_45993530_45993580_ 4.9e-06 7_[+1]_32 +chr21_45971413_45971463_ 4.9e-06 9_[+1]_30 +chr21_31964683_31964733_ 4.9e-06 13_[+1]_26 +chr21_47517957_47518007_ 6.5e-06 32_[+1]_7 +chr21_45978668_45978718_ 6.5e-06 4_[+1]_35 +chr21_32185595_32185645_ 6.5e-06 18_[+1]_21 +chr21_32410820_32410870_ 1.4e-05 21_[+1]_18 +chr21_31992870_31992920_ 1.4e-05 16_[+1]_23 +chr21_19617074_19617124_ 1.4e-05 39_[+1] +chr21_31914206_31914256_ 1.6e-05 15_[+1]_24 +chr21_46020421_46020471_ 1.9e-05 2_[+1]_37 +chr21_32253899_32253949_ 1.9e-05 17_[+1]_22 +chr21_45705687_45705737_ 2.2e-05 37_[+1]_2 +chr21_47575506_47575556_ 3e-05 30_[+1]_9 +chr21_31744582_31744632_ 3e-05 12_[+1]_27 +chr21_31768316_31768366_ 3.7e-05 [+1]_39 +chr21_26934381_26934431_ 3.9e-05 27_[+1]_12 +chr21_31933633_31933683_ 5.6e-05 4_[+1]_35 +chr21_31710037_31710087_ 6.2e-05 14_[+1]_25 +chr21_36411748_36411798_ 7.1e-05 22_[+1]_17 +chr21_46102103_46102153_ 0.00014 36_[+1]_3 +chr21_46086869_46086919_ 0.00014 37_[+1]_2 +chr21_37838750_37838800_ 0.00048 2_[+1]_37 +chr21_31962741_31962791_ 0.00086 13_[+1]_26 +chr21_31973364_31973414_ 0.0015 7_[+1]_32 +chr21_28217753_28217803_ 0.0026 26_[+1]_13 +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 in BLOCKS format +-------------------------------------------------------------------------------- +BL MOTIF 1 width=11 seqs=30 +chr21_46046964_46047014_ ( 13) GGGGTATAAAA 1 +chr21_46031920_46031970_ ( 16) AGGGTATAAAA 1 +chr21_32202076_32202126_ ( 14) GAGGTATAAAA 1 +chr21_46057197_46057247_ ( 37) GGCATATAAAA 1 +chr21_45993530_45993580_ ( 8) GGAGTATAAAA 1 +chr21_45971413_45971463_ ( 10) GGCATATAAAA 1 +chr21_31964683_31964733_ ( 14) GGCATATAAAA 1 +chr21_47517957_47518007_ ( 33) GGGGTATAAAG 1 +chr21_45978668_45978718_ ( 5) GGGGTATAAAG 1 +chr21_32185595_32185645_ ( 19) GGGATATATAA 1 +chr21_32410820_32410870_ ( 22) GATGTATAAAA 1 +chr21_31992870_31992920_ ( 17) GATGTATAAAA 1 +chr21_19617074_19617124_ ( 40) TGGGTATATAA 1 +chr21_31914206_31914256_ ( 16) AGAGTATAAAA 1 +chr21_46020421_46020471_ ( 3) GACATATAAAA 1 +chr21_32253899_32253949_ ( 18) AGGATATATAA 1 +chr21_45705687_45705737_ ( 38) GGGGTATAACA 1 +chr21_47575506_47575556_ ( 31) AGCGTATAAAG 1 +chr21_31744582_31744632_ ( 13) AGCATATATAA 1 +chr21_31768316_31768366_ ( 1) AACGTATATAA 1 +chr21_26934381_26934431_ ( 28) GAGTTATAAAA 1 +chr21_31933633_31933683_ ( 5) AGTATATATAA 1 +chr21_31710037_31710087_ ( 15) TGAGTATATAA 1 +chr21_36411748_36411798_ ( 23) GGCATCTAAAA 1 +chr21_46102103_46102153_ ( 37) CAGGTATAAAG 1 +chr21_46086869_46086919_ ( 38) CAGGTATAAAG 1 +chr21_37838750_37838800_ ( 3) TGGTTTTATAA 1 +chr21_31962741_31962791_ ( 14) GTTGGATAAAA 1 +chr21_31973364_31973414_ ( 8) AAACTCTATAA 1 +chr21_28217753_28217803_ ( 27) GGGGTTTCACT 1 +// + +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 position-specific scoring matrix +-------------------------------------------------------------------------------- +log-odds matrix: alength= 4 w= 11 n= 1200 bayes= 5.2854 E= 5.1e-040 + -14 -179 114 -112 + 3 -1155 137 -270 + -114 20 86 -71 + 3 -279 122 -170 + -1155 -1155 -295 215 + 156 -179 -1155 -170 + -1155 -1155 -1155 220 + 172 -279 -1155 -1155 + 125 -1155 -1155 46 + 167 -179 -1155 -1155 + 144 -1155 -63 -270 +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 position-specific probability matrix +-------------------------------------------------------------------------------- +letter-probability matrix: alength= 4 w= 11 nsites= 30 E= 5.1e-040 + 0.266667 0.066667 0.566667 0.100000 + 0.300000 0.000000 0.666667 0.033333 + 0.133333 0.266667 0.466667 0.133333 + 0.300000 0.033333 0.600000 0.066667 + 0.000000 0.000000 0.033333 0.966667 + 0.866667 0.066667 0.000000 0.066667 + 0.000000 0.000000 0.000000 1.000000 + 0.966667 0.033333 0.000000 0.000000 + 0.700000 0.000000 0.000000 0.300000 + 0.933333 0.066667 0.000000 0.000000 + 0.800000 0.000000 0.166667 0.033333 +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- + Motif 1 regular expression +-------------------------------------------------------------------------------- +[GA][GA][GC][GA]TATA[AT]AA +-------------------------------------------------------------------------------- + + + + +Time 0.32 secs. + +******************************************************************************** + + +******************************************************************************** +SUMMARY OF MOTIFS +******************************************************************************** + +-------------------------------------------------------------------------------- + Combined block diagrams: non-overlapping sites with p-value < 0.0001 +-------------------------------------------------------------------------------- +SEQUENCE NAME COMBINED P-VALUE MOTIF DIAGRAM +------------- ---------------- ------------- +chr21_19617074_19617124_ 5.63e-04 39_[+1(1.41e-05)] +chr21_26934381_26934431_ 1.57e-03 27_[+1(3.93e-05)]_12 +chr21_28217753_28217803_ 1.00e-01 50 +chr21_31710037_31710087_ 2.49e-03 14_[+1(6.24e-05)]_25 +chr21_31744582_31744632_ 1.22e-03 12_[+1(3.04e-05)]_27 +chr21_31768316_31768366_ 1.47e-03 [+1(3.67e-05)]_39 +chr21_31914206_31914256_ 6.45e-04 15_[+1(1.61e-05)]_24 +chr21_31933633_31933683_ 2.26e-03 4_[+1(5.65e-05)]_35 +chr21_31962741_31962791_ 3.37e-02 50 +chr21_31964683_31964733_ 1.95e-04 13_[+1(4.86e-06)]_26 +chr21_31973364_31973414_ 5.73e-02 50 +chr21_31992870_31992920_ 5.52e-04 16_[+1(1.38e-05)]_23 +chr21_32185595_32185645_ 2.59e-04 18_[+1(6.48e-06)]_21 +chr21_32202076_32202126_ 1.10e-04 13_[+1(2.74e-06)]_26 +chr21_32253899_32253949_ 7.78e-04 17_[+1(1.95e-05)]_22 +chr21_32410820_32410870_ 5.52e-04 21_[+1(1.38e-05)]_18 +chr21_36411748_36411798_ 2.85e-03 22_[+1(7.15e-05)]_17 +chr21_37838750_37838800_ 1.90e-02 50 +chr21_45705687_45705737_ 8.63e-04 37_[+1(2.16e-05)]_2 +chr21_45971413_45971463_ 1.95e-04 9_[+1(4.86e-06)]_30 +chr21_45978668_45978718_ 2.59e-04 4_[+1(6.48e-06)]_35 +chr21_45993530_45993580_ 1.95e-04 7_[+1(4.86e-06)]_32 +chr21_46020421_46020471_ 7.78e-04 2_[+1(1.95e-05)]_37 +chr21_46031920_46031970_ 8.89e-05 15_[+1(2.22e-06)]_24 +chr21_46046964_46047014_ 1.80e-05 12_[+1(4.51e-07)]_27 +chr21_46057197_46057247_ 1.95e-04 36_[+1(4.86e-06)]_3 +chr21_46086869_46086919_ 5.54e-03 50 +chr21_46102103_46102153_ 5.54e-03 50 +chr21_47517957_47518007_ 2.59e-04 32_[+1(6.48e-06)]_7 +chr21_47575506_47575556_ 1.22e-03 30_[+1(3.04e-05)]_9 +-------------------------------------------------------------------------------- + +******************************************************************************** + + +******************************************************************************** +Stopped because requested number of motifs (1) found. +******************************************************************************** + +CPU: bigsky + +********************************************************************************
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meme_output_xml_1.xml Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,1285 @@ +<?xml version='1.0' encoding='UTF-8' standalone='yes'?> +<!-- Document definition --> +<!DOCTYPE MEME[ +<!ELEMENT MEME ( + training_set, + model, + motifs, + scanned_sites_summary? +)> +<!ATTLIST MEME + version CDATA #REQUIRED + release CDATA #REQUIRED +> +<!-- Training-set elements --> +<!ELEMENT training_set (alphabet, ambigs, sequence*, letter_frequencies)> +<!ATTLIST training_set datafile CDATA #REQUIRED length CDATA #REQUIRED> +<!ELEMENT alphabet (letter*)> +<!ATTLIST alphabet name CDATA #REQUIRED> +<!ELEMENT ambigs (letter*)> +<!ELEMENT letter EMPTY> +<!ATTLIST letter id ID #REQUIRED> +<!ATTLIST letter symbol CDATA #REQUIRED> +<!ATTLIST letter equals CDATA #IMPLIED> +<!ATTLIST letter aliases CDATA #IMPLIED> +<!ATTLIST letter complement CDATA #IMPLIED> +<!ATTLIST letter name CDATA #IMPLIED> +<!ATTLIST letter colour CDATA #IMPLIED> +<!ELEMENT sequence EMPTY> +<!ATTLIST sequence id ID #REQUIRED + name CDATA #REQUIRED + length CDATA #REQUIRED + weight CDATA #REQUIRED +> +<!ELEMENT letter_frequencies (alphabet_array)> + +<!-- Model elements --> +<!ELEMENT model ( + command_line, + host, + type, + nmotifs, + evalue_threshold, + object_function, + min_width, + max_width, + minic, + wg, + ws, + endgaps, + minsites, + maxsites, + wnsites, + prob, + spmap, + spfuzz, + prior, + beta, + maxiter, + distance, + num_sequences, + num_positions, + seed, + seqfrac, + strands, + priors_file, + reason_for_stopping, + background_frequencies +)> +<!ELEMENT command_line (#PCDATA)*> +<!ELEMENT host (#PCDATA)*> +<!ELEMENT type (#PCDATA)*> +<!ELEMENT nmotifs (#PCDATA)*> +<!ELEMENT evalue_threshold (#PCDATA)*> +<!ELEMENT object_function (#PCDATA)*> +<!ELEMENT min_width (#PCDATA)*> +<!ELEMENT max_width (#PCDATA)*> +<!ELEMENT minic (#PCDATA)*> +<!ELEMENT wg (#PCDATA)*> +<!ELEMENT ws (#PCDATA)*> +<!ELEMENT endgaps (#PCDATA)*> +<!ELEMENT minsites (#PCDATA)*> +<!ELEMENT maxsites (#PCDATA)*> +<!ELEMENT wnsites (#PCDATA)*> +<!ELEMENT prob (#PCDATA)*> +<!ELEMENT spmap (#PCDATA)*> +<!ELEMENT spfuzz (#PCDATA)*> +<!ELEMENT prior (#PCDATA)*> +<!ELEMENT beta (#PCDATA)*> +<!ELEMENT maxiter (#PCDATA)*> +<!ELEMENT distance (#PCDATA)*> +<!ELEMENT num_sequences (#PCDATA)*> +<!ELEMENT num_positions (#PCDATA)*> +<!ELEMENT seed (#PCDATA)*> +<!ELEMENT seqfrac (#PCDATA)*> +<!ELEMENT strands (#PCDATA)*> +<!ELEMENT priors_file (#PCDATA)*> +<!ELEMENT reason_for_stopping (#PCDATA)*> +<!ELEMENT background_frequencies (alphabet_array)> +<!ATTLIST background_frequencies source CDATA #REQUIRED> + +<!-- Motif elements --> +<!ELEMENT motifs (motif*)> +<!ELEMENT motif (scores, probabilities, regular_expression?, contributing_sites)> +<!ATTLIST motif id ID #REQUIRED + name CDATA #REQUIRED + width CDATA #REQUIRED + sites CDATA #REQUIRED + llr CDATA #REQUIRED + ic CDATA #REQUIRED + re CDATA #REQUIRED + bayes_threshold CDATA #REQUIRED + e_value CDATA #REQUIRED + elapsed_time CDATA #REQUIRED + url CDATA "" +> +<!ELEMENT scores (alphabet_matrix)> +<!ELEMENT probabilities (alphabet_matrix)> +<!ELEMENT regular_expression (#PCDATA)*> + +<!-- Contributing site elements --> +<!-- Contributing sites are motif occurences found during the motif discovery phase --> +<!ELEMENT contributing_sites (contributing_site*)> +<!ELEMENT contributing_site (left_flank, site, right_flank)> +<!ATTLIST contributing_site sequence_id IDREF #REQUIRED + position CDATA #REQUIRED + strand (plus|minus|none) 'none' + pvalue CDATA #REQUIRED +> +<!-- The left_flank contains the sequence for 10 bases to the left of the motif start --> +<!ELEMENT left_flank (#PCDATA)> +<!-- The site contains the sequence for the motif instance --> +<!ELEMENT site (letter_ref*)> +<!-- The right_flank contains the sequence for 10 bases to the right of the motif end --> +<!ELEMENT right_flank (#PCDATA)> + +<!-- Scanned site elements --> +<!-- Scanned sites are motif occurences found during the sequence scan phase --> +<!ELEMENT scanned_sites_summary (scanned_sites*)> +<!ATTLIST scanned_sites_summary p_thresh CDATA #REQUIRED> +<!ELEMENT scanned_sites (scanned_site*)> +<!ATTLIST scanned_sites sequence_id IDREF #REQUIRED + pvalue CDATA #REQUIRED + num_sites CDATA #REQUIRED> +<!ELEMENT scanned_site EMPTY> +<!ATTLIST scanned_site motif_id IDREF #REQUIRED + strand (plus|minus|none) 'none' + position CDATA #REQUIRED + pvalue CDATA #REQUIRED> + +<!-- Utility elements --> +<!-- A reference to a letter in the alphabet --> +<!ELEMENT letter_ref EMPTY> +<!ATTLIST letter_ref letter_id IDREF #REQUIRED> +<!-- A alphabet-array contains one floating point value for each letter in an alphabet --> +<!ELEMENT alphabet_array (value*)> +<!ELEMENT value (#PCDATA)> +<!ATTLIST value letter_id IDREF #REQUIRED> + +<!-- A alphabet_matrix contains one alphabet_array for each position in a motif --> +<!ELEMENT alphabet_matrix (alphabet_array*)> + +]> +<!-- Begin document body --> +<MEME version="4.11.2" release="Thu May 05 14:58:55 2016 -0700"> +<training_set datafile="/Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2490.dat" length="30"> +<alphabet name="Protein" like="protein"> +<letter id="A" symbol="A" name="Alanine" colour="0000CC"/> +<letter id="C" symbol="C" name="Cysteine" colour="0000CC"/> +<letter id="D" symbol="D" name="Aspartic acid" colour="FF00FF"/> +<letter id="E" symbol="E" name="Glutamic acid" colour="FF00FF"/> +<letter id="F" symbol="F" name="Phenylalanine" colour="0000CC"/> +<letter id="G" symbol="G" name="Glycine" colour="FFB300"/> +<letter id="H" symbol="H" name="Histidine" colour="FFCCCC"/> +<letter id="I" symbol="I" name="Isoleucine" colour="0000CC"/> +<letter id="K" symbol="K" name="Lysine" colour="CC0000"/> +<letter id="L" symbol="L" name="Leucine" colour="0000CC"/> +<letter id="M" symbol="M" name="Methionine" colour="0000CC"/> +<letter id="N" symbol="N" name="Asparagine" colour="008000"/> +<letter id="P" symbol="P" name="Proline" colour="FFFF00"/> +<letter id="Q" symbol="Q" name="Glutamine" colour="008000"/> +<letter id="R" symbol="R" name="Arginine" colour="CC0000"/> +<letter id="S" symbol="S" name="Serine" colour="008000"/> +<letter id="T" symbol="T" name="Threonine" colour="008000"/> +<letter id="V" symbol="V" name="Valine" colour="0000CC"/> +<letter id="W" symbol="W" name="Tryptophan" colour="0000CC"/> +<letter id="Y" symbol="Y" name="Tyrosine" colour="33E6CC"/> +<letter id="X" symbol="X" aliases="*." equals="ACDEFGHIKLMNPQRSTVWY" name="Any amino acid"/> +<letter id="B" symbol="B" equals="DN" name="Asparagine or Aspartic acid"/> +<letter id="Z" symbol="Z" equals="EQ" name="Glutamine or Glutamic acid"/> +<letter id="J" symbol="J" equals="IL" name="Leucine or Isoleucine"/> +</alphabet> +<sequence id="sequence_0" name="chr21_19617074_19617124_+" length="50" weight="1.000000" /> +<sequence id="sequence_1" name="chr21_26934381_26934431_+" length="50" weight="1.000000" /> +<sequence id="sequence_2" name="chr21_28217753_28217803_-" length="50" weight="1.000000" /> +<sequence id="sequence_3" name="chr21_31710037_31710087_-" length="50" weight="1.000000" /> +<sequence id="sequence_4" name="chr21_31744582_31744632_-" length="50" weight="1.000000" /> +<sequence id="sequence_5" name="chr21_31768316_31768366_+" length="50" weight="1.000000" /> +<sequence id="sequence_6" name="chr21_31914206_31914256_-" length="50" weight="1.000000" /> +<sequence id="sequence_7" name="chr21_31933633_31933683_-" length="50" weight="1.000000" /> +<sequence id="sequence_8" name="chr21_31962741_31962791_-" length="50" weight="1.000000" /> +<sequence id="sequence_9" name="chr21_31964683_31964733_+" length="50" weight="1.000000" /> +<sequence id="sequence_10" name="chr21_31973364_31973414_+" length="50" weight="1.000000" /> +<sequence id="sequence_11" name="chr21_31992870_31992920_+" length="50" weight="1.000000" /> +<sequence id="sequence_12" name="chr21_32185595_32185645_-" length="50" weight="1.000000" /> +<sequence id="sequence_13" name="chr21_32202076_32202126_-" length="50" weight="1.000000" /> +<sequence id="sequence_14" name="chr21_32253899_32253949_-" length="50" weight="1.000000" /> +<sequence id="sequence_15" name="chr21_32410820_32410870_-" length="50" weight="1.000000" /> +<sequence id="sequence_16" name="chr21_36411748_36411798_-" length="50" weight="1.000000" /> +<sequence id="sequence_17" name="chr21_37838750_37838800_-" length="50" weight="1.000000" /> +<sequence id="sequence_18" name="chr21_45705687_45705737_+" length="50" weight="1.000000" /> +<sequence id="sequence_19" name="chr21_45971413_45971463_-" length="50" weight="1.000000" /> +<sequence id="sequence_20" name="chr21_45978668_45978718_-" length="50" weight="1.000000" /> +<sequence id="sequence_21" name="chr21_45993530_45993580_+" length="50" weight="1.000000" /> +<sequence id="sequence_22" name="chr21_46020421_46020471_+" length="50" weight="1.000000" /> +<sequence id="sequence_23" name="chr21_46031920_46031970_+" length="50" weight="1.000000" /> +<sequence id="sequence_24" name="chr21_46046964_46047014_+" length="50" weight="1.000000" /> +<sequence id="sequence_25" name="chr21_46057197_46057247_+" length="50" weight="1.000000" /> +<sequence id="sequence_26" name="chr21_46086869_46086919_-" length="50" weight="1.000000" /> +<sequence id="sequence_27" name="chr21_46102103_46102153_-" length="50" weight="1.000000" /> +<sequence id="sequence_28" name="chr21_47517957_47518007_+" length="50" weight="1.000000" /> +<sequence id="sequence_29" name="chr21_47575506_47575556_-" length="50" weight="1.000000" /> +<letter_frequencies> +<alphabet_array> +<value letter_id="A">0.294</value> +<value letter_id="C">0.231</value> +<value letter_id="D">0.000</value> +<value letter_id="E">0.000</value> +<value letter_id="F">0.000</value> +<value letter_id="G">0.257</value> +<value letter_id="H">0.000</value> +<value letter_id="I">0.000</value> +<value letter_id="K">0.000</value> +<value letter_id="L">0.000</value> +<value letter_id="M">0.000</value> +<value letter_id="N">0.000</value> +<value letter_id="P">0.000</value> +<value letter_id="Q">0.000</value> +<value letter_id="R">0.000</value> +<value letter_id="S">0.000</value> +<value letter_id="T">0.217</value> +<value letter_id="V">0.000</value> +<value letter_id="W">0.000</value> +<value letter_id="Y">0.000</value> +</alphabet_array> +</letter_frequencies> +</training_set> +<model> +<command_line>meme /Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2490.dat -o /Users/gvk/work/git_workspace/galaxy/database/job_working_directory/001/1912/dataset_2530_files -nostatus </command_line> +<host>MacBook-Pro-2.local</host> +<type>zoops</type> +<nmotifs>1</nmotifs> +<evalue_threshold>inf</evalue_threshold> +<object_function>E-value of product of p-values</object_function> +<use_llr>0</use_llr> +<min_width>8</min_width> +<max_width>50</max_width> +<wg>11</wg> +<ws>1</ws> +<endgaps>yes</endgaps> +<substring>yes</substring> +<minsites>2</minsites> +<maxsites>30</maxsites> +<wnsites>0.8</wnsites> +<spmap>pam</spmap> +<spfuzz>120</spfuzz> +<prior>megap</prior> +<beta>7500</beta> +<maxiter>50</maxiter> +<distance>1e-05</distance> +<num_sequences>30</num_sequences> +<num_positions>1500</num_positions> +<seed>0</seed> +<ctfrac>-1</ctfrac> +<maxwords>-1</maxwords> +<strands>none</strands> +<priors_file>prior30.plib</priors_file> +<reason_for_stopping>Stopped because requested number of motifs (1) found.</reason_for_stopping> +<background_frequencies source="dataset with add-one prior applied"> +<alphabet_array> +<value letter_id="A">0.291</value> +<value letter_id="C">0.229</value> +<value letter_id="D">0.001</value> +<value letter_id="E">0.001</value> +<value letter_id="F">0.001</value> +<value letter_id="G">0.255</value> +<value letter_id="H">0.001</value> +<value letter_id="I">0.001</value> +<value letter_id="K">0.001</value> +<value letter_id="L">0.001</value> +<value letter_id="M">0.001</value> +<value letter_id="N">0.001</value> +<value letter_id="P">0.001</value> +<value letter_id="Q">0.001</value> +<value letter_id="R">0.001</value> +<value letter_id="S">0.001</value> +<value letter_id="T">0.215</value> +<value letter_id="V">0.001</value> +<value letter_id="W">0.001</value> +<value letter_id="Y">0.001</value> +</alphabet_array> +</background_frequencies> +</model> +<motifs> +<motif id="motif_1" name="1" width="11" sites="25" ic="40.0" re="13.8" llr="239" e_value="2.4e-011" bayes_threshold="5.33554" elapsed_time="0.533107"> +<scores> +<alphabet_matrix> +<alphabet_array> +<value letter_id="A">-32</value> +<value letter_id="C">-680</value> +<value letter_id="D">91</value> +<value letter_id="E">77</value> +<value letter_id="F">7</value> +<value letter_id="G">138</value> +<value letter_id="H">-20</value> +<value letter_id="I">55</value> +<value letter_id="K">64</value> +<value letter_id="L">107</value> +<value letter_id="M">11</value> +<value letter_id="N">150</value> +<value letter_id="P">142</value> +<value letter_id="Q">72</value> +<value letter_id="R">87</value> +<value letter_id="S">396</value> +<value letter_id="T">-148</value> +<value letter_id="V">221</value> +<value letter_id="W">-140</value> +<value letter_id="Y">-36</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">-11</value> +<value letter_id="C">-680</value> +<value letter_id="D">89</value> +<value letter_id="E">76</value> +<value letter_id="F">7</value> +<value letter_id="G">137</value> +<value letter_id="H">-21</value> +<value letter_id="I">55</value> +<value letter_id="K">63</value> +<value letter_id="L">107</value> +<value letter_id="M">10</value> +<value letter_id="N">149</value> +<value letter_id="P">141</value> +<value letter_id="Q">71</value> +<value letter_id="R">87</value> +<value letter_id="S">396</value> +<value letter_id="T">-239</value> +<value letter_id="V">220</value> +<value letter_id="W">-140</value> +<value letter_id="Y">-36</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">-79</value> +<value letter_id="C">41</value> +<value letter_id="D">4</value> +<value letter_id="E">21</value> +<value letter_id="F">-7</value> +<value letter_id="G">44</value> +<value letter_id="H">-62</value> +<value letter_id="I">42</value> +<value letter_id="K">-5</value> +<value letter_id="L">99</value> +<value letter_id="M">0</value> +<value letter_id="N">99</value> +<value letter_id="P">138</value> +<value letter_id="Q">52</value> +<value letter_id="R">42</value> +<value letter_id="S">399</value> +<value letter_id="T">-46</value> +<value letter_id="V">223</value> +<value letter_id="W">-173</value> +<value letter_id="Y">-68</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">11</value> +<value letter_id="C">-677</value> +<value letter_id="D">48</value> +<value letter_id="E">47</value> +<value letter_id="F">-2</value> +<value letter_id="G">127</value> +<value letter_id="H">-43</value> +<value letter_id="I">46</value> +<value letter_id="K">27</value> +<value letter_id="L">101</value> +<value letter_id="M">3</value> +<value letter_id="N">124</value> +<value letter_id="P">138</value> +<value letter_id="Q">60</value> +<value letter_id="R">62</value> +<value letter_id="S">397</value> +<value letter_id="T">-235</value> +<value letter_id="V">220</value> +<value letter_id="W">-160</value> +<value letter_id="Y">-55</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">-596</value> +<value letter_id="C">-820</value> +<value letter_id="D">12</value> +<value letter_id="E">-21</value> +<value letter_id="F">-53</value> +<value letter_id="G">-267</value> +<value letter_id="H">-74</value> +<value letter_id="I">37</value> +<value letter_id="K">16</value> +<value letter_id="L">44</value> +<value letter_id="M">-37</value> +<value letter_id="N">98</value> +<value letter_id="P">31</value> +<value letter_id="Q">9</value> +<value letter_id="R">19</value> +<value letter_id="S">319</value> +<value letter_id="T">212</value> +<value letter_id="V">127</value> +<value letter_id="W">-193</value> +<value letter_id="Y">-95</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">165</value> +<value letter_id="C">-261</value> +<value letter_id="D">70</value> +<value letter_id="E">110</value> +<value letter_id="F">77</value> +<value letter_id="G">-521</value> +<value letter_id="H">-4</value> +<value letter_id="I">147</value> +<value letter_id="K">95</value> +<value letter_id="L">201</value> +<value letter_id="M">90</value> +<value letter_id="N">121</value> +<value letter_id="P">124</value> +<value letter_id="Q">91</value> +<value letter_id="R">107</value> +<value letter_id="S">425</value> +<value letter_id="T">-527</value> +<value letter_id="V">314</value> +<value letter_id="W">-95</value> +<value letter_id="Y">8</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">-838</value> +<value letter_id="C">-990</value> +<value letter_id="D">-89</value> +<value letter_id="E">-149</value> +<value letter_id="F">-151</value> +<value letter_id="G">-841</value> +<value letter_id="H">-161</value> +<value letter_id="I">-117</value> +<value letter_id="K">-113</value> +<value letter_id="L">-66</value> +<value letter_id="M">-209</value> +<value letter_id="N">-68</value> +<value letter_id="P">-69</value> +<value letter_id="Q">-129</value> +<value letter_id="R">-91</value> +<value letter_id="S">111</value> +<value letter_id="T">221</value> +<value letter_id="V">-55</value> +<value letter_id="W">-255</value> +<value letter_id="Y">-173</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">176</value> +<value letter_id="C">-858</value> +<value letter_id="D">-79</value> +<value letter_id="E">-103</value> +<value letter_id="F">-115</value> +<value letter_id="G">-717</value> +<value letter_id="H">-148</value> +<value letter_id="I">-95</value> +<value letter_id="K">-108</value> +<value letter_id="L">-17</value> +<value letter_id="M">-162</value> +<value letter_id="N">-61</value> +<value letter_id="P">-12</value> +<value letter_id="Q">-95</value> +<value letter_id="R">-69</value> +<value letter_id="S">193</value> +<value letter_id="T">-737</value> +<value letter_id="V">52</value> +<value letter_id="W">-240</value> +<value letter_id="Y">-153</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">134</value> +<value letter_id="C">-686</value> +<value letter_id="D">0</value> +<value letter_id="E">16</value> +<value letter_id="F">-12</value> +<value letter_id="G">-553</value> +<value letter_id="H">-68</value> +<value letter_id="I">44</value> +<value letter_id="K">-8</value> +<value letter_id="L">96</value> +<value letter_id="M">-9</value> +<value letter_id="N">88</value> +<value letter_id="P">124</value> +<value letter_id="Q">41</value> +<value letter_id="R">36</value> +<value letter_id="S">384</value> +<value letter_id="T">11</value> +<value letter_id="V">216</value> +<value letter_id="W">-177</value> +<value letter_id="Y">-71</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">165</value> +<value letter_id="C">-261</value> +<value letter_id="D">70</value> +<value letter_id="E">110</value> +<value letter_id="F">77</value> +<value letter_id="G">-521</value> +<value letter_id="H">-4</value> +<value letter_id="I">147</value> +<value letter_id="K">95</value> +<value letter_id="L">201</value> +<value letter_id="M">90</value> +<value letter_id="N">121</value> +<value letter_id="P">124</value> +<value letter_id="Q">91</value> +<value letter_id="R">107</value> +<value letter_id="S">425</value> +<value letter_id="T">-527</value> +<value letter_id="V">314</value> +<value letter_id="W">-95</value> +<value letter_id="Y">8</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">147</value> +<value letter_id="C">-614</value> +<value letter_id="D">89</value> +<value letter_id="E">129</value> +<value letter_id="F">93</value> +<value letter_id="G">-121</value> +<value letter_id="H">12</value> +<value letter_id="I">160</value> +<value letter_id="K">113</value> +<value letter_id="L">217</value> +<value letter_id="M">108</value> +<value letter_id="N">144</value> +<value letter_id="P">144</value> +<value letter_id="Q">111</value> +<value letter_id="R">125</value> +<value letter_id="S">447</value> +<value letter_id="T">-241</value> +<value letter_id="V">332</value> +<value letter_id="W">-81</value> +<value letter_id="Y">22</value> +</alphabet_array> +</alphabet_matrix> +</scores> +<probabilities> +<alphabet_matrix> +<alphabet_array> +<value letter_id="A">0.240000</value> +<value letter_id="C">0.000000</value> +<value letter_id="D">0.000000</value> +<value letter_id="E">0.000000</value> +<value letter_id="F">0.000000</value> +<value letter_id="G">0.680000</value> +<value letter_id="H">0.000000</value> +<value letter_id="I">0.000000</value> +<value letter_id="K">0.000000</value> +<value letter_id="L">0.000000</value> +<value letter_id="M">0.000000</value> +<value letter_id="N">0.000000</value> +<value letter_id="P">0.000000</value> +<value letter_id="Q">0.000000</value> +<value letter_id="R">0.000000</value> +<value letter_id="S">0.000000</value> +<value letter_id="T">0.080000</value> +<value letter_id="V">0.000000</value> +<value letter_id="W">0.000000</value> +<value letter_id="Y">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.280000</value> +<value letter_id="C">0.000000</value> +<value letter_id="D">0.000000</value> +<value letter_id="E">0.000000</value> +<value letter_id="F">0.000000</value> +<value letter_id="G">0.680000</value> +<value letter_id="H">0.000000</value> +<value letter_id="I">0.000000</value> +<value letter_id="K">0.000000</value> +<value letter_id="L">0.000000</value> +<value letter_id="M">0.000000</value> +<value letter_id="N">0.000000</value> +<value letter_id="P">0.000000</value> +<value letter_id="Q">0.000000</value> +<value letter_id="R">0.000000</value> +<value letter_id="S">0.000000</value> +<value letter_id="T">0.040000</value> +<value letter_id="V">0.000000</value> +<value letter_id="W">0.000000</value> +<value letter_id="Y">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.160000</value> +<value letter_id="C">0.320000</value> +<value letter_id="D">0.000000</value> +<value letter_id="E">0.000000</value> +<value letter_id="F">0.000000</value> +<value letter_id="G">0.360000</value> +<value letter_id="H">0.000000</value> +<value letter_id="I">0.000000</value> +<value letter_id="K">0.000000</value> +<value letter_id="L">0.000000</value> +<value letter_id="M">0.000000</value> +<value letter_id="N">0.000000</value> +<value letter_id="P">0.000000</value> +<value letter_id="Q">0.000000</value> +<value letter_id="R">0.000000</value> +<value letter_id="S">0.000000</value> +<value letter_id="T">0.160000</value> +<value letter_id="V">0.000000</value> +<value letter_id="W">0.000000</value> +<value letter_id="Y">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.320000</value> +<value letter_id="C">0.000000</value> +<value letter_id="D">0.000000</value> +<value letter_id="E">0.000000</value> +<value letter_id="F">0.000000</value> +<value letter_id="G">0.640000</value> +<value letter_id="H">0.000000</value> +<value letter_id="I">0.000000</value> +<value letter_id="K">0.000000</value> +<value letter_id="L">0.000000</value> +<value letter_id="M">0.000000</value> +<value letter_id="N">0.000000</value> +<value letter_id="P">0.000000</value> +<value letter_id="Q">0.000000</value> +<value letter_id="R">0.000000</value> +<value letter_id="S">0.000000</value> +<value letter_id="T">0.040000</value> +<value letter_id="V">0.000000</value> +<value letter_id="W">0.000000</value> +<value letter_id="Y">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.000000</value> +<value letter_id="C">0.000000</value> +<value letter_id="D">0.000000</value> +<value letter_id="E">0.000000</value> +<value letter_id="F">0.000000</value> +<value letter_id="G">0.040000</value> +<value letter_id="H">0.000000</value> +<value letter_id="I">0.000000</value> +<value letter_id="K">0.000000</value> +<value letter_id="L">0.000000</value> +<value letter_id="M">0.000000</value> +<value letter_id="N">0.000000</value> +<value letter_id="P">0.000000</value> +<value letter_id="Q">0.000000</value> +<value letter_id="R">0.000000</value> +<value letter_id="S">0.000000</value> +<value letter_id="T">0.960000</value> +<value letter_id="V">0.000000</value> +<value letter_id="W">0.000000</value> +<value letter_id="Y">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.960000</value> +<value letter_id="C">0.040000</value> +<value letter_id="D">0.000000</value> +<value letter_id="E">0.000000</value> +<value letter_id="F">0.000000</value> +<value letter_id="G">0.000000</value> +<value letter_id="H">0.000000</value> +<value letter_id="I">0.000000</value> +<value letter_id="K">0.000000</value> +<value letter_id="L">0.000000</value> +<value letter_id="M">0.000000</value> +<value letter_id="N">0.000000</value> +<value letter_id="P">0.000000</value> +<value letter_id="Q">0.000000</value> +<value letter_id="R">0.000000</value> +<value letter_id="S">0.000000</value> +<value letter_id="T">0.000000</value> +<value letter_id="V">0.000000</value> +<value letter_id="W">0.000000</value> +<value letter_id="Y">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.000000</value> +<value letter_id="C">0.000000</value> +<value letter_id="D">0.000000</value> +<value letter_id="E">0.000000</value> +<value letter_id="F">0.000000</value> +<value letter_id="G">0.000000</value> +<value letter_id="H">0.000000</value> +<value letter_id="I">0.000000</value> +<value letter_id="K">0.000000</value> +<value letter_id="L">0.000000</value> +<value letter_id="M">0.000000</value> +<value letter_id="N">0.000000</value> +<value letter_id="P">0.000000</value> +<value letter_id="Q">0.000000</value> +<value letter_id="R">0.000000</value> +<value letter_id="S">0.000000</value> +<value letter_id="T">1.000000</value> +<value letter_id="V">0.000000</value> +<value letter_id="W">0.000000</value> +<value letter_id="Y">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">1.000000</value> +<value letter_id="C">0.000000</value> +<value letter_id="D">0.000000</value> +<value letter_id="E">0.000000</value> +<value letter_id="F">0.000000</value> +<value letter_id="G">0.000000</value> +<value letter_id="H">0.000000</value> +<value letter_id="I">0.000000</value> +<value letter_id="K">0.000000</value> +<value letter_id="L">0.000000</value> +<value letter_id="M">0.000000</value> +<value letter_id="N">0.000000</value> +<value letter_id="P">0.000000</value> +<value letter_id="Q">0.000000</value> +<value letter_id="R">0.000000</value> +<value letter_id="S">0.000000</value> +<value letter_id="T">0.000000</value> +<value letter_id="V">0.000000</value> +<value letter_id="W">0.000000</value> +<value letter_id="Y">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.760000</value> +<value letter_id="C">0.000000</value> +<value letter_id="D">0.000000</value> +<value letter_id="E">0.000000</value> +<value letter_id="F">0.000000</value> +<value letter_id="G">0.000000</value> +<value letter_id="H">0.000000</value> +<value letter_id="I">0.000000</value> +<value letter_id="K">0.000000</value> +<value letter_id="L">0.000000</value> +<value letter_id="M">0.000000</value> +<value letter_id="N">0.000000</value> +<value letter_id="P">0.000000</value> +<value letter_id="Q">0.000000</value> +<value letter_id="R">0.000000</value> +<value letter_id="S">0.000000</value> +<value letter_id="T">0.240000</value> +<value letter_id="V">0.000000</value> +<value letter_id="W">0.000000</value> +<value letter_id="Y">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.960000</value> +<value letter_id="C">0.040000</value> +<value letter_id="D">0.000000</value> +<value letter_id="E">0.000000</value> +<value letter_id="F">0.000000</value> +<value letter_id="G">0.000000</value> +<value letter_id="H">0.000000</value> +<value letter_id="I">0.000000</value> +<value letter_id="K">0.000000</value> +<value letter_id="L">0.000000</value> +<value letter_id="M">0.000000</value> +<value letter_id="N">0.000000</value> +<value letter_id="P">0.000000</value> +<value letter_id="Q">0.000000</value> +<value letter_id="R">0.000000</value> +<value letter_id="S">0.000000</value> +<value letter_id="T">0.000000</value> +<value letter_id="V">0.000000</value> +<value letter_id="W">0.000000</value> +<value letter_id="Y">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.840000</value> +<value letter_id="C">0.000000</value> +<value letter_id="D">0.000000</value> +<value letter_id="E">0.000000</value> +<value letter_id="F">0.000000</value> +<value letter_id="G">0.120000</value> +<value letter_id="H">0.000000</value> +<value letter_id="I">0.000000</value> +<value letter_id="K">0.000000</value> +<value letter_id="L">0.000000</value> +<value letter_id="M">0.000000</value> +<value letter_id="N">0.000000</value> +<value letter_id="P">0.000000</value> +<value letter_id="Q">0.000000</value> +<value letter_id="R">0.000000</value> +<value letter_id="S">0.000000</value> +<value letter_id="T">0.040000</value> +<value letter_id="V">0.000000</value> +<value letter_id="W">0.000000</value> +<value letter_id="Y">0.000000</value> +</alphabet_array> +</alphabet_matrix> +</probabilities> +<regular_expression> +[GA][GA][GC][GA]TATA[AT]AA +</regular_expression> +<contributing_sites> +<contributing_site sequence_id="sequence_24" position="12" strand="none" pvalue="1.06e-06" > +<left_flank>AAGGCCAGGA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GCCTGAGAGC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_25" position="36" strand="none" pvalue="3.41e-06" > +<left_flank>ACAGGCCCTG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GCC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_19" position="9" strand="none" pvalue="3.41e-06" > +<left_flank>CAGGCCCTG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GCCCCAGCAG</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_9" position="13" strand="none" pvalue="3.41e-06" > +<left_flank>GATTCACTGA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GGCCCTCTGC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_21" position="7" strand="none" pvalue="4.00e-06" > +<left_flank>CCAAGGA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GCCCCACAAA</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_13" position="13" strand="none" pvalue="5.01e-06" > +<left_flank>CCACCAGCTT</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>AGCCCTGTAC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_23" position="15" strand="none" pvalue="6.06e-06" > +<left_flank>ATACCCAGGG</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>CCTCAGCAGC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_15" position="21" strand="none" pvalue="8.67e-06" > +<left_flank>AATCACTGAG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GTCCCAGGGA</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_12" position="18" strand="none" pvalue="8.67e-06" > +<left_flank>CACCAGAGCT</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>AGAAGGTTCT</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_11" position="16" strand="none" pvalue="8.67e-06" > +<left_flank>CACTATTGAA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>TTTCATTTGC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_22" position="2" strand="none" pvalue="1.21e-05" > +<left_flank>GA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GCCAACATCC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_28" position="32" strand="none" pvalue="1.59e-05" > +<left_flank>CCGGCGGGGC</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +</site> +<right_flank>GGGGCGG</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_20" position="4" strand="none" pvalue="1.59e-05" > +<left_flank>CAGA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +</site> +<right_flank>GTTCCGACCA</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_6" position="15" strand="none" pvalue="1.68e-05" > +<left_flank>CCCACTACTT</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>TCATTCTGAG</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_14" position="19" strand="none" pvalue="2.03e-05" > +<left_flank>CACCAGCAAG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GCTCAGGAGT</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_4" position="12" strand="none" pvalue="3.06e-05" > +<left_flank>CAGGTCTAAG</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>CTTGGAGTCC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_0" position="39" strand="none" pvalue="3.06e-05" > +<left_flank>CCTCGGGACG</left_flank> +<site> +<letter_ref letter_id="T"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank></right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_18" position="37" strand="none" pvalue="3.82e-05" > +<left_flank>CGTGGTCGCG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_5" position="0" strand="none" pvalue="3.82e-05" > +<left_flank></left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>ATGGTCCTGT</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_29" position="30" strand="none" pvalue="4.02e-05" > +<left_flank>GCTGCCGGTG</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +</site> +<right_flank>GCCCTGGCG</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_1" position="27" strand="none" pvalue="5.52e-05" > +<left_flank>AGTCACAAGT</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GGGTCGCACG</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_3" position="14" strand="none" pvalue="5.94e-05" > +<left_flank>CCCAGGTTTC</left_flank> +<site> +<letter_ref letter_id="T"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>TCGCCGCACC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_16" position="22" strand="none" pvalue="6.78e-05" > +<left_flank>AGTTTCAGTT</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>attatataac</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_7" position="2" strand="none" pvalue="2.08e-04" > +<left_flank>TC</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +</site> +<right_flank>AAATGTTCCT</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_8" position="13" strand="none" pvalue="4.05e-04" > +<left_flank>TATAACTCAG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>TAATTTGTAC</right_flank> +</contributing_site> +</contributing_sites> +</motif> +</motifs> +<scanned_sites_summary p_thresh="0.0001"> +<scanned_sites sequence_id="sequence_0" pvalue="1.22e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="39" pvalue="3.06e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_1" pvalue="2.21e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="27" pvalue="5.52e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_2" pvalue="7.29e-01" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_3" pvalue="2.37e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="14" pvalue="5.94e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_4" pvalue="1.22e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="12" pvalue="3.06e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_5" pvalue="1.53e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="0" pvalue="3.82e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_6" pvalue="6.70e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="15" pvalue="1.68e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_7" pvalue="1.81e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="4" pvalue="4.54e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_8" pvalue="1.61e-02" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_9" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="13" pvalue="3.41e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_10" pvalue="1.99e-01" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_11" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="16" pvalue="8.67e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_12" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="18" pvalue="8.67e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_13" pvalue="2.01e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="13" pvalue="5.01e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_14" pvalue="8.11e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="19" pvalue="2.03e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_15" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="21" pvalue="8.67e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_16" pvalue="2.71e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="22" pvalue="6.78e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_17" pvalue="8.23e-02" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_18" pvalue="1.53e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="37" pvalue="3.82e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_19" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="9" pvalue="3.41e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_20" pvalue="6.37e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="4" pvalue="1.59e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_21" pvalue="1.60e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="7" pvalue="4.00e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_22" pvalue="4.83e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="2" pvalue="1.21e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_23" pvalue="2.43e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="15" pvalue="6.06e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_24" pvalue="4.26e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="12" pvalue="1.06e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_25" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="36" pvalue="3.41e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_26" pvalue="4.30e-02" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_27" pvalue="4.30e-02" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_28" pvalue="6.37e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="32" pvalue="1.59e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_29" pvalue="1.61e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="30" pvalue="4.02e-05"/> +</scanned_sites> +</scanned_sites_summary> +</MEME>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meme_output_xml_2.xml Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,977 @@ +<?xml version='1.0' encoding='UTF-8' standalone='yes'?> +<!-- Document definition --> +<!DOCTYPE MEME[ +<!ELEMENT MEME ( + training_set, + model, + motifs, + scanned_sites_summary? +)> +<!ATTLIST MEME + version CDATA #REQUIRED + release CDATA #REQUIRED +> +<!-- Training-set elements --> +<!ELEMENT training_set (alphabet, ambigs, sequence*, letter_frequencies)> +<!ATTLIST training_set datafile CDATA #REQUIRED length CDATA #REQUIRED> +<!ELEMENT alphabet (letter*)> +<!ATTLIST alphabet name CDATA #REQUIRED> +<!ELEMENT ambigs (letter*)> +<!ELEMENT letter EMPTY> +<!ATTLIST letter id ID #REQUIRED> +<!ATTLIST letter symbol CDATA #REQUIRED> +<!ATTLIST letter equals CDATA #IMPLIED> +<!ATTLIST letter aliases CDATA #IMPLIED> +<!ATTLIST letter complement CDATA #IMPLIED> +<!ATTLIST letter name CDATA #IMPLIED> +<!ATTLIST letter colour CDATA #IMPLIED> +<!ELEMENT sequence EMPTY> +<!ATTLIST sequence id ID #REQUIRED + name CDATA #REQUIRED + length CDATA #REQUIRED + weight CDATA #REQUIRED +> +<!ELEMENT letter_frequencies (alphabet_array)> + +<!-- Model elements --> +<!ELEMENT model ( + command_line, + host, + type, + nmotifs, + evalue_threshold, + object_function, + min_width, + max_width, + minic, + wg, + ws, + endgaps, + minsites, + maxsites, + wnsites, + prob, + spmap, + spfuzz, + prior, + beta, + maxiter, + distance, + num_sequences, + num_positions, + seed, + seqfrac, + strands, + priors_file, + reason_for_stopping, + background_frequencies +)> +<!ELEMENT command_line (#PCDATA)*> +<!ELEMENT host (#PCDATA)*> +<!ELEMENT type (#PCDATA)*> +<!ELEMENT nmotifs (#PCDATA)*> +<!ELEMENT evalue_threshold (#PCDATA)*> +<!ELEMENT object_function (#PCDATA)*> +<!ELEMENT min_width (#PCDATA)*> +<!ELEMENT max_width (#PCDATA)*> +<!ELEMENT minic (#PCDATA)*> +<!ELEMENT wg (#PCDATA)*> +<!ELEMENT ws (#PCDATA)*> +<!ELEMENT endgaps (#PCDATA)*> +<!ELEMENT minsites (#PCDATA)*> +<!ELEMENT maxsites (#PCDATA)*> +<!ELEMENT wnsites (#PCDATA)*> +<!ELEMENT prob (#PCDATA)*> +<!ELEMENT spmap (#PCDATA)*> +<!ELEMENT spfuzz (#PCDATA)*> +<!ELEMENT prior (#PCDATA)*> +<!ELEMENT beta (#PCDATA)*> +<!ELEMENT maxiter (#PCDATA)*> +<!ELEMENT distance (#PCDATA)*> +<!ELEMENT num_sequences (#PCDATA)*> +<!ELEMENT num_positions (#PCDATA)*> +<!ELEMENT seed (#PCDATA)*> +<!ELEMENT seqfrac (#PCDATA)*> +<!ELEMENT strands (#PCDATA)*> +<!ELEMENT priors_file (#PCDATA)*> +<!ELEMENT reason_for_stopping (#PCDATA)*> +<!ELEMENT background_frequencies (alphabet_array)> +<!ATTLIST background_frequencies source CDATA #REQUIRED> + +<!-- Motif elements --> +<!ELEMENT motifs (motif*)> +<!ELEMENT motif (scores, probabilities, regular_expression?, contributing_sites)> +<!ATTLIST motif id ID #REQUIRED + name CDATA #REQUIRED + width CDATA #REQUIRED + sites CDATA #REQUIRED + llr CDATA #REQUIRED + ic CDATA #REQUIRED + re CDATA #REQUIRED + bayes_threshold CDATA #REQUIRED + e_value CDATA #REQUIRED + elapsed_time CDATA #REQUIRED + url CDATA "" +> +<!ELEMENT scores (alphabet_matrix)> +<!ELEMENT probabilities (alphabet_matrix)> +<!ELEMENT regular_expression (#PCDATA)*> + +<!-- Contributing site elements --> +<!-- Contributing sites are motif occurences found during the motif discovery phase --> +<!ELEMENT contributing_sites (contributing_site*)> +<!ELEMENT contributing_site (left_flank, site, right_flank)> +<!ATTLIST contributing_site sequence_id IDREF #REQUIRED + position CDATA #REQUIRED + strand (plus|minus|none) 'none' + pvalue CDATA #REQUIRED +> +<!-- The left_flank contains the sequence for 10 bases to the left of the motif start --> +<!ELEMENT left_flank (#PCDATA)> +<!-- The site contains the sequence for the motif instance --> +<!ELEMENT site (letter_ref*)> +<!-- The right_flank contains the sequence for 10 bases to the right of the motif end --> +<!ELEMENT right_flank (#PCDATA)> + +<!-- Scanned site elements --> +<!-- Scanned sites are motif occurences found during the sequence scan phase --> +<!ELEMENT scanned_sites_summary (scanned_sites*)> +<!ATTLIST scanned_sites_summary p_thresh CDATA #REQUIRED> +<!ELEMENT scanned_sites (scanned_site*)> +<!ATTLIST scanned_sites sequence_id IDREF #REQUIRED + pvalue CDATA #REQUIRED + num_sites CDATA #REQUIRED> +<!ELEMENT scanned_site EMPTY> +<!ATTLIST scanned_site motif_id IDREF #REQUIRED + strand (plus|minus|none) 'none' + position CDATA #REQUIRED + pvalue CDATA #REQUIRED> + +<!-- Utility elements --> +<!-- A reference to a letter in the alphabet --> +<!ELEMENT letter_ref EMPTY> +<!ATTLIST letter_ref letter_id IDREF #REQUIRED> +<!-- A alphabet-array contains one floating point value for each letter in an alphabet --> +<!ELEMENT alphabet_array (value*)> +<!ELEMENT value (#PCDATA)> +<!ATTLIST value letter_id IDREF #REQUIRED> + +<!-- A alphabet_matrix contains one alphabet_array for each position in a motif --> +<!ELEMENT alphabet_matrix (alphabet_array*)> + +]> +<!-- Begin document body --> +<MEME version="4.11.2" release="Thu May 05 14:58:55 2016 -0700"> +<training_set datafile="Galaxy_FASTA_Input" length="30"> +<alphabet name="DNA" like="dna"> +<letter id="A" symbol="A" complement="T" name="Adenine" colour="CC0000"/> +<letter id="C" symbol="C" complement="G" name="Cytosine" colour="0000CC"/> +<letter id="G" symbol="G" complement="C" name="Guanine" colour="FFB300"/> +<letter id="T" symbol="T" aliases="U" complement="A" name="Thymine" colour="008000"/> +<letter id="N" symbol="N" aliases="X." equals="ACGT" name="Any base"/> +<letter id="V" symbol="V" equals="ACG" name="Not T"/> +<letter id="H" symbol="H" equals="ACT" name="Not G"/> +<letter id="D" symbol="D" equals="AGT" name="Not C"/> +<letter id="B" symbol="B" equals="CGT" name="Not A"/> +<letter id="M" symbol="M" equals="AC" name="Amino"/> +<letter id="R" symbol="R" equals="AG" name="Purine"/> +<letter id="W" symbol="W" equals="AT" name="Weak"/> +<letter id="S" symbol="S" equals="CG" name="Strong"/> +<letter id="Y" symbol="Y" equals="CT" name="Pyrimidine"/> +<letter id="K" symbol="K" equals="GT" name="Keto"/> +</alphabet> +<sequence id="sequence_0" name="chr21_19617074_19617124_+" length="50" weight="1.000000" /> +<sequence id="sequence_1" name="chr21_26934381_26934431_+" length="50" weight="1.000000" /> +<sequence id="sequence_2" name="chr21_28217753_28217803_-" length="50" weight="1.000000" /> +<sequence id="sequence_3" name="chr21_31710037_31710087_-" length="50" weight="1.000000" /> +<sequence id="sequence_4" name="chr21_31744582_31744632_-" length="50" weight="1.000000" /> +<sequence id="sequence_5" name="chr21_31768316_31768366_+" length="50" weight="1.000000" /> +<sequence id="sequence_6" name="chr21_31914206_31914256_-" length="50" weight="1.000000" /> +<sequence id="sequence_7" name="chr21_31933633_31933683_-" length="50" weight="1.000000" /> +<sequence id="sequence_8" name="chr21_31962741_31962791_-" length="50" weight="1.000000" /> +<sequence id="sequence_9" name="chr21_31964683_31964733_+" length="50" weight="1.000000" /> +<sequence id="sequence_10" name="chr21_31973364_31973414_+" length="50" weight="1.000000" /> +<sequence id="sequence_11" name="chr21_31992870_31992920_+" length="50" weight="1.000000" /> +<sequence id="sequence_12" name="chr21_32185595_32185645_-" length="50" weight="1.000000" /> +<sequence id="sequence_13" name="chr21_32202076_32202126_-" length="50" weight="1.000000" /> +<sequence id="sequence_14" name="chr21_32253899_32253949_-" length="50" weight="1.000000" /> +<sequence id="sequence_15" name="chr21_32410820_32410870_-" length="50" weight="1.000000" /> +<sequence id="sequence_16" name="chr21_36411748_36411798_-" length="50" weight="1.000000" /> +<sequence id="sequence_17" name="chr21_37838750_37838800_-" length="50" weight="1.000000" /> +<sequence id="sequence_18" name="chr21_45705687_45705737_+" length="50" weight="1.000000" /> +<sequence id="sequence_19" name="chr21_45971413_45971463_-" length="50" weight="1.000000" /> +<sequence id="sequence_20" name="chr21_45978668_45978718_-" length="50" weight="1.000000" /> +<sequence id="sequence_21" name="chr21_45993530_45993580_+" length="50" weight="1.000000" /> +<sequence id="sequence_22" name="chr21_46020421_46020471_+" length="50" weight="1.000000" /> +<sequence id="sequence_23" name="chr21_46031920_46031970_+" length="50" weight="1.000000" /> +<sequence id="sequence_24" name="chr21_46046964_46047014_+" length="50" weight="1.000000" /> +<sequence id="sequence_25" name="chr21_46057197_46057247_+" length="50" weight="1.000000" /> +<sequence id="sequence_26" name="chr21_46086869_46086919_-" length="50" weight="1.000000" /> +<sequence id="sequence_27" name="chr21_46102103_46102153_-" length="50" weight="1.000000" /> +<sequence id="sequence_28" name="chr21_47517957_47518007_+" length="50" weight="1.000000" /> +<sequence id="sequence_29" name="chr21_47575506_47575556_-" length="50" weight="1.000000" /> +<letter_frequencies> +<alphabet_array> +<value letter_id="A">0.294</value> +<value letter_id="C">0.231</value> +<value letter_id="G">0.257</value> +<value letter_id="T">0.217</value> +</alphabet_array> +</letter_frequencies> +</training_set> +<model> +<command_line>meme /Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2490.dat -o /Users/gvk/work/git_workspace/galaxy/database/job_working_directory/001/1929/dataset_2578_files -nostatus -sf Galaxy_FASTA_Input -dna -mod zoops -nmotifs 1 -wnsites 0.8 -minw 8 -maxw 50 -wg 11 -ws 1 -maxiter 50 -distance 0.001 -prior dirichlet -b 0.01 -plib /Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2577.dat -spmap uni -spfuzz 0.5 </command_line> +<host>MacBook-Pro-2.local</host> +<type>zoops</type> +<nmotifs>1</nmotifs> +<evalue_threshold>inf</evalue_threshold> +<object_function>E-value of product of p-values</object_function> +<use_llr>0</use_llr> +<min_width>8</min_width> +<max_width>50</max_width> +<wg>11</wg> +<ws>1</ws> +<endgaps>yes</endgaps> +<substring>yes</substring> +<minsites>2</minsites> +<maxsites>30</maxsites> +<wnsites>0.8</wnsites> +<spmap>uni</spmap> +<spfuzz>0.5</spfuzz> +<prior>dirichlet</prior> +<beta>0.01</beta> +<maxiter>50</maxiter> +<distance>0.001</distance> +<num_sequences>30</num_sequences> +<num_positions>1500</num_positions> +<seed>0</seed> +<ctfrac>-1</ctfrac> +<maxwords>-1</maxwords> +<strands>forward</strands> +<priors_file>dataset_2577.dat</priors_file> +<reason_for_stopping>Stopped because requested number of motifs (1) found.</reason_for_stopping> +<background_frequencies source="dataset with add-one prior applied"> +<alphabet_array> +<value letter_id="A">0.294</value> +<value letter_id="C">0.231</value> +<value letter_id="G">0.257</value> +<value letter_id="T">0.217</value> +</alphabet_array> +</background_frequencies> +</model> +<motifs> +<motif id="motif_1" name="1" width="11" sites="30" ic="13.0" re="12.2" llr="254" e_value="5.1e-040" bayes_threshold="5.2854" elapsed_time="0.168106"> +<scores> +<alphabet_matrix> +<alphabet_array> +<value letter_id="A">-14</value> +<value letter_id="C">-179</value> +<value letter_id="G">114</value> +<value letter_id="T">-112</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">3</value> +<value letter_id="C">-1155</value> +<value letter_id="G">137</value> +<value letter_id="T">-270</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">-114</value> +<value letter_id="C">20</value> +<value letter_id="G">86</value> +<value letter_id="T">-71</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">3</value> +<value letter_id="C">-279</value> +<value letter_id="G">122</value> +<value letter_id="T">-170</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">-1155</value> +<value letter_id="C">-1155</value> +<value letter_id="G">-295</value> +<value letter_id="T">215</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">156</value> +<value letter_id="C">-179</value> +<value letter_id="G">-1155</value> +<value letter_id="T">-170</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">-1155</value> +<value letter_id="C">-1155</value> +<value letter_id="G">-1155</value> +<value letter_id="T">220</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">172</value> +<value letter_id="C">-279</value> +<value letter_id="G">-1155</value> +<value letter_id="T">-1155</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">125</value> +<value letter_id="C">-1155</value> +<value letter_id="G">-1155</value> +<value letter_id="T">46</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">167</value> +<value letter_id="C">-179</value> +<value letter_id="G">-1155</value> +<value letter_id="T">-1155</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">144</value> +<value letter_id="C">-1155</value> +<value letter_id="G">-63</value> +<value letter_id="T">-270</value> +</alphabet_array> +</alphabet_matrix> +</scores> +<probabilities> +<alphabet_matrix> +<alphabet_array> +<value letter_id="A">0.266667</value> +<value letter_id="C">0.066667</value> +<value letter_id="G">0.566667</value> +<value letter_id="T">0.100000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.300000</value> +<value letter_id="C">0.000000</value> +<value letter_id="G">0.666667</value> +<value letter_id="T">0.033333</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.133333</value> +<value letter_id="C">0.266667</value> +<value letter_id="G">0.466667</value> +<value letter_id="T">0.133333</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.300000</value> +<value letter_id="C">0.033333</value> +<value letter_id="G">0.600000</value> +<value letter_id="T">0.066667</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.000000</value> +<value letter_id="C">0.000000</value> +<value letter_id="G">0.033333</value> +<value letter_id="T">0.966667</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.866667</value> +<value letter_id="C">0.066667</value> +<value letter_id="G">0.000000</value> +<value letter_id="T">0.066667</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.000000</value> +<value letter_id="C">0.000000</value> +<value letter_id="G">0.000000</value> +<value letter_id="T">1.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.966667</value> +<value letter_id="C">0.033333</value> +<value letter_id="G">0.000000</value> +<value letter_id="T">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.700000</value> +<value letter_id="C">0.000000</value> +<value letter_id="G">0.000000</value> +<value letter_id="T">0.300000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.933333</value> +<value letter_id="C">0.066667</value> +<value letter_id="G">0.000000</value> +<value letter_id="T">0.000000</value> +</alphabet_array> +<alphabet_array> +<value letter_id="A">0.800000</value> +<value letter_id="C">0.000000</value> +<value letter_id="G">0.166667</value> +<value letter_id="T">0.033333</value> +</alphabet_array> +</alphabet_matrix> +</probabilities> +<regular_expression> +[GA][GA][GC][GA]TATA[AT]AA +</regular_expression> +<contributing_sites> +<contributing_site sequence_id="sequence_24" position="12" strand="plus" pvalue="4.51e-07" > +<left_flank>AAGGCCAGGA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GCCTGAGAGC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_23" position="15" strand="plus" pvalue="2.22e-06" > +<left_flank>ATACCCAGGG</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>CCTCAGCAGC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_13" position="13" strand="plus" pvalue="2.74e-06" > +<left_flank>CCACCAGCTT</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>AGCCCTGTAC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_25" position="36" strand="plus" pvalue="4.86e-06" > +<left_flank>ACAGGCCCTG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GCC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_21" position="7" strand="plus" pvalue="4.86e-06" > +<left_flank>CCAAGGA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GCCCCACAAA</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_19" position="9" strand="plus" pvalue="4.86e-06" > +<left_flank>CAGGCCCTG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GCCCCAGCAG</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_9" position="13" strand="plus" pvalue="4.86e-06" > +<left_flank>GATTCACTGA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GGCCCTCTGC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_28" position="32" strand="plus" pvalue="6.48e-06" > +<left_flank>CCGGCGGGGC</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +</site> +<right_flank>GGGGCGG</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_20" position="4" strand="plus" pvalue="6.48e-06" > +<left_flank>CAGA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +</site> +<right_flank>GTTCCGACCA</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_12" position="18" strand="plus" pvalue="6.48e-06" > +<left_flank>CACCAGAGCT</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>AGAAGGTTCT</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_15" position="21" strand="plus" pvalue="1.38e-05" > +<left_flank>AATCACTGAG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GTCCCAGGGA</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_11" position="16" strand="plus" pvalue="1.38e-05" > +<left_flank>CACTATTGAA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>TTTCATTTGC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_0" position="39" strand="plus" pvalue="1.41e-05" > +<left_flank>CCTCGGGACG</left_flank> +<site> +<letter_ref letter_id="T"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank></right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_6" position="15" strand="plus" pvalue="1.61e-05" > +<left_flank>CCCACTACTT</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>TCATTCTGAG</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_22" position="2" strand="plus" pvalue="1.95e-05" > +<left_flank>GA</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GCCAACATCC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_14" position="17" strand="plus" pvalue="1.95e-05" > +<left_flank>CCCACCAGCA</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>AAGCTCAGGA</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_18" position="37" strand="plus" pvalue="2.16e-05" > +<left_flank>CGTGGTCGCG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_29" position="30" strand="plus" pvalue="3.04e-05" > +<left_flank>GCTGCCGGTG</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +</site> +<right_flank>GCCCTGGCG</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_4" position="12" strand="plus" pvalue="3.04e-05" > +<left_flank>CAGGTCTAAG</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>CTTGGAGTCC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_5" position="0" strand="plus" pvalue="3.67e-05" > +<left_flank></left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>ATGGTCCTGT</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_1" position="27" strand="plus" pvalue="3.93e-05" > +<left_flank>AGTCACAAGT</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>GGGTCGCACG</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_7" position="4" strand="plus" pvalue="5.65e-05" > +<left_flank>TCAG</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>ATGTTCCTGT</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_3" position="14" strand="plus" pvalue="6.24e-05" > +<left_flank>CCCAGGTTTC</left_flank> +<site> +<letter_ref letter_id="T"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>TCGCCGCACC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_16" position="22" strand="plus" pvalue="7.15e-05" > +<left_flank>AGTTTCAGTT</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>attatataac</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_27" position="36" strand="plus" pvalue="1.39e-04" > +<left_flank>TGCCTGGGTC</left_flank> +<site> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +</site> +<right_flank>GCT</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_26" position="37" strand="plus" pvalue="1.39e-04" > +<left_flank>TGCCTGGGCC</left_flank> +<site> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="G"/> +</site> +<right_flank>GC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_17" position="2" strand="plus" pvalue="4.81e-04" > +<left_flank>ga</left_flank> +<site> +<letter_ref letter_id="T"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>ggggcctcac</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_8" position="13" strand="plus" pvalue="8.57e-04" > +<left_flank>TATAACTCAG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>TAATTTGTAC</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_10" position="7" strand="plus" pvalue="1.47e-03" > +<left_flank>aaactta</left_flank> +<site> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="A"/> +</site> +<right_flank>acttaaaact</right_flank> +</contributing_site> +<contributing_site sequence_id="sequence_2" position="26" strand="plus" pvalue="2.64e-03" > +<left_flank>GGTGGGGGTG</left_flank> +<site> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="G"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="T"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="A"/> +<letter_ref letter_id="C"/> +<letter_ref letter_id="T"/> +</site> +<right_flank>GGTCCACTAT</right_flank> +</contributing_site> +</contributing_sites> +</motif> +</motifs> +<scanned_sites_summary p_thresh="0.0001"> +<scanned_sites sequence_id="sequence_0" pvalue="5.63e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="39" pvalue="1.41e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_1" pvalue="1.57e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="27" pvalue="3.93e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_2" pvalue="1.00e-01" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_3" pvalue="2.49e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="14" pvalue="6.24e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_4" pvalue="1.22e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="12" pvalue="3.04e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_5" pvalue="1.47e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="0" pvalue="3.67e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_6" pvalue="6.45e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="15" pvalue="1.61e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_7" pvalue="2.26e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="4" pvalue="5.65e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_8" pvalue="3.37e-02" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_9" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="13" pvalue="4.86e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_10" pvalue="5.73e-02" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_11" pvalue="5.52e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="16" pvalue="1.38e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_12" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="18" pvalue="6.48e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_13" pvalue="1.10e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="13" pvalue="2.74e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_14" pvalue="7.78e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="17" pvalue="1.95e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_15" pvalue="5.52e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="21" pvalue="1.38e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_16" pvalue="2.85e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="22" pvalue="7.15e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_17" pvalue="1.90e-02" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_18" pvalue="8.63e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="37" pvalue="2.16e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_19" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="9" pvalue="4.86e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_20" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="4" pvalue="6.48e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_21" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="7" pvalue="4.86e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_22" pvalue="7.78e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="2" pvalue="1.95e-05"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_23" pvalue="8.89e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="15" pvalue="2.22e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_24" pvalue="1.80e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="12" pvalue="4.51e-07"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_25" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="36" pvalue="4.86e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_26" pvalue="5.54e-03" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_27" pvalue="5.54e-03" num_sites="0"></scanned_sites> +<scanned_sites sequence_id="sequence_28" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="32" pvalue="6.48e-06"/> +</scanned_sites> +<scanned_sites sequence_id="sequence_29" pvalue="1.22e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="30" pvalue="3.04e-05"/> +</scanned_sites> +</scanned_sites_summary> +</MEME>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meme_psp_gen_reports_output.tabular Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,19 @@ +540 bases or amino acids +0.5 0.5 4 4 +0.5 0.5 6 6 +0.5 0.5 7 7 +0.5 0.5 8 8 +0.5 0.5 9 9 +0.5 0.5 10 10 +0.5 0.5 11 11 +0.5 0.5 12 12 +0.5 0.5 13 13 +0.5 0.5 14 14 +0.5 0.5 15 15 +0.5 0.5 16 16 +0.5 0.5 17 17 +0.5 0.5 18 18 +0.5 0.5 19 19 +0.5 0.5 20 20 + +score 0.9 occurred 483 times
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meme_psp_protein_input.fasta Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,17 @@ +>ICYA_MANSE +GDIFYPGYCPDVKPVNDFDLSAFAGAWHEIAKLPLENENQGKCTIAEYKY +DGKKASVYNSFVSNGVKEYMEGDLEIAPDAKYTKQGKYVMTFKFGQRVVN +LVPWVLATDYKNYAINYNCDYHPDKKAHSIHAWILSKSKVLEGNTKEVVD +NVLKTFSHLIDASKFISNDFSEAACQYSTTYSLTGPDRH + +>LACB_BOVIN +MKCLLLALALTCGAQALIVTQTMKGLDIQKVAGTWYSLAMAASDISLLDA +QSAPLRVYVEELKPTPEGDLEILLQKWENGECAQKKIIAEKTKIPAVFKI +DALNENKVLVLDTDYKKYLLFCMENSAEPEQSLACQCLVRTPEVDDEALE +KFDKALKALPMHIRLSFNPTQLEEQCHI + +>BBP_PIEBR +NVYHDGACPEVKPVDNFDWSNYHGKWWEVAKYPNSVEKYGKCGWAEYTPE +GKSVKVSNYHVIHGKEYFIEGTAYPVGDSKIGKIYHKLTYGGVTKENVFN +VLSTDNKNYIIGYYCKYDEDKKGHQDFVWVLSRSKVLTGEAKTAVENYLI +GSPVVDSQKLVYSDFSEAACKVN
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/motif1.gff Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,100 @@ +##gff-version 3 +phiX174 fimo polypeptide_motif 1 11 78.3 + . Name=1;ID=1-57-phiX174;pvalue=1.46e-08;sequence=GAGTTTTATCG; +phiX174 fimo polypeptide_motif 3 13 57.5 + . Name=1;ID=1-471-phiX174;pvalue=1.79e-06;sequence=GTTTTATCGCT; +phiX174 fimo polypeptide_motif 7 17 45 + . Name=1;ID=1-1378-phiX174;pvalue=3.18e-05;sequence=TATCGCTTCCA; +phiX174 fimo polypeptide_motif 10 20 53.9 + . Name=1;ID=1-605-phiX174;pvalue=4.1e-06;sequence=CGCTTCCATGA; +phiX174 fimo polypeptide_motif 17 27 40.2 + . Name=1;ID=1-1887-phiX174;pvalue=9.55e-05;sequence=ATGACGCAGAA; +phiX174 fimo polypeptide_motif 18 28 45.3 + . Name=1;ID=1-1349-phiX174;pvalue=2.98e-05;sequence=TGACGCAGAAG; +phiX174 fimo polypeptide_motif 19 29 55.8 + . Name=1;ID=1-527-phiX174;pvalue=2.6e-06;sequence=GACGCAGAAGT; +phiX174 fimo polypeptide_motif 21 31 41.5 + . Name=1;ID=1-1705-phiX174;pvalue=7.07e-05;sequence=CGCAGAAGTTA; +phiX174 fimo polypeptide_motif 22 32 44.6 + . Name=1;ID=1-1404-phiX174;pvalue=3.44e-05;sequence=GCAGAAGTTAA; +phiX174 fimo polypeptide_motif 24 34 79.1 + . Name=1;ID=1-53-phiX174;pvalue=1.23e-08;sequence=AGAAGTTAACA; +phiX174 fimo polypeptide_motif 25 35 45.3 + . Name=1;ID=1-1347-phiX174;pvalue=2.97e-05;sequence=GAAGTTAACAC; +phiX174 fimo polypeptide_motif 26 36 59.2 + . Name=1;ID=1-417-phiX174;pvalue=1.19e-06;sequence=AAGTTAACACT; +phiX174 fimo polypeptide_motif 30 40 44.7 + . Name=1;ID=1-1399-phiX174;pvalue=3.4e-05;sequence=TAACACTTTCG; +phiX174 fimo polypeptide_motif 37 47 72.4 + . Name=1;ID=1-98-phiX174;pvalue=5.79e-08;sequence=TTCGGATATTT; +phiX174 fimo polypeptide_motif 39 49 65.3 + . Name=1;ID=1-213-phiX174;pvalue=2.92e-07;sequence=CGGATATTTCT; +phiX174 fimo polypeptide_motif 41 51 55.3 + . Name=1;ID=1-548-phiX174;pvalue=2.97e-06;sequence=GATATTTCTGA; +phiX174 fimo polypeptide_motif 43 53 58.4 + . Name=1;ID=1-442-phiX174;pvalue=1.43e-06;sequence=TATTTCTGATG; +phiX174 fimo polypeptide_motif 46 56 53.7 + . Name=1;ID=1-617-phiX174;pvalue=4.23e-06;sequence=TTCTGATGAGT; +phiX174 fimo polypeptide_motif 50 60 45.4 + . Name=1;ID=1-1333-phiX174;pvalue=2.86e-05;sequence=GATGAGTCGAA; +phiX174 fimo polypeptide_motif 51 61 48.4 + . Name=1;ID=1-1094-phiX174;pvalue=1.44e-05;sequence=ATGAGTCGAAA; +phiX174 fimo polypeptide_motif 52 62 83.9 + . Name=1;ID=1-22-phiX174;pvalue=4.06e-09;sequence=TGAGTCGAAAA; +phiX174 fimo polypeptide_motif 53 63 53.9 + . Name=1;ID=1-601-phiX174;pvalue=4.03e-06;sequence=GAGTCGAAAAA; +phiX174 fimo polypeptide_motif 54 64 62.9 + . Name=1;ID=1-297-phiX174;pvalue=5.16e-07;sequence=AGTCGAAAAAT; +phiX174 fimo polypeptide_motif 55 65 52.8 + . Name=1;ID=1-675-phiX174;pvalue=5.26e-06;sequence=GTCGAAAAATT; +phiX174 fimo polypeptide_motif 56 66 41.4 + . Name=1;ID=1-1713-phiX174;pvalue=7.2e-05;sequence=TCGAAAAATTA; +phiX174 fimo polypeptide_motif 58 68 43.4 + . Name=1;ID=1-1500-phiX174;pvalue=4.56e-05;sequence=GAAAAATTATC; +phiX174 fimo polypeptide_motif 59 69 59.6 + . Name=1;ID=1-409-phiX174;pvalue=1.1e-06;sequence=AAAAATTATCT; +phiX174 fimo polypeptide_motif 61 71 61.8 + . Name=1;ID=1-329-phiX174;pvalue=6.52e-07;sequence=AAATTATCTTG; +phiX174 fimo polypeptide_motif 63 73 59.2 + . Name=1;ID=1-419-phiX174;pvalue=1.2e-06;sequence=ATTATCTTGAT; +phiX174 fimo polypeptide_motif 65 75 53.3 + . Name=1;ID=1-643-phiX174;pvalue=4.66e-06;sequence=TATCTTGATAA; +phiX174 fimo polypeptide_motif 66 76 51.8 + . Name=1;ID=1-737-phiX174;pvalue=6.54e-06;sequence=ATCTTGATAAA; +phiX174 fimo polypeptide_motif 67 77 73.2 + . Name=1;ID=1-89-phiX174;pvalue=4.78e-08;sequence=TCTTGATAAAG; +phiX174 fimo polypeptide_motif 69 79 63.8 + . Name=1;ID=1-268-phiX174;pvalue=4.15e-07;sequence=TTGATAAAGCA; +phiX174 fimo polypeptide_motif 71 81 40.2 + . Name=1;ID=1-1882-phiX174;pvalue=9.49e-05;sequence=GATAAAGCAGG; +phiX174 fimo polypeptide_motif 73 83 45.4 + . Name=1;ID=1-1334-phiX174;pvalue=2.87e-05;sequence=TAAAGCAGGAA; +phiX174 fimo polypeptide_motif 74 84 50.9 + . Name=1;ID=1-832-phiX174;pvalue=8.05e-06;sequence=AAAGCAGGAAT; +phiX174 fimo polypeptide_motif 76 86 52.2 + . Name=1;ID=1-710-phiX174;pvalue=5.96e-06;sequence=AGCAGGAATTA; +phiX174 fimo polypeptide_motif 78 88 51.8 + . Name=1;ID=1-741-phiX174;pvalue=6.65e-06;sequence=CAGGAATTACT; +phiX174 fimo polypeptide_motif 79 89 45 + . Name=1;ID=1-1369-phiX174;pvalue=3.16e-05;sequence=AGGAATTACTA; +phiX174 fimo polypeptide_motif 80 90 43.3 + . Name=1;ID=1-1511-phiX174;pvalue=4.63e-05;sequence=GGAATTACTAC; +phiX174 fimo polypeptide_motif 81 91 59.8 + . Name=1;ID=1-402-phiX174;pvalue=1.05e-06;sequence=GAATTACTACT; +phiX174 fimo polypeptide_motif 82 92 46.9 + . Name=1;ID=1-1205-phiX174;pvalue=2.03e-05;sequence=AATTACTACTG; +phiX174 fimo polypeptide_motif 88 98 41.2 + . Name=1;ID=1-1744-phiX174;pvalue=7.51e-05;sequence=TACTGCTTGTT; +phiX174 fimo polypeptide_motif 91 101 53.6 + . Name=1;ID=1-621-phiX174;pvalue=4.33e-06;sequence=TGCTTGTTTAC; +phiX174 fimo polypeptide_motif 92 102 44.8 + . Name=1;ID=1-1388-phiX174;pvalue=3.31e-05;sequence=GCTTGTTTACG; +phiX174 fimo polypeptide_motif 93 103 43.2 + . Name=1;ID=1-1525-phiX174;pvalue=4.82e-05;sequence=CTTGTTTACGA; +phiX174 fimo polypeptide_motif 95 105 61.9 + . Name=1;ID=1-327-phiX174;pvalue=6.5e-07;sequence=TGTTTACGAAT; +phiX174 fimo polypeptide_motif 96 106 42.9 + . Name=1;ID=1-1553-phiX174;pvalue=5.08e-05;sequence=GTTTACGAATT; +phiX174 fimo polypeptide_motif 98 108 45.4 + . Name=1;ID=1-1340-phiX174;pvalue=2.9e-05;sequence=TTACGAATTAA; +phiX174 fimo polypeptide_motif 99 109 73.5 + . Name=1;ID=1-88-phiX174;pvalue=4.48e-08;sequence=TACGAATTAAA; +phiX174 fimo polypeptide_motif 100 110 53 + . Name=1;ID=1-661-phiX174;pvalue=4.99e-06;sequence=ACGAATTAAAT; +phiX174 fimo polypeptide_motif 102 112 75.6 + . Name=1;ID=1-72-phiX174;pvalue=2.75e-08;sequence=GAATTAAATCG; +phiX174 fimo polypeptide_motif 104 114 52.2 + . Name=1;ID=1-713-phiX174;pvalue=6.05e-06;sequence=ATTAAATCGAA; +phiX174 fimo polypeptide_motif 106 116 59.6 + . Name=1;ID=1-407-phiX174;pvalue=1.09e-06;sequence=TAAATCGAAGT; +phiX174 fimo polypeptide_motif 112 122 41.6 + . Name=1;ID=1-1683-phiX174;pvalue=6.85e-05;sequence=GAAGTGGACTG; +phiX174 fimo polypeptide_motif 114 124 45.3 + . Name=1;ID=1-1342-phiX174;pvalue=2.92e-05;sequence=AGTGGACTGCT; +phiX174 fimo polypeptide_motif 118 128 56.2 + . Name=1;ID=1-512-phiX174;pvalue=2.38e-06;sequence=GACTGCTGGCG; +phiX174 fimo polypeptide_motif 122 132 51.4 + . Name=1;ID=1-774-phiX174;pvalue=7.15e-06;sequence=GCTGGCGGAAA; +phiX174 fimo polypeptide_motif 123 133 43.1 + . Name=1;ID=1-1533-phiX174;pvalue=4.93e-05;sequence=CTGGCGGAAAA; +phiX174 fimo polypeptide_motif 124 134 48.6 + . Name=1;ID=1-1085-phiX174;pvalue=1.38e-05;sequence=TGGCGGAAAAT; +phiX174 fimo polypeptide_motif 125 135 68.3 + . Name=1;ID=1-146-phiX174;pvalue=1.49e-07;sequence=GGCGGAAAATG; +phiX174 fimo polypeptide_motif 126 136 46.4 + . Name=1;ID=1-1243-phiX174;pvalue=2.27e-05;sequence=GCGGAAAATGA; +phiX174 fimo polypeptide_motif 128 138 58.3 + . Name=1;ID=1-446-phiX174;pvalue=1.48e-06;sequence=GGAAAATGAGA; +phiX174 fimo polypeptide_motif 129 139 43.2 + . Name=1;ID=1-1522-phiX174;pvalue=4.78e-05;sequence=GAAAATGAGAA; +phiX174 fimo polypeptide_motif 130 140 54.1 + . Name=1;ID=1-595-phiX174;pvalue=3.92e-06;sequence=AAAATGAGAAA; +phiX174 fimo polypeptide_motif 131 141 76 + . Name=1;ID=1-68-phiX174;pvalue=2.49e-08;sequence=AAATGAGAAAA; +phiX174 fimo polypeptide_motif 132 142 51.2 + . Name=1;ID=1-800-phiX174;pvalue=7.57e-06;sequence=AATGAGAAAAT; +phiX174 fimo polypeptide_motif 133 143 56.2 + . Name=1;ID=1-513-phiX174;pvalue=2.41e-06;sequence=ATGAGAAAATT; +phiX174 fimo polypeptide_motif 134 144 41.1 + . Name=1;ID=1-1761-phiX174;pvalue=7.83e-05;sequence=TGAGAAAATTC; +phiX174 fimo polypeptide_motif 135 145 50.3 + . Name=1;ID=1-910-phiX174;pvalue=9.39e-06;sequence=GAGAAAATTCG; +phiX174 fimo polypeptide_motif 136 146 43.3 + . Name=1;ID=1-1517-phiX174;pvalue=4.66e-05;sequence=AGAAAATTCGA; +phiX174 fimo polypeptide_motif 139 149 54.2 + . Name=1;ID=1-588-phiX174;pvalue=3.75e-06;sequence=AAATTCGACCT; +phiX174 fimo polypeptide_motif 141 151 42.2 + . Name=1;ID=1-1625-phiX174;pvalue=6.01e-05;sequence=ATTCGACCTAT; +phiX174 fimo polypeptide_motif 143 153 50 + . Name=1;ID=1-938-phiX174;pvalue=9.94e-06;sequence=TCGACCTATCC; +phiX174 fimo polypeptide_motif 145 155 44.6 + . Name=1;ID=1-1403-phiX174;pvalue=3.42e-05;sequence=GACCTATCCTT; +phiX174 fimo polypeptide_motif 155 165 51.3 + . Name=1;ID=1-787-phiX174;pvalue=7.35e-06;sequence=TGCGCAGCTCG; +phiX174 fimo polypeptide_motif 157 167 51.1 + . Name=1;ID=1-807-phiX174;pvalue=7.68e-06;sequence=CGCAGCTCGAG; +phiX174 fimo polypeptide_motif 159 169 44.5 + . Name=1;ID=1-1420-phiX174;pvalue=3.56e-05;sequence=CAGCTCGAGAA; +phiX174 fimo polypeptide_motif 160 170 40 + . Name=1;ID=1-1921-phiX174;pvalue=9.89e-05;sequence=AGCTCGAGAAG; +phiX174 fimo polypeptide_motif 166 176 60.9 + . Name=1;ID=1-365-phiX174;pvalue=8.02e-07;sequence=AGAAGCTCTTA; +phiX174 fimo polypeptide_motif 168 178 62.3 + . Name=1;ID=1-311-phiX174;pvalue=5.87e-07;sequence=AAGCTCTTACT; +phiX174 fimo polypeptide_motif 181 191 49.9 + . Name=1;ID=1-946-phiX174;pvalue=1.01e-05;sequence=GCGACCTTTCG; +phiX174 fimo polypeptide_motif 187 197 52.5 + . Name=1;ID=1-694-phiX174;pvalue=5.64e-06;sequence=TTTCGCCATCA; +phiX174 fimo polypeptide_motif 191 201 46.6 + . Name=1;ID=1-1232-phiX174;pvalue=2.2e-05;sequence=GCCATCAACTA; +phiX174 fimo polypeptide_motif 194 204 76.4 + . Name=1;ID=1-67-phiX174;pvalue=2.29e-08;sequence=ATCAACTAACG; +phiX174 fimo polypeptide_motif 201 211 40.1 + . Name=1;ID=1-1908-phiX174;pvalue=9.77e-05;sequence=AACGATTCTGT; +phiX174 fimo polypeptide_motif 203 213 63 + . Name=1;ID=1-291-phiX174;pvalue=5e-07;sequence=CGATTCTGTCA; +phiX174 fimo polypeptide_motif 205 215 53.8 + . Name=1;ID=1-610-phiX174;pvalue=4.16e-06;sequence=ATTCTGTCAAA; +phiX174 fimo polypeptide_motif 206 216 59.1 + . Name=1;ID=1-421-phiX174;pvalue=1.23e-06;sequence=TTCTGTCAAAA; +phiX174 fimo polypeptide_motif 207 217 68 + . Name=1;ID=1-153-phiX174;pvalue=1.58e-07;sequence=TCTGTCAAAAA; +phiX174 fimo polypeptide_motif 209 219 49.6 + . Name=1;ID=1-988-phiX174;pvalue=1.09e-05;sequence=TGTCAAAAACT; +phiX174 fimo polypeptide_motif 210 220 40.8 + . Name=1;ID=1-1810-phiX174;pvalue=8.33e-05;sequence=GTCAAAAACTG; +phiX174 fimo polypeptide_motif 213 223 59.7 + . Name=1;ID=1-404-phiX174;pvalue=1.06e-06;sequence=AAAAACTGACG; +phiX174 fimo polypeptide_motif 223 233 42 + . Name=1;ID=1-1654-phiX174;pvalue=6.36e-05;sequence=GCGTTGGATGA; +phiX174 fimo polypeptide_motif 225 235 61.4 + . Name=1;ID=1-349-phiX174;pvalue=7.16e-07;sequence=GTTGGATGAGG; +phiX174 fimo polypeptide_motif 227 237 40.3 + . Name=1;ID=1-1874-phiX174;pvalue=9.32e-05;sequence=TGGATGAGGAG; +phiX174 fimo polypeptide_motif 228 238 49.9 + . Name=1;ID=1-947-phiX174;pvalue=1.01e-05;sequence=GGATGAGGAGA; +phiX174 fimo polypeptide_motif 229 239 45 + . Name=1;ID=1-1370-phiX174;pvalue=3.16e-05;sequence=GATGAGGAGAA; +phiX174 fimo polypeptide_motif 230 240 44.8 + . Name=1;ID=1-1395-phiX174;pvalue=3.33e-05;sequence=ATGAGGAGAAG;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.memepsp Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,6 @@ +>BBP_PIEBR 20 scaledmin = 0.1 scaledmax = 0.9 +0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0.006488825 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +>ICYA_MANSE 20 scaledmin = 0.1 scaledmax = 0.9 +0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0.005878511 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +>LACB_BOVIN 20 scaledmin = 0.1 scaledmax = 0.9 +0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0.006284916 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX.fasta Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,79 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/prior30.plib Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,275 @@ +Alphabet= ACDEFGHIKLMNPQRSTVWY +NumDistr= 30 +Number= 0 +Mixture= 0.055795 +B= 5.623820 +Alpha= 0.0855491 0.0221831 0.0111063 0.0209959 0.0505726 0.025437 0.0155389 0.132951 0.0247865 0.150287 0.0577239 0.0209317 0.0166629 0.0220905 0.0244295 0.0497608 0.070277 0.157532 0.0102219 0.0309633 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= HMM9.4 reestimated in henikoff29.2 + +Number= 1 +Mixture= 0.198333 +B= 0.097240 +Alpha= 0.0562629 0.0329597 0.0692513 0.0385232 0.0400041 0.143573 0.0428939 0.0226244 0.0442102 0.0665467 0.0117853 0.0447655 0.0833299 0.0395825 0.0611271 0.0588852 0.0513472 0.0317153 0.0237865 0.0368161 +FullUpdate= 1 +QUpdate= 1 +StructID= 24 +Comment= Outside + +Number= 2 +Mixture= 0.043566 +B= 1.648336 +Alpha= 0.0144564 0.00845337 0.00785519 0.00864933 0.255959 0.0110815 0.0509526 0.0234533 0.0120443 0.0561967 0.015111 0.0190974 0.00857653 0.0167812 0.0164918 0.0197108 0.0151013 0.0252782 0.050139 0.364613 +FullUpdate= 1 +QUpdate= 1 +StructID= 26 +Comment= Inside + +Number= 3 +Mixture= 0.060170 +B= 2.595432 +Alpha= 0.0452144 0.00587917 0.169731 0.0751478 0.00749471 0.0845832 0.0369819 0.00610072 0.0548186 0.011029 0.00382749 0.212785 0.0206532 0.0416705 0.0280716 0.117267 0.0533742 0.00943157 0.00216149 0.0137784 +FullUpdate= 1 +QUpdate= 1 +StructID= 19 +Comment= Outside Alpha + +Number= 4 +Mixture= 0.065466 +B= 3.112271 +Alpha= 0.0361167 0.0049157 0.0134924 0.0461325 0.00557631 0.0209043 0.0302551 0.016425 0.307554 0.0338255 0.0139435 0.0360733 0.0127659 0.0873761 0.222668 0.0369042 0.0354442 0.0228891 0.00434827 0.0123906 +FullUpdate= 1 +QUpdate= 1 +StructID= 21 +Comment= Outside Beta + +Number= 5 +Mixture= 0.067614 +B= 2.053644 +Alpha= 0.0194362 0.00765176 0.00188738 0.00372898 0.0849894 0.00421787 0.00400459 0.152735 0.00407958 0.4568 0.106051 0.00304386 0.00545956 0.00900935 0.00605071 0.00519029 0.016255 0.0861045 0.00787965 0.0154248 +FullUpdate= 1 +QUpdate= 1 +StructID= 22 +Comment= Inside alpha + +Number= 6 +Mixture= 0.080724 +B= 2.138987 +Alpha= 0.0423172 0.0153891 0.00409306 0.00565735 0.0197117 0.00590607 0.00139926 0.307863 0.00544884 0.115721 0.0285808 0.00522771 0.00474851 0.00328193 0.00351054 0.00892385 0.0348922 0.380003 0.00117673 0.00614917 +FullUpdate= 1 +QUpdate= 1 +StructID= 23 +Comment= Inside beta + +Number= 7 +Mixture= 0.051030 +B= 3.878926 +Alpha= 0.0548123 0.000759746 0.144127 0.46019 0.00249502 0.0192754 0.0106535 0.00938765 0.0562429 0.0163148 0.00717389 0.0245612 0.0177482 0.0744802 0.0199233 0.0323535 0.0257651 0.018574 0.00087086 0.00429088 +FullUpdate= 1 +QUpdate= 1 +StructID= 23 +Comment= Alpha helix + +Number= 8 +Mixture= 0.103529 +B= 1.486325 +Alpha= 0.315754 0.0384546 0.0116388 0.0133665 0.0111126 0.107921 0.00752325 0.0154885 0.0111281 0.0231087 0.011626 0.0228375 0.0304785 0.0166632 0.0156345 0.186379 0.0954421 0.0546691 0.00351538 0.00725682 +FullUpdate= 1 +QUpdate= 1 +StructID= 23 +Comment= Beta strand + +Number= 9 +Mixture= 0.062940 +B= 8.221215 +Alpha= 0.0869919 0.00672577 0.0600995 0.10763 0.0153489 0.0378086 0.0325335 0.023388 0.113765 0.041623 0.0196906 0.0625344 0.0262599 0.0788667 0.0707399 0.0886634 0.0666777 0.0361472 0.00484308 0.0196629 +FullUpdate= 1 +QUpdate= 1 +StructID= 23 +Comment= Other + +Number= 10 +Mixture= 0.012518 +B= 38.955631 +Alpha= 0.732922 0.0145131 0.00623235 0.00951423 0.00717778 0.0289521 0.00351664 0.0125081 0.00886593 0.0183651 0.00832812 0.00670968 0.00364556 0.00622169 0.00812899 0.0582399 0.0205067 0.0394327 0.00207485 0.00414489 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= A + +Number= 11 +Mixture= 0.004953 +B= 381.562195 +Alpha= 0.00563239 0.959814 0.00144129 0.00213042 0.00158645 0.00168393 0.000989765 0.00325263 0.00148501 0.00343924 0.00168673 0.00159054 0.00121534 0.00129942 0.00195209 0.00296106 0.0039912 0.00266944 0.000327808 0.000851203 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= C + +Number= 12 +Mixture= 0.013849 +B= 90.727570 +Alpha= 0.00897115 0.00169015 0.859473 0.0360829 0.00269485 0.00606504 0.00469816 0.00400134 0.0047981 0.00514968 0.00208395 0.029754 0.00241276 0.0045506 0.00433816 0.0088208 0.00511143 0.00527448 0.00104469 0.00298475 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= D + +Number= 13 +Mixture= 0.008388 +B= 404.591034 +Alpha= 0.00241514 0.000413991 0.0122981 0.96369 0.000665578 0.00187461 0.00106904 0.00149214 0.00121548 0.00129791 0.000554145 0.00253496 0.000624495 0.00316839 0.00115045 0.00171781 0.001468 0.0014564 0.000278652 0.000614791 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= E + +Number= 14 +Mixture= 0.008064 +B= 83.323669 +Alpha= 0.00839779 0.00428348 0.00298116 0.00358128 0.850936 0.00329382 0.00196832 0.0130534 0.00320345 0.0351883 0.00729724 0.00287304 0.00358482 0.00218728 0.00264753 0.00833798 0.00418729 0.0120684 0.00448366 0.025446 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= F + +Number= 15 +Mixture= 0.032205 +B= 32.644871 +Alpha= 0.0234448 0.00236512 0.0112957 0.00811395 0.00248143 0.868718 0.00345598 0.00342985 0.00859682 0.0040966 0.00239339 0.012342 0.00423123 0.00440054 0.00795347 0.0165095 0.0065024 0.00550512 0.00140604 0.00275817 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= G + +Number= 16 +Mixture= 0.005033 +B= 35.520824 +Alpha= 0.0100058 0.00386024 0.0131498 0.0108984 0.0122851 0.00738691 0.722249 0.00521193 0.00686054 0.0150103 0.00673014 0.0367074 0.00625526 0.0429912 0.0234127 0.0187246 0.0128445 0.00837399 0.00390723 0.0331349 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= H + +Number= 17 +Mixture= 0.007454 +B= 101.265472 +Alpha= 0.0106938 0.00267663 0.00404166 0.00466637 0.00838963 0.00372808 0.00182575 0.681615 0.0059102 0.0770333 0.0184335 0.004676 0.0027124 0.00372663 0.00418165 0.00773357 0.0109237 0.140679 0.00140417 0.00494911 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= I + +Number= 18 +Mixture= 0.009400 +B= 150.415985 +Alpha= 0.00688657 0.00169711 0.00222738 0.00346887 0.00115861 0.00302866 0.00209171 0.00400905 0.903944 0.0037747 0.00186061 0.00449531 0.00249618 0.00324487 0.041775 0.00392196 0.00461714 0.00296607 0.000893256 0.00144282 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= K + +Number= 19 +Mixture= 0.017057 +B= 31.896633 +Alpha= 0.0114646 0.00367926 0.00296188 0.00596126 0.0190009 0.00382486 0.00338381 0.0401936 0.00650072 0.790038 0.031659 0.00392791 0.0050046 0.00753591 0.00771818 0.00748621 0.0101555 0.0312597 0.00242405 0.00581952 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= L + +Number= 20 +Mixture= 0.002761 +B= 201.346268 +Alpha= 0.00353933 0.00165628 0.0014931 0.00161065 0.00279831 0.00194259 0.00101868 0.00969101 0.00211316 0.0217036 0.928022 0.00162899 0.0015681 0.0015629 0.00138977 0.00294601 0.00311476 0.00723178 0.00156295 0.00340569 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= M + +Number= 21 +Mixture= 0.005734 +B= 108.343185 +Alpha= 0.0067512 0.00239062 0.0140378 0.0043452 0.00365788 0.00689345 0.0148828 0.00715373 0.00789036 0.00614036 0.00289697 0.858995 0.00399721 0.00770961 0.00570515 0.0238176 0.011602 0.00591549 0.00167893 0.00353897 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= N + +Number= 22 +Mixture= 0.022818 +B= 15.153304 +Alpha= 0.0417987 0.00360232 0.0113792 0.0152366 0.00564775 0.0123795 0.00606957 0.0091353 0.0165122 0.0167265 0.00490487 0.00915437 0.755604 0.0131375 0.012587 0.0283392 0.0189623 0.0140029 0.0012848 0.00353553 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= P + +Number= 23 +Mixture= 0.005931 +B= 79.417511 +Alpha= 0.0142993 0.00266984 0.0053289 0.0321605 0.0028715 0.00426743 0.0257509 0.00565307 0.0106106 0.0161186 0.00955753 0.0104696 0.00638107 0.807311 0.0149106 0.0111968 0.00889459 0.00681482 0.00206658 0.00266624 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= Q + +Number= 24 +Mixture= 0.011491 +B= 93.103897 +Alpha= 0.00756896 0.00314197 0.00296652 0.00327634 0.00194604 0.00467894 0.00721049 0.00406061 0.0277257 0.00663852 0.00217868 0.00577047 0.00473306 0.00953551 0.889701 0.00650859 0.00506022 0.00294281 0.00205549 0.00230062 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= R + +Number= 25 +Mixture= 0.008219 +B= 47.504795 +Alpha= 0.0284818 0.00697155 0.00749796 0.00604665 0.00515171 0.00954817 0.00380684 0.00637929 0.0104463 0.00908885 0.00471437 0.0194592 0.00711823 0.00611827 0.00979722 0.707416 0.139256 0.00656298 0.0015377 0.00460086 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= S + +Number= 26 +Mixture= 0.019050 +B= 14.027470 +Alpha= 0.0247201 0.00718027 0.00845584 0.0076239 0.00600101 0.0073401 0.00492149 0.0173757 0.0129878 0.0125773 0.0100452 0.0230424 0.00659406 0.0110314 0.0112037 0.107763 0.690341 0.0249364 0.00193884 0.00392074 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= T + +Number= 27 +Mixture= 0.007047 +B= 76.958153 +Alpha= 0.0447488 0.00734525 0.00576457 0.00805666 0.00714188 0.00593389 0.0041663 0.0688592 0.00714299 0.0255115 0.00800708 0.00501678 0.00632646 0.00492002 0.00812967 0.0100074 0.0240134 0.745035 0.00126243 0.00261056 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= V + +Number= 28 +Mixture= 0.003957 +B= 150.973328 +Alpha= 0.00517343 0.00213336 0.00350645 0.00390297 0.018439 0.0041919 0.0023655 0.00404231 0.00420998 0.0171406 0.00379068 0.00363696 0.00245861 0.00387467 0.00502035 0.00465674 0.00417283 0.00620977 0.888513 0.012561 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= W + +Number= 29 +Mixture= 0.004904 +B= 30.653225 +Alpha= 0.0342049 0.00809912 0.0126852 0.0174701 0.156033 0.0118268 0.0431342 0.0204751 0.0164439 0.0363664 0.0129811 0.0131986 0.0103037 0.0116235 0.0159032 0.0287792 0.0176143 0.024986 0.0131845 0.494687 +FullUpdate= 1 +QUpdate= 1 +StructID= 0 +Comment= Y + +/* $Header$ */ +/* $Header$ */ +/* $Header$ */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Aug 23 20:57:34 2017 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables>