# HG changeset patch # User iuc # Date 1466183812 14400 # Node ID 62d1fae3b7d30a44e31c0e7847089aea5a2eaf31 # Parent 34c794383f815687265fec3afe2f39ab2969e7f8 Uploaded diff -r 34c794383f81 -r 62d1fae3b7d3 fimo_wrapper.py --- a/fimo_wrapper.py Fri Mar 18 08:23:54 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,134 +0,0 @@ -#!/usr/bin/env python -import argparse -import os -import shutil -import string -import subprocess -import sys -import tempfile - -BUFFSIZE = 1048576 -# Translation table for reverse Complement, with ambiguity codes. -DNA_COMPLEMENT = string.maketrans("ACGTRYKMBDHVacgtrykmbdhv", "TGCAYRMKVHDBtgcayrmkvhdb") - - -def reverse(sequence): - # Reverse sequence string. - return sequence[::-1] - - -def dna_complement(sequence): - # Complement DNA sequence string. - return sequence.translate(DNA_COMPLEMENT) - - -def dna_reverse_complement(sequence): - # Returns the reverse complement of the sequence. - sequence = reverse(sequence) - return dna_complement(sequence) - - -def stop_err(msg): - sys.stderr.write(msg) - sys.exit(1) - -parser = argparse.ArgumentParser() -parser.add_argument('--input_motifs', dest='input_motifs', help='MEME output formatted files for input to fimo') -parser.add_argument('--input_fasta', dest='input_fasta', help='Fassta sequence file') -parser.add_argument('--options_type', dest='options_type', help='Basic or Advance options') -parser.add_argument('--input_psp', dest='input_psp', default=None, help='File containing position specific priors') -parser.add_argument('--input_prior_dist', dest='input_prior_dist', default=None, help='File containing binned distribution of priors') -parser.add_argument('--alpha', dest='alpha', type=float, default=1.0, help='The alpha parameter for calculating position specific priors') -parser.add_argument('--bgfile', dest='bgfile', default=None, help='Background file type, used only if not "default"') -parser.add_argument('--max_strand', action='store_true', help='If matches on both strands at a given position satisfy the output threshold, only report the match for the strand with the higher score') -parser.add_argument('--max_stored_scores', dest='max_stored_scores', type=int, help='Maximum score count to store') -parser.add_argument('--motif', dest='motifs', action='append', default=[], help='Specify motif by id') -parser.add_argument('--motif_pseudo', dest='motif_pseudo', type=float, default=0.1, help='Pseudocount to add to counts in motif matrix') -parser.add_argument('--no_qvalue', action='store_true', help='Do not compute a q-value for each p-value') -parser.add_argument('--norc', action='store_true', help='Do not score the reverse complement DNA strand') -parser.add_argument('--output_path', dest='output_path', help='Output files directory') -parser.add_argument('--parse_genomic_coord', action='store_true', help='Check each sequence header for UCSC style genomic coordinates') -parser.add_argument('--qv_thresh', action='store_true', help='Use q-values for the output threshold') -parser.add_argument('--thresh', dest='thresh', type=float, help='p-value threshold') -parser.add_argument('--gff_output', dest='gff_output', help='Gff output file') -parser.add_argument('--html_output', dest='html_output', help='HTML output file') -parser.add_argument('--interval_output', dest='interval_output', help='Interval output file') -parser.add_argument('--txt_output', dest='txt_output', help='Text output file') -parser.add_argument('--xml_output', dest='xml_output', help='XML output file') -args = parser.parse_args() - -fimo_cmd_list = ['fimo'] -if args.options_type == 'advanced': - fimo_cmd_list.append('--alpha %4f' % args.alpha) - if args.bgfile is not None: - fimo_cmd_list.append('--bgfile "%s"' % args.bgfile) - if args.max_strand: - fimo_cmd_list.append('--max-strand') - fimo_cmd_list.append('--max-stored-scores %d' % args.max_stored_scores) - if len(args.motifs) > 0: - for motif in args.motifs: - fimo_cmd_list.append('--motif "%s"' % motif) - fimo_cmd_list.append('--motif-pseudo %4f' % args.motif_pseudo) - if args.no_qvalue: - fimo_cmd_list.append('--no-qvalue') - if args.norc: - fimo_cmd_list.append('--norc') - if args.parse_genomic_coord: - fimo_cmd_list.append('--parse-genomic-coord') - if args.qv_thresh: - fimo_cmd_list.append('--qv-thresh') - fimo_cmd_list.append('--thresh %4f' % args.thresh) - if args.input_psp is not None: - fimo_cmd_list.append('--psp "%s"' % args.input_psp) - if args.input_prior_dist is not None: - fimo_cmd_list.append('--prior-dist "%s"' % args.input_prior_dist) -fimo_cmd_list.append('--o "%s"' % (args.output_path)) -fimo_cmd_list.append('--verbosity 1') -fimo_cmd_list.append(args.input_motifs) -fimo_cmd_list.append(args.input_fasta) - -fimo_cmd = ' '.join(fimo_cmd_list) - -try: - tmp_stderr = tempfile.NamedTemporaryFile() - proc = subprocess.Popen(args=fimo_cmd, shell=True, stderr=tmp_stderr) - returncode = proc.wait() - tmp_stderr.seek(0) - stderr = '' - try: - while True: - stderr += tmp_stderr.read(BUFFSIZE) - if not stderr or len(stderr) % BUFFSIZE != 0: - break - except OverflowError: - pass - if returncode != 0: - stop_err(stderr) -except Exception, e: - stop_err('Error running FIMO:\n%s' % str(e)) - -shutil.move(os.path.join(args.output_path, 'fimo.txt'), args.txt_output) -shutil.move(os.path.join(args.output_path, 'fimo.gff'), args.gff_output) -shutil.move(os.path.join(args.output_path, 'fimo.xml'), args.xml_output) -shutil.move(os.path.join(args.output_path, 'fimo.html'), args.html_output) - -out_file = open(args.interval_output, 'wb') -out_file.write("#%s\n" % "\t".join(("chr", "start", "end", "pattern name", "score", "strand", "matched sequence", "p-value", "q-value"))) -for line in open(args.txt_output): - if line.startswith('#'): - continue - fields = line.rstrip("\n\r").split("\t") - start, end = int(fields[2]), int(fields[3]) - sequence = fields[7] - if start > end: - # Flip start and end and set strand. - start, end = end, start - strand = "-" - # We want sequences relative to strand; FIMO always provides + stranded sequence. - sequence = dna_reverse_complement(sequence) - else: - strand = "+" - # Make 0-based start position. - start -= 1 - out_file.write("%s\n" % "\t".join([fields[1], str(start), str(end), fields[0], fields[4], strand, sequence, fields[5], fields[6]])) -out_file.close() diff -r 34c794383f81 -r 62d1fae3b7d3 test-data/fimo_output_almost-gff_1.txt --- a/test-data/fimo_output_almost-gff_1.txt Fri Mar 18 08:23:54 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ -##gff-version 3 -phiX174 fimo polypeptide_motif 1388 1398 102 + . Name=1;ID=1-1-phiX174;pvalue=6.36e-11;qvalue= 1.25e-09;sequence=AATATCTATAA; -phiX174 fimo polypeptide_motif 847 857 102 + . Name=1;ID=1-2-phiX174;pvalue=7.02e-11;qvalue= 1.25e-09;sequence=AATGTCTAAAG; -phiX174 fimo polypeptide_motif 2301 2311 99.6 + . Name=1;ID=1-3-phiX174;pvalue=1.08e-10;qvalue= 1.29e-09;sequence=AGGTTATAACG; -phiX174 fimo polypeptide_motif 5063 5073 95.6 + . Name=1;ID=1-4-phiX174;pvalue=2.73e-10;qvalue= 2.25e-09;sequence=AGGAGCTAAAG; -phiX174 fimo polypeptide_motif 989 999 95 + . Name=1;ID=1-5-phiX174;pvalue=3.15e-10;qvalue= 2.25e-09;sequence=TGAGGATAAAT; -phiX174 fimo polypeptide_motif 4713 4723 91.1 + . Name=1;ID=1-6-phiX174;pvalue=7.74e-10;qvalue= 3.48e-09;sequence=GACTGCTATCA; -phiX174 fimo polypeptide_motif 5048 5058 90.7 + . Name=1;ID=1-7-phiX174;pvalue=8.51e-10;qvalue= 3.48e-09;sequence=TGCTGCTAAAG; -phiX174 fimo polypeptide_motif 855 865 90.6 + . Name=1;ID=1-8-phiX174;pvalue=8.64e-10;qvalue= 3.48e-09;sequence=AAGGTAAAAAA; -phiX174 fimo polypeptide_motif 3155 3165 90.1 + . Name=1;ID=1-9-phiX174;pvalue=9.76e-10;qvalue= 3.48e-09;sequence=TATGGCTAAAG; -phiX174 fimo polypeptide_motif 5009 5019 90.1 + . Name=1;ID=1-10-phiX174;pvalue=9.76e-10;qvalue= 3.48e-09;sequence=TGTGGCTAAAT; -phiX174 fimo polypeptide_motif 814 824 88.9 + . Name=1;ID=1-11-phiX174;pvalue=1.28e-09;qvalue= 4.14e-09;sequence=TGCGTCAAAAA; -phiX174 fimo polypeptide_motif 2832 2842 88.5 + . Name=1;ID=1-12-phiX174;pvalue=1.42e-09;qvalue= 4.23e-09;sequence=TTGGTCTAACT; -phiX174 fimo polypeptide_motif 3830 3840 87.7 + . Name=1;ID=1-13-phiX174;pvalue=1.7e-09;qvalue= 4.68e-09;sequence=TATTGATAAAG; -phiX174 fimo polypeptide_motif 3560 3570 87.2 + . Name=1;ID=1-14-phiX174;pvalue=1.89e-09;qvalue= 4.82e-09;sequence=TGCGTCTATTA; -phiX174 fimo polypeptide_motif 2882 2892 86.4 + . Name=1;ID=1-15-phiX174;pvalue=2.29e-09;qvalue= 5.46e-09;sequence=AGGTTATTAAA; -phiX174 fimo polypeptide_motif 4453 4463 85.9 + . Name=1;ID=1-16-phiX174;pvalue=2.58e-09;qvalue= 5.75e-09;sequence=AAGGTATTAAG; -phiX174 fimo polypeptide_motif 2493 2503 85.1 + . Name=1;ID=1-17-phiX174;pvalue=3.06e-09;qvalue= 5.79e-09;sequence=GACACCTAAAG; -phiX174 fimo polypeptide_motif 4104 4114 85.1 + . Name=1;ID=1-18-phiX174;pvalue=3.08e-09;qvalue= 5.79e-09;sequence=GGCTTCCATAA; -phiX174 fimo polypeptide_motif 4955 4965 85.1 + . Name=1;ID=1-19-phiX174;pvalue=3.08e-09;qvalue= 5.79e-09;sequence=TGATGCTAAAG; -phiX174 fimo polypeptide_motif 1885 1895 84.4 + . Name=1;ID=1-20-phiX174;pvalue=3.61e-09;qvalue= 6.45e-09;sequence=TGCGACTAAAG; -phiX174 fimo polypeptide_motif 3376 3386 84.2 + . Name=1;ID=1-21-phiX174;pvalue=3.81e-09;qvalue= 6.48e-09;sequence=AGAATCAAAAA; -phiX174 fimo polypeptide_motif 52 62 83.9 + . Name=1;ID=1-22-phiX174;pvalue=4.06e-09;qvalue= 6.58e-09;sequence=TGAGTCGAAAA; -phiX174 fimo polypeptide_motif 1390 1400 83.7 + . Name=1;ID=1-23-phiX174;pvalue=4.26e-09;qvalue= 6.61e-09;sequence=TATCTATAACA; -phiX174 fimo polypeptide_motif 2017 2027 83.4 + . Name=1;ID=1-24-phiX174;pvalue=4.6e-09;qvalue= 6.85e-09;sequence=TTCGTCTAAGA; -phiX174 fimo polypeptide_motif 1000 1010 83.1 + . Name=1;ID=1-25-phiX174;pvalue=4.88e-09;qvalue= 6.97e-09;sequence=TATGTCTAATA; -phiX174 fimo polypeptide_motif 1555 1565 82.5 + . Name=1;ID=1-26-phiX174;pvalue=5.58e-09;qvalue= 7.37e-09;sequence=GACTTCTACCA; -phiX174 fimo polypeptide_motif 4430 4440 82.5 + . Name=1;ID=1-27-phiX174;pvalue=5.62e-09;qvalue= 7.37e-09;sequence=TGAGTATAATT; -phiX174 fimo polypeptide_motif 1927 1937 82.3 + . Name=1;ID=1-28-phiX174;pvalue=5.82e-09;qvalue= 7.37e-09;sequence=GACTTATACCG; -phiX174 fimo polypeptide_motif 2981 2991 82.1 + . Name=1;ID=1-29-phiX174;pvalue=6.13e-09;qvalue= 7.37e-09;sequence=CATGTCTAAAT; -phiX174 fimo polypeptide_motif 4203 4213 82 + . Name=1;ID=1-30-phiX174;pvalue=6.34e-09;qvalue= 7.37e-09;sequence=GACGGCCATAA; -phiX174 fimo polypeptide_motif 1669 1679 81.9 + . Name=1;ID=1-31-phiX174;pvalue=6.4e-09;qvalue= 7.37e-09;sequence=TGGAGGTAAAA; -phiX174 fimo polypeptide_motif 3260 3270 81.5 + . Name=1;ID=1-32-phiX174;pvalue=7.01e-09;qvalue= 7.82e-09;sequence=CGCTGATAAAG; -phiX174 fimo polypeptide_motif 3047 3057 81.3 + . Name=1;ID=1-33-phiX174;pvalue=7.4e-09;qvalue= 7.85e-09;sequence=TACCGATAACA; -phiX174 fimo polypeptide_motif 4176 4186 81.2 + . Name=1;ID=1-34-phiX174;pvalue=7.6e-09;qvalue= 7.85e-09;sequence=GAGTTCGATAA; -phiX174 fimo polypeptide_motif 4118 4128 81.1 + . Name=1;ID=1-35-phiX174;pvalue=7.7e-09;qvalue= 7.85e-09;sequence=GATGGATAACC; -phiX174 fimo polypeptide_motif 5370 5380 80.9 + . Name=1;ID=1-36-phiX174;pvalue=8.03e-09;qvalue= 7.87e-09;sequence=GGCGTATCCAA; -phiX174 fimo polypeptide_motif 1242 1252 80.5 + . Name=1;ID=1-37-phiX174;pvalue=8.94e-09;qvalue= 7.87e-09;sequence=AGTGGATTAAG; -phiX174 fimo polypeptide_motif 2583 2593 80.5 + . Name=1;ID=1-38-phiX174;pvalue=8.94e-09;qvalue= 7.87e-09;sequence=TACATCTGTCA; -phiX174 fimo polypeptide_motif 698 708 80.4 + . Name=1;ID=1-39-phiX174;pvalue=9.13e-09;qvalue= 7.87e-09;sequence=TACGGAAAACA; -phiX174 fimo polypeptide_motif 2299 2309 80.3 + . Name=1;ID=1-40-phiX174;pvalue=9.26e-09;qvalue= 7.87e-09;sequence=TGAGGTTATAA; -phiX174 fimo polypeptide_motif 4189 4199 80.1 + . Name=1;ID=1-41-phiX174;pvalue=9.69e-09;qvalue= 7.87e-09;sequence=GTGATATGTAT; -phiX174 fimo polypeptide_motif 275 285 80.1 + . Name=1;ID=1-42-phiX174;pvalue=9.85e-09;qvalue= 7.87e-09;sequence=GGTTTAGATAT; -phiX174 fimo polypeptide_motif 1801 1811 80 + . Name=1;ID=1-43-phiX174;pvalue=1e-08;qvalue= 7.87e-09;sequence=GACCTATAAAC; -phiX174 fimo polypeptide_motif 1386 1396 79.9 + . Name=1;ID=1-44-phiX174;pvalue=1.03e-08;qvalue= 7.87e-09;sequence=TGAATATCTAT; -phiX174 fimo polypeptide_motif 1303 1313 79.8 + . Name=1;ID=1-45-phiX174;pvalue=1.03e-08;qvalue= 7.87e-09;sequence=TGGTTATATTG; -phiX174 fimo polypeptide_motif 3772 3782 79.8 + . Name=1;ID=1-46-phiX174;pvalue=1.04e-08;qvalue= 7.87e-09;sequence=AGGATATTTCT; -phiX174 fimo polypeptide_motif 1288 1298 79.8 + . Name=1;ID=1-47-phiX174;pvalue=1.04e-08;qvalue= 7.87e-09;sequence=GACTGTTAACA; -phiX174 fimo polypeptide_motif 2577 2587 79.7 + . Name=1;ID=1-48-phiX174;pvalue=1.08e-08;qvalue= 7.87e-09;sequence=GATGGATACAT; -phiX174 fimo polypeptide_motif 937 947 79.6 + . Name=1;ID=1-49-phiX174;pvalue=1.08e-08;qvalue= 7.87e-09;sequence=TTGGTATGTAG; -phiX174 fimo polypeptide_motif 904 914 79.5 + . Name=1;ID=1-50-phiX174;pvalue=1.11e-08;qvalue= 7.93e-09;sequence=AGGTACTAAAG; -phiX174 fimo polypeptide_motif 2279 2289 79.4 + . Name=1;ID=1-51-phiX174;pvalue=1.13e-08;qvalue= 7.93e-09;sequence=TCGTGATAAAA; -phiX174 fimo polypeptide_motif 3164 3174 79.3 + . Name=1;ID=1-52-phiX174;pvalue=1.16e-08;qvalue= 7.98e-09;sequence=AGCTGGTAAAG; -phiX174 fimo polypeptide_motif 24 34 79.1 + . Name=1;ID=1-53-phiX174;pvalue=1.23e-08;qvalue= 8.24e-09;sequence=AGAAGTTAACA; -phiX174 fimo polypeptide_motif 838 848 78.9 + . Name=1;ID=1-54-phiX174;pvalue=1.27e-08;qvalue= 8.24e-09;sequence=GAGTGATGTAA; -phiX174 fimo polypeptide_motif 853 863 78.9 + . Name=1;ID=1-55-phiX174;pvalue=1.27e-08;qvalue= 8.24e-09;sequence=TAAAGGTAAAA; -phiX174 fimo polypeptide_motif 1984 1994 78.6 + . Name=1;ID=1-56-phiX174;pvalue=1.36e-08;qvalue= 8.68e-09;sequence=AATTTCTATGA; -phiX174 fimo polypeptide_motif 1 11 78.3 + . Name=1;ID=1-57-phiX174;pvalue=1.46e-08;qvalue= 9.05e-09;sequence=GAGTTTTATCG; -phiX174 fimo polypeptide_motif 4307 4317 78.3 + . Name=1;ID=1-58-phiX174;pvalue=1.47e-08;qvalue= 9.05e-09;sequence=TATTAATAACA; -phiX174 fimo polypeptide_motif 4303 4313 78.2 + . Name=1;ID=1-59-phiX174;pvalue=1.52e-08;qvalue= 9.19e-09;sequence=TTGATATTAAT; -phiX174 fimo polypeptide_motif 5033 5043 78 + . Name=1;ID=1-60-phiX174;pvalue=1.58e-08;qvalue= 9.41e-09;sequence=GTCAGATATGG; -phiX174 fimo polypeptide_motif 2579 2589 77.6 + . Name=1;ID=1-61-phiX174;pvalue=1.73e-08;qvalue= 1.01e-08;sequence=TGGATACATCT; -phiX174 fimo polypeptide_motif 322 332 77.4 + . Name=1;ID=1-62-phiX174;pvalue=1.82e-08;qvalue= 1.05e-08;sequence=GACATTTTAAA; -phiX174 fimo polypeptide_motif 5001 5011 76.8 + . Name=1;ID=1-63-phiX174;pvalue=2.09e-08;qvalue= 1.19e-08;sequence=GGTTTCTATGT; -phiX174 fimo polypeptide_motif 4217 4227 76.7 + . Name=1;ID=1-64-phiX174;pvalue=2.15e-08;qvalue= 1.2e-08;sequence=TGCTTCTGACG; -phiX174 fimo polypeptide_motif 4262 4272 76.6 + . Name=1;ID=1-65-phiX174;pvalue=2.18e-08;qvalue= 1.2e-08;sequence=AATGGATGAAT; -phiX174 fimo polypeptide_motif 3569 3579 76.5 + . Name=1;ID=1-66-phiX174;pvalue=2.26e-08;qvalue= 1.22e-08;sequence=TATGGAAAACA; -phiX174 fimo polypeptide_motif 194 204 76.4 + . Name=1;ID=1-67-phiX174;pvalue=2.29e-08;qvalue= 1.22e-08;sequence=ATCAACTAACG; -phiX174 fimo polypeptide_motif 131 141 76 + . Name=1;ID=1-68-phiX174;pvalue=2.49e-08;qvalue= 1.31e-08;sequence=AAATGAGAAAA; -phiX174 fimo polypeptide_motif 1491 1501 75.9 + . Name=1;ID=1-69-phiX174;pvalue=2.55e-08;qvalue= 1.32e-08;sequence=GCCATCTCAAA; -phiX174 fimo polypeptide_motif 434 444 75.7 + . Name=1;ID=1-70-phiX174;pvalue=2.67e-08;qvalue= 1.36e-08;sequence=GGCCTCTATTA; -phiX174 fimo polypeptide_motif 4565 4575 75.6 + . Name=1;ID=1-71-phiX174;pvalue=2.73e-08;qvalue= 1.36e-08;sequence=TTGGTTTATCG; -phiX174 fimo polypeptide_motif 102 112 75.6 + . Name=1;ID=1-72-phiX174;pvalue=2.75e-08;qvalue= 1.36e-08;sequence=GAATTAAATCG; -phiX174 fimo polypeptide_motif 903 913 75.5 + . Name=1;ID=1-73-phiX174;pvalue=2.82e-08;qvalue= 1.38e-08;sequence=GAGGTACTAAA; -phiX174 fimo polypeptide_motif 4748 4758 75.2 + . Name=1;ID=1-74-phiX174;pvalue=3.01e-08;qvalue= 1.45e-08;sequence=TACAGCTAATG; -phiX174 fimo polypeptide_motif 2622 2632 75 + . Name=1;ID=1-75-phiX174;pvalue=3.16e-08;qvalue= 1.5e-08;sequence=TGCTGATATTG; -phiX174 fimo polypeptide_motif 467 477 74.7 + . Name=1;ID=1-76-phiX174;pvalue=3.35e-08;qvalue= 1.57e-08;sequence=TTTGGATTTAA; -phiX174 fimo polypeptide_motif 4033 4043 74.6 + . Name=1;ID=1-77-phiX174;pvalue=3.44e-08;qvalue= 1.58e-08;sequence=AGCGTATCGAG; -phiX174 fimo polypeptide_motif 1348 1358 74.6 + . Name=1;ID=1-78-phiX174;pvalue=3.46e-08;qvalue= 1.58e-08;sequence=TACCAATAAAA; -phiX174 fimo polypeptide_motif 239 249 74.4 + . Name=1;ID=1-79-phiX174;pvalue=3.62e-08;qvalue= 1.64e-08;sequence=AGTGGCTTAAT; -phiX174 fimo polypeptide_motif 500 510 74.1 + . Name=1;ID=1-80-phiX174;pvalue=3.84e-08;qvalue= 1.71e-08;sequence=GACGAGTAACA; -phiX174 fimo polypeptide_motif 3001 3011 74 + . Name=1;ID=1-81-phiX174;pvalue=3.93e-08;qvalue= 1.73e-08;sequence=GCGGTCAAAAA; -phiX174 fimo polypeptide_motif 3776 3786 74 + . Name=1;ID=1-82-phiX174;pvalue=3.98e-08;qvalue= 1.73e-08;sequence=TATTTCTAATG; -phiX174 fimo polypeptide_motif 2026 2036 73.9 + . Name=1;ID=1-83-phiX174;pvalue=4.06e-08;qvalue= 1.75e-08;sequence=GAAGTTTAAGA; -phiX174 fimo polypeptide_motif 4237 4247 73.8 + . Name=1;ID=1-84-phiX174;pvalue=4.12e-08;qvalue= 1.75e-08;sequence=AGTTTGTATCT; -phiX174 fimo polypeptide_motif 803 813 73.7 + . Name=1;ID=1-85-phiX174;pvalue=4.24e-08;qvalue= 1.78e-08;sequence=AGAAGAAAACG; -phiX174 fimo polypeptide_motif 3770 3780 73.6 + . Name=1;ID=1-86-phiX174;pvalue=4.35e-08;qvalue= 1.81e-08;sequence=AAAGGATATTT; -phiX174 fimo polypeptide_motif 3429 3439 73.5 + . Name=1;ID=1-87-phiX174;pvalue=4.45e-08;qvalue= 1.82e-08;sequence=GAGATGCAAAA; -phiX174 fimo polypeptide_motif 99 109 73.5 + . Name=1;ID=1-88-phiX174;pvalue=4.48e-08;qvalue= 1.82e-08;sequence=TACGAATTAAA; -phiX174 fimo polypeptide_motif 67 77 73.2 + . Name=1;ID=1-89-phiX174;pvalue=4.78e-08;qvalue= 1.92e-08;sequence=TCTTGATAAAG; -phiX174 fimo polypeptide_motif 5332 5342 72.9 + . Name=1;ID=1-90-phiX174;pvalue=5.13e-08;qvalue= 2.01e-08;sequence=ATCTGCTCAAA; -phiX174 fimo polypeptide_motif 277 287 72.9 + . Name=1;ID=1-91-phiX174;pvalue=5.14e-08;qvalue= 2.01e-08;sequence=TTTAGATATGA; -phiX174 fimo polypeptide_motif 4338 4348 72.8 + . Name=1;ID=1-92-phiX174;pvalue=5.18e-08;qvalue= 2.01e-08;sequence=GGGGACGAAAA; -phiX174 fimo polypeptide_motif 3812 3822 72.8 + . Name=1;ID=1-93-phiX174;pvalue=5.28e-08;qvalue= 2.03e-08;sequence=GGTTGATATTT; -phiX174 fimo polypeptide_motif 1909 1919 72.6 + . Name=1;ID=1-94-phiX174;pvalue=5.51e-08;qvalue= 2.08e-08;sequence=TAACGCTAAAG; -phiX174 fimo polypeptide_motif 3000 3010 72.6 + . Name=1;ID=1-95-phiX174;pvalue=5.54e-08;qvalue= 2.08e-08;sequence=GGCGGTCAAAA; -phiX174 fimo polypeptide_motif 3891 3901 72.4 + . Name=1;ID=1-96-phiX174;pvalue=5.75e-08;qvalue= 2.11e-08;sequence=ATTGGCTCTAA; -phiX174 fimo polypeptide_motif 3079 3089 72.4 + . Name=1;ID=1-97-phiX174;pvalue=5.76e-08;qvalue= 2.11e-08;sequence=CTGGTATTAAA; -phiX174 fimo polypeptide_motif 37 47 72.4 + . Name=1;ID=1-98-phiX174;pvalue=5.79e-08;qvalue= 2.11e-08;sequence=TTCGGATATTT; -phiX174 fimo polypeptide_motif 380 390 72.2 + . Name=1;ID=1-99-phiX174;pvalue=6.01e-08;qvalue= 2.17e-08;sequence=GTAAGAAATCA; diff -r 34c794383f81 -r 62d1fae3b7d3 test-data/fimo_output_almost-gff_2.txt --- a/test-data/fimo_output_almost-gff_2.txt Fri Mar 18 08:23:54 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ -##gff-version 3 -phiX174 fimo polypeptide_motif 1388 1398 102 + . Name=1;ID=1-1-phiX174;pvalue=6.36e-11;sequence=AATATCTATAA; -phiX174 fimo polypeptide_motif 847 857 102 + . Name=1;ID=1-2-phiX174;pvalue=7.02e-11;sequence=AATGTCTAAAG; -phiX174 fimo polypeptide_motif 2301 2311 99.6 + . Name=1;ID=1-3-phiX174;pvalue=1.08e-10;sequence=AGGTTATAACG; -phiX174 fimo polypeptide_motif 5063 5073 95.6 + . Name=1;ID=1-4-phiX174;pvalue=2.73e-10;sequence=AGGAGCTAAAG; -phiX174 fimo polypeptide_motif 989 999 95 + . Name=1;ID=1-5-phiX174;pvalue=3.15e-10;sequence=TGAGGATAAAT; -phiX174 fimo polypeptide_motif 4713 4723 91.1 + . Name=1;ID=1-6-phiX174;pvalue=7.74e-10;sequence=GACTGCTATCA; -phiX174 fimo polypeptide_motif 5048 5058 90.7 + . Name=1;ID=1-7-phiX174;pvalue=8.51e-10;sequence=TGCTGCTAAAG; -phiX174 fimo polypeptide_motif 855 865 90.6 + . Name=1;ID=1-8-phiX174;pvalue=8.64e-10;sequence=AAGGTAAAAAA; -phiX174 fimo polypeptide_motif 3155 3165 90.1 + . Name=1;ID=1-9-phiX174;pvalue=9.76e-10;sequence=TATGGCTAAAG; -phiX174 fimo polypeptide_motif 5009 5019 90.1 + . Name=1;ID=1-10-phiX174;pvalue=9.76e-10;sequence=TGTGGCTAAAT; -phiX174 fimo polypeptide_motif 814 824 88.9 + . Name=1;ID=1-11-phiX174;pvalue=1.28e-09;sequence=TGCGTCAAAAA; -phiX174 fimo polypeptide_motif 2832 2842 88.5 + . Name=1;ID=1-12-phiX174;pvalue=1.42e-09;sequence=TTGGTCTAACT; -phiX174 fimo polypeptide_motif 3830 3840 87.7 + . Name=1;ID=1-13-phiX174;pvalue=1.7e-09;sequence=TATTGATAAAG; -phiX174 fimo polypeptide_motif 3560 3570 87.2 + . Name=1;ID=1-14-phiX174;pvalue=1.89e-09;sequence=TGCGTCTATTA; -phiX174 fimo polypeptide_motif 2882 2892 86.4 + . Name=1;ID=1-15-phiX174;pvalue=2.29e-09;sequence=AGGTTATTAAA; -phiX174 fimo polypeptide_motif 4453 4463 85.9 + . Name=1;ID=1-16-phiX174;pvalue=2.58e-09;sequence=AAGGTATTAAG; -phiX174 fimo polypeptide_motif 2493 2503 85.1 + . Name=1;ID=1-17-phiX174;pvalue=3.06e-09;sequence=GACACCTAAAG; -phiX174 fimo polypeptide_motif 4104 4114 85.1 + . Name=1;ID=1-18-phiX174;pvalue=3.08e-09;sequence=GGCTTCCATAA; -phiX174 fimo polypeptide_motif 4955 4965 85.1 + . Name=1;ID=1-19-phiX174;pvalue=3.08e-09;sequence=TGATGCTAAAG; -phiX174 fimo polypeptide_motif 1885 1895 84.4 + . Name=1;ID=1-20-phiX174;pvalue=3.61e-09;sequence=TGCGACTAAAG; -phiX174 fimo polypeptide_motif 3376 3386 84.2 + . Name=1;ID=1-21-phiX174;pvalue=3.81e-09;sequence=AGAATCAAAAA; -phiX174 fimo polypeptide_motif 52 62 83.9 + . Name=1;ID=1-22-phiX174;pvalue=4.06e-09;sequence=TGAGTCGAAAA; -phiX174 fimo polypeptide_motif 1390 1400 83.7 + . Name=1;ID=1-23-phiX174;pvalue=4.26e-09;sequence=TATCTATAACA; -phiX174 fimo polypeptide_motif 2017 2027 83.4 + . Name=1;ID=1-24-phiX174;pvalue=4.6e-09;sequence=TTCGTCTAAGA; -phiX174 fimo polypeptide_motif 1000 1010 83.1 + . Name=1;ID=1-25-phiX174;pvalue=4.88e-09;sequence=TATGTCTAATA; -phiX174 fimo polypeptide_motif 1555 1565 82.5 + . Name=1;ID=1-26-phiX174;pvalue=5.58e-09;sequence=GACTTCTACCA; -phiX174 fimo polypeptide_motif 4430 4440 82.5 + . Name=1;ID=1-27-phiX174;pvalue=5.62e-09;sequence=TGAGTATAATT; -phiX174 fimo polypeptide_motif 1927 1937 82.3 + . Name=1;ID=1-28-phiX174;pvalue=5.82e-09;sequence=GACTTATACCG; -phiX174 fimo polypeptide_motif 2981 2991 82.1 + . Name=1;ID=1-29-phiX174;pvalue=6.13e-09;sequence=CATGTCTAAAT; -phiX174 fimo polypeptide_motif 4203 4213 82 + . Name=1;ID=1-30-phiX174;pvalue=6.34e-09;sequence=GACGGCCATAA; -phiX174 fimo polypeptide_motif 1669 1679 81.9 + . Name=1;ID=1-31-phiX174;pvalue=6.4e-09;sequence=TGGAGGTAAAA; -phiX174 fimo polypeptide_motif 3260 3270 81.5 + . Name=1;ID=1-32-phiX174;pvalue=7.01e-09;sequence=CGCTGATAAAG; -phiX174 fimo polypeptide_motif 3047 3057 81.3 + . Name=1;ID=1-33-phiX174;pvalue=7.4e-09;sequence=TACCGATAACA; -phiX174 fimo polypeptide_motif 4176 4186 81.2 + . Name=1;ID=1-34-phiX174;pvalue=7.6e-09;sequence=GAGTTCGATAA; -phiX174 fimo polypeptide_motif 4118 4128 81.1 + . Name=1;ID=1-35-phiX174;pvalue=7.7e-09;sequence=GATGGATAACC; -phiX174 fimo polypeptide_motif 5370 5380 80.9 + . Name=1;ID=1-36-phiX174;pvalue=8.03e-09;sequence=GGCGTATCCAA; -phiX174 fimo polypeptide_motif 1242 1252 80.5 + . Name=1;ID=1-37-phiX174;pvalue=8.94e-09;sequence=AGTGGATTAAG; -phiX174 fimo polypeptide_motif 2583 2593 80.5 + . Name=1;ID=1-38-phiX174;pvalue=8.94e-09;sequence=TACATCTGTCA; -phiX174 fimo polypeptide_motif 698 708 80.4 + . Name=1;ID=1-39-phiX174;pvalue=9.13e-09;sequence=TACGGAAAACA; -phiX174 fimo polypeptide_motif 2299 2309 80.3 + . Name=1;ID=1-40-phiX174;pvalue=9.26e-09;sequence=TGAGGTTATAA; -phiX174 fimo polypeptide_motif 4189 4199 80.1 + . Name=1;ID=1-41-phiX174;pvalue=9.69e-09;sequence=GTGATATGTAT; -phiX174 fimo polypeptide_motif 275 285 80.1 + . Name=1;ID=1-42-phiX174;pvalue=9.85e-09;sequence=GGTTTAGATAT; -phiX174 fimo polypeptide_motif 1801 1811 80 + . Name=1;ID=1-43-phiX174;pvalue=1e-08;sequence=GACCTATAAAC; -phiX174 fimo polypeptide_motif 1386 1396 79.9 + . Name=1;ID=1-44-phiX174;pvalue=1.03e-08;sequence=TGAATATCTAT; -phiX174 fimo polypeptide_motif 1303 1313 79.8 + . Name=1;ID=1-45-phiX174;pvalue=1.03e-08;sequence=TGGTTATATTG; -phiX174 fimo polypeptide_motif 3772 3782 79.8 + . Name=1;ID=1-46-phiX174;pvalue=1.04e-08;sequence=AGGATATTTCT; -phiX174 fimo polypeptide_motif 1288 1298 79.8 + . Name=1;ID=1-47-phiX174;pvalue=1.04e-08;sequence=GACTGTTAACA; -phiX174 fimo polypeptide_motif 2577 2587 79.7 + . Name=1;ID=1-48-phiX174;pvalue=1.08e-08;sequence=GATGGATACAT; -phiX174 fimo polypeptide_motif 937 947 79.6 + . Name=1;ID=1-49-phiX174;pvalue=1.08e-08;sequence=TTGGTATGTAG; -phiX174 fimo polypeptide_motif 904 914 79.5 + . Name=1;ID=1-50-phiX174;pvalue=1.11e-08;sequence=AGGTACTAAAG; -phiX174 fimo polypeptide_motif 2279 2289 79.4 + . Name=1;ID=1-51-phiX174;pvalue=1.13e-08;sequence=TCGTGATAAAA; -phiX174 fimo polypeptide_motif 3164 3174 79.3 + . Name=1;ID=1-52-phiX174;pvalue=1.16e-08;sequence=AGCTGGTAAAG; -phiX174 fimo polypeptide_motif 24 34 79.1 + . Name=1;ID=1-53-phiX174;pvalue=1.23e-08;sequence=AGAAGTTAACA; -phiX174 fimo polypeptide_motif 838 848 78.9 + . Name=1;ID=1-54-phiX174;pvalue=1.27e-08;sequence=GAGTGATGTAA; -phiX174 fimo polypeptide_motif 853 863 78.9 + . Name=1;ID=1-55-phiX174;pvalue=1.27e-08;sequence=TAAAGGTAAAA; -phiX174 fimo polypeptide_motif 1984 1994 78.6 + . Name=1;ID=1-56-phiX174;pvalue=1.36e-08;sequence=AATTTCTATGA; -phiX174 fimo polypeptide_motif 1 11 78.3 + . Name=1;ID=1-57-phiX174;pvalue=1.46e-08;sequence=GAGTTTTATCG; -phiX174 fimo polypeptide_motif 4307 4317 78.3 + . Name=1;ID=1-58-phiX174;pvalue=1.47e-08;sequence=TATTAATAACA; -phiX174 fimo polypeptide_motif 4303 4313 78.2 + . Name=1;ID=1-59-phiX174;pvalue=1.52e-08;sequence=TTGATATTAAT; -phiX174 fimo polypeptide_motif 5033 5043 78 + . Name=1;ID=1-60-phiX174;pvalue=1.58e-08;sequence=GTCAGATATGG; -phiX174 fimo polypeptide_motif 2579 2589 77.6 + . Name=1;ID=1-61-phiX174;pvalue=1.73e-08;sequence=TGGATACATCT; -phiX174 fimo polypeptide_motif 322 332 77.4 + . Name=1;ID=1-62-phiX174;pvalue=1.82e-08;sequence=GACATTTTAAA; -phiX174 fimo polypeptide_motif 5001 5011 76.8 + . Name=1;ID=1-63-phiX174;pvalue=2.09e-08;sequence=GGTTTCTATGT; -phiX174 fimo polypeptide_motif 4217 4227 76.7 + . Name=1;ID=1-64-phiX174;pvalue=2.15e-08;sequence=TGCTTCTGACG; -phiX174 fimo polypeptide_motif 4262 4272 76.6 + . Name=1;ID=1-65-phiX174;pvalue=2.18e-08;sequence=AATGGATGAAT; -phiX174 fimo polypeptide_motif 3569 3579 76.5 + . Name=1;ID=1-66-phiX174;pvalue=2.26e-08;sequence=TATGGAAAACA; -phiX174 fimo polypeptide_motif 194 204 76.4 + . Name=1;ID=1-67-phiX174;pvalue=2.29e-08;sequence=ATCAACTAACG; -phiX174 fimo polypeptide_motif 131 141 76 + . Name=1;ID=1-68-phiX174;pvalue=2.49e-08;sequence=AAATGAGAAAA; -phiX174 fimo polypeptide_motif 1491 1501 75.9 + . Name=1;ID=1-69-phiX174;pvalue=2.55e-08;sequence=GCCATCTCAAA; -phiX174 fimo polypeptide_motif 434 444 75.7 + . Name=1;ID=1-70-phiX174;pvalue=2.67e-08;sequence=GGCCTCTATTA; -phiX174 fimo polypeptide_motif 4565 4575 75.6 + . Name=1;ID=1-71-phiX174;pvalue=2.73e-08;sequence=TTGGTTTATCG; -phiX174 fimo polypeptide_motif 102 112 75.6 + . Name=1;ID=1-72-phiX174;pvalue=2.75e-08;sequence=GAATTAAATCG; -phiX174 fimo polypeptide_motif 903 913 75.5 + . Name=1;ID=1-73-phiX174;pvalue=2.82e-08;sequence=GAGGTACTAAA; -phiX174 fimo polypeptide_motif 4748 4758 75.2 + . Name=1;ID=1-74-phiX174;pvalue=3.01e-08;sequence=TACAGCTAATG; -phiX174 fimo polypeptide_motif 2622 2632 75 + . Name=1;ID=1-75-phiX174;pvalue=3.16e-08;sequence=TGCTGATATTG; -phiX174 fimo polypeptide_motif 467 477 74.7 + . Name=1;ID=1-76-phiX174;pvalue=3.35e-08;sequence=TTTGGATTTAA; -phiX174 fimo polypeptide_motif 4033 4043 74.6 + . Name=1;ID=1-77-phiX174;pvalue=3.44e-08;sequence=AGCGTATCGAG; -phiX174 fimo polypeptide_motif 1348 1358 74.6 + . Name=1;ID=1-78-phiX174;pvalue=3.46e-08;sequence=TACCAATAAAA; -phiX174 fimo polypeptide_motif 239 249 74.4 + . Name=1;ID=1-79-phiX174;pvalue=3.62e-08;sequence=AGTGGCTTAAT; -phiX174 fimo polypeptide_motif 500 510 74.1 + . Name=1;ID=1-80-phiX174;pvalue=3.84e-08;sequence=GACGAGTAACA; -phiX174 fimo polypeptide_motif 3001 3011 74 + . Name=1;ID=1-81-phiX174;pvalue=3.93e-08;sequence=GCGGTCAAAAA; -phiX174 fimo polypeptide_motif 3776 3786 74 + . Name=1;ID=1-82-phiX174;pvalue=3.98e-08;sequence=TATTTCTAATG; -phiX174 fimo polypeptide_motif 2026 2036 73.9 + . Name=1;ID=1-83-phiX174;pvalue=4.06e-08;sequence=GAAGTTTAAGA; -phiX174 fimo polypeptide_motif 4237 4247 73.8 + . Name=1;ID=1-84-phiX174;pvalue=4.12e-08;sequence=AGTTTGTATCT; -phiX174 fimo polypeptide_motif 803 813 73.7 + . Name=1;ID=1-85-phiX174;pvalue=4.24e-08;sequence=AGAAGAAAACG; -phiX174 fimo polypeptide_motif 3770 3780 73.6 + . Name=1;ID=1-86-phiX174;pvalue=4.35e-08;sequence=AAAGGATATTT; -phiX174 fimo polypeptide_motif 3429 3439 73.5 + . Name=1;ID=1-87-phiX174;pvalue=4.45e-08;sequence=GAGATGCAAAA; -phiX174 fimo polypeptide_motif 99 109 73.5 + . Name=1;ID=1-88-phiX174;pvalue=4.48e-08;sequence=TACGAATTAAA; -phiX174 fimo polypeptide_motif 67 77 73.2 + . Name=1;ID=1-89-phiX174;pvalue=4.78e-08;sequence=TCTTGATAAAG; -phiX174 fimo polypeptide_motif 5332 5342 72.9 + . Name=1;ID=1-90-phiX174;pvalue=5.13e-08;sequence=ATCTGCTCAAA; -phiX174 fimo polypeptide_motif 277 287 72.9 + . Name=1;ID=1-91-phiX174;pvalue=5.14e-08;sequence=TTTAGATATGA; -phiX174 fimo polypeptide_motif 4338 4348 72.8 + . Name=1;ID=1-92-phiX174;pvalue=5.18e-08;sequence=GGGGACGAAAA; -phiX174 fimo polypeptide_motif 3812 3822 72.8 + . Name=1;ID=1-93-phiX174;pvalue=5.28e-08;sequence=GGTTGATATTT; -phiX174 fimo polypeptide_motif 1909 1919 72.6 + . Name=1;ID=1-94-phiX174;pvalue=5.51e-08;sequence=TAACGCTAAAG; -phiX174 fimo polypeptide_motif 3000 3010 72.6 + . Name=1;ID=1-95-phiX174;pvalue=5.54e-08;sequence=GGCGGTCAAAA; -phiX174 fimo polypeptide_motif 3891 3901 72.4 + . Name=1;ID=1-96-phiX174;pvalue=5.75e-08;sequence=ATTGGCTCTAA; -phiX174 fimo polypeptide_motif 3079 3089 72.4 + . Name=1;ID=1-97-phiX174;pvalue=5.76e-08;sequence=CTGGTATTAAA; -phiX174 fimo polypeptide_motif 37 47 72.4 + . Name=1;ID=1-98-phiX174;pvalue=5.79e-08;sequence=TTCGGATATTT; -phiX174 fimo polypeptide_motif 380 390 72.2 + . Name=1;ID=1-99-phiX174;pvalue=6.01e-08;sequence=GTAAGAAATCA; diff -r 34c794383f81 -r 62d1fae3b7d3 test-data/fimo_output_html_1.html --- a/test-data/fimo_output_html_1.html Fri Mar 18 08:23:54 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ - - - - - -FIMO Results - - - - -
- - - - - - -
Database and MotifsHigh-scoring Motif OccurencesDebugging Information
-
-
-
-
FIMO - Motif search tool
-
-

-FIMO version 4.11.0, (Release date: Thu Nov 26 17:48:49 2015 +1000) -

-

-For further information on how to interpret these results -or to get a copy of the FIMO software please access -http://meme.nbcr.net

-

If you use FIMO in your research, please cite the following paper:
-Charles E. Grant, Timothy L. Bailey, and William Stafford Noble, -"FIMO: Scanning for occurrences of a given motif", -Bioinformatics, 27(7):1017-1018, 2011. -[full text]

-
-
DATABASE AND MOTIFS
-
-
-

- DATABASE /Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2541.dat -
- Database contains 1 sequences, 5386 residues -

-

- MOTIFS /Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2540.dat (Protein) - - - - - - - - - - - - - - - -
MOTIFWIDTH - BEST POSSIBLE MATCH -
111GGGGTATAAAA
-

-

-Random model letter frequencies (from non-redundant database): -
- -A 0.073 C 0.018 D 0.052 E 0.062 F 0.040 G 0.069 H 0.022 I 0.056 K 0.058 -L 0.092 M 0.023 N 0.046 P 0.051 Q 0.041 R 0.052 S 0.074 T 0.059 V 0.064 -W 0.013 Y 0.033

-
-
-
SECTION I: HIGH-SCORING MOTIF OCCURENCES
-
-