comparison fimo_wrapper.py @ 0:fd522a964017 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meme commit e96df94dba60050fa28aaf55b5bb095717a5f260
author iuc
date Tue, 22 Dec 2015 17:01:51 -0500
parents
children eca84de658b0
comparison
equal deleted inserted replaced
-1:000000000000 0:fd522a964017
1 #!/usr/bin/env python
2 import argparse
3 import os
4 import shutil
5 import string
6 import subprocess
7 import sys
8 import tempfile
9
10 BUFFSIZE = 1048576
11 # Translation table for reverse Complement, with ambiguity codes.
12 DNA_COMPLEMENT = string.maketrans("ACGTRYKMBDHVacgtrykmbdhv", "TGCAYRMKVHDBtgcayrmkvhdb")
13
14
15 def reverse(sequence):
16 # Reverse sequence string.
17 return sequence[::-1]
18
19
20 def dna_complement(sequence):
21 # Complement DNA sequence string.
22 return sequence.translate(DNA_COMPLEMENT)
23
24
25 def dna_reverse_complement(sequence):
26 # Returns the reverse complement of the sequence.
27 sequence = reverse(sequence)
28 return dna_complement(sequence)
29
30
31 def stop_err(msg):
32 sys.stderr.write(msg)
33 sys.exit(1)
34
35 parser = argparse.ArgumentParser()
36 parser.add_argument('--input_motifs', dest='input_motifs', help='MEME output formatted files for input to fimo')
37 parser.add_argument('--input_fasta', dest='input_fasta', help='Fassta sequence file')
38 parser.add_argument('--options_type', dest='options_type', help='Basic or Advance options')
39 parser.add_argument('--input_psp', dest='input_psp', default=None, help='File containing position specific priors')
40 parser.add_argument('--input_prior_dist', dest='input_prior_dist', default=None, help='File containing binned distribution of priors')
41 parser.add_argument('--alpha', dest='alpha', type=float, default=1.0, help='The alpha parameter for calculating position specific priors')
42 parser.add_argument('--bgfile', dest='bgfile', default=None, help='Background file type, used only if not "default"')
43 parser.add_argument('--max_strand', action='store_true', help='If matches on both strands at a given position satisfy the output threshold, only report the match for the strand with the higher score')
44 parser.add_argument('--max_stored_scores', dest='max_stored_scores', type=int, help='Maximum score count to store')
45 parser.add_argument('--motif', dest='motifs', action='append', default=[], help='Specify motif by id')
46 parser.add_argument('--motif_pseudo', dest='motif_pseudo', type=float, default=0.1, help='Pseudocount to add to counts in motif matrix')
47 parser.add_argument('--no_qvalue', action='store_true', help='Do not compute a q-value for each p-value')
48 parser.add_argument('--norc', action='store_true', help='Do not score the reverse complement DNA strand')
49 parser.add_argument('--output_path', dest='output_path', help='Output files directory')
50 parser.add_argument('--parse_genomic_coord', action='store_true', help='Check each sequence header for UCSC style genomic coordinates')
51 parser.add_argument('--qv_thresh', action='store_true', help='Use q-values for the output threshold')
52 parser.add_argument('--thresh', dest='thresh', type=float, help='p-value threshold')
53 parser.add_argument('--gff_output', dest='gff_output', help='Gff output file')
54 parser.add_argument('--html_output', dest='html_output', help='HTML output file')
55 parser.add_argument('--interval_output', dest='interval_output', help='Interval output file')
56 parser.add_argument('--txt_output', dest='txt_output', help='Text output file')
57 parser.add_argument('--xml_output', dest='xml_output', help='XML output file')
58 args = parser.parse_args()
59
60 fimo_cmd_list = ['fimo']
61 if args.options_type == 'advanced':
62 fimo_cmd_list.append('--alpha %4f' % args.alpha)
63 if args.bgfile is not None:
64 fimo_cmd_list.append('--bgfile "%s"' % args.bgfile)
65 if args.max_strand:
66 fimo_cmd_list.append('--max-strand')
67 fimo_cmd_list.append('--max-stored-scores %d' % args.max_stored_scores)
68 if len(args.motifs) > 0:
69 for motif in args.motifs:
70 fimo_cmd_list.append('--motif "%s"' % motif)
71 fimo_cmd_list.append('--motif-pseudo %4f' % args.motif_pseudo)
72 if args.no_qvalue:
73 fimo_cmd_list.append('--no-qvalue')
74 if args.norc:
75 fimo_cmd_list.append('--norc')
76 if args.parse_genomic_coord:
77 fimo_cmd_list.append('--parse-genomic-coord')
78 if args.qv_thresh:
79 fimo_cmd_list.append('--qv-thresh')
80 fimo_cmd_list.append('--thresh %4f' % args.thresh)
81 if args.input_psp is not None:
82 fimo_cmd_list.append('--psp "%s"' % args.input_psp)
83 if args.input_prior_dist is not None:
84 fimo_cmd_list.append('--prior-dist "%s"' % args.input_prior_dist)
85 fimo_cmd_list.append('--o "%s"' % (args.output_path))
86 fimo_cmd_list.append('--verbosity 1')
87 fimo_cmd_list.append(args.input_motifs)
88 fimo_cmd_list.append(args.input_fasta)
89
90 fimo_cmd = ' '.join(fimo_cmd_list)
91
92 try:
93 tmp_stderr = tempfile.NamedTemporaryFile()
94 proc = subprocess.Popen(args=fimo_cmd, shell=True, stderr=tmp_stderr)
95 returncode = proc.wait()
96 tmp_stderr.seek(0)
97 stderr = ''
98 try:
99 while True:
100 stderr += tmp_stderr.read(BUFFSIZE)
101 if not stderr or len(stderr) % BUFFSIZE != 0:
102 break
103 except OverflowError:
104 pass
105 if returncode != 0:
106 stop_err(stderr)
107 except Exception, e:
108 stop_err('Error running FIMO:\n%s' % str(e))
109
110 shutil.move(os.path.join(args.output_path, 'fimo.txt'), args.txt_output)
111 shutil.move(os.path.join(args.output_path, 'fimo.gff'), args.gff_output)
112 shutil.move(os.path.join(args.output_path, 'fimo.xml'), args.xml_output)
113 shutil.move(os.path.join(args.output_path, 'fimo.html'), args.html_output)
114
115 out_file = open(args.interval_output, 'wb')
116 out_file.write("#%s\n" % "\t".join(("chr", "start", "end", "pattern name", "score", "strand", "matched sequence", "p-value", "q-value")))
117 for line in open(args.txt_output):
118 if line.startswith('#'):
119 continue
120 fields = line.rstrip("\n\r").split("\t")
121 start, end = int(fields[2]), int(fields[3])
122 sequence = fields[7]
123 if start > end:
124 # Flip start and end and set strand.
125 start, end = end, start
126 strand = "-"
127 # We want sequences relative to strand; FIMO always provides + stranded sequence.
128 sequence = dna_reverse_complement(sequence)
129 else:
130 strand = "+"
131 # Make 0-based start position.
132 start -= 1
133 out_file.write("%s\n" % "\t".join([fields[1], str(start), str(end), fields[0], fields[4], strand, sequence, fields[5], fields[6]]))
134 out_file.close()