Mercurial > repos > miller-lab > genome_diversity
comparison extract_flanking_dna.py @ 17:a3af29edcce2
Uploaded Miller Lab Devshed version a51c894f5bed
author | miller-lab |
---|---|
date | Fri, 28 Sep 2012 11:57:18 -0400 |
parents | 2c498d40ecde |
children |
comparison
equal
deleted
inserted
replaced
16:be0e2223c531 | 17:a3af29edcce2 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import os | |
4 import sys | |
5 from optparse import OptionParser | |
6 import genome_diversity as gd | |
7 | |
8 def main_function( parse_arguments=None ): | |
9 if parse_arguments is None: | |
10 parse_arguments = lambda arguments: ( None, arguments ) | |
11 def main_decorator( to_decorate ): | |
12 def decorated_main( arguments=None ): | |
13 if arguments is None: | |
14 arguments = sys.argv | |
15 options, arguments = parse_arguments( arguments ) | |
16 rc = 1 | |
17 try: | |
18 rc = to_decorate( options, arguments ) | |
19 except Exception, err: | |
20 sys.stderr.write( 'ERROR: %s\n' % str( err ) ) | |
21 traceback.print_exc() | |
22 finally: | |
23 sys.exit( rc ) | |
24 return decorated_main | |
25 return main_decorator | |
26 | |
27 def parse_arguments( arguments ): | |
28 parser = OptionParser() | |
29 parser.add_option('--input', | |
30 type='string', dest='input', | |
31 help='file of selected SNPs') | |
32 parser.add_option('--output', | |
33 type='string', dest='output', | |
34 help='output file') | |
35 parser.add_option('--snps_loc', | |
36 type='string', dest='snps_loc', | |
37 help='snps .loc file') | |
38 parser.add_option('--scaffold_col', | |
39 type="int", dest='scaffold_col', | |
40 help='scaffold column in the input file') | |
41 parser.add_option('--pos_col', | |
42 type="int", dest='pos_col', | |
43 help='position column in the input file') | |
44 parser.add_option('--output_format', | |
45 type="string", dest='output_format', | |
46 help='output format, fasta or primer3') | |
47 parser.add_option('--species', | |
48 type="string", dest='species', | |
49 help='species') | |
50 return parser.parse_args( arguments[1:] ) | |
51 | |
52 | |
53 @main_function( parse_arguments ) | |
54 def main( options, arguments ): | |
55 if not options.input: | |
56 raise RuntimeError( 'missing --input option' ) | |
57 if not options.output: | |
58 raise RuntimeError( 'missing --output option' ) | |
59 if not options.snps_loc: | |
60 raise RuntimeError( 'missing --snps_loc option' ) | |
61 if not options.scaffold_col: | |
62 raise RuntimeError( 'missing --scaffold_col option' ) | |
63 if not options.pos_col: | |
64 raise RuntimeError( 'missing --pos_col option' ) | |
65 if not options.output_format: | |
66 raise RuntimeError( 'missing --output_format option' ) | |
67 if not options.species: | |
68 raise RuntimeError( 'missing --species option' ) | |
69 | |
70 snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) ) | |
71 | |
72 out_fh = gd._openfile( options.output, 'w' ) | |
73 | |
74 snpcalls_file = gd.get_filename_from_loc( options.species, options.snps_loc ) | |
75 file_root, file_ext = os.path.splitext( snpcalls_file ) | |
76 snpcalls_index_file = file_root + ".cdb" | |
77 snpcalls = gd.SnpcallsFile( data_file=snpcalls_file, index_file=snpcalls_index_file ) | |
78 | |
79 while snps.next(): | |
80 seq, pos = snps.get_seq_pos() | |
81 flanking_dna = snpcalls.get_flanking_dna( sequence=seq, position=pos, format=options.output_format ) | |
82 if flanking_dna: | |
83 out_fh.write( flanking_dna ) | |
84 | |
85 out_fh.close() | |
86 | |
87 if __name__ == "__main__": | |
88 main() | |
89 |