Mercurial > repos > devteam > data_manager_bwa_mem_index_builder
comparison data_manager/bwa_mem_index_builder.py @ 5:46066df8813d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bwa_mem_index_builder commit 8652f36a3a3838dca989426961561e81432acf4f
author | iuc |
---|---|
date | Tue, 04 Apr 2017 18:07:04 -0400 |
parents | 36447bb36384 |
children | 2a386b2383a1 |
comparison
equal
deleted
inserted
replaced
4:414a92a973fa | 5:46066df8813d |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 #Dan Blankenberg | 2 # Dan Blankenberg |
3 from __future__ import print_function | |
3 | 4 |
5 import optparse | |
6 import os | |
7 import subprocess | |
4 import sys | 8 import sys |
5 import os | 9 from json import dumps, loads |
6 import tempfile | |
7 import optparse | |
8 import subprocess | |
9 | 10 |
10 from json import loads, dumps | 11 CHUNK_SIZE = 2**20 |
12 TWO_GB = 2**30 * 2 | |
13 DEFAULT_DATA_TABLE_NAME = "bwa_mem_indexes" | |
11 | 14 |
12 | 15 |
13 CHUNK_SIZE = 2**20 | |
14 TWO_GB = 2**30*2 | |
15 | |
16 DEFAULT_DATA_TABLE_NAME = "bwa_mem_indexes" | |
17 | |
18 def get_id_name( params, dbkey, fasta_description=None): | 16 def get_id_name( params, dbkey, fasta_description=None): |
19 #TODO: ensure sequence_id is unique and does not already appear in location file | 17 # TODO: ensure sequence_id is unique and does not already appear in location file |
20 sequence_id = params['param_dict']['sequence_id'] | 18 sequence_id = params['param_dict']['sequence_id'] |
21 if not sequence_id: | 19 if not sequence_id: |
22 sequence_id = dbkey | 20 sequence_id = dbkey |
23 | 21 |
24 sequence_name = params['param_dict']['sequence_name'] | 22 sequence_name = params['param_dict']['sequence_name'] |
25 if not sequence_name: | 23 if not sequence_name: |
26 sequence_name = fasta_description | 24 sequence_name = fasta_description |
27 if not sequence_name: | 25 if not sequence_name: |
28 sequence_name = dbkey | 26 sequence_name = dbkey |
29 return sequence_id, sequence_name | 27 return sequence_id, sequence_name |
30 | 28 |
29 | |
31 def build_bwa_index( data_manager_dict, fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME ): | 30 def build_bwa_index( data_manager_dict, fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME ): |
32 #TODO: allow multiple FASTA input files | 31 # TODO: allow multiple FASTA input files |
33 #tmp_dir = tempfile.mkdtemp( prefix='tmp-data-manager-bwa-index-builder-' ) | |
34 fasta_base_name = os.path.split( fasta_filename )[-1] | 32 fasta_base_name = os.path.split( fasta_filename )[-1] |
35 sym_linked_fasta_filename = os.path.join( target_directory, fasta_base_name ) | 33 sym_linked_fasta_filename = os.path.join( target_directory, fasta_base_name ) |
36 os.symlink( fasta_filename, sym_linked_fasta_filename ) | 34 os.symlink( fasta_filename, sym_linked_fasta_filename ) |
37 if params['param_dict']['index_algorithm'] == 'automatic': | 35 if params['param_dict']['index_algorithm'] == 'automatic': |
38 if os.stat( fasta_filename ).st_size < TWO_GB: #use 2 GB as cut off for memory vs. max of 2gb database size; this is somewhat arbitrary | 36 if os.stat( fasta_filename ).st_size < TWO_GB: # use 2 GB as cut off for memory vs. max of 2gb database size; this is somewhat arbitrary |
39 index_algorithm = 'is' | 37 index_algorithm = 'is' |
40 else: | 38 else: |
41 index_algorithm = 'bwtsw' | 39 index_algorithm = 'bwtsw' |
42 else: | 40 else: |
43 index_algorithm = params['param_dict']['index_algorithm'] | 41 index_algorithm = params['param_dict']['index_algorithm'] |
44 | 42 |
45 args = [ 'bwa', 'index', '-a', index_algorithm ] | 43 args = [ 'bwa', 'index', '-a', index_algorithm ] |
46 args.append( sym_linked_fasta_filename ) | 44 args.append( sym_linked_fasta_filename ) |
47 proc = subprocess.Popen( args=args, shell=False, cwd=target_directory ) | 45 proc = subprocess.Popen( args=args, shell=False, cwd=target_directory ) |
48 return_code = proc.wait() | 46 return_code = proc.wait() |
49 if return_code: | 47 if return_code: |
50 print >> sys.stderr, "Error building index." | 48 print("Error building index.", file=sys.stderr) |
51 sys.exit( return_code ) | 49 sys.exit( return_code ) |
52 data_table_entry = dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name ) | 50 data_table_entry = dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name ) |
53 _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) | 51 _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) |
52 | |
54 | 53 |
55 def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): | 54 def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): |
56 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 55 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) |
57 data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) | 56 data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) |
58 data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) | 57 data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) |
59 return data_manager_dict | 58 return data_manager_dict |
60 | 59 |
60 | |
61 def main(): | 61 def main(): |
62 #Parse Command Line | |
63 parser = optparse.OptionParser() | 62 parser = optparse.OptionParser() |
64 parser.add_option( '-f', '--fasta_filename', dest='fasta_filename', action='store', type="string", default=None, help='fasta_filename' ) | 63 parser.add_option( '-f', '--fasta_filename', dest='fasta_filename', action='store', type="string", default=None, help='fasta_filename' ) |
65 parser.add_option( '-d', '--fasta_dbkey', dest='fasta_dbkey', action='store', type="string", default=None, help='fasta_dbkey' ) | 64 parser.add_option( '-d', '--fasta_dbkey', dest='fasta_dbkey', action='store', type="string", default=None, help='fasta_dbkey' ) |
66 parser.add_option( '-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description' ) | 65 parser.add_option( '-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description' ) |
67 parser.add_option( '-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name' ) | 66 parser.add_option( '-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name' ) |
68 (options, args) = parser.parse_args() | 67 (options, args) = parser.parse_args() |
69 | 68 |
70 filename = args[0] | 69 filename = args[0] |
71 | 70 |
72 params = loads( open( filename ).read() ) | 71 params = loads( open( filename ).read() ) |
73 target_directory = params[ 'output_data' ][0]['extra_files_path'] | 72 target_directory = params[ 'output_data' ][0]['extra_files_path'] |
74 os.mkdir( target_directory ) | 73 os.mkdir( target_directory ) |
75 data_manager_dict = {} | 74 data_manager_dict = {} |
76 | 75 |
77 dbkey = options.fasta_dbkey | 76 dbkey = options.fasta_dbkey |
78 | 77 |
79 if dbkey in [ None, '', '?' ]: | 78 if dbkey in [ None, '', '?' ]: |
80 raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( dbkey ) ) | 79 raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( dbkey ) ) |
81 | 80 |
82 sequence_id, sequence_name = get_id_name( params, dbkey=dbkey, fasta_description=options.fasta_description ) | 81 sequence_id, sequence_name = get_id_name( params, dbkey=dbkey, fasta_description=options.fasta_description ) |
83 | 82 |
84 #build the index | 83 # build the index |
85 build_bwa_index( data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME ) | 84 build_bwa_index( data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME ) |
86 | 85 |
87 #save info to json file | 86 # save info to json file |
88 open( filename, 'wb' ).write( dumps( data_manager_dict ) ) | 87 open( filename, 'wb' ).write( dumps( data_manager_dict ) ) |
89 | 88 |
90 if __name__ == "__main__": main() | 89 |
90 if __name__ == "__main__": | |
91 main() |