Mercurial > repos > ieguinoa > data_manager_fetch_tx2gene
comparison data_manager/data_manager_fetch_tx2gene.py @ 5:c380b7da5b65 draft default tip
Uploaded
author | ieguinoa |
---|---|
date | Mon, 07 Jun 2021 16:33:53 +0000 |
parents | d71f65b854de |
children |
comparison
equal
deleted
inserted
replaced
4:bacd91d8b05a | 5:c380b7da5b65 |
---|---|
25 from json import loads, dumps | 25 from json import loads, dumps |
26 | 26 |
27 | 27 |
28 CHUNK_SIZE = 2**20 # 1mb | 28 CHUNK_SIZE = 2**20 # 1mb |
29 | 29 |
30 DATA_TABLE_NAME = 'tx2gene' | 30 DATA_TABLE_NAME = 'tx2gene_table' |
31 | 31 |
32 def cleanup_before_exit( tmp_dir ): | 32 def cleanup_before_exit( tmp_dir ): |
33 if tmp_dir and os.path.exists( tmp_dir ): | 33 if tmp_dir and os.path.exists( tmp_dir ): |
34 shutil.rmtree( tmp_dir ) | 34 shutil.rmtree( tmp_dir ) |
35 | 35 |
43 # dbkey = params['param_dict']['dbkey_source']['dbkey'] | 43 # dbkey = params['param_dict']['dbkey_source']['dbkey'] |
44 #TODO: ensure sequence_id is unique and does not already appear in location file | 44 #TODO: ensure sequence_id is unique and does not already appear in location file |
45 sequence_id = params['param_dict']['sequence_id'] | 45 sequence_id = params['param_dict']['sequence_id'] |
46 if not sequence_id: | 46 if not sequence_id: |
47 sequence_id = dbkey #uuid.uuid4() generate and use an uuid instead? | 47 sequence_id = dbkey #uuid.uuid4() generate and use an uuid instead? |
48 | 48 |
49 # if params['param_dict']['dbkey_source']['dbkey_source_selector'] == 'new': | 49 # if params['param_dict']['dbkey_source']['dbkey_source_selector'] == 'new': |
50 # dbkey_name = params['param_dict']['dbkey_source']['dbkey_name'] | 50 # dbkey_name = params['param_dict']['dbkey_source']['dbkey_name'] |
51 # if not dbkey_name: | 51 # if not dbkey_name: |
52 # dbkey_name = dbkey | 52 # dbkey_name = dbkey |
53 # else: | 53 # else: |
54 # dbkey_name = None | 54 # dbkey_name = None |
55 dbkey = params['param_dict']['dbkey'] | 55 dbkey = params['param_dict']['dbkey'] |
56 dbkey_name = dbkey_description | 56 dbkey_name = dbkey_description |
57 sequence_name = params['param_dict']['sequence_name'] | 57 sequence_name = params['param_dict']['sequence_name'] |
58 if not sequence_name: | 58 if not sequence_name: |
59 sequence_name = dbkey_description | 59 sequence_name = dbkey_description |
60 if not sequence_name: | 60 if not sequence_name: |
288 parser = optparse.OptionParser() | 288 parser = optparse.OptionParser() |
289 parser.add_option( '-d', '--dbkey_description', dest='dbkey_description', action='store', type="string", default=None, help='dbkey_description' ) | 289 parser.add_option( '-d', '--dbkey_description', dest='dbkey_description', action='store', type="string", default=None, help='dbkey_description' ) |
290 parser.add_option( '-b', '--base_dir', dest='base_dir', action='store', type='string', default=None, help='base_dir') | 290 parser.add_option( '-b', '--base_dir', dest='base_dir', action='store', type='string', default=None, help='base_dir') |
291 parser.add_option( '-t', '--type', dest='file_type', action='store', type='string', default=None, help='file_type') | 291 parser.add_option( '-t', '--type', dest='file_type', action='store', type='string', default=None, help='file_type') |
292 (options, args) = parser.parse_args() | 292 (options, args) = parser.parse_args() |
293 | |
294 filename = args[0] | 293 filename = args[0] |
295 #global DATA_TABLE_NAME | 294 #global DATA_TABLE_NAME |
296 rscript_gff_to_tx2gene=os.path.join( options.base_dir, 'get_tx2gene_table.R') | 295 rscript_gff_to_tx2gene=os.path.join( options.base_dir, 'get_tx2gene_table.R') |
297 | 296 |
298 #input_type='gff_gtf' | 297 #input_type='gff_gtf' |
299 #if options.file_type != 'gff_gtf': | 298 #if options.file_type != 'gff_gtf': |
300 # file_type='tx2gene' | 299 # file_type='tx2gene' |
301 | 300 |
302 params = loads( open( filename ).read() ) | 301 params = loads( open( filename ).read() ) |
303 target_directory = params[ 'output_data' ][0]['extra_files_path'] | 302 target_directory = params[ 'output_data' ][0]['extra_files_path'] |
304 os.mkdir( target_directory ) | 303 os.mkdir( target_directory ) |
305 data_manager_dict = {} | 304 data_manager_dict = {} |
306 | 305 |
307 dbkey, dbkey_name, sequence_id, sequence_name = get_dbkey_dbname_id_name( params, dbkey_description=options.dbkey_description ) | 306 dbkey, dbkey_name, sequence_id, sequence_name = get_dbkey_dbname_id_name( params, dbkey_description=options.dbkey_description ) |
308 | 307 |
309 if dbkey in [ None, '', '?' ]: | 308 if dbkey in [ None, '', '?' ]: |
310 raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( dbkey ) ) | 309 raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( dbkey ) ) |
311 | 310 |
312 # Create a tmp_dir, in case a zip file needs to be uncompressed | 311 # Create a tmp_dir, in case a zip file needs to be uncompressed |
313 tmp_dir = tempfile.mkdtemp() | 312 tmp_dir = tempfile.mkdtemp() |
316 REFERENCE_SOURCE_TO_DOWNLOAD[ params['param_dict']['reference_source']['reference_source_selector'] ]( rscript_gff_to_tx2gene, data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir) | 315 REFERENCE_SOURCE_TO_DOWNLOAD[ params['param_dict']['reference_source']['reference_source_selector'] ]( rscript_gff_to_tx2gene, data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir) |
317 finally: | 316 finally: |
318 cleanup_before_exit(tmp_dir) | 317 cleanup_before_exit(tmp_dir) |
319 #save info to json file | 318 #save info to json file |
320 open( filename, 'wb' ).write( dumps( data_manager_dict ).encode() ) | 319 open( filename, 'wb' ).write( dumps( data_manager_dict ).encode() ) |
321 | 320 |
322 if __name__ == "__main__": | 321 if __name__ == "__main__": |
323 main() | 322 main() |