comparison data_manager/data_manager_snpEff_download.py @ 9:08d7998c3afb draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_snpeff commit 036feef4f46b5aedabc5080c8fa4bc00d9c30d95"
author iuc
date Wed, 12 Feb 2020 18:33:53 -0500
parents d107d20dc6bd
children c6fbc5421697
comparison
equal deleted inserted replaced
8:d107d20dc6bd 9:08d7998c3afb
17 databases_path = 'databases.out' 17 databases_path = 'databases.out'
18 databases_output = open(databases_path, 'w') 18 databases_output = open(databases_path, 'w')
19 args = ['snpEff', 'databases'] 19 args = ['snpEff', 'databases']
20 return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) 20 return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno())
21 if return_code: 21 if return_code:
22 sys.exit( return_code ) 22 sys.exit(return_code)
23 databases_output.close() 23 databases_output.close()
24 try: 24 try:
25 fh = open(databases_path, 'r') 25 fh = open(databases_path, 'r')
26 for i, line in enumerate(fh): 26 for i, line in enumerate(fh):
27 fields = line.split('\t') 27 fields = line.split('\t')
32 if genome_version.startswith("Genome") or genome_version.startswith("-"): 32 if genome_version.startswith("Genome") or genome_version.startswith("-"):
33 continue 33 continue
34 description = fields[1].strip() 34 description = fields[1].strip()
35 snpDBs[genome_version] = description 35 snpDBs[genome_version] = description
36 except Exception as e: 36 except Exception as e:
37 stop_err( 'Error parsing %s %s\n' % (databases_path, str( e )) ) 37 stop_err('Error parsing %s %s\n' % (databases_path, str(e)))
38 else: 38 else:
39 fh.close() 39 fh.close()
40 return snpDBs 40 return snpDBs
41 41
42 42
57 stderr_path = 'snpeff.err' 57 stderr_path = 'snpeff.err'
58 stderr_fh = open(stderr_path, 'w') 58 stderr_fh = open(stderr_path, 'w')
59 args = ['snpEff', '-h'] 59 args = ['snpEff', '-h']
60 return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno()) 60 return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno())
61 if return_code != 255: 61 if return_code != 255:
62 sys.exit( return_code ) 62 sys.exit(return_code)
63 stderr_fh.close() 63 stderr_fh.close()
64 fh = open(stderr_path, 'r') 64 fh = open(stderr_path, 'r')
65 for line in fh: 65 for line in fh:
66 m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line) 66 m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line)
67 if m: 67 if m:
88 # Note: Since version 2.1 you can use tilde ('~') as first character to refer to your home directory 88 # Note: Since version 2.1 you can use tilde ('~') as first character to refer to your home directory
89 data_dir = target_directory 89 data_dir = target_directory
90 args = ['snpEff', 'download', '-dataDir', data_dir, '-v', genome_version] 90 args = ['snpEff', 'download', '-dataDir', data_dir, '-v', genome_version]
91 return_code = subprocess.call(args=args, shell=False) 91 return_code = subprocess.call(args=args, shell=False)
92 if return_code: 92 if return_code:
93 sys.exit( return_code ) 93 sys.exit(return_code)
94 # search data_dir/genome_version for files 94 # search data_dir/genome_version for files
95 regulation_pattern = 'regulation_(.+).bin' 95 regulation_pattern = 'regulation_(.+).bin'
96 genome_path = os.path.join(data_dir, genome_version) 96 genome_path = os.path.join(data_dir, genome_version)
97 snpeff_version = getSnpeffVersion() 97 snpeff_version = getSnpeffVersion()
98 key = snpeff_version + '_' + genome_version 98 key = snpeff_version + '_' + genome_version
101 for fname in files: 101 for fname in files:
102 if fname.startswith('snpEffectPredictor'): 102 if fname.startswith('snpEffectPredictor'):
103 # if snpEffectPredictor.bin download succeeded 103 # if snpEffectPredictor.bin download succeeded
104 name = genome_version + (' : ' + organism if organism else '') 104 name = genome_version + (' : ' + organism if organism else '')
105 data_table_entry = dict(key=key, version=snpeff_version, value=genome_version, name=name, path=data_dir) 105 data_table_entry = dict(key=key, version=snpeff_version, value=genome_version, name=name, path=data_dir)
106 _add_data_table_entry( data_manager_dict, 'snpeffv_genomedb', data_table_entry ) 106 _add_data_table_entry(data_manager_dict, 'snpeffv_genomedb', data_table_entry)
107 else: 107 else:
108 m = re.match(regulation_pattern, fname) 108 m = re.match(regulation_pattern, fname)
109 if m: 109 if m:
110 name = m.groups()[0] 110 name = m.groups()[0]
111 data_table_entry = dict(key=key, version=snpeff_version, genome=genome_version, value=name, name=name) 111 data_table_entry = dict(key=key, version=snpeff_version, genome=genome_version, value=name, name=name)
112 _add_data_table_entry( data_manager_dict, 'snpeffv_regulationdb', data_table_entry ) 112 _add_data_table_entry(data_manager_dict, 'snpeffv_regulationdb', data_table_entry)
113 return data_manager_dict 113 return data_manager_dict
114 114
115 115
116 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ): 116 def _add_data_table_entry(data_manager_dict, data_table, data_table_entry):
117 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) 117 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
118 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] ) 118 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
119 data_manager_dict['data_tables'][data_table].append( data_table_entry ) 119 data_manager_dict['data_tables'][data_table].append(data_table_entry)
120 return data_manager_dict 120 return data_manager_dict
121 121
122 122
123 def main(): 123 def main():
124 parser = optparse.OptionParser() 124 parser = optparse.OptionParser()
125 parser.add_option( '-g', '--genome_version', dest='genome_version', action='store', type="string", default=None, help='genome_version' ) 125 parser.add_option('-g', '--genome_version', dest='genome_version', action='store', type="string", default=None, help='genome_version')
126 parser.add_option( '-o', '--organism', dest='organism', action='store', type="string", default=None, help='organism name' ) 126 parser.add_option('-o', '--organism', dest='organism', action='store', type="string", default=None, help='organism name')
127 (options, args) = parser.parse_args() 127 (options, args) = parser.parse_args()
128 128
129 filename = args[0] 129 filename = args[0]
130 130
131 params = json.loads( open( filename ).read() ) 131 params = json.loads(open(filename).read())
132 target_directory = params[ 'output_data' ][0]['extra_files_path'] 132 target_directory = params['output_data'][0]['extra_files_path']
133 os.mkdir( target_directory ) 133 os.mkdir(target_directory)
134 data_manager_dict = {} 134 data_manager_dict = {}
135 135
136 # Create SnpEff Reference Data 136 # Create SnpEff Reference Data
137 for genome_version, organism in zip(options.genome_version.split(','), getOrganismNames(options.genome_version, options.organism).split(',')): 137 for genome_version, organism in zip(options.genome_version.split(','), getOrganismNames(options.genome_version, options.organism).split(',')):
138 download_database( data_manager_dict, target_directory, genome_version, organism ) 138 download_database(data_manager_dict, target_directory, genome_version, organism)
139 139
140 # save info to json file 140 # save info to json file
141 open( filename, 'wb' ).write( json.dumps( data_manager_dict ) ) 141 open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True))
142 142
143 143
144 if __name__ == "__main__": 144 if __name__ == "__main__":
145 main() 145 main()