Mercurial > repos > iuc > data_manager_snpeff
comparison data_manager/data_manager_snpEff_download.py @ 9:08d7998c3afb draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_snpeff commit 036feef4f46b5aedabc5080c8fa4bc00d9c30d95"
author | iuc |
---|---|
date | Wed, 12 Feb 2020 18:33:53 -0500 |
parents | d107d20dc6bd |
children | c6fbc5421697 |
comparison
equal
deleted
inserted
replaced
8:d107d20dc6bd | 9:08d7998c3afb |
---|---|
17 databases_path = 'databases.out' | 17 databases_path = 'databases.out' |
18 databases_output = open(databases_path, 'w') | 18 databases_output = open(databases_path, 'w') |
19 args = ['snpEff', 'databases'] | 19 args = ['snpEff', 'databases'] |
20 return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) | 20 return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) |
21 if return_code: | 21 if return_code: |
22 sys.exit( return_code ) | 22 sys.exit(return_code) |
23 databases_output.close() | 23 databases_output.close() |
24 try: | 24 try: |
25 fh = open(databases_path, 'r') | 25 fh = open(databases_path, 'r') |
26 for i, line in enumerate(fh): | 26 for i, line in enumerate(fh): |
27 fields = line.split('\t') | 27 fields = line.split('\t') |
32 if genome_version.startswith("Genome") or genome_version.startswith("-"): | 32 if genome_version.startswith("Genome") or genome_version.startswith("-"): |
33 continue | 33 continue |
34 description = fields[1].strip() | 34 description = fields[1].strip() |
35 snpDBs[genome_version] = description | 35 snpDBs[genome_version] = description |
36 except Exception as e: | 36 except Exception as e: |
37 stop_err( 'Error parsing %s %s\n' % (databases_path, str( e )) ) | 37 stop_err('Error parsing %s %s\n' % (databases_path, str(e))) |
38 else: | 38 else: |
39 fh.close() | 39 fh.close() |
40 return snpDBs | 40 return snpDBs |
41 | 41 |
42 | 42 |
57 stderr_path = 'snpeff.err' | 57 stderr_path = 'snpeff.err' |
58 stderr_fh = open(stderr_path, 'w') | 58 stderr_fh = open(stderr_path, 'w') |
59 args = ['snpEff', '-h'] | 59 args = ['snpEff', '-h'] |
60 return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno()) | 60 return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno()) |
61 if return_code != 255: | 61 if return_code != 255: |
62 sys.exit( return_code ) | 62 sys.exit(return_code) |
63 stderr_fh.close() | 63 stderr_fh.close() |
64 fh = open(stderr_path, 'r') | 64 fh = open(stderr_path, 'r') |
65 for line in fh: | 65 for line in fh: |
66 m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line) | 66 m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line) |
67 if m: | 67 if m: |
88 # Note: Since version 2.1 you can use tilde ('~') as first character to refer to your home directory | 88 # Note: Since version 2.1 you can use tilde ('~') as first character to refer to your home directory |
89 data_dir = target_directory | 89 data_dir = target_directory |
90 args = ['snpEff', 'download', '-dataDir', data_dir, '-v', genome_version] | 90 args = ['snpEff', 'download', '-dataDir', data_dir, '-v', genome_version] |
91 return_code = subprocess.call(args=args, shell=False) | 91 return_code = subprocess.call(args=args, shell=False) |
92 if return_code: | 92 if return_code: |
93 sys.exit( return_code ) | 93 sys.exit(return_code) |
94 # search data_dir/genome_version for files | 94 # search data_dir/genome_version for files |
95 regulation_pattern = 'regulation_(.+).bin' | 95 regulation_pattern = 'regulation_(.+).bin' |
96 genome_path = os.path.join(data_dir, genome_version) | 96 genome_path = os.path.join(data_dir, genome_version) |
97 snpeff_version = getSnpeffVersion() | 97 snpeff_version = getSnpeffVersion() |
98 key = snpeff_version + '_' + genome_version | 98 key = snpeff_version + '_' + genome_version |
101 for fname in files: | 101 for fname in files: |
102 if fname.startswith('snpEffectPredictor'): | 102 if fname.startswith('snpEffectPredictor'): |
103 # if snpEffectPredictor.bin download succeeded | 103 # if snpEffectPredictor.bin download succeeded |
104 name = genome_version + (' : ' + organism if organism else '') | 104 name = genome_version + (' : ' + organism if organism else '') |
105 data_table_entry = dict(key=key, version=snpeff_version, value=genome_version, name=name, path=data_dir) | 105 data_table_entry = dict(key=key, version=snpeff_version, value=genome_version, name=name, path=data_dir) |
106 _add_data_table_entry( data_manager_dict, 'snpeffv_genomedb', data_table_entry ) | 106 _add_data_table_entry(data_manager_dict, 'snpeffv_genomedb', data_table_entry) |
107 else: | 107 else: |
108 m = re.match(regulation_pattern, fname) | 108 m = re.match(regulation_pattern, fname) |
109 if m: | 109 if m: |
110 name = m.groups()[0] | 110 name = m.groups()[0] |
111 data_table_entry = dict(key=key, version=snpeff_version, genome=genome_version, value=name, name=name) | 111 data_table_entry = dict(key=key, version=snpeff_version, genome=genome_version, value=name, name=name) |
112 _add_data_table_entry( data_manager_dict, 'snpeffv_regulationdb', data_table_entry ) | 112 _add_data_table_entry(data_manager_dict, 'snpeffv_regulationdb', data_table_entry) |
113 return data_manager_dict | 113 return data_manager_dict |
114 | 114 |
115 | 115 |
116 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ): | 116 def _add_data_table_entry(data_manager_dict, data_table, data_table_entry): |
117 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 117 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) |
118 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] ) | 118 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, []) |
119 data_manager_dict['data_tables'][data_table].append( data_table_entry ) | 119 data_manager_dict['data_tables'][data_table].append(data_table_entry) |
120 return data_manager_dict | 120 return data_manager_dict |
121 | 121 |
122 | 122 |
123 def main(): | 123 def main(): |
124 parser = optparse.OptionParser() | 124 parser = optparse.OptionParser() |
125 parser.add_option( '-g', '--genome_version', dest='genome_version', action='store', type="string", default=None, help='genome_version' ) | 125 parser.add_option('-g', '--genome_version', dest='genome_version', action='store', type="string", default=None, help='genome_version') |
126 parser.add_option( '-o', '--organism', dest='organism', action='store', type="string", default=None, help='organism name' ) | 126 parser.add_option('-o', '--organism', dest='organism', action='store', type="string", default=None, help='organism name') |
127 (options, args) = parser.parse_args() | 127 (options, args) = parser.parse_args() |
128 | 128 |
129 filename = args[0] | 129 filename = args[0] |
130 | 130 |
131 params = json.loads( open( filename ).read() ) | 131 params = json.loads(open(filename).read()) |
132 target_directory = params[ 'output_data' ][0]['extra_files_path'] | 132 target_directory = params['output_data'][0]['extra_files_path'] |
133 os.mkdir( target_directory ) | 133 os.mkdir(target_directory) |
134 data_manager_dict = {} | 134 data_manager_dict = {} |
135 | 135 |
136 # Create SnpEff Reference Data | 136 # Create SnpEff Reference Data |
137 for genome_version, organism in zip(options.genome_version.split(','), getOrganismNames(options.genome_version, options.organism).split(',')): | 137 for genome_version, organism in zip(options.genome_version.split(','), getOrganismNames(options.genome_version, options.organism).split(',')): |
138 download_database( data_manager_dict, target_directory, genome_version, organism ) | 138 download_database(data_manager_dict, target_directory, genome_version, organism) |
139 | 139 |
140 # save info to json file | 140 # save info to json file |
141 open( filename, 'wb' ).write( json.dumps( data_manager_dict ) ) | 141 open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) |
142 | 142 |
143 | 143 |
144 if __name__ == "__main__": | 144 if __name__ == "__main__": |
145 main() | 145 main() |