Mercurial > repos > iuc > data_manager_snpeff
comparison data_manager/data_manager_snpEff_download.py @ 10:c6fbc5421697 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_snpeff commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
author | iuc |
---|---|
date | Sun, 22 Nov 2020 12:53:42 +0000 |
parents | 08d7998c3afb |
children | def511e8e005 |
comparison
equal
deleted
inserted
replaced
9:08d7998c3afb | 10:c6fbc5421697 |
---|---|
5 import re | 5 import re |
6 import subprocess | 6 import subprocess |
7 import sys | 7 import sys |
8 | 8 |
9 | 9 |
10 def stop_err(msg): | |
11 sys.stderr.write(msg) | |
12 sys.exit(1) | |
13 | |
14 | |
15 def fetch_databases(genome_list=None): | 10 def fetch_databases(genome_list=None): |
16 snpDBs = dict() | 11 snpDBs = dict() |
17 databases_path = 'databases.out' | 12 databases_path = 'databases.out' |
18 databases_output = open(databases_path, 'w') | |
19 args = ['snpEff', 'databases'] | 13 args = ['snpEff', 'databases'] |
20 return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) | 14 with open(databases_path, 'w') as databases_output: |
15 return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) | |
21 if return_code: | 16 if return_code: |
22 sys.exit(return_code) | 17 sys.exit(return_code) |
23 databases_output.close() | |
24 try: | 18 try: |
25 fh = open(databases_path, 'r') | 19 with open(databases_path, 'r') as fh: |
26 for i, line in enumerate(fh): | 20 for line in fh: |
27 fields = line.split('\t') | 21 fields = line.split('\t') |
28 if len(fields) >= 2: | 22 if len(fields) >= 2: |
29 genome_version = fields[0].strip() | 23 genome_version = fields[0].strip() |
30 if genome_list and genome_version not in genome_list: | 24 if genome_list and genome_version not in genome_list: |
31 continue | 25 continue |
32 if genome_version.startswith("Genome") or genome_version.startswith("-"): | 26 if genome_version.startswith("Genome") or genome_version.startswith("-"): |
33 continue | 27 continue |
34 description = fields[1].strip() | 28 description = fields[1].strip() |
35 snpDBs[genome_version] = description | 29 snpDBs[genome_version] = description |
36 except Exception as e: | 30 except Exception as e: |
37 stop_err('Error parsing %s %s\n' % (databases_path, str(e))) | 31 sys.exit('Error parsing %s %s\n' % (databases_path, str(e))) |
38 else: | |
39 fh.close() | |
40 return snpDBs | 32 return snpDBs |
41 | 33 |
42 | 34 |
43 def getOrganismNames(genomes, organisms): | 35 def getOrganismNames(genomes, organisms): |
44 genome_list = genomes.split(',') | 36 genome_list = genomes.split(',') |
53 | 45 |
54 | 46 |
55 def getSnpeffVersion(): | 47 def getSnpeffVersion(): |
56 snpeff_version = 'SnpEff ?.?' | 48 snpeff_version = 'SnpEff ?.?' |
57 stderr_path = 'snpeff.err' | 49 stderr_path = 'snpeff.err' |
58 stderr_fh = open(stderr_path, 'w') | |
59 args = ['snpEff', '-h'] | 50 args = ['snpEff', '-h'] |
60 return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno()) | 51 with open(stderr_path, 'w') as stderr_fh: |
52 return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno()) | |
61 if return_code != 255: | 53 if return_code != 255: |
62 sys.exit(return_code) | 54 sys.exit(return_code) |
63 stderr_fh.close() | 55 with open(stderr_path) as fh: |
64 fh = open(stderr_path, 'r') | 56 for line in fh: |
65 for line in fh: | 57 m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line) |
66 m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line) | 58 if m: |
67 if m: | 59 snpeff_version = m.groups()[0] + m.groups()[1] |
68 snpeff_version = m.groups()[0] + m.groups()[1] | 60 break |
69 break | |
70 fh.close() | |
71 return snpeff_version | 61 return snpeff_version |
72 | 62 |
73 | 63 |
74 # Download human database 'hg19' | 64 # Download human database 'hg19' |
75 # java -jar snpEff.jar download -v hg19 | 65 # java -jar snpEff.jar download -v hg19 |
95 regulation_pattern = 'regulation_(.+).bin' | 85 regulation_pattern = 'regulation_(.+).bin' |
96 genome_path = os.path.join(data_dir, genome_version) | 86 genome_path = os.path.join(data_dir, genome_version) |
97 snpeff_version = getSnpeffVersion() | 87 snpeff_version = getSnpeffVersion() |
98 key = snpeff_version + '_' + genome_version | 88 key = snpeff_version + '_' + genome_version |
99 if os.path.isdir(genome_path): | 89 if os.path.isdir(genome_path): |
100 for root, dirs, files in os.walk(genome_path): | 90 for _, _, files in os.walk(genome_path): |
101 for fname in files: | 91 for fname in files: |
102 if fname.startswith('snpEffectPredictor'): | 92 if fname.startswith('snpEffectPredictor'): |
103 # if snpEffectPredictor.bin download succeeded | 93 # if snpEffectPredictor.bin download succeeded |
104 name = genome_version + (' : ' + organism if organism else '') | 94 name = genome_version + (' : ' + organism if organism else '') |
105 data_table_entry = dict(key=key, version=snpeff_version, value=genome_version, name=name, path=data_dir) | 95 data_table_entry = dict(key=key, version=snpeff_version, value=genome_version, name=name, path=data_dir) |
126 parser.add_option('-o', '--organism', dest='organism', action='store', type="string", default=None, help='organism name') | 116 parser.add_option('-o', '--organism', dest='organism', action='store', type="string", default=None, help='organism name') |
127 (options, args) = parser.parse_args() | 117 (options, args) = parser.parse_args() |
128 | 118 |
129 filename = args[0] | 119 filename = args[0] |
130 | 120 |
131 params = json.loads(open(filename).read()) | 121 with open(filename) as fh: |
122 params = json.load(fh) | |
132 target_directory = params['output_data'][0]['extra_files_path'] | 123 target_directory = params['output_data'][0]['extra_files_path'] |
133 os.mkdir(target_directory) | 124 os.mkdir(target_directory) |
134 data_manager_dict = {} | 125 data_manager_dict = {} |
135 | 126 |
136 # Create SnpEff Reference Data | 127 # Create SnpEff Reference Data |
137 for genome_version, organism in zip(options.genome_version.split(','), getOrganismNames(options.genome_version, options.organism).split(',')): | 128 for genome_version, organism in zip(options.genome_version.split(','), getOrganismNames(options.genome_version, options.organism).split(',')): |
138 download_database(data_manager_dict, target_directory, genome_version, organism) | 129 download_database(data_manager_dict, target_directory, genome_version, organism) |
139 | 130 |
140 # save info to json file | 131 # save info to json file |
141 open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) | 132 with open(filename, 'w'): |
133 json.dump(data_manager_dict, fh, sort_keys=True) | |
142 | 134 |
143 | 135 |
144 if __name__ == "__main__": | 136 if __name__ == "__main__": |
145 main() | 137 main() |