comparison data_manager/data_manager_snpEff_download.py @ 10:c6fbc5421697 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_snpeff commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
author iuc
date Sun, 22 Nov 2020 12:53:42 +0000
parents 08d7998c3afb
children def511e8e005
comparison
equal deleted inserted replaced
9:08d7998c3afb 10:c6fbc5421697
5 import re 5 import re
6 import subprocess 6 import subprocess
7 import sys 7 import sys
8 8
9 9
10 def stop_err(msg):
11 sys.stderr.write(msg)
12 sys.exit(1)
13
14
15 def fetch_databases(genome_list=None): 10 def fetch_databases(genome_list=None):
16 snpDBs = dict() 11 snpDBs = dict()
17 databases_path = 'databases.out' 12 databases_path = 'databases.out'
18 databases_output = open(databases_path, 'w')
19 args = ['snpEff', 'databases'] 13 args = ['snpEff', 'databases']
20 return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) 14 with open(databases_path, 'w') as databases_output:
15 return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno())
21 if return_code: 16 if return_code:
22 sys.exit(return_code) 17 sys.exit(return_code)
23 databases_output.close()
24 try: 18 try:
25 fh = open(databases_path, 'r') 19 with open(databases_path, 'r') as fh:
26 for i, line in enumerate(fh): 20 for line in fh:
27 fields = line.split('\t') 21 fields = line.split('\t')
28 if len(fields) >= 2: 22 if len(fields) >= 2:
29 genome_version = fields[0].strip() 23 genome_version = fields[0].strip()
30 if genome_list and genome_version not in genome_list: 24 if genome_list and genome_version not in genome_list:
31 continue 25 continue
32 if genome_version.startswith("Genome") or genome_version.startswith("-"): 26 if genome_version.startswith("Genome") or genome_version.startswith("-"):
33 continue 27 continue
34 description = fields[1].strip() 28 description = fields[1].strip()
35 snpDBs[genome_version] = description 29 snpDBs[genome_version] = description
36 except Exception as e: 30 except Exception as e:
37 stop_err('Error parsing %s %s\n' % (databases_path, str(e))) 31 sys.exit('Error parsing %s %s\n' % (databases_path, str(e)))
38 else:
39 fh.close()
40 return snpDBs 32 return snpDBs
41 33
42 34
43 def getOrganismNames(genomes, organisms): 35 def getOrganismNames(genomes, organisms):
44 genome_list = genomes.split(',') 36 genome_list = genomes.split(',')
53 45
54 46
55 def getSnpeffVersion(): 47 def getSnpeffVersion():
56 snpeff_version = 'SnpEff ?.?' 48 snpeff_version = 'SnpEff ?.?'
57 stderr_path = 'snpeff.err' 49 stderr_path = 'snpeff.err'
58 stderr_fh = open(stderr_path, 'w')
59 args = ['snpEff', '-h'] 50 args = ['snpEff', '-h']
60 return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno()) 51 with open(stderr_path, 'w') as stderr_fh:
52 return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno())
61 if return_code != 255: 53 if return_code != 255:
62 sys.exit(return_code) 54 sys.exit(return_code)
63 stderr_fh.close() 55 with open(stderr_path) as fh:
64 fh = open(stderr_path, 'r') 56 for line in fh:
65 for line in fh: 57 m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line)
66 m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line) 58 if m:
67 if m: 59 snpeff_version = m.groups()[0] + m.groups()[1]
68 snpeff_version = m.groups()[0] + m.groups()[1] 60 break
69 break
70 fh.close()
71 return snpeff_version 61 return snpeff_version
72 62
73 63
74 # Download human database 'hg19' 64 # Download human database 'hg19'
75 # java -jar snpEff.jar download -v hg19 65 # java -jar snpEff.jar download -v hg19
95 regulation_pattern = 'regulation_(.+).bin' 85 regulation_pattern = 'regulation_(.+).bin'
96 genome_path = os.path.join(data_dir, genome_version) 86 genome_path = os.path.join(data_dir, genome_version)
97 snpeff_version = getSnpeffVersion() 87 snpeff_version = getSnpeffVersion()
98 key = snpeff_version + '_' + genome_version 88 key = snpeff_version + '_' + genome_version
99 if os.path.isdir(genome_path): 89 if os.path.isdir(genome_path):
100 for root, dirs, files in os.walk(genome_path): 90 for _, _, files in os.walk(genome_path):
101 for fname in files: 91 for fname in files:
102 if fname.startswith('snpEffectPredictor'): 92 if fname.startswith('snpEffectPredictor'):
103 # if snpEffectPredictor.bin download succeeded 93 # if snpEffectPredictor.bin download succeeded
104 name = genome_version + (' : ' + organism if organism else '') 94 name = genome_version + (' : ' + organism if organism else '')
105 data_table_entry = dict(key=key, version=snpeff_version, value=genome_version, name=name, path=data_dir) 95 data_table_entry = dict(key=key, version=snpeff_version, value=genome_version, name=name, path=data_dir)
126 parser.add_option('-o', '--organism', dest='organism', action='store', type="string", default=None, help='organism name') 116 parser.add_option('-o', '--organism', dest='organism', action='store', type="string", default=None, help='organism name')
127 (options, args) = parser.parse_args() 117 (options, args) = parser.parse_args()
128 118
129 filename = args[0] 119 filename = args[0]
130 120
131 params = json.loads(open(filename).read()) 121 with open(filename) as fh:
122 params = json.load(fh)
132 target_directory = params['output_data'][0]['extra_files_path'] 123 target_directory = params['output_data'][0]['extra_files_path']
133 os.mkdir(target_directory) 124 os.mkdir(target_directory)
134 data_manager_dict = {} 125 data_manager_dict = {}
135 126
136 # Create SnpEff Reference Data 127 # Create SnpEff Reference Data
137 for genome_version, organism in zip(options.genome_version.split(','), getOrganismNames(options.genome_version, options.organism).split(',')): 128 for genome_version, organism in zip(options.genome_version.split(','), getOrganismNames(options.genome_version, options.organism).split(',')):
138 download_database(data_manager_dict, target_directory, genome_version, organism) 129 download_database(data_manager_dict, target_directory, genome_version, organism)
139 130
140 # save info to json file 131 # save info to json file
141 open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) 132 with open(filename, 'w'):
133 json.dump(data_manager_dict, fh, sort_keys=True)
142 134
143 135
144 if __name__ == "__main__": 136 if __name__ == "__main__":
145 main() 137 main()