Mercurial > repos > card > rgi
annotate data_managers/data_manager_rgi_build_db/data_manager/import_data.py @ 0:715bc9aeef69 draft
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
author | card |
---|---|
date | Wed, 27 Feb 2019 09:08:21 -0500 |
parents | |
children |
rev | line source |
---|---|
0
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
1 import argparse |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
2 import datetime |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
3 import json |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
4 import os |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
5 import shutil |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
6 import sys |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
7 import tarfile |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
8 import urllib.request, urllib.error, urllib.parse |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
9 import zipfile |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
10 import logging |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
11 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
12 path = os.path.join(os.getcwd(), 'rgi-database') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
13 data_path = path |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
14 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
15 level = logging.WARNING |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
16 logger = logging.getLogger(__name__) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
17 logger.setLevel(level) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
18 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
19 def url_download(url, workdir): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
20 file_path = os.path.join(workdir, 'download.dat') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
21 if not os.path.exists(workdir): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
22 os.makedirs(workdir) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
23 src = None |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
24 dst = None |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
25 try: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
26 req = urllib.request.Request(url) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
27 src = urllib.request.urlopen(req) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
28 dst = open(file_path, 'wb') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
29 while True: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
30 chunk = src.read(2**10) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
31 if chunk: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
32 dst.write(chunk) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
33 else: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
34 break |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
35 except Exception as e: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
36 print(str(e), file=sys.stderr) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
37 finally: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
38 if src: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
39 src.close() |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
40 if dst: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
41 dst.close() |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
42 if tarfile.is_tarfile(file_path): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
43 fh = tarfile.open(file_path, 'r:*') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
44 elif zipfile.is_zipfile(file_path): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
45 fh = zipfile.ZipFile(file_path, 'r') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
46 else: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
47 return |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
48 # extract only one file : card.json |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
49 for member in fh.getmembers(): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
50 if member.isreg(): # skip if the TarInfo is not files |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
51 member.name = os.path.basename(member.name) # remove the path by reset it |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
52 if member.name == 'card.json': |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
53 print('[import_data] extracting file: {}'.format(str(member.name))) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
54 fh.extract(member.name,workdir) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
55 os.remove(file_path) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
56 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
57 def checkKeyExisted(key, my_dict): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
58 try: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
59 nonNone = my_dict[key] is not None |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
60 except KeyError: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
61 nonNone = False |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
62 return nonNone |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
63 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
64 def data_version(): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
65 data_version = '' |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
66 with open(os.path.join(data_path, 'card.json')) as json_file: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
67 json_data = json.load(json_file) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
68 for item in list(json_data.keys()): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
69 if item == '_version': |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
70 data_version = json_data[item] |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
71 json_file.close() |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
72 return data_version |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
73 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
74 def makeBlastDB(): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
75 if os.path.isfile(os.path.join(path, 'proteindb.fsa')) == True: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
76 print('[import_data] create blast DB.') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
77 os.system('makeblastdb -in {}/proteindb.fsa -dbtype prot -out {}/protein.db > /dev/null 2>&1'.format(path, path)) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
78 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
79 def makeDiamondDB(): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
80 if os.path.isfile(os.path.join(path, 'proteindb.fsa')) == True: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
81 print('[import_data] create diamond DB.') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
82 os.system('diamond makedb --quiet --in {}/proteindb.fsa --db {}/protein.db'.format(path, path)) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
83 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
84 def write_fasta_from_json(): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
85 '''Creates a fasta file from card.json file.''' |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
86 if os.path.isfile(os.path.join(path, 'proteindb.fsa')): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
87 return |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
88 else: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
89 try: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
90 with open(os.path.join(data_path, 'card.json'), 'r') as jfile: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
91 j = json.load(jfile) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
92 except Exception as e: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
93 logger.error(e) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
94 exit() |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
95 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
96 with open(os.path.join(path, 'proteindb.fsa'), 'w') as fout: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
97 for i in j: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
98 if i.isdigit(): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
99 # model_type: protein homolog model |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
100 if j[i]['model_type_id'] == '40292': |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
101 try: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
102 pass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value'] |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
103 except KeyError: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
104 logger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \ |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
105 % (j[i]['model_id'], j[i]['model_name'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
106 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
107 else: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
108 try: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
109 for seq in j[i]['model_sequences']['sequence']: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
110 fout.write('>%s_%s | model_type_id: 40292 | pass_bitscore: %s | %s\n' % (i, seq, pass_bit_score, j[i]['ARO_name'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
111 fout.write('%s\n' %(j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
112 except Exception as e: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
113 logger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \ |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
114 % (j[i]['model_id'], j[i]['model_name'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
115 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
116 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
117 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
118 # model_type: protein variant model |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
119 elif j[i]['model_type_id'] == '40293': |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
120 try: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
121 pass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value'] |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
122 except KeyError: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
123 logger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \ |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
124 % (j[i]['model_id'], j[i]['model_name'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
125 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
126 else: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
127 try: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
128 snpList = [j[i]['model_param']['snp']['param_value'][k] for k in j[i]['model_param']['snp']['param_value']] |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
129 except Exception as e: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
130 logger.warning('No snp for model (%s, %s). RGI will omit this model and keep running.' \ |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
131 % (j[i]['model_id'], j[i]['model_name'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
132 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
133 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
134 try: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
135 for seq in j[i]['model_sequences']['sequence']: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
136 fout.write('>%s_%s | model_type_id: 40293 | pass_bit_score: %s | SNP: %s | %s\n' \ |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
137 % (i, seq, pass_bit_score, ','.join(snpList), j[i]['ARO_name'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
138 fout.write('%s\n' % (j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
139 except Exception as e: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
140 logger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \ |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
141 % (j[i]['model_id'], j[i]['model_name'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
142 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
143 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
144 # model_type: protein overexpression model |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
145 elif j[i]['model_type_id'] == '41091': |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
146 try: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
147 pass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value'] |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
148 except KeyError: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
149 logger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \ |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
150 % (j[i]['model_id'], j[i]['model_name'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
151 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
152 else: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
153 try: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
154 snpList = [j[i]['model_param']['snp']['param_value'][k] for k in j[i]['model_param']['snp']['param_value']] |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
155 except Exception as e: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
156 logger.warning('No snp for model (%s, %s). RGI will omit this model and keep running.' \ |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
157 % (j[i]['model_id'], j[i]['model_name'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
158 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
159 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
160 try: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
161 for seq in j[i]['model_sequences']['sequence']: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
162 fout.write('>%s_%s | model_type_id: 41091 | pass_bit_score: %s | SNP: %s | %s\n' \ |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
163 % (i, seq, pass_bit_score, ','.join(snpList), j[i]['ARO_name'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
164 fout.write('%s\n' % (j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
165 except Exception as e: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
166 logger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \ |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
167 % (j[i]['model_id'], j[i]['model_name'])) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
168 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
169 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
170 def _main(args): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
171 if not os.path.exists(path): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
172 print('[import_data] mkdir: {}'.format(path)) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
173 os.makedirs(path) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
174 print('[import_data] path: {}'.format(path)) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
175 print(args) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
176 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
177 if args.url == None: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
178 url = 'https://card.mcmaster.ca/latest/data' |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
179 else: |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
180 url = args.url |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
181 print('[import_data] url: {}'.format(url)) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
182 workdir = os.path.join(os.getcwd(), 'rgi-database') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
183 print('[import_data] working directory: {}'.format(workdir)) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
184 url_download(url, workdir) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
185 write_fasta_from_json() |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
186 makeBlastDB() |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
187 makeDiamondDB() |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
188 version = data_version() |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
189 print('[import_data] data version: {}'.format(version)) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
190 return version |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
191 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
192 def run(): |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
193 parser = argparse.ArgumentParser(description='Create data manager json.') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
194 parser.add_argument('--url', dest='url', action='store', help='Url for CARD data') |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
195 args = parser.parse_args() |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
196 _main(args) |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
197 |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
198 if __name__ == '__main__': |
715bc9aeef69
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff
changeset
|
199 run() |