Mercurial > repos > proteore > proteore_data_manager
comparison data_manager/resource_building.py @ 6:8f33a6e6e36c draft
"planemo upload commit 4747fc3ca8e24e0f6c0cfcde0992780c6d4ef4ff-dirty"
author | proteore |
---|---|
date | Wed, 10 Jun 2020 03:13:18 -0400 |
parents | b05fa99ddda2 |
children | b8565596bb25 |
comparison
equal
deleted
inserted
replaced
5:b05fa99ddda2 | 6:8f33a6e6e36c |
---|---|
130 return False | 130 return False |
131 | 131 |
132 ####################################################################################################### | 132 ####################################################################################################### |
133 # 3. ID mapping file | 133 # 3. ID mapping file |
134 ####################################################################################################### | 134 ####################################################################################################### |
135 import ftplib, gzip | 135 import ftplib, gzip |
136 from io import StringIO | |
136 csv.field_size_limit(sys.maxsize) # to handle big files | 137 csv.field_size_limit(sys.maxsize) # to handle big files |
137 | 138 |
138 def id_mapping_sources (data_manager_dict, species, target_directory, tool_data_path) : | 139 def id_mapping_sources (data_manager_dict, species, target_directory, tool_data_path) : |
139 | 140 |
140 human = species == "Human" | 141 human = species == "Human" |
232 #print ("tab ok") | 233 #print ("tab ok") |
233 | 234 |
234 #add missing nextprot ID for human or replace old ones | 235 #add missing nextprot ID for human or replace old ones |
235 if human : | 236 if human : |
236 #build next_dict | 237 #build next_dict |
237 nextprot_path = id_list_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory) | 238 nextprot_path = download_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory) |
238 with open(nextprot_path,'r') as nextprot_ids : | 239 with open(nextprot_path,'r') as nextprot_ids : |
239 nextprot_ids = nextprot_ids.read().splitlines() | 240 nextprot_ids = nextprot_ids.read().splitlines() |
240 if os.path.exists(os.path.join(archive,nextprot_path.split("/")[-1])) : os.remove(os.path.join(archive,nextprot_path.split("/")[-1])) | 241 if os.path.exists(os.path.join(archive,nextprot_path.split("/")[-1])) : os.remove(os.path.join(archive,nextprot_path.split("/")[-1])) |
241 shutil.move(nextprot_path,archive) | 242 shutil.move(nextprot_path,archive) |
242 next_dict = {} | 243 next_dict = {} |
277 ftp.cwd(ftp_dir) | 278 ftp.cwd(ftp_dir) |
278 ftp.retrbinary("RETR " + file, open(path, 'wb').write) | 279 ftp.retrbinary("RETR " + file, open(path, 'wb').write) |
279 ftp.quit() | 280 ftp.quit() |
280 return (path) | 281 return (path) |
281 | 282 |
283 def download_from_nextprot_ftp(file,target_directory) : | |
284 ftp_dir = "pub/current_release/ac_lists/" | |
285 path = os.path.join(target_directory, file) | |
286 ftp = ftplib.FTP("ftp.nextprot.org") | |
287 ftp.login("anonymous", "anonymous") | |
288 ftp.cwd(ftp_dir) | |
289 ftp.retrbinary("RETR " + file, open(path, 'wb').write) | |
290 ftp.quit() | |
291 return (path) | |
292 | |
282 def id_list_from_nextprot_ftp(file,target_directory) : | 293 def id_list_from_nextprot_ftp(file,target_directory) : |
283 ftp_dir = "pub/current_release/ac_lists/" | 294 ftp_dir = "pub/current_release/ac_lists/" |
284 path = os.path.join(target_directory, file) | 295 path = os.path.join(target_directory, file) |
285 ftp = ftplib.FTP("ftp.nextprot.org") | 296 ftp = ftplib.FTP("ftp.nextprot.org") |
286 ftp.login("anonymous", "anonymous") | 297 ftp.login("anonymous", "anonymous") |
287 ftp.cwd(ftp_dir) | 298 ftp.cwd(ftp_dir) |
288 ftp.retrbinary("RETR " + file, open(path, 'wb').write) | 299 ftp.retrbinary("RETR " + file, open(path, 'wb').write) |
289 ftp.quit() | 300 ftp.quit() |
290 | 301 with open(path,'r') as nextprot_ids : |
291 return (path) | 302 nextprot_ids = nextprot_ids.read().splitlines() |
303 return (nextprot_ids) | |
292 | 304 |
293 #return '' if there's no value in a dictionary, avoid error | 305 #return '' if there's no value in a dictionary, avoid error |
294 def access_dictionary (dico,key1,key2) : | 306 def access_dictionary (dico,key1,key2) : |
295 if key1 in dico : | 307 if key1 in dico : |
296 if key2 in dico[key1] : | 308 if key2 in dico[key1] : |
547 | 559 |
548 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] | 560 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] |
549 writer.writerows(nextprot_file) | 561 writer.writerows(nextprot_file) |
550 | 562 |
551 for id in ids : | 563 for id in ids : |
552 #print (id) | |
553 query="https://api.nextprot.org/entry/"+id+".json" | 564 query="https://api.nextprot.org/entry/"+id+".json" |
554 resp = requests.get(url=query) | 565 try: |
566 resp = requests.get(url=query) | |
567 except : | |
568 print ("wainting 1 hour before trying again") | |
569 time.sleep(3600) | |
570 resp = requests.get(url=query) | |
555 data = resp.json() | 571 data = resp.json() |
556 | 572 |
557 #get info from json dictionary | 573 #get info from json dictionary |
558 mass_mol = data["entry"]["isoforms"][0]["massAsString"] | 574 mass_mol = data["entry"]["isoforms"][0]["massAsString"] |
559 seq_length = data['entry']["isoforms"][0]["sequenceLength"] | 575 seq_length = data['entry']["isoforms"][0]["sequenceLength"] |