Mercurial > repos > proteore > proteore_data_manager
diff data_manager/resource_building.py @ 6:8f33a6e6e36c draft
"planemo upload commit 4747fc3ca8e24e0f6c0cfcde0992780c6d4ef4ff-dirty"
author | proteore |
---|---|
date | Wed, 10 Jun 2020 03:13:18 -0400 |
parents | b05fa99ddda2 |
children | b8565596bb25 |
line wrap: on
line diff
--- a/data_manager/resource_building.py Thu Feb 06 04:02:50 2020 -0500 +++ b/data_manager/resource_building.py Wed Jun 10 03:13:18 2020 -0400 @@ -132,7 +132,8 @@ ####################################################################################################### # 3. ID mapping file ####################################################################################################### -import ftplib, gzip +import ftplib, gzip +from io import StringIO csv.field_size_limit(sys.maxsize) # to handle big files def id_mapping_sources (data_manager_dict, species, target_directory, tool_data_path) : @@ -234,7 +235,7 @@ #add missing nextprot ID for human or replace old ones if human : #build next_dict - nextprot_path = id_list_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory) + nextprot_path = download_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory) with open(nextprot_path,'r') as nextprot_ids : nextprot_ids = nextprot_ids.read().splitlines() if os.path.exists(os.path.join(archive,nextprot_path.split("/")[-1])) : os.remove(os.path.join(archive,nextprot_path.split("/")[-1])) @@ -279,6 +280,16 @@ ftp.quit() return (path) +def download_from_nextprot_ftp(file,target_directory) : + ftp_dir = "pub/current_release/ac_lists/" + path = os.path.join(target_directory, file) + ftp = ftplib.FTP("ftp.nextprot.org") + ftp.login("anonymous", "anonymous") + ftp.cwd(ftp_dir) + ftp.retrbinary("RETR " + file, open(path, 'wb').write) + ftp.quit() + return (path) + def id_list_from_nextprot_ftp(file,target_directory) : ftp_dir = "pub/current_release/ac_lists/" path = os.path.join(target_directory, file) @@ -287,8 +298,9 @@ ftp.cwd(ftp_dir) ftp.retrbinary("RETR " + file, open(path, 'wb').write) ftp.quit() - - return (path) + with open(path,'r') as nextprot_ids : + nextprot_ids = nextprot_ids.read().splitlines() + return (nextprot_ids) #return '' if there's no value in a dictionary, avoid error def access_dictionary (dico,key1,key2) : @@ -549,9 +561,13 @@ writer.writerows(nextprot_file) for id in ids : - #print (id) query="https://api.nextprot.org/entry/"+id+".json" - resp = requests.get(url=query) + try: + resp = requests.get(url=query) + except : + print ("wainting 1 hour before trying again") + time.sleep(3600) + resp = requests.get(url=query) data = resp.json() #get info from json dictionary