diff data_manager/resource_building.py @ 6:8f33a6e6e36c draft

"planemo upload commit 4747fc3ca8e24e0f6c0cfcde0992780c6d4ef4ff-dirty"
author proteore
date Wed, 10 Jun 2020 03:13:18 -0400
parents b05fa99ddda2
children b8565596bb25
line wrap: on
line diff
--- a/data_manager/resource_building.py	Thu Feb 06 04:02:50 2020 -0500
+++ b/data_manager/resource_building.py	Wed Jun 10 03:13:18 2020 -0400
@@ -132,7 +132,8 @@
 #######################################################################################################
 # 3. ID mapping file
 #######################################################################################################
-import ftplib, gzip
+import ftplib,  gzip
+from io import StringIO
 csv.field_size_limit(sys.maxsize) # to handle big files
 
 def id_mapping_sources (data_manager_dict, species, target_directory, tool_data_path) :
@@ -234,7 +235,7 @@
     #add missing nextprot ID for human or replace old ones
     if human : 
         #build next_dict
-        nextprot_path = id_list_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory)
+        nextprot_path = download_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory)
         with open(nextprot_path,'r') as nextprot_ids :
             nextprot_ids = nextprot_ids.read().splitlines()
         if os.path.exists(os.path.join(archive,nextprot_path.split("/")[-1])) : os.remove(os.path.join(archive,nextprot_path.split("/")[-1]))
@@ -279,6 +280,16 @@
     ftp.quit()
     return (path)
 
+def download_from_nextprot_ftp(file,target_directory) :
+    ftp_dir = "pub/current_release/ac_lists/"
+    path = os.path.join(target_directory, file)
+    ftp = ftplib.FTP("ftp.nextprot.org")
+    ftp.login("anonymous", "anonymous") 
+    ftp.cwd(ftp_dir)
+    ftp.retrbinary("RETR " + file, open(path, 'wb').write)
+    ftp.quit()
+    return (path)
+
 def id_list_from_nextprot_ftp(file,target_directory) :
     ftp_dir = "pub/current_release/ac_lists/"
     path = os.path.join(target_directory, file)
@@ -287,8 +298,9 @@
     ftp.cwd(ftp_dir)
     ftp.retrbinary("RETR " + file, open(path, 'wb').write)
     ftp.quit()
-
-    return (path)
+    with open(path,'r') as nextprot_ids :
+        nextprot_ids = nextprot_ids.read().splitlines()
+    return (nextprot_ids)
 
 #return '' if there's no value in a dictionary, avoid error
 def access_dictionary (dico,key1,key2) :
@@ -549,9 +561,13 @@
     writer.writerows(nextprot_file)
     
     for id in ids :
-        #print (id)
         query="https://api.nextprot.org/entry/"+id+".json"
-        resp = requests.get(url=query)
+        try:
+            resp = requests.get(url=query)
+        except :
+            print ("wainting 1 hour before trying again")
+            time.sleep(3600)
+            resp = requests.get(url=query)
         data = resp.json()
 
         #get info from json dictionary