proteore_data_manager: data_manager/resource

comparison data_manager/resource_building.py @ 6:8f33a6e6e36c draft

"planemo upload commit 4747fc3ca8e24e0f6c0cfcde0992780c6d4ef4ff-dirty"

author	proteore
date	Wed, 10 Jun 2020 03:13:18 -0400
parents	b05fa99ddda2
children	b8565596bb25

comparison

equal deleted inserted replaced

-:b05fa99ddda2
+:8f33a6e6e36c
 return False
 #######################################################################################################
 # 3. ID mapping file
 #######################################################################################################
-import ftplib, gzip
+import ftplib,  gzip
+from io import StringIO
 csv.field_size_limit(sys.maxsize) # to handle big files
 def id_mapping_sources (data_manager_dict, species, target_directory, tool_data_path) :
 human = species == "Human"
 #print ("tab ok")
 #add missing nextprot ID for human or replace old ones
 if human :
 #build next_dict
-nextprot_path = id_list_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory)
+nextprot_path = download_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory)
 with open(nextprot_path,'r') as nextprot_ids :
 nextprot_ids = nextprot_ids.read().splitlines()
 if os.path.exists(os.path.join(archive,nextprot_path.split("/")[-1])) : os.remove(os.path.join(archive,nextprot_path.split("/")[-1]))
 shutil.move(nextprot_path,archive)
 next_dict = {}
 ftp.cwd(ftp_dir)
 ftp.retrbinary("RETR " + file, open(path, 'wb').write)
 ftp.quit()
 return (path)
+def download_from_nextprot_ftp(file,target_directory) :
+ftp_dir = "pub/current_release/ac_lists/"
+path = os.path.join(target_directory, file)
+ftp = ftplib.FTP("ftp.nextprot.org")
+ftp.login("anonymous", "anonymous")
+ftp.cwd(ftp_dir)
+ftp.retrbinary("RETR " + file, open(path, 'wb').write)
+ftp.quit()
+return (path)
 def id_list_from_nextprot_ftp(file,target_directory) :
 ftp_dir = "pub/current_release/ac_lists/"
 path = os.path.join(target_directory, file)
 ftp = ftplib.FTP("ftp.nextprot.org")
 ftp.login("anonymous", "anonymous")
 ftp.cwd(ftp_dir)
 ftp.retrbinary("RETR " + file, open(path, 'wb').write)
 ftp.quit()
+with open(path,'r') as nextprot_ids :
-return (path)
+nextprot_ids = nextprot_ids.read().splitlines()
+return (nextprot_ids)
 #return '' if there's no value in a dictionary, avoid error
 def access_dictionary (dico,key1,key2) :
 if key1 in dico :
 if key2 in dico[key1] :
 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]]
 writer.writerows(nextprot_file)
 for id in ids :
-#print (id)
 query="https://api.nextprot.org/entry/"+id+".json"
-resp = requests.get(url=query)
+try:
+resp = requests.get(url=query)
+except :
+print ("wainting 1 hour before trying again")
+time.sleep(3600)
+resp = requests.get(url=query)
 data = resp.json()
 #get info from json dictionary
 mass_mol = data["entry"]["isoforms"][0]["massAsString"]
 seq_length = data['entry']["isoforms"][0]["sequenceLength"]

Mercurial > repos > proteore > proteore_data_manager

comparison data_manager/resource_building.py @ 6:8f33a6e6e36c draft