proteore_data_manager: data_manager/resource

comparison data_manager/resource_building.py @ 7:b8565596bb25 draft default tip

"planemo upload commit 7afd4b3ee25f024257ccbac6e51076d25b2a04e7"

author	proteore
date	Thu, 20 Aug 2020 03:33:35 -0400
parents	8f33a6e6e36c
children

comparison

equal deleted inserted replaced

-:8f33a6e6e36c
+:b8565596bb25
 output_file = 'nextprot_ref_'+ time.strftime("%d-%m-%Y") + ".tsv"
 path = os.path.join(target_directory,output_file)
 name = "neXtProt release "+time.strftime("%d-%m-%Y")
 release_id = "nextprot_ref_"+time.strftime("%d-%m-%Y")
-output = open(path, 'w')
+output = open('test.csv', 'w')
 writer = csv.writer(output,delimiter="\t")
-nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]]
+nextprot_file=[["NextprotID","ProteinName","SeqLength","MW","IsoPoint","TMDomains","SubcellLocations","Diseases","Function","PostTranslationalModifications","ProteinFamily","Pathway","ProteinExistence","Chr"]]
 writer.writerows(nextprot_file)
 for id in ids :
 query="https://api.nextprot.org/entry/"+id+".json"
 try:
 resp = requests.get(url=query)
 except :
-print ("wainting 1 hour before trying again")
+print ("waiting 15 minutes before trying again")
-time.sleep(3600)
+time.sleep(900)
 resp = requests.get(url=query)
 data = resp.json()
 #get info from json dictionary
 mass_mol = data["entry"]["isoforms"][0]["massAsString"]
 seq_length = data['entry']["isoforms"][0]["sequenceLength"]
 iso_elec_point = data['entry']["isoforms"][0]["isoelectricPointAsString"]
 chr_loc = data['entry']["chromosomalLocations"][0]["chromosome"]
 protein_existence = "PE"+str(data['entry']["overview"]['proteinExistence']['level'])
+protein_name = data['entry']["overview"]['proteinNames'][0]['name']
+#get families description
+if 'families' in data['entry']["overview"] and len(data['entry']["overview"]['families']) > 0:
+families = data['entry']["overview"]['families']
+families = [entry['description'] for entry in families]
+protein_family = ";".join(families)
+else:
+protein_family = 'NA'
+#get Protein function
+if 'function-info' in data['entry']['annotationsByCategory'].keys():
+function_info = data['entry']['annotationsByCategory']['function-info']
+function_info = [entry['description'] for entry in function_info if entry['qualityQualifier'] == 'GOLD']
+function = ';'.join(function_info)
+else :
+function = 'NA'
+#Get ptm-info
+post_trans_mod = 'NA'
+if 'ptm-info' in data['entry']['annotationsByCategory'].keys():
+ptm_info = data['entry']['annotationsByCategory']['ptm-info']
+infos = [entry['description'] for entry in ptm_info if entry['qualityQualifier'] == 'GOLD']
+post_trans_mod = ";".join(infos)
+#Get pathway(s)
+if 'pathway' in data['entry']['annotationsByCategory'].keys():
+pathways = data['entry']['annotationsByCategory']['pathway']
+pathways = [entry['description'] for entry in pathways if entry['qualityQualifier'] == 'GOLD']
+pathway = ";".join(pathways)
+else :
+pathway = 'NA'
 #put all subcell loc in a set
 if "subcellular-location" in data['entry']["annotationsByCategory"].keys() :
 subcell_locs = data['entry']["annotationsByCategory"]["subcellular-location"]
 all_subcell_locs = set()
 for tm in tm_domains :
 all_tm_domains.add(tm['cvTermName'])
 nb_domains+=1
 #print "nb domains ++"
 #print (nb_domains)
 nextprot_file[:] = []
-nextprot_file.append([id,mass_mol,str(seq_length),iso_elec_point,chr_loc,all_subcell_locs,all_diseases,str(nb_domains),protein_existence])
+nextprot_file.append([id,protein_name,str(seq_length),mass_mol,iso_elec_point,str(nb_domains),all_subcell_locs,all_diseases,function,post_trans_mod,protein_family,pathway,protein_existence,chr_loc])
 writer.writerows(nextprot_file)
 id = str(10000000000 - int(time.strftime("%Y%m%d")))
 data_table_entry = dict(id=id, release=release_id, name = name, value = path)
 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_nextprot_ref")
 #######################################################################################################

Mercurial > repos > proteore > proteore_data_manager

comparison data_manager/resource_building.py @ 7:b8565596bb25 draft default tip