Mercurial > repos > proteore > proteore_data_manager
comparison data_manager/resource_building.py @ 7:b8565596bb25 draft default tip
"planemo upload commit 7afd4b3ee25f024257ccbac6e51076d25b2a04e7"
| author | proteore |
|---|---|
| date | Thu, 20 Aug 2020 03:33:35 -0400 |
| parents | 8f33a6e6e36c |
| children |
comparison
equal
deleted
inserted
replaced
| 6:8f33a6e6e36c | 7:b8565596bb25 |
|---|---|
| 552 output_file = 'nextprot_ref_'+ time.strftime("%d-%m-%Y") + ".tsv" | 552 output_file = 'nextprot_ref_'+ time.strftime("%d-%m-%Y") + ".tsv" |
| 553 path = os.path.join(target_directory,output_file) | 553 path = os.path.join(target_directory,output_file) |
| 554 name = "neXtProt release "+time.strftime("%d-%m-%Y") | 554 name = "neXtProt release "+time.strftime("%d-%m-%Y") |
| 555 release_id = "nextprot_ref_"+time.strftime("%d-%m-%Y") | 555 release_id = "nextprot_ref_"+time.strftime("%d-%m-%Y") |
| 556 | 556 |
| 557 output = open(path, 'w') | 557 output = open('test.csv', 'w') |
| 558 writer = csv.writer(output,delimiter="\t") | 558 writer = csv.writer(output,delimiter="\t") |
| 559 | 559 |
| 560 nextprot_file=[["NextprotID","MW","SeqLength","IsoPoint","Chr","SubcellLocations","Diseases","TMDomains","ProteinExistence"]] | 560 nextprot_file=[["NextprotID","ProteinName","SeqLength","MW","IsoPoint","TMDomains","SubcellLocations","Diseases","Function","PostTranslationalModifications","ProteinFamily","Pathway","ProteinExistence","Chr"]] |
| 561 writer.writerows(nextprot_file) | 561 writer.writerows(nextprot_file) |
| 562 | 562 |
| 563 for id in ids : | 563 for id in ids : |
| 564 query="https://api.nextprot.org/entry/"+id+".json" | 564 query="https://api.nextprot.org/entry/"+id+".json" |
| 565 try: | 565 try: |
| 566 resp = requests.get(url=query) | 566 resp = requests.get(url=query) |
| 567 except : | 567 except : |
| 568 print ("wainting 1 hour before trying again") | 568 print ("waiting 15 minutes before trying again") |
| 569 time.sleep(3600) | 569 time.sleep(900) |
| 570 resp = requests.get(url=query) | 570 resp = requests.get(url=query) |
| 571 data = resp.json() | 571 data = resp.json() |
| 572 | 572 |
| 573 #get info from json dictionary | 573 #get info from json dictionary |
| 574 mass_mol = data["entry"]["isoforms"][0]["massAsString"] | 574 mass_mol = data["entry"]["isoforms"][0]["massAsString"] |
| 575 seq_length = data['entry']["isoforms"][0]["sequenceLength"] | 575 seq_length = data['entry']["isoforms"][0]["sequenceLength"] |
| 576 iso_elec_point = data['entry']["isoforms"][0]["isoelectricPointAsString"] | 576 iso_elec_point = data['entry']["isoforms"][0]["isoelectricPointAsString"] |
| 577 chr_loc = data['entry']["chromosomalLocations"][0]["chromosome"] | 577 chr_loc = data['entry']["chromosomalLocations"][0]["chromosome"] |
| 578 protein_existence = "PE"+str(data['entry']["overview"]['proteinExistence']['level']) | 578 protein_existence = "PE"+str(data['entry']["overview"]['proteinExistence']['level']) |
| 579 protein_name = data['entry']["overview"]['proteinNames'][0]['name'] | |
| 580 | |
| 581 #get families description | |
| 582 if 'families' in data['entry']["overview"] and len(data['entry']["overview"]['families']) > 0: | |
| 583 families = data['entry']["overview"]['families'] | |
| 584 families = [entry['description'] for entry in families] | |
| 585 protein_family = ";".join(families) | |
| 586 else: | |
| 587 protein_family = 'NA' | |
| 588 | |
| 589 #get Protein function | |
| 590 if 'function-info' in data['entry']['annotationsByCategory'].keys(): | |
| 591 function_info = data['entry']['annotationsByCategory']['function-info'] | |
| 592 function_info = [entry['description'] for entry in function_info if entry['qualityQualifier'] == 'GOLD'] | |
| 593 function = ';'.join(function_info) | |
| 594 else : | |
| 595 function = 'NA' | |
| 596 | |
| 597 #Get ptm-info | |
| 598 post_trans_mod = 'NA' | |
| 599 if 'ptm-info' in data['entry']['annotationsByCategory'].keys(): | |
| 600 ptm_info = data['entry']['annotationsByCategory']['ptm-info'] | |
| 601 infos = [entry['description'] for entry in ptm_info if entry['qualityQualifier'] == 'GOLD'] | |
| 602 post_trans_mod = ";".join(infos) | |
| 603 | |
| 604 #Get pathway(s) | |
| 605 if 'pathway' in data['entry']['annotationsByCategory'].keys(): | |
| 606 pathways = data['entry']['annotationsByCategory']['pathway'] | |
| 607 pathways = [entry['description'] for entry in pathways if entry['qualityQualifier'] == 'GOLD'] | |
| 608 pathway = ";".join(pathways) | |
| 609 else : | |
| 610 pathway = 'NA' | |
| 579 | 611 |
| 580 #put all subcell loc in a set | 612 #put all subcell loc in a set |
| 581 if "subcellular-location" in data['entry']["annotationsByCategory"].keys() : | 613 if "subcellular-location" in data['entry']["annotationsByCategory"].keys() : |
| 582 subcell_locs = data['entry']["annotationsByCategory"]["subcellular-location"] | 614 subcell_locs = data['entry']["annotationsByCategory"]["subcellular-location"] |
| 583 all_subcell_locs = set() | 615 all_subcell_locs = set() |
| 608 for tm in tm_domains : | 640 for tm in tm_domains : |
| 609 all_tm_domains.add(tm['cvTermName']) | 641 all_tm_domains.add(tm['cvTermName']) |
| 610 nb_domains+=1 | 642 nb_domains+=1 |
| 611 #print "nb domains ++" | 643 #print "nb domains ++" |
| 612 #print (nb_domains) | 644 #print (nb_domains) |
| 645 | |
| 613 nextprot_file[:] = [] | 646 nextprot_file[:] = [] |
| 614 nextprot_file.append([id,mass_mol,str(seq_length),iso_elec_point,chr_loc,all_subcell_locs,all_diseases,str(nb_domains),protein_existence]) | 647 nextprot_file.append([id,protein_name,str(seq_length),mass_mol,iso_elec_point,str(nb_domains),all_subcell_locs,all_diseases,function,post_trans_mod,protein_family,pathway,protein_existence,chr_loc]) |
| 615 writer.writerows(nextprot_file) | 648 writer.writerows(nextprot_file) |
| 616 | 649 |
| 617 id = str(10000000000 - int(time.strftime("%Y%m%d"))) | 650 id = str(10000000000 - int(time.strftime("%Y%m%d"))) |
| 618 | 651 |
| 619 data_table_entry = dict(id=id, release=release_id, name = name, value = path) | 652 data_table_entry = dict(id=id, release=release_id, name = name, value = path) |
| 620 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_nextprot_ref") | 653 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_nextprot_ref") |
| 621 | 654 |
| 622 ####################################################################################################### | 655 ####################################################################################################### |
