Mercurial > repos > iuc > virannot_rps2tsv
diff rps2tsv.py @ 2:fd7104249a3c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
author | iuc |
---|---|
date | Wed, 21 Aug 2024 13:13:28 +0000 |
parents | bbaa89f070f4 |
children | d1fd5579469d |
line wrap: on
line diff
--- a/rps2tsv.py Sat May 18 18:14:29 2024 +0000 +++ b/rps2tsv.py Wed Aug 21 13:13:28 2024 +0000 @@ -56,7 +56,7 @@ hsp["accession"] = aln.accession hsp["pfam_id"] = hsp["description"].split(",")[0].replace("pfam", "PF") log.info("Requeting Interpro for " + hsp["pfam_id"]) - url = "https://www.ebi.ac.uk/interpro/api/entry/pfam/" + hsp["pfam_id"] + "/taxonomy/uniprot/" + url = "https://www.ebi.ac.uk/interpro/api/taxonomy/uniprot/entry/pfam/" + hsp["pfam_id"] req = request.Request(url) try: response = request.urlopen(req) @@ -69,13 +69,20 @@ decoded_response = encoded_response.decode() payload = json.loads(decoded_response) kingdoms = [] - for item in payload["taxonomy_subset"]: - lineage_string = item["lineage"] - lineage = [int(i) for i in lineage_string] - translation = ncbi.get_taxid_translator(lineage) - names = list(translation.values()) - taxonomy = names[1:] # remove 'root' at the begining - kingdoms.append(taxonomy[0]) + for item in payload["results"][:6]: + if item["metadata"]["parent"] is not None: + lineage_parent = item["metadata"]["parent"] + translation = ncbi.get_taxid_translator([int(lineage_parent)]) + names = list(translation.values()) + if len(names) > 0: + if names[0] == "root": + taxonomy = names[1:] # remove 'root' at the begining + else: + taxonomy = names + else: + taxonomy = names + if len(taxonomy) != 0: + kingdoms.append(taxonomy[0]) frequency = {kingdom: kingdoms.count(kingdom) for kingdom in kingdoms} # {'Pseudomonadota': 9, 'cellular organisms': 4} sorted_freq = dict(sorted(frequency.items(), key=lambda x: x[1], reverse=True)) concat_freq = ";".join("{}({})".format(k, v) for k, v in sorted_freq.items())