diff rps2tsv.py @ 2:fd7104249a3c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
author iuc
date Wed, 21 Aug 2024 13:13:28 +0000
parents bbaa89f070f4
children d1fd5579469d
line wrap: on
line diff
--- a/rps2tsv.py	Sat May 18 18:14:29 2024 +0000
+++ b/rps2tsv.py	Wed Aug 21 13:13:28 2024 +0000
@@ -56,7 +56,7 @@
             hsp["accession"] = aln.accession
             hsp["pfam_id"] = hsp["description"].split(",")[0].replace("pfam", "PF")
             log.info("Requeting Interpro for " + hsp["pfam_id"])
-            url = "https://www.ebi.ac.uk/interpro/api/entry/pfam/" + hsp["pfam_id"] + "/taxonomy/uniprot/"
+            url = "https://www.ebi.ac.uk/interpro/api/taxonomy/uniprot/entry/pfam/" + hsp["pfam_id"]
             req = request.Request(url)
             try:
                 response = request.urlopen(req)
@@ -69,13 +69,20 @@
                 decoded_response = encoded_response.decode()
                 payload = json.loads(decoded_response)
                 kingdoms = []
-                for item in payload["taxonomy_subset"]:
-                    lineage_string = item["lineage"]
-                    lineage = [int(i) for i in lineage_string]
-                    translation = ncbi.get_taxid_translator(lineage)
-                    names = list(translation.values())
-                    taxonomy = names[1:]  # remove 'root' at the begining
-                    kingdoms.append(taxonomy[0])
+                for item in payload["results"][:6]:
+                    if item["metadata"]["parent"] is not None:
+                        lineage_parent = item["metadata"]["parent"]
+                        translation = ncbi.get_taxid_translator([int(lineage_parent)])
+                        names = list(translation.values())
+                        if len(names) > 0:
+                            if names[0] == "root":
+                                taxonomy = names[1:]  # remove 'root' at the begining
+                            else:
+                                taxonomy = names
+                        else:
+                            taxonomy = names
+                        if len(taxonomy) != 0:
+                            kingdoms.append(taxonomy[0])
                 frequency = {kingdom: kingdoms.count(kingdom) for kingdom in kingdoms}  # {'Pseudomonadota': 9, 'cellular organisms': 4}
                 sorted_freq = dict(sorted(frequency.items(), key=lambda x: x[1], reverse=True))
                 concat_freq = ";".join("{}({})".format(k, v) for k, v in sorted_freq.items())