changeset 10:95fb5712344f draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/uniprot_rest_interface commit 1c020106d4d7f957c9f1ec0d9885bbb2d56e70e7
author bgruening
date Tue, 06 Aug 2024 14:49:45 +0000
parents 468c71dac78a
children
files test-data/test1_map.tab test-data/test2_retrieve.gff uniprot.py uniprot.xml
diffstat 4 files changed, 38 insertions(+), 132 deletions(-) [+]
line wrap: on
line diff
--- a/test-data/test1_map.tab	Wed May 22 21:18:15 2024 +0000
+++ b/test-data/test1_map.tab	Tue Aug 06 14:49:45 2024 +0000
@@ -2,8 +2,6 @@
 A0A077Z587	TTRE_0000309301
 A0A077ZFY8	TTRE_0000758701
 A0A077ZHN8	TTRE_0000819801
-M5B8V9	CMN_01519
-M5BAG7	cydC
 O14639	ABLIM1
 Q0P8A9	fdhC
 Q13685	AAMP
--- a/test-data/test2_retrieve.gff	Wed May 22 21:18:15 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,107 +0,0 @@
-##gff-version 3
-##sequence-region M5BAG7 1 563
-M5BAG7	UniProtKB	Transmembrane	21	43	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-M5BAG7	UniProtKB	Transmembrane	49	71	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-M5BAG7	UniProtKB	Transmembrane	132	153	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-M5BAG7	UniProtKB	Transmembrane	159	181	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-M5BAG7	UniProtKB	Transmembrane	236	259	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-M5BAG7	UniProtKB	Transmembrane	274	296	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-M5BAG7	UniProtKB	Domain	20	301	.	.	.	Note=ABC transmembrane type-1;Ontology_term=ECO:0000259;evidence=ECO:0000259|PROSITE:PS50929	
-M5BAG7	UniProtKB	Domain	345	559	.	.	.	Note=ABC transporter;Ontology_term=ECO:0000259;evidence=ECO:0000259|PROSITE:PS50893	
-M5BAG7	UniProtKB	Nucleotide binding	379	386	.	.	.	Note=ATP;Ontology_term=ECO:0000256;evidence=ECO:0000256|PROSITE-ProRule:PRU00434	
-M5BAG7	UniProtKB	Region	317	337	.	.	.	Note=Disordered;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:MobiDB-lite	
-##sequence-region A0A077ZHN8 1 634
-A0A077ZHN8	UniProtKB	Transmembrane	14	36	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-A0A077ZHN8	UniProtKB	Transmembrane	56	80	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-A0A077ZHN8	UniProtKB	Transmembrane	113	132	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-A0A077ZHN8	UniProtKB	Transmembrane	290	310	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-A0A077ZHN8	UniProtKB	Domain	312	364	.	.	.	Note=HAMP;Ontology_term=ECO:0000259;evidence=ECO:0000259|PROSITE:PS50885	
-A0A077ZHN8	UniProtKB	Domain	369	598	.	.	.	Note=Methyl-accepting transducer;Ontology_term=ECO:0000259;evidence=ECO:0000259|PROSITE:PS50111	
-A0A077ZHN8	UniProtKB	Coiled coil	170	204	.	.	.	Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Coils	
-A0A077ZHN8	UniProtKB	Coiled coil	569	607	.	.	.	Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Coils	
-##sequence-region M5B8V9 1 582
-M5B8V9	UniProtKB	Transmembrane	20	43	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-M5B8V9	UniProtKB	Transmembrane	55	77	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-M5B8V9	UniProtKB	Transmembrane	134	154	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-M5B8V9	UniProtKB	Transmembrane	161	180	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-M5B8V9	UniProtKB	Transmembrane	236	260	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-M5B8V9	UniProtKB	Domain	20	302	.	.	.	Note=ABC transmembrane type-1;Ontology_term=ECO:0000259;evidence=ECO:0000259|PROSITE:PS50929	
-M5B8V9	UniProtKB	Domain	340	570	.	.	.	Note=ABC transporter;Ontology_term=ECO:0000259;evidence=ECO:0000259|PROSITE:PS50893	
-M5B8V9	UniProtKB	Nucleotide binding	372	379	.	.	.	Note=ATP;Ontology_term=ECO:0000256;evidence=ECO:0000256|PROSITE-ProRule:PRU00434	
-##sequence-region S0DS17 1 369
-S0DS17	UniProtKB	Chain	1	369	.	.	.	ID=PRO_0000437163;Note=Cytochrome P450 monooxygenase apf8	
-S0DS17	UniProtKB	Metal binding	303	303	.	.	.	Note=Iron (heme axial ligand);Ontology_term=ECO:0000250;evidence=ECO:0000250|UniProtKB:P04798	
-##sequence-region A0A077Z587 1 772
-A0A077Z587	UniProtKB	Transmembrane	593	617	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-A0A077Z587	UniProtKB	Transmembrane	637	656	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-A0A077Z587	UniProtKB	Transmembrane	668	692	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-A0A077Z587	UniProtKB	Transmembrane	704	727	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-A0A077Z587	UniProtKB	Transmembrane	733	755	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-A0A077Z587	UniProtKB	Domain	20	94	.	.	.	Note=PDZ;Ontology_term=ECO:0000259;evidence=ECO:0000259|PROSITE:PS50106	
-A0A077Z587	UniProtKB	Domain	552	761	.	.	.	Note=Cytochrome b561;Ontology_term=ECO:0000259;evidence=ECO:0000259|PROSITE:PS50939	
-##sequence-region Q0P8A9 1 310
-Q0P8A9	UniProtKB	Transmembrane	55	78	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-Q0P8A9	UniProtKB	Transmembrane	99	124	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-Q0P8A9	UniProtKB	Transmembrane	136	156	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-Q0P8A9	UniProtKB	Transmembrane	195	216	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-Q0P8A9	UniProtKB	Transmembrane	244	264	.	.	.	Note=Helical;Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Phobius	
-Q0P8A9	UniProtKB	Domain	93	274	.	.	.	Note=Ni_hydr_CYTB;Ontology_term=ECO:0000259;evidence=ECO:0000259|Pfam:PF01292	
-##sequence-region O14639 1 778
-O14639	UniProtKB	Chain	1	778	.	.	.	ID=PRO_0000075697;Note=Actin-binding LIM protein 1	
-O14639	UniProtKB	Domain	97	156	.	.	.	Note=LIM zinc-binding 1;Ontology_term=ECO:0000255;evidence=ECO:0000255|PROSITE-ProRule:PRU00125	
-O14639	UniProtKB	Domain	156	216	.	.	.	Note=LIM zinc-binding 2;Ontology_term=ECO:0000255;evidence=ECO:0000255|PROSITE-ProRule:PRU00125	
-O14639	UniProtKB	Domain	224	283	.	.	.	Note=LIM zinc-binding 3;Ontology_term=ECO:0000255;evidence=ECO:0000255|PROSITE-ProRule:PRU00125	
-O14639	UniProtKB	Domain	283	343	.	.	.	Note=LIM zinc-binding 4;Ontology_term=ECO:0000255;evidence=ECO:0000255|PROSITE-ProRule:PRU00125	
-O14639	UniProtKB	Domain	710	778	.	.	.	Note=HP;Ontology_term=ECO:0000255;evidence=ECO:0000255|PROSITE-ProRule:PRU00595	
-O14639	UniProtKB	Coiled coil	590	614	.	.	.	Ontology_term=ECO:0000255;evidence=ECO:0000255	
-O14639	UniProtKB	Modified residue	216	216	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000250;evidence=ECO:0000250|UniProtKB:Q8K4G5	
-O14639	UniProtKB	Modified residue	367	367	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244;evidence=ECO:0000244|PubMed:19690332;Dbxref=PMID:19690332	
-O14639	UniProtKB	Modified residue	373	373	.	.	.	Note=Phosphotyrosine;Ontology_term=ECO:0000244;evidence=ECO:0000244|PubMed:19690332;Dbxref=PMID:19690332	
-O14639	UniProtKB	Modified residue	396	396	.	.	.	Note=Phosphotyrosine;Ontology_term=ECO:0000244;evidence=ECO:0000244|PubMed:15592455;Dbxref=PMID:15592455	
-O14639	UniProtKB	Modified residue	422	422	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244;evidence=ECO:0000244|PubMed:23186163;Dbxref=PMID:23186163	
-O14639	UniProtKB	Modified residue	426	426	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244,ECO:0000244;evidence=ECO:0000244|PubMed:19690332,ECO:0000244|PubMed:24275569;Dbxref=PMID:19690332,PMID:24275569	
-O14639	UniProtKB	Modified residue	431	431	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244,ECO:0000244,ECO:0000244;evidence=ECO:0000244|PubMed:18669648,ECO:0000244|PubMed:19690332,ECO:0000244|PubMed:23186163;Dbxref=PMID:18669648,PMID:19690332,PMID:23186163	
-O14639	UniProtKB	Modified residue	433	433	.	.	.	Note=Phosphothreonine;Ontology_term=ECO:0000244;evidence=ECO:0000244|PubMed:19690332;Dbxref=PMID:19690332	
-O14639	UniProtKB	Modified residue	435	435	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244,ECO:0000244,ECO:0000244,ECO:0000244,ECO:0000244,ECO:0000244;evidence=ECO:0000244|PubMed:18669648,ECO:0000244|PubMed:19690332,ECO:0000244|PubMed:20068231,ECO:0000244|PubMed:21406692,ECO:0000244|PubMed:23186163,ECO:0000244|PubMed:24275569;Dbxref=PMID:18669648,PMID:19690332,PMID:20068231,PMID:21406692,PMID:23186163,PMID:24275569	
-O14639	UniProtKB	Modified residue	439	439	.	.	.	Note=Phosphotyrosine;Ontology_term=ECO:0000244;evidence=ECO:0000244|PubMed:15144186;Dbxref=PMID:15144186	
-O14639	UniProtKB	Modified residue	452	452	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244;evidence=ECO:0000244|PubMed:23186163;Dbxref=PMID:23186163	
-O14639	UniProtKB	Modified residue	455	455	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244,ECO:0000244,ECO:0000244;evidence=ECO:0000244|PubMed:18669648,ECO:0000244|PubMed:23186163,ECO:0000244|PubMed:24275569;Dbxref=PMID:18669648,PMID:23186163,PMID:24275569	
-O14639	UniProtKB	Modified residue	458	458	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244,ECO:0000244;evidence=ECO:0000244|PubMed:18669648,ECO:0000244|PubMed:23186163;Dbxref=PMID:18669648,PMID:23186163	
-O14639	UniProtKB	Modified residue	498	498	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000250;evidence=ECO:0000250|UniProtKB:Q8K4G5	
-O14639	UniProtKB	Modified residue	587	587	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244,ECO:0000244;evidence=ECO:0000244|PubMed:18669648,ECO:0000244|PubMed:23186163;Dbxref=PMID:18669648,PMID:23186163	
-O14639	UniProtKB	Modified residue	640	640	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244,ECO:0000244;evidence=ECO:0000244|PubMed:18669648,ECO:0000244|PubMed:20068231;Dbxref=PMID:18669648,PMID:20068231	
-O14639	UniProtKB	Modified residue	655	655	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244,ECO:0000244;evidence=ECO:0000244|PubMed:18669648,ECO:0000244|PubMed:23186163;Dbxref=PMID:18669648,PMID:23186163	
-O14639	UniProtKB	Modified residue	677	677	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000250;evidence=ECO:0000250|UniProtKB:Q8K4G5	
-O14639	UniProtKB	Modified residue	706	706	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244;evidence=ECO:0000244|PubMed:24275569;Dbxref=PMID:24275569	
-O14639	UniProtKB	Cross-link	620	620	.	.	.	Note=Glycyl lysine isopeptide (Lys-Gly) (interchain with G-Cter in SUMO2);Ontology_term=ECO:0000244;evidence=ECO:0000244|PubMed:28112733;Dbxref=PMID:28112733	
-O14639	UniProtKB	Alternative sequence	1	316	.	.	.	ID=VSP_012099;Note=In isoform 3%2C isoform 4 and isoform 5. Missing;Ontology_term=ECO:0000303,ECO:0000303,ECO:0000303;evidence=ECO:0000303|PubMed:14702039,ECO:0000303|PubMed:15489334,ECO:0000303|PubMed:17974005;Dbxref=PMID:14702039,PMID:15489334,PMID:17974005	
-O14639	UniProtKB	Alternative sequence	1	81	.	.	.	ID=VSP_012100;Note=In isoform 2 and isoform 6. MPAFLGLKCLGKLCSSEKSKVTSSERTSARGSNRKRLIVEDRRVSGTSFTAHRRATITHLLYLCPKDYCPRGRVCNSVDPF->MLMTLEMTELTDPHHTMGDYK;Ontology_term=ECO:0000303,ECO:0000303;evidence=ECO:0000303|PubMed:14702039,ECO:0000303|PubMed:7584044;Dbxref=PMID:14702039,PMID:7584044	
-O14639	UniProtKB	Alternative sequence	347	347	.	.	.	ID=VSP_041185;Note=In isoform 5 and isoform 6. R->RLPNIRRSSSDFFYSKSLIRRTGRSPSLQ;Ontology_term=ECO:0000303;evidence=ECO:0000303|PubMed:14702039;Dbxref=PMID:14702039	
-O14639	UniProtKB	Alternative sequence	348	373	.	.	.	ID=VSP_012101;Note=In isoform 4. Missing;Ontology_term=ECO:0000303;evidence=ECO:0000303|PubMed:15489334;Dbxref=PMID:15489334	
-O14639	UniProtKB	Alternative sequence	480	514	.	.	.	ID=VSP_012102;Note=In isoform 3%2C isoform 4 and isoform 5. Missing;Ontology_term=ECO:0000303,ECO:0000303,ECO:0000303;evidence=ECO:0000303|PubMed:14702039,ECO:0000303|PubMed:15489334,ECO:0000303|PubMed:17974005;Dbxref=PMID:14702039,PMID:15489334,PMID:17974005	
-O14639	UniProtKB	Alternative sequence	531	531	.	.	.	ID=VSP_057209;Note=In isoform 6. H->HDA;Ontology_term=ECO:0000303;evidence=ECO:0000303|PubMed:14702039;Dbxref=PMID:14702039	
-O14639	UniProtKB	Natural variant	434	434	.	.	.	ID=VAR_050141;Note=P->T;Dbxref=dbSNP:rs11593544	
-O14639	UniProtKB	Natural variant	637	637	.	.	.	ID=VAR_050142;Note=R->G;Dbxref=dbSNP:rs7091419	
-O14639	UniProtKB	Sequence conflict	499	499	.	.	.	Note=R->L;Ontology_term=ECO:0000305;evidence=ECO:0000305	
-O14639	UniProtKB	Sequence conflict	532	532	.	.	.	Note=A->R;Ontology_term=ECO:0000305;evidence=ECO:0000305	
-O14639	UniProtKB	Sequence conflict	563	563	.	.	.	Note=K->E;Ontology_term=ECO:0000305;evidence=ECO:0000305	
-O14639	UniProtKB	Sequence conflict	578	578	.	.	.	Note=V->I;Ontology_term=ECO:0000305;evidence=ECO:0000305	
-##sequence-region A0A077ZFY8 1 973
-A0A077ZFY8	UniProtKB	Domain	1	89	.	.	.	Note=Mur_ligase;Ontology_term=ECO:0000259;evidence=ECO:0000259|Pfam:PF01225	
-A0A077ZFY8	UniProtKB	Domain	96	279	.	.	.	Note=Mur_ligase_M;Ontology_term=ECO:0000259;evidence=ECO:0000259|Pfam:PF08245	
-A0A077ZFY8	UniProtKB	Domain	300	349	.	.	.	Note=Mur_ligase_C;Ontology_term=ECO:0000259;evidence=ECO:0000259|Pfam:PF02875	
-A0A077ZFY8	UniProtKB	Coiled coil	867	887	.	.	.	Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Coils	
-A0A077ZFY8	UniProtKB	Coiled coil	951	971	.	.	.	Ontology_term=ECO:0000256;evidence=ECO:0000256|SAM:Coils	
-##sequence-region Q13685 1 434
-Q13685	UniProtKB	Chain	1	434	.	.	.	ID=PRO_0000050832;Note=Angio-associated migratory cell protein	
-Q13685	UniProtKB	Repeat	89	129	.	.	.	Note=WD 1	
-Q13685	UniProtKB	Repeat	132	171	.	.	.	Note=WD 2	
-Q13685	UniProtKB	Repeat	173	212	.	.	.	Note=WD 3	
-Q13685	UniProtKB	Repeat	214	254	.	.	.	Note=WD 4	
-Q13685	UniProtKB	Repeat	258	299	.	.	.	Note=WD 5	
-Q13685	UniProtKB	Repeat	315	354	.	.	.	Note=WD 6	
-Q13685	UniProtKB	Repeat	356	395	.	.	.	Note=WD 7	
-Q13685	UniProtKB	Repeat	398	433	.	.	.	Note=WD 8	
-Q13685	UniProtKB	Compositional bias	53	59	.	.	.	Note=Poly-Glu	
-Q13685	UniProtKB	Modified residue	20	20	.	.	.	Note=Phosphoserine;Ontology_term=ECO:0000244;evidence=ECO:0000244|PubMed:24275569;Dbxref=PMID:24275569	
-Q13685	UniProtKB	Natural variant	250	250	.	.	.	ID=VAR_037061;Note=I->V;Dbxref=dbSNP:rs2305835	
--- a/uniprot.py	Wed May 22 21:18:15 2024 +0000
+++ b/uniprot.py	Tue Aug 06 14:49:45 2024 +0000
@@ -4,6 +4,7 @@
 import sys
 import time
 import zlib
+from time import sleep
 from urllib.parse import (
     parse_qs,
     urlencode,
@@ -18,7 +19,8 @@
 )
 
 
-POLLING_INTERVAL = 3
+BATCH_SIZE = 50000  # Limit at UniProt is 100k
+POLLING_INTERVAL = 5
 API_URL = "https://rest.uniprot.org"
 
 
@@ -31,7 +33,6 @@
     try:
         response.raise_for_status()
     except requests.HTTPError:
-        print(response.json())
         raise
 
 
@@ -59,7 +60,7 @@
         check_response(request)
         j = request.json()
         if "jobStatus" in j:
-            if j["jobStatus"] == "RUNNING":
+            if j["jobStatus"] in ["NEW", "RUNNING"]:
                 print(f"Retrying in {POLLING_INTERVAL}s")
                 time.sleep(POLLING_INTERVAL)
             else:
@@ -102,7 +103,7 @@
         if file_format == "json":
             j = json.loads(decompressed.decode("utf-8"))
             return j
-        elif file_format == "tsv":
+        elif file_format in ["tsv", "gff"]:
             return [line for line in decompressed.decode("utf-8").split("\n") if line]
         elif file_format == "xlsx":
             return [decompressed]
@@ -112,7 +113,7 @@
             return decompressed.decode("utf-8")
     elif file_format == "json":
         return response.json()
-    elif file_format == "tsv":
+    elif file_format in ["tsv", "gff"]:
         return [line for line in response.text.split("\n") if line]
     elif file_format == "xlsx":
         return [response.content]
@@ -141,7 +142,7 @@
     print(f"Fetched: {n_fetched} / {total}")
 
 
-def get_id_mapping_results_search(url):
+def get_id_mapping_results_search(url, first):
     parsed = urlparse(url)
     query = parse_qs(parsed.query)
     file_format = query["format"][0] if "format" in query else "json"
@@ -163,6 +164,8 @@
     for i, batch in enumerate(get_batch(request, file_format, compressed), 1):
         results = combine_batches(results, batch, file_format)
         print_progress_batches(i, size, total)
+    if len(results) > 1 and file_format == "tsv" and not first:
+        results = results[1:]
     if file_format == "xml":
         return merge_xml_results(results)
     return results
@@ -266,20 +269,27 @@
     query = set()
     for line in args.inp:
         query.add(line.strip())
-    query = sorted(query)
+    query = list(query)
+    results = []
+    first = True  # if False the header is removed
+    while len(query) > 0:
+        batch = query[:BATCH_SIZE]
+        query = query[BATCH_SIZE:]
+        print(f"processing {len(batch)} left {len(query)}")
+        if args.tool == "map":
+            job_id = submit_id_mapping(from_db=args.f, to_db=args.t, ids=batch)
+        elif args.tool == "retrieve":
+            job_id = submit_id_mapping(from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids=batch)
 
-    if args.tool == "map":
-        job_id = submit_id_mapping(from_db=args.f, to_db=args.t, ids=query)
-    elif args.tool == "retrieve":
-        job_id = submit_id_mapping(
-            from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids=query
-        )
-
-    if check_id_mapping_results_ready(job_id):
-        link = get_id_mapping_results_link(job_id)
-        link = f"{link}?format={args.format}"
-        print(link)
-        results = get_id_mapping_results_search(link)
+        if check_id_mapping_results_ready(job_id):
+            link = get_id_mapping_results_link(job_id)
+            link = f"{link}?format={args.format}"
+            print(link)
+            results.extend(get_id_mapping_results_search(link, first))
+            first = False
+        print(f"got {len(results)} results so far")
+        if len(query):
+            sleep(5)
 
     if not isinstance(results, str):
         results = "\n".join(results)
--- a/uniprot.xml	Wed May 22 21:18:15 2024 +0000
+++ b/uniprot.xml	Tue Aug 06 14:49:45 2024 +0000
@@ -1,4 +1,4 @@
-<tool id="uniprot" name="UniProt" version="0.5" profile="23.1">
+<tool id="uniprot" name="UniProt" version="0.6" profile="23.1">
     <description>ID mapping and retrieval</description>
     <requirements>
         <requirement type="package" version="2.25.1">requests</requirement>
@@ -865,7 +865,12 @@
             <param name="id_column" value="c1"/>
             <param name="format" value="gff"/>
             <param name="tool_choice" value="retrieve"/>
-            <output name="outfile_retrieve_gff" file="test2_retrieve.gff" ftype="gff" compare="sim_size" />
+            <output name="outfile_retrieve_gff" ftype="gff">
+                <assert_contents>
+                    <has_n_lines min="80"/>
+                    <has_text text="UniProtKB"/>
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="1">
             <param name="infile" value="id_uniprot.tab" ftype="tabular"/>
@@ -873,7 +878,7 @@
             <param name="tool_choice" value="map"/>
             <param name="from" value="UniProtKB_AC-ID"/>
             <param name="to" value="Gene_Name"/>
-            <output name="outfile_map" file="test1_map.tab" ftype="tabular"/>
+            <output name="outfile_map" file="test1_map.tab" ftype="tabular" sort="true"/>
         </test>
         <test expect_num_outputs="1">
             <param name="infile" value="id_map_refseq.txt" ftype="tabular"/>
@@ -881,7 +886,7 @@
             <param name="tool_choice" value="map"/>
             <param name="from" value="RefSeq_Nucleotide"/>
             <param name="to" value="UniProtKB"/>
-            <output name="outfile_map" file="test2_map.tab" ftype="tabular"/>
+            <output name="outfile_map" file="test2_map.tab" ftype="tabular" sort="true"/>
         </test>
     </tests>
     <help><![CDATA[