Next changeset 1:cd2a41c65447 (2016-10-14) |
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/uniprot_rest_interface commit 2b8ad1bbfe098129ae32cd8311a755dff58ae97b-dirty |
added:
macros.xml test-data/id_map_refseq.txt test-data/id_uniprot.tab test-data/test1_map.tab test-data/test1_retrieve.fasta test-data/test2_map.tab test-data/test2_retrieve.gff tool_dependencies.xml tool_dependencies.xml.orig uniprot.py uniprot.xml |
b |
diff -r 000000000000 -r 48522382b6a4 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Oct 09 16:42:22 2015 -0400 |
b |
b'@@ -0,0 +1,228 @@\n+<macros>\n+<token name="@EXECUTABLE@">1.0</token>\n+ <macro name="macro-category_FROM">\n+ <param name="category_FROM" type="select" label="Choose your database category (FROM):"\n+ help="Select a databse from which your ids are coming from">\n+ <option value="uniprot">UniProt</option>\n+ <option value="oseqdb">Other sequence databases</option>\n+ <option value="3Dstrdb">3D structure databases</option>\n+ <option value="ppidb">Protein-protein interaction databases</option>\n+ <option value="chemistry">Chemistry</option>\n+ <option value="protfgdb">Protein family/group databases</option>\n+ <option value="polymorphismANDmutation">Polymorphism and mutation databases</option>\n+ <option value="2DgelDB">2D gel databases</option>\n+ <option value="ProtocolsMaterialsDB">Protocols and materials databases</option>\n+ <option value="GenomeAnnotationDB">Genome annotation databases</option>\n+ <option value="OrganismSpecificGeneDB">Organism-specific gene databases</option>\n+ <option value="phylogenomic">Phylogenomic databases</option>\n+ <option value="EnzymePathwayDB">Enzyme and pathway databases</option>\n+ <option value="GeneExpression">Gene expression databases</option>\n+ <option value="other">Other</option>\n+ </param>\n+ </macro>\n+ <macro name="macro-category_TO">\n+ <param name="category_TO" type="select" label="Choose your database category (TO):"\n+ help="Select a database which will be used for mapping">\n+ <option value="uniprot">UniProt</option>\n+ <option value="oseqdb">Other sequence databases</option>\n+ <option value="3Dstrdb">3D structure databases</option>\n+ <option value="ppidb">Protein-protein interaction databases</option>\n+ <option value="chemistry">Chemistry</option>\n+ <option value="protfgdb">Protein family/group databases</option>\n+ <option value="polymorphismANDmutation">Polymorphism and mutation databases</option>\n+ <option value="2DgelDB">2D gel databases</option>\n+ <option value="ProtocolsMaterialsDB">Protocols and materials databases</option>\n+ <option value="GenomeAnnotationDB">Genome annotation databases</option>\n+ <option value="OrganismSpecificGeneDB">Organism-specific gene databases</option>\n+ <option value="phylogenomic">Phylogenomic databases</option>\n+ <option value="EnzymePathwayDB">Enzyme and pathway databases</option>\n+ <option value="GeneExpression">Gene expression databases</option>\n+ <option value="other">Other</option>\n+ </param>\n+ </macro>\n+ <macro name="macro-db_uniprot_FROM">\n+ <param name="db_uniprot_FROM" type="select" label="Choose a database:"\n+ help="">\n+ <option value="ACC+ID">UniProtKB AC/ID</option>\n+ <option value="ACC">UniProtKB AC</option>\n+ <option value="ID">UniProtKB ID</option>\n+ <option value="ACC">UniProtKB AC</option>\n+ <option value="UPARC">UniParc</option>\n+ <option value="NF50">UniRef50</option>\n+ <option value="NF90">UniRef90</option>\n+ <option value="NF100">UniRef100</option>\n+ <option value="GENENAME">Gene name</option>\n+ </param>\n+ </macro>\n+ <macro name="macro-db_uniprot_TO">\n+ <param name="db_uniprot_TO" type="select" label="Choose a database:"\n+ '..b'rganismSpecificGeneDB" type="select" label="Choose a database:"\n+ help="">\n+ <option value="ARACHNOSERVER_ID">ArachnoServer</option>\n+ <option value="CGD">CGD</option>\n+ <option value="CONOSERVER_ID">ConoServer</option>\n+ <option value="DICTYBASE_ID">dictyBase</option>\n+ <option value="ECHOBASE_ID">EchoBASE</option>\n+ <option value="ECOGENE_ID">EcoGene</option>\n+ <option value="EUHCVDB_ID">euHCVdb</option>\n+ <option value="EUPATHDB_ID">EuPathDB</option>\n+ <option value="FLYBASE_ID">FlyBase</option>\n+ <option value="GENECARDS_ID">GeneCards</option>\n+ <option value="GENEFARM_ID">GeneFarm</option>\n+ <option value="GENOLIST_ID">GenoList</option>\n+ <option value="H_INVDB_ID">H-InvDB</option>\n+ <option value="HGNC_ID">HGNC</option>\n+ <option value="HPA_ID">HPA</option>\n+ <option value="LEGIOLIST_ID">LegioList</option>\n+ <option value="MAIZEGDB_ID">MaizeGDB</option>\n+ <option value="MIM_ID">MIM</option>\n+ <option value="MGI_ID">MGI</option>\n+ <option value="NEXTPROT_ID">neXtProt</option>\n+ <option value="ORPHANET_ID">Orphanet</option>\n+ <option value="PHARMGKB_ID">PharmGKB</option>\n+ <option value="POMBASE_ID">PomBase</option>\n+ <option value="PSEUDOCAP_ID">PseudoCAP</option>\n+ <option value="RGD_ID">RGD</option>\n+ <option value="SGD_ID">SGD</option>\n+ <option value="TAIR_ID">TAIR</option>\n+ <option value="TUBERCULIST_ID">TubercuList</option>\n+ <option value="WORMBASE_ID">WormBase</option>\n+ <option value="WORMBASE_TRS_ID">WormBase Transcript</option>\n+ <option value="WORMBASE_PRO_ID">WormBase Protein</option>\n+ <option value="XENBASE_ID">Xenbase</option>\n+ <option value="ZFIN_ID">ZFIN</option>\n+ </param>\n+ </macro>\n+ <macro name="macro-db_phylogenomic">\n+ <param name="db_phylogenomic" type="select" label="Choose a database:"\n+ help="">\n+ <option value="EGGNOG_ID">eggNOG</option>\n+ <option value="GENETREE_ID">GeneTree</option>\n+ <option value="HOGENOM_ID">HOGENOM</option>\n+ <option value="HOVERGEN_ID">HOVERGEN</option>\n+ <option value="KO_ID">KO</option>\n+ <option value="OMA_ID">OMA</option>\n+ <option value="ORTHODB_ID">OrthoDB</option>\n+ <option value="PROTCLUSTDB_ID">ProtClustDB</option>\n+ <option value="TREEFAM_ID">TreeFam</option>\n+ </param>\n+ </macro>\n+ <macro name="macro-db_EnzymePathwayDB">\n+ <param name="db_EnzymePathwayDB" type="select" label="Choose a database:"\n+ help="">\n+ <option value="BIOCYC_ID">BioCyc</option>\n+ <option value="REACTOME_ID">Reactome</option>\n+ <option value="UNIPATHWAY_ID">UniPathWay</option>\n+ </param>\n+ </macro>\n+ <macro name="macro-db_GeneExpression">\n+ <param name="db_GeneExpression" type="select" label="Choose a database:"\n+ help="">\n+ <option value="CLEANEX_ID">CleanEx</option>\n+ </param>\n+ </macro>\n+ <macro name="macro-db_other">\n+ <param name="db_other" type="select" label="Choose a database:"\n+ help="">\n+ <option value="CHITARS_ID">ChiTaRS</option>\n+ <option value="GENOMERNAI_ID">GenomeRNAi</option>\n+ <option value="GENEWIKI_ID">GeneWiki</option>\n+ <option value="NEXTBIO_ID">NextBio</option>\n+ </param>\n+ </macro>\n+</macros>\n' |
b |
diff -r 000000000000 -r 48522382b6a4 test-data/id_map_refseq.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/id_map_refseq.txt Fri Oct 09 16:42:22 2015 -0400 |
b |
@@ -0,0 +1,7 @@ +LN734406.1 +CAD29848.1 +CAD29848.1 +CAB85965.1 +NM_130786 +P04217 +NM_001087 |
b |
diff -r 000000000000 -r 48522382b6a4 test-data/id_uniprot.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/id_uniprot.tab Fri Oct 09 16:42:22 2015 -0400 |
b |
@@ -0,0 +1,9 @@ +Q0P8A9 +A0A077ZHN8 +A0A077ZFY8 +M5B8V9 +M5BAG7 +S0DS17 +A0A077Z587 +Q13685 +O14639 |
b |
diff -r 000000000000 -r 48522382b6a4 test-data/test1_map.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_map.tab Fri Oct 09 16:42:22 2015 -0400 |
b |
@@ -0,0 +1,10 @@ +From To +Q0P8A9 fdhC +A0A077ZHN8 TTRE_0000819801 +A0A077ZFY8 TTRE_0000758701 +M5B8V9 CMN_01519 +M5BAG7 cydC +S0DS17 FFUJ_00006 +A0A077Z587 TTRE_0000309301 +Q13685 AAMP +O14639 ABLIM1 |
b |
diff -r 000000000000 -r 48522382b6a4 test-data/test1_retrieve.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_retrieve.fasta Fri Oct 09 16:42:22 2015 -0400 |
b |
@@ -0,0 +1,104 @@ +>tr|S0DS17|S0DS17_GIBF5 Related to cytochrom P450 OS=Gibberella fujikuroi (strain CBS 195.34 / IMI 58289 / NRRL A-6831) GN=FFUJ_00006 PE=3 SV=1 +MSYQSILLRQVNSLCDNLEEVARDENGGLIDMAMQSDYFTFDVMSEVIFGMAYNALKDTS +YRFVTGALGSSNIRIGTLVQSPLPAMCRIDKYLFPESIQGRNKFLGFIGSLLRDRSKASF +AGNGNVFSFLETAKDPDGGNQLSKSEIRAECATLVAAGTDTSSSTLAATLFYLSRNSKCY +SRVSEEVRNAFSSHQDIKIGPELNSCVYLRACIEETLRMSPPVGAALWREIGPGGMNIGP +LTLPAGVDVGTGIYSLHHNAAYHPEPFKYLPERWLVGEGSSTSESVELARSAFAPFSRGP +RSCVGKGFAYHELTLTIAHILHRFDFSATEEDFALRHGSEGPGGINEFLLHDHVTGARSG +PLLQFSMRR +>tr|M5BAG7|M5BAG7_9MICO ABC transporter, fused permease/ABC transporter involved in the biosynthesis of cytochrom bd, fused permease/ATP-binding protein OS=Clavibacter michiganensis subsp. nebraskensis NCPPB 2581 GN=cydC PE=3 SV=1 +MNRDGVLRLAQPPTRRTLPGLLAGLASAVGAVALLATSAWLITRASEQPPILFLGMAIVG +VRAFALGRAAFRYLERITSHDAAFRALATLRVGVFERLLPFAPAGLRDTRRGDLLARLVG +DVDRLQDLPLRVVQPLAVSVVVQAASVAVVGAVLPAAGIALAVVLGVALVVGIGATTALA +GRAETRIAPLRARLQDLVLDFVGGLDVLTSFGAVDDRLAAIDRAATELRRAELRSAAAAG +VTTGVVLAGTGAVAGWTVLQGVPGLASGTLDPAWLALAALVPLALVEQATAVPLAVQAWR +RVRTSAERVAGVVPETVPDEIPREPDDAADAQPVTADASPAGTTLEVRDLVTRWPGADED +ALAPVSLVVRPGETVVVRGPSGSGKSSLAAALARFLESRGAYELDGRDARSMPPSAVRRI +VGLCEQAPHLFDASIRQNLLFARDDATDDELVAVLARVGLADWTAGRGGLDARVGDRGGL +VSGGQAQRIALARALLADFPVLVLDEPTADVDAERARAVLRDVLTAARDRGPGVLLLTHT +DVPHDLVDRTVELRVAGDRVRTE +>tr|A0A077Z587|A0A077Z587_TRITR Kelch 3 and Kelch 4 and Cytochrom B561 domain con taining protein OS=Trichuris trichiura GN=TTRE_0000309301 PE=4 SV=1 +MGSQQAADETQKVVERIILNINVRKDKRSFGLGIKIKKGNVFVSSIRPGSIAEDHFKLYD +VIKDVNGSRIDSRELCRDLIRTHKVLTVTVERELSKNIEQPGQGDRKSSTECPYLETAQP +FSEMEKNQWSKLPADVREILKKQFATASQYGLQAPARTEQPTQTEHRKVSVLENIVRFEI +TSDVPRDKSLRKPSDGQQLYKIVASYQCISLLADQMIIYLWLRIGWLILTFNLFVTQAVS +LHWKRVAEYGKNPRPQARKHAAFGYDMLRHYVVLFGGQGERDENYNDTWIFDVLAGRWYA +VHRNVAPPAMHGAAFGLNDGKFYLVGGCDQTQCFDDVWVFLTSTFEWHKLAPKGELRPTG +RLGAIGGFYATGSHIIYGLGTTINDQFLEDIFFFDIPMQRWYKIIERLFVYSPFTPHPRR +HMSSLMVSPSEVLLFGGCSKHGQCPTGDAWLFNVQSHVWQSLPFCPSPRMEASAVTLLSS +DDVEPKPAAVLIYGGRRYTSQHLLGSPMLEPDEVVIYDLVGKSWSIRSSKYEDSSGLPEQ +RSAASTASTLTEVYMFGGEAYDGRLLDDFWMLAGDWRESATNQKCQQVNFNLLALHGLLM +SASFALILPAGALWALYKSARVTKQKKSGGWTMTHTIAQTCGMVIVAAGAVCSIQAKRDN +GKHFGSVHGVLGIIVIALLCVQVALGFSKSLIRTEAQRRTINRVHFWLAIVLLPLAFLNI +ILGLQLIAVPVGLLLGFFVHIFCLLAALGLILPILRFRKANRSVAFPPPNDD +>tr|A0A077ZFY8|A0A077ZFY8_TRITR PmbA TldD and Mur ligase M and Mur ligase and Mur ligase C and Cytochrom B562 domain containing protein OS=Trichuris trichiura GN=TTRE_0000758701 PE=4 SV=1 +MGGLAMLARQLGHEVTGSDANVYPPMSTLLEKQGIELIQGYDASQLDPQPDLVIIGNAMT +RGNPCVEAVLEKNIPYMSGPQWLHDFVLRDRWVLAVAGTHGKTTTAGMATWILEQCGYKP +GFVIGGVPGNFEVSARLGESNFFVIEADEYDCAFFDKRSKFVHYCPRTLILNNLEFDHAD +IFDDLKAIQKQFHHLVRIVPGQGRIIWPENDINLKQTMAMGCWSEQELVGEQGHWQAKKL +TTDASEWEVLLDGEKVGEVKWSLVGEHNMHNGLMAIAAARHVGVAPADAANALGSFINAR +RRLELRGEANGVTVYDDFAHHPTAILATLAALRGKVGGTARIIAVLEPRSNTMKMGICKD +DLAPSLGRADEVFLLQPAHIPWQVAEVAEACVQPAHWSGDVDTLADMVVKTAQPGDHILV +MSNGGFGGIHQKLLDDFRETLYIMALAMKVISQVEAQRKILEEAVSTALELASGKSDGAE +VAVSKTTGISVSTRYGEVENVEFNSDGALGITVYHQNRKGSASSTDLSPQAIARTVQAAL +DIARYTSPDPCAGVADKELLAFDAPDLDLFHPAEVSPDEAIELAARAEQAALQADKRITN +TEGGSFNSHYGVKVFGNSHGMLQGYCSTRHSLSSCVIAEENGDMERDYAYTIGRAMSDLQ +TPEWVGADCARRTLSRLSPRKLSTMKAPVIFANEVATGLFGHLVGAIAGGSVYRKSTFLL +DSLGKQILPDWLTIEEHPHLLKGLASTPFDSEGVRTERRDIIKDGILTQWLLTSYSARKL +GLKSTGHAGGIHNWRIAGQGLSFEQMLKEMGTGLVVTELMGQGVSAITGDYSRGAAGFWV +ENGEIQYPVSEITIAVSSLVFSSASFAADLEDNMETLNDNLKVVEKADNAAQVKDALTKM +RAAALDAQKATPPKLEGKSPDSPEMKDFRHGFDILVGQIDDALKLANEGKVKEAQAAAEQ +LKTTRNAYHQKYR +>sp|O14639|ABLM1_HUMAN Actin-binding LIM protein 1 OS=Homo sapiens GN=ABLIM1 PE=1 SV=3 +MPAFLGLKCLGKLCSSEKSKVTSSERTSARGSNRKRLIVEDRRVSGTSFTAHRRATITHL +LYLCPKDYCPRGRVCNSVDPFVAHPQDPHHPSEKPVIHCHKCGEPCKGEVLRVQTKHFHI +KCFTCKVCGCDLAQGGFFIKNGEYLCTLDYQRMYGTRCHGCGEFVEGEVVTALGKTYHPN +CFACTICKRPFPPGDRVTFNGRDCLCQLCAQPMSSSPKETTFSSNCAGCGRDIKNGQALL +ALDKQWHLGCFKCKSCGKVLTGEYISKDGAPYCEKDYQGLFGVKCEACHQFITGKVLEAG +DKHYHPSCARCSRCNQMFTEGEEMYLQGSTVWHPDCKQSTKTEEKLRPTRTSSESIYSRP +GSSIPGSPGHTIYAKVDNEILDYKDLAAIPKVKAIYDIERPDLITYEPFYTSGYDDKQER +QSLGESPRTLSPTPSAEGYQDVRDRMIHRSTSQGSINSPVYSRHSYTPTTSRSPQHFHRP +GNEPSSGRNSPLPYRPDSRPLTPTYAQAPKHFHVPDQGINIYRKPPIYKQHAALAAQSKS +SEDIIKFSKFPAAQAPDPSETPKIETDHWPGPPSFAVVGPDMKRRSSGREEDDEELLRRR +QLQEEQLMKLNSGLGQLILKEEMEKESRERSSLLASRYDSPINSASHIPSSKTASLPGYG +RNGLHRPVSTDFAQYNSYGDVSGGVRDYQTLPDGHMPAMRMDRGVSMPNMLEPKIFPYEM +LMVTNRGRNKILREVDRTRLERHLAPEVFREIFGMSIQEFDRLPLWRRNDMKKKAKLF +>tr|M5B8V9|M5B8V9_9MICO ABC transporter involved in the biosynthesis of cytochrom bd, fused permease/ATP-binding protein OS=Clavibacter michiganensis subsp. nebraskensis NCPPB 2581 GN=CMN_01519 PE=3 SV=1 +MKPLDPRLLRHSASARTMLAVGAVVSVVQTAALVAFCWSLTQLVVRAIGGADQAALAPVL +ALAVGSAVVRGAAAWLLDVTGARGAARVTAELRRRALRAIADLGPAWTAARSRGRLATIV +GPGLDALDPYFARYVPQLILTALATPIVVAVLLLSDPLTGVTVLVTLPVIPVFMVLVGWA +TQEVQRRQWSRLTELASSFLEVVDGLSTLLVFRRARRQTARIRRVTEEYRVETMRVLRIS +FLSGFVLELAASLSVALVAVSVGVRLIGGQLDLEVGLFVLLLAPEAFLPIRQVGVQFHAA +AEGVAAADDVLGILEEERAARATRPVPGPATATPPAGDALVIRDLAVARGDRAVLSGVSA +RFPRGRVTAVTGPSGVGKSSLLGAMLGHLPAGGAAGWIDDDASSLRPPVPTEIAWAGQRP +GLVAGTVRENVALGVADPDDALVRRALALAAADGIDPDLVLGVGGQGLSGGQAQRVAVAR +AVHRALALDCPLVLLDEPSSALDAATEERLAAGIRALADQGRAVVVVTHRGALVRAADAE +LRLGGASGEDDAPAAVGSSVGAGRVAPARIAPEPAWRAQVAP +>tr|A0A077ZHN8|A0A077ZHN8_TRITR HAMP and MCPsignal and TarH and Cytochrom B N dom ain containing protein OS=Trichuris trichiura GN=TTRE_0000819801 PE=4 SV=1 +MEFRGFFPRSDRPLINMVHVSCGISILVLMVVRLLLRLKYPTPPIIPKPKPMMTGLAHLG +HLVIYLLFIALPVIGLVMMYNRGNPWFAFGLTMPYASEANFERVDSLKSWHETLANLGYF +VIGSALAGYFLWQADRDQRDVTAEIEIRTGLANSSDFLRSARINMIQAGAASRIAEMEAM +KRNIAQAESEIKQSQQGYRAYQNRPVKTPADEALDTELNQRFQAYITGMQPMLKYAKNGM +FEAIINHESEQIRPLDNAYTDILNKAVKIRSTRANQLAELAHQRTRLGGMFMIGAFVLAL +VMTLITFMVLRRIVIRPLQHAAQRIEKIASGDLTMNDEPAGRNEIGRLSRHLQQMQHSLG +MTVGTVRQGAEEIYRGTSEISAGNADLSSRTEEQAAAIEQTAASMEQLTATVKQNADNAH +HASKLAQEASIKASDGGQTVSGVVKTMGAISTSSKKISEITAVINSIAFQTNILALNAAV +EAARAGEQGRGFAVVASEVRTLASRSAQAAKEIEGLISESVRLIDLGSDEVATAGKTMST +IVDAVASVTHIMQEIAAASDEQSRGITQVSQAISEMDKVTQQNASLVEEASAAAVSLEEQ +AARLTEAVDVFRLHKHSVSAEPRGAGEPVSFATV +>tr|Q0P8A9|Q0P8A9_CAMJE Putative formate dehydrogenase, cytochrom B subunit OS=Campylobacter jejuni subsp. jejuni serotype O:2 (strain NCTC 11168) GN=fdhC PE=4 SV=1 +MRKVFVTLLLSVVSLFAYGSERMGQDTQIWDFHRITNIPNYDTFGKLWTTLQGEYIATIA +LIAVIAVLSAFALHYMVIGPKQFSHDGKKIYAFTLFERLFHFIAAISWVILVPTGFVMMF +GEVFGGGVFVRVCKNLHAFATILFIISIIPMFLCWIKRMLPASYDIRWMMIVGGYLSKIK +RPVPAGKFNFGQKSWYYIAVFGGFLMIITGGFMYFLDFNSTAIQGLFGLTQIELLRISAI +VHNFLGIVCAVFFGVHIYMAVFAIKGSIHSMISGYKEEEEVYILHSYWYKELSNKKQIEP +SFSYDPNVKI +>sp|Q13685|AAMP_HUMAN Angio-associated migratory cell protein OS=Homo sapiens GN=AAMP PE=1 SV=2 +MESESESGAAADTPPLETLSFHGDEEIIEVVELDPGPPDPDDLAQEMEDVDFEEEEEEEG +NEEGWVLEPQEGVVGSMEGPDDSEVTFALHSASVFCVSLDPKTNTLAVTGGEDDKAFVWR +LSDGELLFECAGHKDSVTCAGFSHDSTLVATGDMSGLLKVWQVDTKEEVWSFEAGDLEWM +EWHPRAPVLLAGTADGNTWMWKVPNGDCKTFQGPNCPATCGRVLPDGKRAVVGYEDGTIR +IWDLKQGSPIHVLKGTEGHQGPLTCVAANQDGSLILTGSVDCQAKLVSATTGKVVGVFRP +ETVASQPSLGEGEESESNSVESLGFCSVMPLAAVGYLDGTLAIYDLATQTLRHQCQHQSG +IVQLLWEAGTAVVYTCSLDGIVRLWDARTGRLLTDYRGHTAEILDFALSKDASLVVTTSG +DHKAKVFCVQRPDR |
b |
diff -r 000000000000 -r 48522382b6a4 test-data/test2_map.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test2_map.tab Fri Oct 09 16:42:22 2015 -0400 |
b |
@@ -0,0 +1,5 @@ +From To +NM_130786 A1BG_HUMAN +NM_130786 V9HWD8_HUMAN +NM_001087 A0A024R410_HUMAN +NM_001087 AAMP_HUMAN |
b |
diff -r 000000000000 -r 48522382b6a4 test-data/test2_retrieve.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test2_retrieve.gff Fri Oct 09 16:42:22 2015 -0400 |
b |
@@ -0,0 +1,55 @@ +##gff-version 3 +##sequence-region S0DS17 1 369 +##sequence-region M5BAG7 1 563 +##sequence-region A0A077Z587 1 772 +##sequence-region A0A077ZFY8 1 973 +##sequence-region O14639 1 778 +O14639 UniProtKB Chain 1 778 . . . ID=PRO_0000075697;Note=Actin-binding LIM protein 1 +O14639 UniProtKB Domain 97 156 . . . Note=LIM zinc-binding 1;evidence=ECO:0000255|PROSITE-ProRule:PRU00125 +O14639 UniProtKB Domain 156 216 . . . Note=LIM zinc-binding 2;evidence=ECO:0000255|PROSITE-ProRule:PRU00125 +O14639 UniProtKB Domain 224 283 . . . Note=LIM zinc-binding 3;evidence=ECO:0000255|PROSITE-ProRule:PRU00125 +O14639 UniProtKB Domain 283 343 . . . Note=LIM zinc-binding 4;evidence=ECO:0000255|PROSITE-ProRule:PRU00125 +O14639 UniProtKB Domain 710 778 . . . Note=HP;evidence=ECO:0000255|PROSITE-ProRule:PRU00595 +O14639 UniProtKB Coiled coil 590 614 . . . evidence=ECO:0000255 +O14639 UniProtKB Modified residue 367 367 . . . Note=Phosphoserine;evidence=ECO:0000244|PubMed:19690332 +O14639 UniProtKB Modified residue 373 373 . . . Note=Phosphotyrosine;evidence=ECO:0000244|PubMed:19690332 +O14639 UniProtKB Modified residue 396 396 . . . Note=Phosphotyrosine;evidence=ECO:0000244|PubMed:15592455 +O14639 UniProtKB Modified residue 426 426 . . . Note=Phosphoserine;evidence=ECO:0000244|PubMed:19690332,ECO:0000244|PubMed:24275569 +O14639 UniProtKB Modified residue 431 431 . . . Note=Phosphoserine;evidence=ECO:0000244|PubMed:18669648,ECO:0000244|PubMed:19690332 +O14639 UniProtKB Modified residue 433 433 . . . Note=Phosphothreonine;evidence=ECO:0000244|PubMed:19690332 +O14639 UniProtKB Modified residue 435 435 . . . Note=Phosphoserine;evidence=ECO:0000244|PubMed:18669648,ECO:0000244|PubMed:19690332,ECO:0000244|PubMed:20068231,ECO:0000244|PubMed:21406692,ECO:0000244|PubMed:24275569 +O14639 UniProtKB Modified residue 439 439 . . . Note=Phosphotyrosine;evidence=ECO:0000244|PubMed:15144186 +O14639 UniProtKB Modified residue 455 455 . . . Note=Phosphoserine;evidence=ECO:0000244|PubMed:18669648,ECO:0000244|PubMed:24275569 +O14639 UniProtKB Modified residue 458 458 . . . Note=Phosphoserine;evidence=ECO:0000244|PubMed:18669648 +O14639 UniProtKB Modified residue 587 587 . . . Note=Phosphoserine;evidence=ECO:0000244|PubMed:18669648 +O14639 UniProtKB Modified residue 640 640 . . . Note=Phosphoserine;evidence=ECO:0000244|PubMed:18669648,ECO:0000244|PubMed:20068231 +O14639 UniProtKB Modified residue 655 655 . . . Note=Phosphoserine;evidence=ECO:0000244|PubMed:18669648 +O14639 UniProtKB Modified residue 706 706 . . . Note=Phosphoserine;evidence=ECO:0000244|PubMed:24275569 +O14639 UniProtKB Alternative sequence 1 316 . . . ID=VSP_012099;Note=In isoform 3%2C isoform 4 and isoform 5.;evidence=ECO:0000303|PubMed:14702039,ECO:0000303|PubMed:15489334,ECO:0000303|PubMed:17974005 +O14639 UniProtKB Alternative sequence 1 81 . . . ID=VSP_012100;Note=In isoform 2 and isoform 6.;evidence=ECO:0000303|PubMed:14702039,ECO:0000303|PubMed:7584044 +O14639 UniProtKB Alternative sequence 347 347 . . . ID=VSP_041185;Note=In isoform 5 and isoform 6.;evidence=ECO:0000303|PubMed:14702039 +O14639 UniProtKB Alternative sequence 348 373 . . . ID=VSP_012101;Note=In isoform 4.;evidence=ECO:0000303|PubMed:15489334 +O14639 UniProtKB Alternative sequence 480 514 . . . ID=VSP_012102;Note=In isoform 3%2C isoform 4 and isoform 5.;evidence=ECO:0000303|PubMed:14702039,ECO:0000303|PubMed:15489334,ECO:0000303|PubMed:17974005 +O14639 UniProtKB Alternative sequence 531 531 . . . ID=VSP_057209;Note=In isoform 6.;evidence=ECO:0000303|PubMed:14702039 +O14639 UniProtKB Natural variant 434 434 . . . ID=VAR_050141;Dbxref=dbSNP:rs11593544 +O14639 UniProtKB Natural variant 637 637 . . . ID=VAR_050142;Dbxref=dbSNP:rs7091419 +O14639 UniProtKB Sequence conflict 499 499 . . . evidence=ECO:0000305 +O14639 UniProtKB Sequence conflict 532 532 . . . evidence=ECO:0000305 +O14639 UniProtKB Sequence conflict 563 563 . . . evidence=ECO:0000305 +O14639 UniProtKB Sequence conflict 578 578 . . . evidence=ECO:0000305 +##sequence-region M5B8V9 1 582 +##sequence-region A0A077ZHN8 1 634 +##sequence-region Q0P8A9 1 310 +##sequence-region Q13685 1 434 +Q13685 UniProtKB Chain 1 434 . . . ID=PRO_0000050832;Note=Angio-associated migratory cell protein +Q13685 UniProtKB Repeat 89 129 . . . Note=WD 1 +Q13685 UniProtKB Repeat 132 171 . . . Note=WD 2 +Q13685 UniProtKB Repeat 173 212 . . . Note=WD 3 +Q13685 UniProtKB Repeat 214 254 . . . Note=WD 4 +Q13685 UniProtKB Repeat 258 299 . . . Note=WD 5 +Q13685 UniProtKB Repeat 315 354 . . . Note=WD 6 +Q13685 UniProtKB Repeat 356 395 . . . Note=WD 7 +Q13685 UniProtKB Repeat 398 433 . . . Note=WD 8 +Q13685 UniProtKB Compositional bias 53 59 . . . Note=Poly-Glu +Q13685 UniProtKB Modified residue 20 20 . . . Note=Phosphoserine;evidence=ECO:0000244|PubMed:24275569 +Q13685 UniProtKB Natural variant 250 250 . . . ID=VAR_037061;Dbxref=dbSNP:rs2305835 |
b |
diff -r 000000000000 -r 48522382b6a4 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Oct 09 16:42:22 2015 -0400 |
b |
@@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="requests" version="2.7"> + <repository changeset_revision="ad6b0c21d92b" name="package_python_2_7_requests_2_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |
b |
diff -r 000000000000 -r 48522382b6a4 tool_dependencies.xml.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml.orig Fri Oct 09 16:42:22 2015 -0400 |
b |
@@ -0,0 +1,11 @@ +<?xml version="1.0"?> +<tool_dependency> +<<<<<<< HEAD:tools/uniprot_id_mapping/tool_dependencies.xml + <package name="requests" version="2.7"> + <repository name="package_requests_2_7" owner="iuc" /> +======= + <package name="request" version="2.7"> + <repository name="package_python_2_7_request_2_7" owner="iuc" /> +>>>>>>> dc07b37de57cdd358da4ecabd9558736d5b80f97:tools/uniprot_rest_interface/tool_dependencies.xml + </package> +</tool_dependency> |
b |
diff -r 000000000000 -r 48522382b6a4 uniprot.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uniprot.py Fri Oct 09 16:42:22 2015 -0400 |
[ |
@@ -0,0 +1,117 @@ +#!/usr/bin/env python +""" +uniprot python interface +to access the uniprot database + +Based on work from Jan Rudolph: https://github.com/jdrudolph/uniprot +available services: + map + retrieve +""" + +import requests +import sys, argparse + +url = 'http://www.uniprot.org/' + +def _retrieve(query, format='txt'): + """_retrieve is not meant for use with the python interface, use `retrieve` + instead""" + tool = 'batch/' + + query = list(set(query.split('\n'))) + queries = [query[i:i+100] for i in range(0, len(query), 100)] + + data = {'format':format} + + responses = [requests.post(url + tool, data=data, files={'file':' '.join(query)}) for query in queries] + page = ''.join([response.text for response in responses]) + return page + +def retrieve(ids, format='txt'): + """ request entries by uniprot acc using batch retrieval + + Args: + query: list of ids to retrieve + format: txt by default + + Help: + possible formats: + txt, xml, rdf, fasta, gff""" + if type(ids) is not list: + ids = [ids] + return _retrieve(' '.join(ids), format) + +def _map(query, f, t, format='tab'): + """ _map is not meant for use with the python interface, use `map` instead + """ + tool = 'mapping/' + + data = { + 'from':f, + 'to':t, + 'format':format, + 'query': query + } + response = requests.post(url + tool, data=data) + page = response.text + return page + +def map(ids, f, t, format='tab'): + """ map a list of ids from one format onto another using uniprots mapping api + + Args: + query: id or list of ids to be mapped + f: from ACC | P_ENTREZGENEID | ... + t: to ... + format: tab by default + + Help: + for a list of all possible mappings visit + 'http://www.uniprot.org/faq/28' + """ + if type(ids) is not list: + ids = [ids] + page = _map(' '.join(ids), f, t, format) + result = dict() + for row in page.splitlines()[1:]: + key, value = row.split('\t') + if key in result: + result[key].add(value) + else: + result[key] = set([value]) + return result + +if __name__ == '__main__': + import argparse + import sys + + parser = argparse.ArgumentParser(description='retrieve uniprot mapping') + subparsers = parser.add_subparsers(dest='tool') + + mapping = subparsers.add_parser('map') + mapping.add_argument('f', help='from') + mapping.add_argument('t', help='to') + mapping.add_argument('inp', nargs='?', type=argparse.FileType('r'), + default=sys.stdin, help='input file (default: stdin)') + mapping.add_argument('out', nargs='?', type=argparse.FileType('w'), + default=sys.stdout, help='output file (default: stdout)') + mapping.add_argument('--format', default='tab', help='output format') + + retrieve = subparsers.add_parser('retrieve') + retrieve.add_argument('inp', metavar = 'in', nargs='?', type=argparse.FileType('r'), + default=sys.stdin, help='input file (default: stdin)') + retrieve.add_argument('out', nargs='?', type=argparse.FileType('w'), + default=sys.stdout, help='output file (default: stdout)') + retrieve.add_argument('-f', '--format', help='specify output format', default='txt') + + args = parser.parse_args() + query = args.inp.read() + + if args.tool == 'map': + args.out.write(_map(query, args.f, args.t, args.format)) + + elif args.tool == 'retrieve': + args.out.write(_retrieve(query, format=args.format)) + + |
b |
diff -r 000000000000 -r 48522382b6a4 uniprot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uniprot.xml Fri Oct 09 16:42:22 2015 -0400 |
[ |
b'@@ -0,0 +1,355 @@\n+<tool id="uniprot" name="UniProt" version="0.1">\n+ <description>ID mapping and retrieval</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+ <requirements>\n+ <requirement type="package" version="2.7">requests</requirement>\n+ </requirements>\n+ <stdio>\n+ <exit_code range="1:" />\n+ <exit_code range=":-1" />\n+ <regex match="Error:" />\n+ <regex match="Exception:" />\n+ </stdio>\n+ <version_command>echo "UniProt ID mapping for Galaxy in version 0.1"</version_command>\n+ <command><![CDATA[\n+\n+ cut -f ${id_column} $infile > id_file.tabular &&\n+\n+ $__tool_directory__/uniprot.py\n+\n+ #if $tool.tool_choice == "retrieve":\n+ retrieve -f $tool.format $infile ./output\n+ #elif $tool.tool_choice == "map":\n+ map\n+\n+ #if $tool.from.category_FROM == "uniprot":\n+ $tool.from.db_uniprot_FROM\n+ #elif $tool.from.category_FROM == "oseqdb":\n+ $tool.from.db_oseqdb\n+ #elif $tool.from.category_FROM == "3Dstrdb":\n+ $tool.from.db_3Dstrdb\n+ #elif $tool.from.category_FROM == "ppidb":\n+ $tool.from.db_ppidb\n+ #elif $tool.from.category_FROM == "chemistry":\n+ $tool.from.db_chemistry\n+ #elif $tool.from.category_FROM == "protfgdb":\n+ $tool.from.db_protfgdb\n+ #elif $tool.from.category_FROM == "polymorphismANDmutation":\n+ $tool.from.db_polymorphismANDmutation\n+ #elif $tool.from.category_FROM == "db_2DgelDB":\n+ $tool.from.db_2DgelDB\n+ #elif $tool.from.category_FROM == "ProtocolsMaterialsDB":\n+ $tool.from.ProtocolsMaterialsDB\n+ #elif $tool.from.category_FROM == "db_GenomeAnnotationDB":\n+ $tool.from.db_GenomeAnnotationDB\n+ #elif $tool.from.category_FROM == "db_OrganismSpecificGeneDB":\n+ $tool.from.db_OrganismSpecificGeneDB\n+ #elif $tool.from.category_FROM == "db_phylogenomic":\n+ $tool.from.db_phylogenomic\n+ #elif $tool.from.category_FROM == "db_EnzymePathwayDB":\n+ $tool.from.db_EnzymePathwayDB\n+ #elif $tool.from.category_FROM == "db_GeneExpression":\n+ $tool.from.db_GeneExpression\n+ #elif $tool.from.category_FROM == "db_other":\n+ $tool.from.db_other\n+ #end if\n+\n+ #if $tool.to.category_TO == "uniprot":\n+ $tool.to.db_uniprot_TO\n+ #elif $tool.to.category_TO == "oseqdb":\n+ $tool.to.db_oseqdb\n+ #elif $tool.to.category_TO == "3Dstrdb":\n+ $tool.to.db_3Dstrdb\n+ #elif $tool.to.category_TO == "ppidb":\n+ $tool.to.db_ppidb\n+ #elif $tool.to.category_TO == "chemistry":\n+ $tool.to.db_chemistry\n+ #elif $tool.to.category_TO == "protfgdb":\n+ $tool.to.db_protfgdb\n+ #elif $tool.to.category_TO == "polymorphismANDmutation":\n+ $tool.to.db_polymorphismANDmutation\n+ #elif $tool.to.category_TO == "db_2DgelDB":\n+ $tool.to.db_2DgelDB\n+ #elif $tool.to.category_TO == "ProtocolsMaterialsDB":\n+ $tool.to.ProtocolsMaterialsDB\n+ #elif $tool.to.category_TO == "db_GenomeAnnotationDB":\n+ $tool.to.db_GenomeAnnotationDB\n+ #elif $tool.frtoom.category_TO == "db_OrganismSpecificGeneDB":\n+ $tool.to.db_OrganismSpecificGeneDB\n+ #elif $tool.to.category_TO == "db_phylogenomic":\n+ $tool.to.db_phylogenomic\n+ #elif $tool.to.category_TO == "db_EnzymePathwayDB":\n+ $tool.to.db_EnzymePathwayDB\n+ #elif $tool.to.category_TO == "db_GeneExpression":\n+ $tool.to.db_GeneExpression\n+ #elif $tool.to.category_TO == "db_other'..b'abular"/>\n+ <param name="id_column" value="c1"/>\n+ <param name="format" value="gff"/>\n+ <param name="tool_choice" value="retrieve"/>\n+ <output name="outfile_retrieve" file="test2_retrieve.gff" ftype="gff"/>\n+ </test>\n+ <test>\n+ <param name="infile" value="id_uniprot.tab" ftype="tabular"/>\n+ <param name="id_column" value="c1"/>\n+ <param name="tool_choice" value="map"/>\n+ <param name="category_FROM" value="uniprot"/>\n+ <param name="db_uniprot_FROM" value="ID"/>\n+ <param name="category_TO" value="uniprot"/>\n+ <param name="db_uniprot_TO" value="GENENAME"/>\n+ <output name="outfile_map" file="test1_map.tab" ftype="tabular"/>\n+ </test>\n+ <test>\n+ <param name="infile" value="id_map_refseq.txt" ftype="tabular"/>\n+ <param name="id_column" value="c1"/>\n+ <param name="tool_choice" value="map"/>\n+ <param name="category_FROM" value="oseqdb"/>\n+ <param name="db_oseqdb" value="REFSEQ_NT_ID"/>\n+ <param name="category_TO" value="uniprot"/>\n+ <param name="db_uniprot_TO" value="ID"/>\n+ <output name="outfile_map" file="test2_map.tab" ftype="tabular"/>\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+\n+.. class:: infomark\n+\n+**What it does** \n+\n+This tool provides access to the UniProt API. You can retrieve sequence informations given a list of sequence identifiers or map\n+identifiers between different databases.\n+Hence, this tool offers you two modes: *map* and *retrieve*.\n+\n+-----\n+\n+**INPUT**\n+\n+The input is a list of IDs.\n+\n+*example*:\n+\n+Q0P8A9\n+A0A077ZHN8\n+A0A077ZFY8\n+M5B8V9\n+M5BAG7\n+S0DS17\n+....\n+\n+-----\n+\n+**MAP OUTPUT EXAMPLES**\n+\n+FROM refseq TO embl::\n+\n+ From To\n+ NM_130786 A1BG_HUMAN\n+ NM_130786 V9HWD8_HUMAN\n+ NM_001087 A0A024R410_HUMAN\n+ NM_001087 AAMP_HUMAN\n+ \n+FROM uniprot TO genename::\n+\n+ From To\n+ Q0P8A9 fdhC\n+ A0A077ZHN8 TTRE_0000819801\n+ A0A077ZFY8 TTRE_0000758701\n+ M5B8V9 CMN_01519\n+ M5BAG7 cydC\n+ S0DS17 FFUJ_00006\n+ A0A077Z587 TTRE_0000309301\n+ Q13685 AAMP\n+ O14639 ABLIM1\n+\n+-----\n+\n+**RETRIEVE OUTPUT EXAMPLES**\n+\n+retrieve gff::\n+\n+ #gff-version 3\n+ #sequence-region S0DS17 1 369\n+ #sequence-region M5BAG7 1 563\n+ #sequence-region A0A077Z587 1 772\n+ #sequence-region A0A077ZFY8 1 973\n+ #sequence-region O14639 1 778\n+ O14639\tUniProtKB\tChain\t1\t778\t.\t.\t.\tID=PRO_0000075697;Note=Actin-binding LIM protein 1\t\n+ O14639\tUniProtKB\tDomain\t97\t156\t.\t.\t.\tNote=LIM zinc-binding 1;evidence=ECO:0000255|PROSITE-ProRule:PRU00125\t\n+ O14639\tUniProtKB\tDomain\t156\t216\t.\t.\t.\tNote=LIM zinc-binding 2;evidence=ECO:0000255|PROSITE-ProRule:PRU00125\t\n+ O14639\tUn...\n+\n+retrieve fasta::\n+\n+ >tr|S0DS17|S0DS17_GIBF5 Related to cytochrom P450 OS=Gibberella fujikuroi (strain CBS 195.34 / IMI 58289 / NRRL A-6831) GN=FFUJ_00006 PE=3 SV=1\n+ MSYQSILLRQVNSLCDNLEEVARDENGGLIDMAMQSDYFTFDVMSEVIFGMAYNALKDTS\n+ YRFVTGALGSSNIRIGTLVQSPLPAMCRIDKY...\n+ >tr|M5BAG7|M5BAG7_9MICO ABC transporter, fused permease/ABC transporter involved in the biosynthesis of cytochrom bd, fused permease/ATP-binding protein OS=Clavibacter michiganensis subsp. nebraskensis NCPPB 2581 GN=cydC PE=3 SV=1\n+ MNRDGVLRLAQPPTRRTLPGLLAGLASAVGAVALLATSAWLITRASEQPPILFLGMAIVG\n+ VRAFALGRAAFRYLERITSHDAAFRALATLRV...\n+ >tr|A0A077Z587|A0A077Z587_TRITR Kelch 3 and Kelch 4 and Cytochrom B561 domain con taining protein OS=Trichuris trichiura GN=TTRE_0000309301 PE=4 SV=1\n+ MGSQQAADETQKVVERIILNINVRKDKRSFGLGIKIKKGNVFVSSIRPGSIAEDHFKLYD\n+ VIKDVNGSRIDSRELCRDLIRTHKVLTV...\n+\n+-----\n+\n+This tool is based on the work `Jan Rudolph`_ and the UniProt API.\n+\n+.. _Jan Rudolph: https://github.com/jdrudolph/uniprot\n+\n+ ]]></help>\n+ <citations>\n+ <citation type="doi">10.1093/nar/gku989</citation>\n+ </citations>\n+</tool>\n' |