Mercurial > repos > rsajulga > uniprot_id_mapper
changeset 2:d0311668d442 draft
Uploaded
author | rsajulga |
---|---|
date | Thu, 16 Jul 2015 16:37:08 -0400 |
parents | a9a1b182bc6d |
children | 6651ac4651f0 |
files | uniprot_id_mapping.xml |
diffstat | 1 files changed, 228 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uniprot_id_mapping.xml Thu Jul 16 16:37:08 2015 -0400 @@ -0,0 +1,228 @@ +<tool id="uniprot_id_mapping" name="Uniprot ID mapping" version="1.0.0"> + <description>retrieve alternative names</description> + <macros> + <xml name="id_options"> + <!-- Category:UniProt --> + <yield/> + <option value="ACC">UniProtKB AC</option> + <option value="ID">UniProtKB ID</option> + <option value="UPARC">UniParc</option> + <option value="NF50">UniRef50</option> + <option value="NF90">UniRef90</option> + <option value="NF100">UniRef100</option> + <!-- Category:Other sequence databases --> + <option value="EMBL_ID">EMBL/GenBank/DDBJ</option> + <option value="EMBL">EMBL/GenBank/DDBJ CDS</option> + <option value="PIR">PIR</option> + <option value="UNIGENE_ID">UniGene</option> + <option value="P_ENTREZGENEID">Entrez Gene (GeneID)</option> + <option value="P_GI">GI number*</option> + <option value="P_REFSEQ_AC">RefSeq Protein</option> + <option value="REFSEQ_NT_ID">RefSeq Nucleotide</option> + <!-- Category:3D structure databases --> + <option value="PDB_ID">PDB</option> + <option value="DISPROT_ID">DisProt</option> + <!-- Category:Protein-protein interaction databases --> + <option value="BIOGRID_ID">BioGrid</option> + <option value="DIP_ID">DIP</option> + <option value="MINT_ID">MINT</option> + <option value="STRING_ID">STRING</option> + <!-- Category:Chemistry --> + <option value="CHEMBL_ID">ChEMBL</option> + <option value="DRUGBANK_ID">DrugBank</option> + <option value="GUIDETOPHARMACOLOGY_ID">GuidetoPHARMACOLOGY</option> + <!-- Category:Protein family/group databases --> + <option value="ALLERGOME_ID">Allergome</option> + <option value="MEROPS_ID">MEROPS</option> + <option value="MYCOCLAP_ID">mycoCLAP</option> + <option value="PEROXIBASE_ID">PeroxiBase</option> + <option value="REBASE_ID">REBASE</option> + <option value="TCDB_ID">TCDB</option> + <!-- Category:Polymorphism databases --> + <option value="DMDM_ID">DMDM</option> + <!-- Category:2D gel databases --> + <option value="WORLD_2DPAGE_ID">World-2DPAGE</option> + <!-- Category:Protocols and materials databases --> + <option value="DNASU_ID">DNASU</option> + <!-- Category:Genome annotation databases --> + <option value="ENSEMBL_ID">Ensembl</option> + <option value="ENSEMBL_PRO_ID">Ensembl Protein</option> + <option value="ENSEMBL_TRS_ID">Ensembl Transcript</option> + <option value="ENSEMBLGENOME_ID">Ensembl Genomes</option> + <option value="ENSEMBLGENOME_PRO_ID">Ensembl Genomes Protein</option> + <option value="ENSEMBLGENOME_TRS_ID">Ensembl Genomes Transcript</option> + <option value="P_ENTREZGENEID">GeneID</option> + <option value="KEGG_ID">KEGG</option> + <option value="PATRIC_ID">PATRIC</option> + <option value="UCSC_ID">UCSC</option> + <option value="VECTORBASE_ID">VectorBase</option> + <!-- Category:Organism-specific gene databases --> + <option value="ARACHNOSERVER_ID">ArachnoServer</option> + <option value="CGD">CGD</option> + <option value="CONOSERVER_ID">ConoServer</option> + <option value="CYGD_ID">CYGD</option> + <option value="DICTYBASE_ID">dictyBase</option> + <option value="ECHOBASE_ID">EchoBASE</option> + <option value="ECOGENE_ID">EcoGene</option> + <option value="EUHCVDB_ID">euHCVdb</option> + <option value="EUPATHDB_ID">EuPathDB</option> + <option value="FLYBASE_ID">FlyBase</option> + <option value="GENECARDS_ID">GeneCards</option> + <option value="GENEFARM_ID">GeneFarm</option> + <option value="GENOLIST_ID">GenoList</option> + <option value="H_INVDB_ID">H-InvDB</option> + <option value="HGNC_ID">HGNC</option> + <option value="HPA_ID">HPA</option> + <option value="LEGIOLIST_ID">LegioList</option> + <option value="LEPROMA_ID">Leproma</option> + <option value="MAIZEGDB_ID">MaizeGDB</option> + <option value="MIM_ID">MIM</option> + <option value="MGI_ID">MGI</option> + <option value="NEXTPROT_ID">neXtProt</option> + <option value="ORPHANET_ID">Orphanet</option> + <option value="PHARMGKB_ID">PharmGKB</option> + <option value="POMBASE_ID">PomBase</option> + <option value="PSEUDOCAP_ID">PseudoCAP</option> + <option value="RGD_ID">RGD</option> + <option value="SGD_ID">SGD</option> + <option value="TAIR_ID">TAIR</option> + <option value="TUBERCULIST_ID">TubercuList</option> + <option value="WORMBASE_ID">WormBase</option> + <option value="WORMBASE_TRS_ID">WormBase Transcript</option> + <option value="WORMBASE_PRO_ID">WormBase Protein</option> + <option value="XENBASE_ID">Xenbase</option> + <option value="ZFIN_ID">ZFIN</option> + <!-- Category:Phylogenomic databases --> + <option value="EGGNOG_ID">eggNOG</option> + <option value="GENETREE_ID">GeneTree</option> + <option value="HOGENOM_ID">HOGENOM</option> + <option value="HOVERGEN_ID">HOVERGEN</option> + <option value="KO_ID">KO</option> + <option value="OMA_ID">OMA</option> + <option value="ORTHODB_ID">OrthoDB</option> + <option value="PROTCLUSTDB_ID">ProtClustDB</option> + <option value="TREEFAM_ID">TreeFam</option> + <!-- Category:Enzyme and pathway databases --> + <option value="BIOCYC_ID">BioCyc</option> + <option value="REACTOME_ID">Reactome</option> + <option value="UNIPATHWAY_ID">UniPathWay</option> + <!-- Category:Gene expression databases --> + <option value="CLEANEX_ID">CleanEx</option> + <!-- Category:Other --> + <option value="CHITARS_ID">ChiTaRS</option> + <option value="GENOMERNAI_ID">GenomeRNAi</option> + <option value="GENEWIKI_ID">GeneWiki</option> + <option value="NEXTBIO_ID">NextBio</option> + </xml> + </macros> + <requirements> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command interpreter="python"><![CDATA[ + uniprot_id_mapping.py + #if str($accession_src.fmt) == 'proteomic': + #if $accession_src.input.datatype.file_ext == 'fasta': + --fasta="$accession_src.input" + #elif $accession_src.input.datatype.file_ext == 'mzid': + --mzid="$accession_src.input" + #elif $accession_src.input.datatype.file_ext == 'pepxml': + --pepxml="$accession_src.input" + #end if + #elif str($accession_src.fmt) == 'tabular': + --tabular="$accession_src.input_tsv" + #set $col = int(str($accession_src.column)) - 1 + --column=$col + #elif str($accession_src.fmt) == 'fasta': + --fasta="$accession_src.input_fasta" + #elif str($accession_src.fmt) == 'mzid': + --mzid="$accession_src.input_mzid" + #elif str($accession_src.fmt) == 'pepxml': + --pepxml="$accession_src.input_pepxml" + #end if + --from="$from_id" + #for to_id in $to_ids: + --to="$to_id" + #end for + #if $decoy_pattern: + --decoy='$decoy_pattern' + #end if + --output="$output" + ]]></command> + <inputs> + <conditional name="accession_src"> + <param name="fmt" type="select" label="input format" > + <option value="proteomic">proteomics formats: mzid, pepxml, fasta</option> + <option value="tabular">tabular</option> + <option value="fasta">fasta</option> + <option value="mzid">mzid</option> + <option value="pepxml">pepxml</option> + </param> + <when value="proteomic"> + <param name="input" type="data" format="mzid,pepxml,fasta" label="Protein IDs from Proteomics files" /> + </when> + <when value="tabular"> + <param name="input_tsv" type="data" format="tabular" label="Tabular Input Containing Peptide column" /> + <param name="column" label="Select column with peptides" type="data_column" numerical="false" data_ref="input_tsv" /> + </when> + <when value="fasta"> + <param name="input_fasta" type="data" format="fasta" label="Peptide Fasta Input" /> + </when> + <when value="mzid"> + <param name="input_mzid" type="data" format="mzid" label="mzIndetML Input" /> + </when> + <when value="pepxml"> + <param name="input_pepxml" type="data" format="pepxml" label="mzIndetML Input" /> + </when> + </conditional> + <param name="from_id" type="select" optional="true" label="From ID type:"> + <expand macro="id_options"> + <option value="ACC+ID">UniProtKB AC/ID</option> + </expand> + </param> + <param name="to_ids" type="select" multiple="true" label="To ID type:"> + <expand macro="id_options"> + </expand> + </param> + <param name="decoy_pattern" type="text" size="30" optional="true" label="decoy pattern" + help="decoy pattern to be removed from ID before attempting to map the ID"/> + </inputs> + <outputs> + <data name="output" format="tabular" label="${tool.name} on ${on_string}"/> + </outputs> + <tests> + <test> + <param name="fmt" value="tabular"/> + <param name="input_tsv" value="input.tsv"i ftype="tabular"/> + <param name="column" value="1"/> + <param name="from_id" value="ACC"/> + <param name="to_id" value="P_REFSEQ_AC"/> + <output name="output"> + <assert_contents> + <has_text text="NP_003395" /> + </assert_contents> + </output> + </test> + <test> + <param name="input_tsv" value="peptide.fasta"/> + <param name="fmt" value="fasta"/> + <param name="from_id" value="ACC"/> + <param name="to_id" value="UNIGENE_ID"/> + <output name="output"> + <assert_contents> + <has_text text="Homininae" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + **Uniprot ID mapping** + + Retrieve alternate ID names from Uniprot_ + + .. _Uniprot: http://www.uniprot.org/help/programmatic_access#id_mapping_examples + + ]]></help> + +</tool>