changeset 2:d0311668d442 draft

Uploaded
author rsajulga
date Thu, 16 Jul 2015 16:37:08 -0400
parents a9a1b182bc6d
children 6651ac4651f0
files uniprot_id_mapping.xml
diffstat 1 files changed, 228 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/uniprot_id_mapping.xml	Thu Jul 16 16:37:08 2015 -0400
@@ -0,0 +1,228 @@
+<tool id="uniprot_id_mapping" name="Uniprot ID mapping" version="1.0.0">
+    <description>retrieve alternative names</description>
+    <macros>
+    <xml name="id_options">
+        <!--  Category:UniProt  -->
+        <yield/>
+        <option value="ACC">UniProtKB AC</option>
+        <option value="ID">UniProtKB ID</option>
+        <option value="UPARC">UniParc</option>
+        <option value="NF50">UniRef50</option>
+        <option value="NF90">UniRef90</option>
+        <option value="NF100">UniRef100</option>
+        <!--  Category:Other sequence databases  -->
+        <option value="EMBL_ID">EMBL/GenBank/DDBJ</option>
+        <option value="EMBL">EMBL/GenBank/DDBJ CDS</option>
+        <option value="PIR">PIR</option>
+        <option value="UNIGENE_ID">UniGene</option>
+        <option value="P_ENTREZGENEID">Entrez Gene (GeneID)</option>
+        <option value="P_GI">GI number*</option>
+        <option value="P_REFSEQ_AC">RefSeq Protein</option>
+        <option value="REFSEQ_NT_ID">RefSeq Nucleotide</option>
+        <!--  Category:3D structure databases  -->
+        <option value="PDB_ID">PDB</option>
+        <option value="DISPROT_ID">DisProt</option>
+        <!--  Category:Protein-protein interaction databases  -->
+        <option value="BIOGRID_ID">BioGrid</option>
+        <option value="DIP_ID">DIP</option>
+        <option value="MINT_ID">MINT</option>
+        <option value="STRING_ID">STRING</option>
+        <!--  Category:Chemistry  -->
+        <option value="CHEMBL_ID">ChEMBL</option>
+        <option value="DRUGBANK_ID">DrugBank</option>
+        <option value="GUIDETOPHARMACOLOGY_ID">GuidetoPHARMACOLOGY</option>
+        <!--  Category:Protein family/group databases  -->
+        <option value="ALLERGOME_ID">Allergome</option>
+        <option value="MEROPS_ID">MEROPS</option>
+        <option value="MYCOCLAP_ID">mycoCLAP</option>
+        <option value="PEROXIBASE_ID">PeroxiBase</option>
+        <option value="REBASE_ID">REBASE</option>
+        <option value="TCDB_ID">TCDB</option>
+        <!--  Category:Polymorphism databases  -->
+        <option value="DMDM_ID">DMDM</option>
+        <!--  Category:2D gel databases  -->
+        <option value="WORLD_2DPAGE_ID">World-2DPAGE</option>
+        <!--  Category:Protocols and materials databases  -->
+        <option value="DNASU_ID">DNASU</option>
+        <!--  Category:Genome annotation databases  -->
+        <option value="ENSEMBL_ID">Ensembl</option>
+        <option value="ENSEMBL_PRO_ID">Ensembl Protein</option>
+        <option value="ENSEMBL_TRS_ID">Ensembl Transcript</option>
+        <option value="ENSEMBLGENOME_ID">Ensembl Genomes</option>
+        <option value="ENSEMBLGENOME_PRO_ID">Ensembl Genomes Protein</option>
+        <option value="ENSEMBLGENOME_TRS_ID">Ensembl Genomes Transcript</option>
+        <option value="P_ENTREZGENEID">GeneID</option>
+        <option value="KEGG_ID">KEGG</option>
+        <option value="PATRIC_ID">PATRIC</option>
+        <option value="UCSC_ID">UCSC</option>
+        <option value="VECTORBASE_ID">VectorBase</option>
+        <!--  Category:Organism-specific gene databases  -->
+        <option value="ARACHNOSERVER_ID">ArachnoServer</option>
+        <option value="CGD">CGD</option>
+        <option value="CONOSERVER_ID">ConoServer</option>
+        <option value="CYGD_ID">CYGD</option>
+        <option value="DICTYBASE_ID">dictyBase</option>
+        <option value="ECHOBASE_ID">EchoBASE</option>
+        <option value="ECOGENE_ID">EcoGene</option>
+        <option value="EUHCVDB_ID">euHCVdb</option>
+        <option value="EUPATHDB_ID">EuPathDB</option>
+        <option value="FLYBASE_ID">FlyBase</option>
+        <option value="GENECARDS_ID">GeneCards</option>
+        <option value="GENEFARM_ID">GeneFarm</option>
+        <option value="GENOLIST_ID">GenoList</option>
+        <option value="H_INVDB_ID">H-InvDB</option>
+        <option value="HGNC_ID">HGNC</option>
+        <option value="HPA_ID">HPA</option>
+        <option value="LEGIOLIST_ID">LegioList</option>
+        <option value="LEPROMA_ID">Leproma</option>
+        <option value="MAIZEGDB_ID">MaizeGDB</option>
+        <option value="MIM_ID">MIM</option>
+        <option value="MGI_ID">MGI</option>
+        <option value="NEXTPROT_ID">neXtProt</option>
+        <option value="ORPHANET_ID">Orphanet</option>
+        <option value="PHARMGKB_ID">PharmGKB</option>
+        <option value="POMBASE_ID">PomBase</option>
+        <option value="PSEUDOCAP_ID">PseudoCAP</option>
+        <option value="RGD_ID">RGD</option>
+        <option value="SGD_ID">SGD</option>
+        <option value="TAIR_ID">TAIR</option>
+        <option value="TUBERCULIST_ID">TubercuList</option>
+        <option value="WORMBASE_ID">WormBase</option>
+        <option value="WORMBASE_TRS_ID">WormBase Transcript</option>
+        <option value="WORMBASE_PRO_ID">WormBase Protein</option>
+        <option value="XENBASE_ID">Xenbase</option>
+        <option value="ZFIN_ID">ZFIN</option>
+        <!--  Category:Phylogenomic databases  -->
+        <option value="EGGNOG_ID">eggNOG</option>
+        <option value="GENETREE_ID">GeneTree</option>
+        <option value="HOGENOM_ID">HOGENOM</option>
+        <option value="HOVERGEN_ID">HOVERGEN</option>
+        <option value="KO_ID">KO</option>
+        <option value="OMA_ID">OMA</option>
+        <option value="ORTHODB_ID">OrthoDB</option>
+        <option value="PROTCLUSTDB_ID">ProtClustDB</option>
+        <option value="TREEFAM_ID">TreeFam</option>
+        <!--  Category:Enzyme and pathway databases  -->
+        <option value="BIOCYC_ID">BioCyc</option>
+        <option value="REACTOME_ID">Reactome</option>
+        <option value="UNIPATHWAY_ID">UniPathWay</option>
+        <!--  Category:Gene expression databases  -->
+        <option value="CLEANEX_ID">CleanEx</option>
+        <!--  Category:Other  -->
+        <option value="CHITARS_ID">ChiTaRS</option>
+        <option value="GENOMERNAI_ID">GenomeRNAi</option>
+        <option value="GENEWIKI_ID">GeneWiki</option>
+        <option value="NEXTBIO_ID">NextBio</option>
+        </xml>
+    </macros>
+    <requirements>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command interpreter="python"><![CDATA[
+      uniprot_id_mapping.py 
+      #if str($accession_src.fmt) == 'proteomic':
+        #if $accession_src.input.datatype.file_ext == 'fasta':
+          --fasta="$accession_src.input"
+        #elif $accession_src.input.datatype.file_ext == 'mzid':
+          --mzid="$accession_src.input"
+        #elif $accession_src.input.datatype.file_ext == 'pepxml':
+          --pepxml="$accession_src.input"
+        #end if
+      #elif str($accession_src.fmt) == 'tabular':
+        --tabular="$accession_src.input_tsv"
+        #set $col = int(str($accession_src.column)) - 1
+        --column=$col
+      #elif str($accession_src.fmt) == 'fasta':
+        --fasta="$accession_src.input_fasta"
+      #elif str($accession_src.fmt) == 'mzid':
+        --mzid="$accession_src.input_mzid"
+      #elif str($accession_src.fmt) == 'pepxml':
+        --pepxml="$accession_src.input_pepxml"
+      #end if
+      --from="$from_id"
+      #for to_id in $to_ids: 
+        --to="$to_id"
+      #end for
+      #if $decoy_pattern:
+        --decoy='$decoy_pattern'
+      #end if
+      --output="$output"
+    ]]></command>
+    <inputs>
+      <conditional name="accession_src">
+        <param name="fmt" type="select" label="input format" >
+          <option value="proteomic">proteomics formats:  mzid, pepxml, fasta</option>
+          <option value="tabular">tabular</option>
+          <option value="fasta">fasta</option>
+          <option value="mzid">mzid</option>
+          <option value="pepxml">pepxml</option>
+        </param>
+        <when value="proteomic">
+          <param name="input" type="data" format="mzid,pepxml,fasta" label="Protein IDs from Proteomics files" />
+        </when>
+        <when value="tabular">
+          <param name="input_tsv" type="data" format="tabular" label="Tabular Input Containing Peptide column" />
+          <param name="column" label="Select column with peptides" type="data_column" numerical="false" data_ref="input_tsv" />
+        </when>
+        <when value="fasta">
+          <param name="input_fasta" type="data" format="fasta" label="Peptide Fasta Input" />
+        </when>
+        <when value="mzid">
+          <param name="input_mzid" type="data" format="mzid" label="mzIndetML Input" />
+        </when>
+        <when value="pepxml">
+          <param name="input_pepxml" type="data" format="pepxml" label="mzIndetML Input" />
+        </when>
+      </conditional>
+      <param name="from_id" type="select" optional="true" label="From ID type:">
+          <expand macro="id_options">
+               <option value="ACC+ID">UniProtKB AC/ID</option>
+          </expand>
+      </param>
+      <param name="to_ids" type="select" multiple="true" label="To ID type:">
+          <expand macro="id_options">
+          </expand>
+      </param>
+      <param name="decoy_pattern" type="text" size="30" optional="true" label="decoy pattern" 
+             help="decoy pattern to be removed from ID before attempting to map the ID"/>
+    </inputs>
+    <outputs>
+      <data name="output" format="tabular" label="${tool.name} on ${on_string}"/> 
+    </outputs>
+    <tests>
+      <test>
+        <param name="fmt" value="tabular"/>
+        <param name="input_tsv" value="input.tsv"i ftype="tabular"/>
+        <param name="column" value="1"/>
+        <param name="from_id" value="ACC"/>
+        <param name="to_id" value="P_REFSEQ_AC"/>
+        <output name="output">
+            <assert_contents>
+              <has_text text="NP_003395" />
+            </assert_contents>
+        </output>
+      </test>
+      <test>
+        <param name="input_tsv" value="peptide.fasta"/>
+        <param name="fmt" value="fasta"/>
+        <param name="from_id" value="ACC"/>
+        <param name="to_id" value="UNIGENE_ID"/>
+        <output name="output">
+            <assert_contents>
+              <has_text text="Homininae" />
+            </assert_contents>
+        </output>
+      </test>
+    </tests>
+    <help><![CDATA[
+    **Uniprot ID mapping** 
+
+    Retrieve alternate ID names from Uniprot_ 
+
+    .. _Uniprot: http://www.uniprot.org/help/programmatic_access#id_mapping_examples
+    
+    ]]></help>
+
+</tool>