view id_converter.xml @ 19:9d758344d36e draft

planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
author proteore
date Wed, 19 Jun 2019 04:38:12 -0400
parents 5252bbcfbdd7
children 77a2cd4162b7
line wrap: on
line source

<tool id="IDconverter" name="ID Converter" version="2019.06.18">
    <description>(Human, Mouse, Rat)
    </description>
    <requirements>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command><![CDATA[
        python $__tool_directory__/id_converter.py 
        --id_type="$species.idtypein"
        #if $input.ids == "text"
            --input="$input.txt"
            --input_type="list"
        #else
            --input="$input.file"
            --column_number="$input.ncol"
            --header="$input.header"
            --input_type="file"
        #end if
        --target_ids="$species.idto.idtypeout"
        --output="$output"
        #if "id_mapping" in str($species.mapping_file).split("/")
            --ref_file="$species.mapping_file"
        #else 
            --ref_file="$__tool_directory__/$species.mapping_file"
        #end if 
    ]]></command>
    <inputs>
        <conditional name="input" >
            <param name="ids" type="select" label="Enter IDs" help="Copy/paste or from a file (e.g. table)" >
                <option value="text">Copy/paste your identifiers</option>
                <option value="file" selected="true">Input file containing IDs</option>
            </param>
            <when value="text" >
                <param name="txt" type="text" label="Copy/paste IDs" help='IDs must be separated by tab, space or carriage return into the form field, for example: P31946 P62258' >
                    <sanitizer invalid_char="">
                        <valid initial="string.printable">
                            <remove value="&apos;"/>
                        </valid>
                        <mapping initial="none">
                            <add source="&apos;" target="__sq__"/>
                            <add source="&#x20;" target=""/>
                            <add source="&#xA;" target=""/>
                            <add source="&#xD;" target=""/>
                            <add source="&#x9;" target=""/>
                        </mapping>
                    </sanitizer>
                </param>
            </when>
            <when value="file" >
                <param name="file" type="data" format="txt,tabular" label="Select your file" help="" />
                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
                <param name="ncol" type="text" value="c1" label="Column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on'>
                    <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
                </param>
            </when>
        </conditional>
        <conditional name="species">
            <param name="species" type="select" label="Species">
                <option value="Human" selected="True">Human (Homo sapiens)</option>
                <option value="Mouse">Mouse (Mus musculus)</option>
                <option value="Rat">Rat (Rattus norvegicus)</option>
            </param>
            <when value="Human">
                <param name="mapping_file" type="select" label="Release" >
                    <options from_data_table="proteore_id_mapping_Human">
                        <filter type="sort_by" column="0"/>
                    </options>
                </param>    
                <param name="idtypein" type="select" label="Type/source of IDs" optional="false" >
                    <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option>
                    <option value="UniProt-AC" selected="True" >UniProt accession number (e.g. P31946)</option>
                    <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
                    <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
                    <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
                    <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
                    <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
                    <option value="GO" >GO terms (Gene Ontology) ID (e.g. GO:0070062)</option>
                    <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
                    <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
                    <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
                    <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option>
                    <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option>
                    <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option>
                    <option value="BioGrid" >BioGrid (e.g. 113361)</option>
                    <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
                    <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option>
                </param>
                <section name="idto" title="Target type" expanded="True" >
                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" >
                        <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option>
                        <option value="UniProt-AC" >UniProt accession number (e.g. P31946 - reviewed entries only)</option>
                        <!--option value="UniProt-AC" >UniProt accession number (e.g. P31946 - reviewed and unreviewed entries)</option-->
                        <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
                        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
                        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
                        <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
                        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
                        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
                        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
                        <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
                        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
                        <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option>
                        <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option>
                        <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option>
                        <option value="BioGrid" >BioGrid (e.g. 113361)</option>
                        <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
                        <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option>
                    </param>
                </section>
            </when>
            <when value="Mouse">
                <param name="mapping_file" type="select" label="Release" >
                    <options from_data_table="proteore_id_mapping_Mouse">
                        <filter type="sort_by" column="0"/>
                    </options>
                </param>    
                <param name="idtypein" type="select" label="Type/source of IDs" optional="false" >
                    <option value="UniProt-AC" selected="True" >UniProt accession number (e.g. Q8VC49)</option>
                    <option value="UniProt-ID" >UniProt ID (e.g IF27B_MOUSE)</option>
                    <option value="GeneID" >Entrez gene ID (e.g. 217845)</option>
                    <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_663424.1)</option>
                    <option value="GI" >GI (NCBI GI number) (e.g. 148686879)</option>
                    <option value="PDB" >Protein DataBank ID (e.g. 2BDU:A)</option>
                    <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0051607)</option>
                    <option value="PIR" >Protein Information Resource ID (e.g. A93261)</option>
                    <option value="UniGene" >Unigene ID (e.g. Mm.1293)</option>
                    <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSMUSG00000031239)</option>
                    <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENSMUST00000033591)</option>
                    <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSMUSP00000033591)</option>
                    <option value="BioGrid" >BioGrid (e.g. 201578)</option>
                    <option value="STRING" >STRING (e.g. 10090.ENSMUSP00000041712)</option>
                    <option value="KEGG" >KEGG gene id (e.g. mmu:217845)</option>
                </param>
                <section name="idto" title="Target type of IDs" expanded="True" >
                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" >
                        <option value="UniProt-AC" >UniProt accession number (e.g. Q8VC49 - reviewed entries only)</option>
                        <!--option value="UniProt-AC" >UniProt accession number (e.g. Q8VC49 - reviewed and unreviewed entries)</option-->
                        <option value="UniProt-ID" >UniProt ID (e.g IF27B_MOUSE)</option>
                        <option value="GeneID" >Entrez gene ID (e.g. 217845)</option>
                        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_663424.1)</option>
                        <option value="GI" >GI (NCBI GI number) (e.g. 148686879)</option>
                        <option value="PDB" >Protein DataBank ID (e.g. 2BDU:A)</option>
                        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0051607)</option>
                        <option value="PIR" >Protein Information Resource ID (e.g. A93261)</option>
                        <option value="UniGene" >Unigene ID (e.g. Mm.1293)</option>
                        <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSMUSG00000031239)</option>
                        <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENSMUST00000033591)</option>
                        <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSMUSP00000033591)</option>
                        <option value="BioGrid" >BioGrid (e.g. 201578)</option>
                        <option value="STRING" >STRING (e.g. 10090.ENSMUSP00000041712)</option>
                        <option value="KEGG" >KEGG gene id (e.g. mmu:217845)</option>
                    </param>
                </section>
            </when>
            <when value="Rat">
                <param name="mapping_file" type="select" label="Release" >
                    <options from_data_table="proteore_id_mapping_Rat">
                        <filter type="sort_by" column="0"/>
                    </options>
                </param>    
                <param name="idtypein" type="select" label="Select type/source of identifier of your list" optional="false" >
                    <option value="UniProt-AC" >UniProt accession number (e.g. A0JPJ7 - reviewed entries only)</option>
                    <option value="UniProt-ID" >UniProt ID (e.g OLA1_RAT)</option>
                    <option value="GeneID" >Entrez gene ID (e.g. 296488)</option>
                    <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_001029099.1)</option>
                    <option value="GI" >GI (NCBI GI number) (e.g. 117558623)</option>
                    <option value="PDB" >Protein DataBank ID (e.g. 6EPD:O)</option>
                    <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0005737)</option>
                    <option value="PIR" >Protein Information Resource ID (e.g. PT0204)</option>
                    <option value="UniGene" >Unigene ID (e.g. Rn.34914)</option>
                    <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSRNOG00000019047)</option>
                    <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENSRNOT00000026040)</option>
                    <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSRNOP00000026040)</option>
                    <option value="BioGrid" >BioGrid (e.g. 253944)</option>
                    <option value="STRING" >STRING (e.g. 10116.ENSRNOP00000054039)</option>
                    <option value="KEGG" >KEGG gene id (e.g. rno:296488)</option>
                </param>
                <section name="idto" title="Target type of IDs" expanded="True" >
                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" >
                        <option value="UniProt-AC" >UniProt accession number (e.g. A0JPJ7 - reviewed entries only)</option>
                        <!--option value="UniProt-AC" >UniProt accession number (e.g. A0JPJ7 - reviewed and unreviewed entries)</option-->
                        <option value="UniProt-ID" >UniProt ID (e.g OLA1_RAT)</option>
                        <option value="GeneID" >Entrez gene ID (e.g. 296488)</option>
                        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_001029099.1)</option>
                        <option value="GI" >GI (NCBI GI number) (e.g. 117558623)</option>
                        <option value="PDB" >Protein DataBank ID (e.g. 6EPD:O)</option>
                        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0005737)</option>
                        <option value="PIR" >Protein Information Resource ID (e.g. PT0204)</option>
                        <option value="UniGene" >Unigene ID (e.g. Rn.34914)</option>
                        <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSRNOG00000019047)</option>
                        <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENSRNOT00000026040)</option>
                        <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSRNOP00000026040)</option>
                        <option value="BioGrid" >BioGrid (e.g. 253944)</option>
                        <option value="STRING" >STRING (e.g. 10116.ENSRNOP00000054039)</option>
                        <option value="KEGG" >KEGG gene id (e.g. rno:296488)</option>
                    </param>
                </section>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output" format="tsv" />
    </outputs>
    <tests>
        <test>
            <conditional name="input" >
                <param name="ids" value="file"/>
                <param name="file" value="FKW_Lacombe_et_al_2017_OK.txt" />
                <param name="header" value="true" />
                <param name="ncol" value="c1"/>
            </conditional>
            <conditional name="species">
                <param name="mapping_file" value="human_id_mapping"/>
                <param name="idtypein" value="UniProt-AC"/>
                <section name="idto">
                    <param name="idtypeout" value="neXtProt,UniProt-ID,GeneID,MIM,Ensembl_Gene" />
                </section>
            </conditional>
            <output name="output" value="ID_Converted_FKW_Lacombe_et_al_2017_OK.txt" />
        </test>
    </tests>
    <help><![CDATA[
**Description**
This tool converts a list of identifiers (IDs) into another type of ID. Currently the conversion of IDs applies to the following three species: Human (homo sapiens, Mouse (Mus musculus), Rat (Rattus norvegicus). 
Supported source and target type of IDs are listed below. After choosing the type of your input IDs, you can select one or more types of ID you may need. 

-----

**Input**

A list of IDs (entered in a copy/paste mode) or a single-column file, the tool will then return a file containing the mapped IDs. 
If your input is a multiple-column file, the mapped IDs column(s) will be added at the end of the input file.

.. class:: warningmark

Accession numbers with an hyphen ("-") that normally correspond to isoform are considered as similar to its canonical form.
For example, "Q71U36-2" will be treated as "Q71U36". 

.. class:: warningmark

In copy/paste mode, the number of IDs considered in input is limited to 5000.

-----

**Parameters**
Target type of IDs currently supported:   

* neXtProt ID (e.g. NX_P31946)

* UniProt accession number (e.g. P31946 - reviewed entries only)

* UniProt ID (e.g 1433B_HUMAN - reviewed entries only)

* Entrez gene ID (e.g. 7529)

* RefSeq protein (NCBI) (e.g.  NP_003395.1)

* GI (NCBI GI number) (e.g. 21328448)

* Protein DataBank ID (e.g. 2BR9:A)

* GOterms (Gene Ontology) ID (e.g. GO:0070062)

* Protein Information Resource ID (e.g. S34755)

* OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)

* Unigene ID (e.g. Hs.643544)

* Ensembl gene ID (e.g. ENSG00000166913)

* Ensembl transcript ID (e.g. ENST00000353703)

* Ensembl protein ID (e.g. ENSP00000300161)

* BioGrid (e.g. 113361)

* STRING (e.g. 9606.ENSP00000300161)

* KEGG gene id (e.g. hsa:7529)

.. class:: warningmark 

Nextprot and OMIM only applicable to Human species.

.. class:: warningmark

For Uniprot-AC and uniprot-ID, only reviewed IDs are considered here, except for releases before 08-05-2019 where all uniprot-AC and uniprot-ID (at the time) are considered.

-----

**Output**

A text file containing the selected type of IDs (in addition to the original column(s) provided)
Please, note that a "NA" is returned when there is no match between a source ID and the corresponding target ID.

-----

**Data sources (release date)**

This tool converts human IDs using the following source files:

- Current release of Uniprot, for idmapping_selected.tab and idmapping.dat for Human, Mouse and Rat: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism
- All previous release of uniprot can be found here: ftp://ftp.uniprot.org/pub/databases/uniprot/previous_releases/
- **nextprot_ac_list_all.txt (Nextprot released on 13/02/2019 - current)**: ftp://ftp.nextprot.org/pub/current_release/ac_lists/
- All previous release of **nextprot_ac_list_all.txt** can be foud here: ftp://ftp.nextprot.org/pub/previous_releases/
- `Human uniprot-AC entries reviewed (05/06/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606+AND+created:[20120720%20TO%2020190605]&format=list>`_. 
- `Mouse uniprot-AC entries reviewed (05/06/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10090+AND+created:[20120720%20TO%2020190605]&format=list>`_. 
- `Rat uniprot-AC entries reviewed (05/06/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10116+AND+created:[20120720%20TO%2020190605]&format=list>`_.
- `Human uniprot-AC entries reviewed (08/05/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606+AND+created:[20120720%20TO%2020190508]&format=list>`_. 
- `Mouse uniprot-AC entries reviewed (08/05/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10090+AND+created:[20120720%20TO%2020190508]&format=list>`_. 
- `Rat uniprot-AC entries reviewed (08/05/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10116+AND+created:[20120720%20TO%2020190508]&format=list>`_.
- `Human uniprot-AC entries reviewed (10/10/2018) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606+AND+created:[20120720%20TO%2020181010]&format=list>`_. 
- `Mouse uniprot-AC entries reviewed (10/10/2018) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10090+AND+created:[20120720%20TO%2020181010]&format=list>`_. 
- `Rat uniprot-AC entries reviewed (10/10/2018) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10116+AND+created:[20120720%20TO%2020181010]&format=list>`_.

-----

.. class:: infomark

**Authors**

David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
 
    ]]></help>
    <citations>
    </citations>
</tool>