diff id_converter.xml @ 19:9d758344d36e draft

planemo upload commit 339ab77a83db03409c8001324f10b36ff5b13a39-dirty
author proteore
date Wed, 19 Jun 2019 04:38:12 -0400
parents 5252bbcfbdd7
children 77a2cd4162b7
line wrap: on
line diff
--- a/id_converter.xml	Fri May 10 10:38:46 2019 -0400
+++ b/id_converter.xml	Wed Jun 19 04:38:12 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="IDconverter" name="ID Converter" version="2019.05.10">
+<tool id="IDconverter" name="ID Converter" version="2019.06.18">
     <description>(Human, Mouse, Rat)
     </description>
     <requirements>
@@ -59,8 +59,8 @@
         <conditional name="species">
             <param name="species" type="select" label="Species">
                 <option value="Human" selected="True">Human (Homo sapiens)</option>
-                <option value="Mouse" selected="True">Mouse (Mus musculus)</option>
-                <option value="Rat" selected="True">Rat (Rattus norvegicus)</option>
+                <option value="Mouse">Mouse (Mus musculus)</option>
+                <option value="Rat">Rat (Rattus norvegicus)</option>
             </param>
             <when value="Human">
                 <param name="mapping_file" type="select" label="Release" >
@@ -90,8 +90,8 @@
                 <section name="idto" title="Target type" expanded="True" >
                     <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" >
                         <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option>
-                        <option value="UniProt-AC_reviewed" >UniProt accession number (e.g. P31946 - reviewed entries only)</option>
-                        <option value="UniProt-AC" >UniProt accession number (e.g. P31946 - reviewed and unreviewed entries)</option>
+                        <option value="UniProt-AC" >UniProt accession number (e.g. P31946 - reviewed entries only)</option>
+                        <!--option value="UniProt-AC" >UniProt accession number (e.g. P31946 - reviewed and unreviewed entries)</option-->
                         <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
                         <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
                         <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
@@ -117,41 +117,40 @@
                     </options>
                 </param>    
                 <param name="idtypein" type="select" label="Type/source of IDs" optional="false" >
-                    <option value="UniProt-AC" selected="True" >UniProt accession number (e.g. P31946)</option>
-                    <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
-                    <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
-                    <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-                    <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
-                    <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
-                    <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
-                    <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
-                    <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
-                    <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
-                    <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option>
-                    <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option>
-                    <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option>
-                    <option value="BioGrid" >BioGrid (e.g. 113361)</option>
-                    <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
-                    <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option>
+                    <option value="UniProt-AC" selected="True" >UniProt accession number (e.g. Q8VC49)</option>
+                    <option value="UniProt-ID" >UniProt ID (e.g IF27B_MOUSE)</option>
+                    <option value="GeneID" >Entrez gene ID (e.g. 217845)</option>
+                    <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_663424.1)</option>
+                    <option value="GI" >GI (NCBI GI number) (e.g. 148686879)</option>
+                    <option value="PDB" >Protein DataBank ID (e.g. 2BDU:A)</option>
+                    <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0051607)</option>
+                    <option value="PIR" >Protein Information Resource ID (e.g. A93261)</option>
+                    <option value="UniGene" >Unigene ID (e.g. Mm.1293)</option>
+                    <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSMUSG00000031239)</option>
+                    <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENSMUST00000033591)</option>
+                    <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSMUSP00000033591)</option>
+                    <option value="BioGrid" >BioGrid (e.g. 201578)</option>
+                    <option value="STRING" >STRING (e.g. 10090.ENSMUSP00000041712)</option>
+                    <option value="KEGG" >KEGG gene id (e.g. mmu:217845)</option>
                 </param>
                 <section name="idto" title="Target type of IDs" expanded="True" >
                     <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" >
-                        <option value="UniProt-AC_reviewed" >UniProt accession number (e.g. P31946 - reviewed entries only)</option>
-                        <option value="UniProt-AC" >UniProt accession number (e.g. P31946 - reviewed and unreviewed entries)</option>
-                        <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
-                        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
-                        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-                        <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
-                        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
-                        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
-                        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
-                        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
-                        <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option>
-                        <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option>
-                        <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option>
-                        <option value="BioGrid" >BioGrid (e.g. 113361)</option>
-                        <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
-                        <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option>
+                        <option value="UniProt-AC" >UniProt accession number (e.g. Q8VC49 - reviewed entries only)</option>
+                        <!--option value="UniProt-AC" >UniProt accession number (e.g. Q8VC49 - reviewed and unreviewed entries)</option-->
+                        <option value="UniProt-ID" >UniProt ID (e.g IF27B_MOUSE)</option>
+                        <option value="GeneID" >Entrez gene ID (e.g. 217845)</option>
+                        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_663424.1)</option>
+                        <option value="GI" >GI (NCBI GI number) (e.g. 148686879)</option>
+                        <option value="PDB" >Protein DataBank ID (e.g. 2BDU:A)</option>
+                        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0051607)</option>
+                        <option value="PIR" >Protein Information Resource ID (e.g. A93261)</option>
+                        <option value="UniGene" >Unigene ID (e.g. Mm.1293)</option>
+                        <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSMUSG00000031239)</option>
+                        <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENSMUST00000033591)</option>
+                        <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSMUSP00000033591)</option>
+                        <option value="BioGrid" >BioGrid (e.g. 201578)</option>
+                        <option value="STRING" >STRING (e.g. 10090.ENSMUSP00000041712)</option>
+                        <option value="KEGG" >KEGG gene id (e.g. mmu:217845)</option>
                     </param>
                 </section>
             </when>
@@ -162,41 +161,40 @@
                     </options>
                 </param>    
                 <param name="idtypein" type="select" label="Select type/source of identifier of your list" optional="false" >
-                    <option value="UniProt-AC" selected="True" >UniProt accession number (e.g. P31946)</option>
-                    <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
-                    <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
-                    <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-                    <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
-                    <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
-                    <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
-                    <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
-                    <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
-                    <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
-                    <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option>
-                    <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option>
-                    <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option>
-                    <option value="BioGrid" >BioGrid (e.g. 113361)</option>
-                    <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
-                    <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option>
+                    <option value="UniProt-AC" >UniProt accession number (e.g. A0JPJ7 - reviewed entries only)</option>
+                    <option value="UniProt-ID" >UniProt ID (e.g OLA1_RAT)</option>
+                    <option value="GeneID" >Entrez gene ID (e.g. 296488)</option>
+                    <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_001029099.1)</option>
+                    <option value="GI" >GI (NCBI GI number) (e.g. 117558623)</option>
+                    <option value="PDB" >Protein DataBank ID (e.g. 6EPD:O)</option>
+                    <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0005737)</option>
+                    <option value="PIR" >Protein Information Resource ID (e.g. PT0204)</option>
+                    <option value="UniGene" >Unigene ID (e.g. Rn.34914)</option>
+                    <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSRNOG00000019047)</option>
+                    <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENSRNOT00000026040)</option>
+                    <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSRNOP00000026040)</option>
+                    <option value="BioGrid" >BioGrid (e.g. 253944)</option>
+                    <option value="STRING" >STRING (e.g. 10116.ENSRNOP00000054039)</option>
+                    <option value="KEGG" >KEGG gene id (e.g. rno:296488)</option>
                 </param>
                 <section name="idto" title="Target type of IDs" expanded="True" >
                     <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" >
-                        <option value="UniProt-AC_reviewed" >UniProt accession number (e.g. P31946 - reviewed entries only)</option>
-                        <option value="UniProt-AC" >UniProt accession number (e.g. P31946 - reviewed and unreviewed entries)</option>
-                        <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option>
-                        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
-                        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-                        <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option>
-                        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
-                        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
-                        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
-                        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
-                        <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option>
-                        <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option>
-                        <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option>
-                        <option value="BioGrid" >BioGrid (e.g. 113361)</option>
-                        <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
-                        <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option>
+                        <option value="UniProt-AC" >UniProt accession number (e.g. A0JPJ7 - reviewed entries only)</option>
+                        <!--option value="UniProt-AC" >UniProt accession number (e.g. A0JPJ7 - reviewed and unreviewed entries)</option-->
+                        <option value="UniProt-ID" >UniProt ID (e.g OLA1_RAT)</option>
+                        <option value="GeneID" >Entrez gene ID (e.g. 296488)</option>
+                        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_001029099.1)</option>
+                        <option value="GI" >GI (NCBI GI number) (e.g. 117558623)</option>
+                        <option value="PDB" >Protein DataBank ID (e.g. 6EPD:O)</option>
+                        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0005737)</option>
+                        <option value="PIR" >Protein Information Resource ID (e.g. PT0204)</option>
+                        <option value="UniGene" >Unigene ID (e.g. Rn.34914)</option>
+                        <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSRNOG00000019047)</option>
+                        <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENSRNOT00000026040)</option>
+                        <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSRNOP00000026040)</option>
+                        <option value="BioGrid" >BioGrid (e.g. 253944)</option>
+                        <option value="STRING" >STRING (e.g. 10116.ENSRNOP00000054039)</option>
+                        <option value="KEGG" >KEGG gene id (e.g. rno:296488)</option>
                     </param>
                 </section>
             </when>
@@ -217,7 +215,7 @@
                 <param name="mapping_file" value="human_id_mapping"/>
                 <param name="idtypein" value="UniProt-AC"/>
                 <section name="idto">
-                    <param name="idtypeout" value="neXtProt,UniProt-ID,GeneID,MIM,Ensembl" />
+                    <param name="idtypeout" value="neXtProt,UniProt-ID,GeneID,MIM,Ensembl_Gene" />
                 </section>
             </conditional>
             <output name="output" value="ID_Converted_FKW_Lacombe_et_al_2017_OK.txt" />
@@ -238,7 +236,11 @@
 .. class:: warningmark
 
 Accession numbers with an hyphen ("-") that normally correspond to isoform are considered as similar to its canonical form.
-For example, "Q71U36-2" will be treated as "Q71U36".
+For example, "Q71U36-2" will be treated as "Q71U36". 
+
+.. class:: warningmark
+
+In copy/paste mode, the number of IDs considered in input is limited to 5000.
 
 -----
 
@@ -249,9 +251,7 @@
 
 * UniProt accession number (e.g. P31946 - reviewed entries only)
 
-* UniProt accession number (e.g. P31946 - reviewed and unreviewed entries)
-
-* UniProt ID (e.g 1433B_HUMAN)
+* UniProt ID (e.g 1433B_HUMAN - reviewed entries only)
 
 * Entrez gene ID (e.g. 7529)
 
@@ -285,6 +285,10 @@
 
 Nextprot and OMIM only applicable to Human species.
 
+.. class:: warningmark
+
+For Uniprot-AC and uniprot-ID, only reviewed IDs are considered here, except for releases before 08-05-2019 where all uniprot-AC and uniprot-ID (at the time) are considered.
+
 -----
 
 **Output**
@@ -298,16 +302,19 @@
 
 This tool converts human IDs using the following source files:
 
-- **HUMAN_9606_idmapping_selected.tab (Uniprot 23/10/2018)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-- **HUMAN_9606_idmapping.dat (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-- `Human uniprot-AC entries reviewed  <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606&format=list>`_. 
-- **nextprot_ac_list_all.txt (Nextprot released on 10/10/2018)**: ftp://ftp.nextprot.org/pub/current_release/ac_lists/
-- **MOUSE_10090_idmapping_selected.tab (Uniprot 23/10/2018)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-- **MOUSE_10090_idmapping.dat (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-- `Mouse uniprot-AC entries reviewed  <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10090&format=list>`_. 
-- **RAT_10116_idmapping.dat (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-- **RAT_10116_idmapping_selected.tab (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-- `Rat uniprot-AC entries reviewed  <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10116&format=list>`_.
+- Current release of Uniprot, for idmapping_selected.tab and idmapping.dat for Human, Mouse and Rat: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism
+- All previous release of uniprot can be found here: ftp://ftp.uniprot.org/pub/databases/uniprot/previous_releases/
+- **nextprot_ac_list_all.txt (Nextprot released on 13/02/2019 - current)**: ftp://ftp.nextprot.org/pub/current_release/ac_lists/
+- All previous release of **nextprot_ac_list_all.txt** can be foud here: ftp://ftp.nextprot.org/pub/previous_releases/
+- `Human uniprot-AC entries reviewed (05/06/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606+AND+created:[20120720%20TO%2020190605]&format=list>`_. 
+- `Mouse uniprot-AC entries reviewed (05/06/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10090+AND+created:[20120720%20TO%2020190605]&format=list>`_. 
+- `Rat uniprot-AC entries reviewed (05/06/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10116+AND+created:[20120720%20TO%2020190605]&format=list>`_.
+- `Human uniprot-AC entries reviewed (08/05/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606+AND+created:[20120720%20TO%2020190508]&format=list>`_. 
+- `Mouse uniprot-AC entries reviewed (08/05/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10090+AND+created:[20120720%20TO%2020190508]&format=list>`_. 
+- `Rat uniprot-AC entries reviewed (08/05/2019) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10116+AND+created:[20120720%20TO%2020190508]&format=list>`_.
+- `Human uniprot-AC entries reviewed (10/10/2018) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606+AND+created:[20120720%20TO%2020181010]&format=list>`_. 
+- `Mouse uniprot-AC entries reviewed (10/10/2018) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10090+AND+created:[20120720%20TO%2020181010]&format=list>`_. 
+- `Rat uniprot-AC entries reviewed (10/10/2018) <https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:10116+AND+created:[20120720%20TO%2020181010]&format=list>`_.
 
 -----