diff id_converter.xml @ 14:659f1248f535 draft

planemo upload commit 688c456ca57914a63c20eba942ec5fe81e896099-dirty
author proteore
date Wed, 19 Sep 2018 04:45:04 -0400
parents e30234018582
children b50d913ec067
line wrap: on
line diff
--- a/id_converter.xml	Mon Aug 27 06:12:21 2018 -0400
+++ b/id_converter.xml	Wed Sep 19 04:45:04 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="IDconverter" name="ID Converter" version="0.1.2">
+<tool id="IDconverter" name="ID Converter" version="2018.09.18">
     <description>convert public database identifiers
     </description>
     <requirements>
@@ -9,7 +9,7 @@
     </stdio>
     <command interpreter="Rscript">
         $__tool_directory__/id_converter_UniProt.R
-        --id_type="$idti.idtypein"
+        --id_type="$species.idtypein"
         #if $input.ids == "text"
             --input="$input.txt"
             --input_type="list"
@@ -19,9 +19,9 @@
             --header="$input.header"
             --input_type="file"
         #end if
-        --target_ids="$idto.idtypeout"
+        --target_ids="$species.idto.idtypeout"
         --output="$output"
-        --ref_file="$human_id_mapping"
+        --ref_file="$__tool_directory__/${ filter( lambda x: str( x[0] ) == str( $species.mapping_file ), $__app__.tool_data_tables['id_mapping_tab'].get_fields() )[0][-1] }"
         
     </command>
     <inputs>
@@ -31,13 +31,17 @@
                 <option value="file" selected="true">Input file containing your identifiers</option>
             </param>
             <when value="text" >
-                <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by spaces into the form field, for example: P31946 P62258' >
-                    <sanitizer>
+                <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by "," into the form field, for example: P31946,P62258' >
+                    <sanitizer invalid_char="">
                         <valid initial="string.printable">
                             <remove value="&apos;"/>
                         </valid>
                         <mapping initial="none">
                             <add source="&apos;" target="__sq__"/>
+                            <add source="&#x20;" target=""/>
+                            <add source="&#xA;" target=""/>
+                            <add source="&#xD;" target=""/>
+                            <add source="&#x9;" target=""/>
                         </mapping>
                     </sanitizer>
                 </param>
@@ -48,93 +52,115 @@
                 <param name="ncol" type="text" value="c1" label="The column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />                
             </when>
         </conditional>
-        <param name="human_id_mapping" type="select" label="Select species for ID conversion" >
-            <options from_data_table="human_id_mapping" >
-            </options>
-        </param>
-	    <conditional name="idti" >
-            <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" >
-		        <option value="neXtProt_ID" >neXtProt ID (e.g. NX_P31946)</option>
-		        <option value="UniProt.AC" selected="True" >Uniprot accession number (e.g. P31946)</option>
-		        <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
-		        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
-		        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-		        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
-		        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
-		        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
-		        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
-		        <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
-		        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
-		        <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option>
-		        <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option>
-		        <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option>
-	        </param>
-	        <when value="neXtProt_ID" >
-	        </when>
-	        <when value="UniProt.AC" >
-	        </when>
-	        <when value="UniProt.ID" >
-	        </when>
-	        <when value="GeneID" >
-	        </when>
-	        <when value="RefSeq" >
-	        </when>
-	        <when value="GI" >
-	        </when>
-	        <when value="PDB" >
-	        </when>
-	        <when value="GO" >
-	        </when>
-	        <when value="PIR" >
-	        </when>
-	        <when value="MIM" >
-	        </when>
-	        <when value="UniGene" >
-	        </when>
-	        <when value="Ensembl" >
-	        </when>
-	        <when value="Ensembl_TRS" >
-	        </when>
-	        <when value="Ensembl_PRO" >
-	        </when>
-	    </conditional>
-	    <section name="idto" title="Target type of IDs" expanded="True" >
-            <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" >
-		        <option value="neXtProt_ID" >neXtProt ID (e.g. NX_P31946)</option>
-		        <option value="UniProt.AC" >Uniprot accession number (e.g. P31946)</option>
-		        <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
-		        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
-		        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
-		        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
-		        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
-		        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
-		        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
-		        <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
-		        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
-		        <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option>
-		        <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option>
-		        <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option>
-	        </param>
-	    </section>
+        <conditional name="species">
+            <param name="mapping_file" type="select" label="Select species for ID conversion" >
+                <options from_data_table="id_mapping_tab"/>
+                <option value="human_id_mapping"></option>
+                <option value="mouse_id_mapping"></option>
+            </param>
+            <when value="human_id_mapping">
+                <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" >
+                    <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option>
+                    <option value="UniProt.AC" selected="True" >Uniprot accession number (e.g. P31946)</option>
+                    <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                    <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
+                    <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
+                    <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                    <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
+                    <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
+                    <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
+                    <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
+                    <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
+                    <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option>
+                    <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option>
+                    <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option>
+                    <option value="BioGrid" >BioGrid (e.g. 113361)</option>
+                    <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
+                    <option value="KEGG" >KEGG (e.g. hsa:7529)</option>
+                </param>
+                <section name="idto" title="Target type of IDs" expanded="True" >
+                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" >
+                        <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option>
+                        <option value="UniProt.AC" >Uniprot accession number (e.g. P31946)</option>
+                        <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
+                        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
+                        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
+                        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
+                        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
+                        <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
+                        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
+                        <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option>
+                        <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option>
+                        <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option>
+                        <option value="BioGrid" >BioGrid (e.g. 113361)</option>
+                        <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
+                        <option value="KEGG" >KEGG (e.g. hsa:7529)</option>
+                    </param>
+                </section>
+            </when>
+            <when value="mouse_id_mapping">
+                <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" >
+                    <option value="UniProt.AC" selected="True" >Uniprot accession number (e.g. P31946)</option>
+                    <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                    <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
+                    <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
+                    <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                    <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
+                    <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
+                    <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
+                    <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
+                    <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
+                    <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option>
+                    <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option>
+                    <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option>
+                    <option value="BioGrid" >BioGrid (e.g. 113361)</option>
+                    <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
+                    <option value="KEGG" >KEGG (e.g. hsa:7529)</option>
+                </param>
+                <section name="idto" title="Target type of IDs" expanded="True" >
+                    <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" >
+                        <option value="UniProt.AC" >Uniprot accession number (e.g. P31946)</option>
+                        <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+                        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
+                        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
+                        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+                        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
+                        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
+                        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
+                        <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
+                        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
+                        <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option>
+                        <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option>
+                        <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option>
+                        <option value="BioGrid" >BioGrid (e.g. 113361)</option>
+                        <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option>
+                        <option value="KEGG" >KEGG (e.g. hsa:7529)</option>
+                    </param>
+                </section>
+            </when>
+        </conditional>
     </inputs>
     <outputs>
         <data name="output" format="tabular" />
     </outputs>
     <tests>
         <test>
-            <conditional name="input">
-                <param name="ids" value="file" />
+            <conditional name="input" >
+                <param name="ids" value="file"/>
                 <param name="file" value="FKW_Lacombe_et_al_2017_OK.txt" />
-                <param name="header" value="True" />
-                <param name="ncol" value="c1" />
+                <param name="header" value="true" />
+                <param name="ncol" value="c1"/>
             </conditional>
-            <conditional name="idti">
-                <param name="idtypein" value="UniProt.AC" />
+            <conditional name="species">
+                <param name="mapping_file" value="human_id_mapping"/>
+                <param name="idtypein" value="UniProt.AC"/>
+                <section name="idto">
+                    <param name="idtypeout" value="neXtProt,UniProt.ID,GeneID,MIM,Ensembl" />
+                </section>
             </conditional>
-            <section name="idto">
-                <param name="idtypeout" value="neXtProt_ID,UniProt.ID,GeneID,MIM,Ensembl" />
-            </section>
-                <output name="output" file="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt" />
+            <output name="output" value="ID_Converted_FKW_Lacombe_et_al_2017_OK.txt" />
         </test>
     </tests>
     <help><![CDATA[
@@ -182,7 +208,10 @@
 
 This tool converts human IDs using the following source files:
 
-* HUMAN_9606_idmapping_selected.tab (Uniprot 15/05/2018)
+* HUMAN_9606_idmapping_selected.tab (Uniprot 02/07/2018)
+    Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+
+* HUMAN_9606_idmapping.dat (Uniprot 02/07/18)
     Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
 
 * nextprot_ac_list_all.txt (Nextprot released on 17/01/2018)
@@ -203,7 +232,7 @@
 
 **Authors**
 
-T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
 
 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform