diff id_converter.xml @ 0:f2d0b13d9615 draft

planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
author proteore
date Fri, 16 Feb 2018 03:16:29 -0500
parents
children 3b654cb3f6ad
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/id_converter.xml	Fri Feb 16 03:16:29 2018 -0500
@@ -0,0 +1,210 @@
+<tool id="IDconverter" name="ID Converter" version="0.1.0">
+    <description>convert public database identifiers (Homo sapiens only for now)
+    </description>
+    <requirements>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command interpreter="Rscript">
+        id_converter_UniProt.R
+        "$idti.idtypein"
+        #if $input.ids == "text"
+            "$input.txt"
+            "list"
+        #else
+            "$input.file,$input.ncol,$input.header"
+            "file"
+        #end if
+        "$idto.idtypeout"
+        "$output"
+        $__tool_directory__/../../utils/human_id_mapping_file.txt
+        
+    </command>
+    <inputs>
+        <conditional name="input" >
+            <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" >
+                <option value="text">Copy/paste your identifiers</option>
+                <option value="file">Input file containing your identifiers</option>
+            </param>
+            <when value="text" >
+                <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by spaces into the form field, for example: P31946 P62258' >
+                    <sanitizer>
+                        <valid initial="string.printable">
+                            <remove value="&apos;"/>
+                        </valid>
+                        <mapping initial="none">
+                            <add source="&apos;" target="__sq__"/>
+                        </mapping>
+                    </sanitizer>
+                </param>
+            </when>
+            <when value="file" >
+                <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of IDs" help="" />
+                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
+                <param name="ncol" type="text" label="The column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />                
+            </when>
+        </conditional>
+	    <conditional name="idti" >
+            <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" >
+		        <option value="neXtProt_ID" >neXtProt ID (e.g. NX_P31946)</option>
+		        <option value="UniProt.AC" selected="True" >Uniprot accession number (e.g. P31946)</option>
+		        <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+		        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
+		        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
+		        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+		        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
+		        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
+		        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
+		        <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
+		        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
+		        <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option>
+		        <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option>
+		        <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option>
+	        </param>
+	        <when value="neXtProt_ID" >
+	        </when>
+	        <when value="UniProt.AC" >
+	        </when>
+	        <when value="UniProt.ID" >
+	        </when>
+	        <when value="GeneID" >
+	        </when>
+	        <when value="RefSeq" >
+	        </when>
+	        <when value="GI" >
+	        </when>
+	        <when value="PDB" >
+	        </when>
+	        <when value="GO" >
+	        </when>
+	        <when value="PIR" >
+	        </when>
+	        <when value="MIM" >
+	        </when>
+	        <when value="UniGene" >
+	        </when>
+	        <when value="Ensembl" >
+	        </when>
+	        <when value="Ensembl_TRS" >
+	        </when>
+	        <when value="Ensembl_PRO" >
+	        </when>
+	    </conditional>
+	    <section name="idto" title="Target type of IDs" expanded="True" >
+            <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" >
+		        <option value="neXtProt_ID" >neXtProt ID (e.g. NX_P31946)</option>
+		        <option value="UniProt.AC" >Uniprot accession number (e.g. P31946)</option>
+		        <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
+		        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
+		        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
+		        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
+		        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
+		        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
+		        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
+		        <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
+		        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
+		        <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option>
+		        <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option>
+		        <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option>
+	        </param>
+	    </section>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="input">
+                <param name="ids" value="file" />
+                <param name="file" value="FKW_Lacombe_et_al_2017_OK.txt" />
+                <param name="header" value="True" />
+                <param name="ncol" value="c1" />
+            </conditional>
+            <conditional name="idti">
+                <param name="idtypein" value="UniProt.AC" />
+            </conditional>
+            <section name="idto">
+                <param name="idtypeout" value="neXtProt_ID,UniProt.ID,GeneID,MIM,Ensembl" />
+            </section>
+                <output name="output" file="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt" />
+        </test>
+    </tests>
+    <help><![CDATA[
+This tool converts a list of IDs to another identifier type, select the source and target type from the dropdown menus above (see below supported source and target types).
+
+After choosing the type of input IDs, you can choose one or more types of IDs you would like to map to. 
+
+If your input is a list of IDs or a single-column file, the tool will return a file containing the mapped IDs. Please, note that a "NA" is returned when there is no corresponding ID.
+
+If your input is a multiple-column file, the mapped IDs column(s) will be added at the end of the input file.
+
+**Available databases**
+
+* neXtProt ID (e.g. NX_P31946)
+
+* Uniprot accession number (e.g. P31946)
+
+* Uniprot ID (e.g 1433B_HUMAN)
+
+* Entrez gene ID (e.g. 7529)
+
+* RefSeq (NCBI) protein (e.g.  NP_003395.1)
+
+* GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)
+
+* Protein DataBank ID (e.g. 2BR9:A)
+
+* GOterms (Gene Ontology) ID (e.g. GO:0070062)
+
+* Protein Information Resource ID (e.g. S34755)
+
+* OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)
+
+* Unigene ID (e.g. Hs.643544)
+
+* Ensembl gene ID (e.g. ENSG00000166913)
+
+* Ensembl transcript ID (e.g. ENST00000353703)
+
+* Ensembl protein ID (e.g. ENSP00000300161)
+
+-----
+
+.. class:: infomark
+
+This tool converts human IDs using the following source files:
+
+* HUMAN_9606_idmapping_selected.tab
+    Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+
+* nextprot_ac_list_all.txt 
+    Downloaded from ftp://ftp.nextprot.org/pub/current_release/ac_lists/
+
+-----
+
+.. class:: warningmark
+
+Accession numbers with an hyphen ("-") that normally correspond to isoform are considered 
+(and will therefore be treated) as similar to its canonical form.
+
+For example, "Q71U36-2" will be treated as "Q71U36".
+
+-----
+
+.. class:: infomark
+
+**Authors**
+
+T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
+
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
+
+Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+ 
+    ]]></help>
+    <citations>
+    </citations>
+</tool>