view id_converter.xml @ 1:3b654cb3f6ad draft

planemo upload commit 74f72978b11230744b116d34fe3f2aa7934bf545-dirty
author proteore
date Tue, 27 Feb 2018 08:42:18 -0500
parents f2d0b13d9615
children 9b3a4ee0dcc6
line wrap: on
line source

<tool id="IDconverter" name="ID Converter" version="0.1.0">
    <description>convert public database identifiers (Homo sapiens only for now)
    </description>
    <requirements>
      <requirement type="package" version="3.4.1">R</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command interpreter="Rscript">
        $__tool_directory__/id_converter_UniProt.R
        "$idti.idtypein"
        #if $input.ids == "text"
            "$input.txt"
            "list"
        #else
            "$input.file,$input.ncol,$input.header"
            "file"
        #end if
        "$idto.idtypeout"
        "$output"
        $__tool_directory__/tool-data/human_id_mapping_file.txt
        
    </command>
    <inputs>
        <conditional name="input" >
            <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" >
                <option value="text">Copy/paste your identifiers</option>
                <option value="file">Input file containing your identifiers</option>
            </param>
            <when value="text" >
                <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by spaces into the form field, for example: P31946 P62258' >
                    <sanitizer>
                        <valid initial="string.printable">
                            <remove value="&apos;"/>
                        </valid>
                        <mapping initial="none">
                            <add source="&apos;" target="__sq__"/>
                        </mapping>
                    </sanitizer>
                </param>
            </when>
            <when value="file" >
                <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of IDs" help="" />
                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
                <param name="ncol" type="text" label="The column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' />                
            </when>
        </conditional>
	    <conditional name="idti" >
            <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" >
		        <option value="neXtProt_ID" >neXtProt ID (e.g. NX_P31946)</option>
		        <option value="UniProt.AC" selected="True" >Uniprot accession number (e.g. P31946)</option>
		        <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
		        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
		        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
		        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
		        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
		        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
		        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
		        <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
		        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
		        <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option>
		        <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option>
		        <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option>
	        </param>
	        <when value="neXtProt_ID" >
	        </when>
	        <when value="UniProt.AC" >
	        </when>
	        <when value="UniProt.ID" >
	        </when>
	        <when value="GeneID" >
	        </when>
	        <when value="RefSeq" >
	        </when>
	        <when value="GI" >
	        </when>
	        <when value="PDB" >
	        </when>
	        <when value="GO" >
	        </when>
	        <when value="PIR" >
	        </when>
	        <when value="MIM" >
	        </when>
	        <when value="UniGene" >
	        </when>
	        <when value="Ensembl" >
	        </when>
	        <when value="Ensembl_TRS" >
	        </when>
	        <when value="Ensembl_PRO" >
	        </when>
	    </conditional>
	    <section name="idto" title="Target type of IDs" expanded="True" >
            <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" >
		        <option value="neXtProt_ID" >neXtProt ID (e.g. NX_P31946)</option>
		        <option value="UniProt.AC" >Uniprot accession number (e.g. P31946)</option>
		        <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option>
		        <option value="GeneID" >Entrez gene ID (e.g. 7529)</option>
		        <option value="RefSeq" >RefSeq (NCBI) protein (e.g.  NP_003395.1)</option>
		        <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option>
		        <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option>
		        <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option>
		        <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option>
		        <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option>
		        <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option>
		        <option value="Ensembl" >Ensembl gene ID (e.g. ENSG00000166913)</option>
		        <option value="Ensembl_TRS" >Ensembl transcript ID (e.g. ENST00000353703)</option>
		        <option value="Ensembl_PRO" >Ensembl protein ID (e.g. ENSP00000300161)</option>
	        </param>
	    </section>
    </inputs>
    <outputs>
        <data name="output" format="tabular" />
    </outputs>
    <tests>
        <test>
            <conditional name="input">
                <param name="ids" value="file" />
                <param name="file" value="FKW_Lacombe_et_al_2017_OK.txt" />
                <param name="header" value="True" />
                <param name="ncol" value="c1" />
            </conditional>
            <conditional name="idti">
                <param name="idtypein" value="UniProt.AC" />
            </conditional>
            <section name="idto">
                <param name="idtypeout" value="neXtProt_ID,UniProt.ID,GeneID,MIM,Ensembl" />
            </section>
                <output name="output" file="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt" />
        </test>
    </tests>
    <help><![CDATA[
This tool converts a list of IDs to another identifier type, select the source and target type from the dropdown menus above (see below supported source and target types).

After choosing the type of input IDs, you can choose one or more types of IDs you would like to map to. 

If your input is a list of IDs or a single-column file, the tool will return a file containing the mapped IDs. Please, note that a "NA" is returned when there is no corresponding ID.

If your input is a multiple-column file, the mapped IDs column(s) will be added at the end of the input file.

**Available databases**

* neXtProt ID (e.g. NX_P31946)

* Uniprot accession number (e.g. P31946)

* Uniprot ID (e.g 1433B_HUMAN)

* Entrez gene ID (e.g. 7529)

* RefSeq (NCBI) protein (e.g.  NP_003395.1)

* GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)

* Protein DataBank ID (e.g. 2BR9:A)

* GOterms (Gene Ontology) ID (e.g. GO:0070062)

* Protein Information Resource ID (e.g. S34755)

* OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)

* Unigene ID (e.g. Hs.643544)

* Ensembl gene ID (e.g. ENSG00000166913)

* Ensembl transcript ID (e.g. ENST00000353703)

* Ensembl protein ID (e.g. ENSP00000300161)

-----

.. class:: infomark

This tool converts human IDs using the following source files:

* HUMAN_9606_idmapping_selected.tab
    Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/

* nextprot_ac_list_all.txt 
    Downloaded from ftp://ftp.nextprot.org/pub/current_release/ac_lists/

-----

.. class:: warningmark

Accession numbers with an hyphen ("-") that normally correspond to isoform are considered 
(and will therefore be treated) as similar to its canonical form.

For example, "Q71U36-2" will be treated as "Q71U36".

-----

.. class:: infomark

**Authors**

T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
 
    ]]></help>
    <citations>
    </citations>
</tool>