Mercurial > repos > proteore > proteore_id_converter
diff id_converter.xml @ 15:b50d913ec067 draft
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
author | proteore |
---|---|
date | Tue, 18 Dec 2018 09:57:21 -0500 |
parents | 659f1248f535 |
children | b6607b7e683f |
line wrap: on
line diff
--- a/id_converter.xml Wed Sep 19 04:45:04 2018 -0400 +++ b/id_converter.xml Tue Dec 18 09:57:21 2018 -0500 @@ -1,5 +1,5 @@ -<tool id="IDconverter" name="ID Converter" version="2018.09.18"> - <description>convert public database identifiers +<tool id="IDconverter" name="ID Converter" version="2018.12.18"> + <description>(Human, Mouse, Rat) </description> <requirements> <requirement type="package" version="3.4.1">R</requirement> @@ -8,7 +8,7 @@ <exit_code range="1:" /> </stdio> <command interpreter="Rscript"> - $__tool_directory__/id_converter_UniProt.R + $__tool_directory__/id_converter.R --id_type="$species.idtypein" #if $input.ids == "text" --input="$input.txt" @@ -21,17 +21,17 @@ #end if --target_ids="$species.idto.idtypeout" --output="$output" - --ref_file="$__tool_directory__/${ filter( lambda x: str( x[0] ) == str( $species.mapping_file ), $__app__.tool_data_tables['id_mapping_tab'].get_fields() )[0][-1] }" + --ref_file="$__tool_directory__/${ filter( lambda x: str( x[0] ) == str( $species.mapping_file ), $__app__.tool_data_tables['proteore_id_mapping'].get_fields() )[0][-1] }" </command> <inputs> <conditional name="input" > - <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" > + <param name="ids" type="select" label="Enter IDs" help="Copy/paste or from a file (e.g. table)" > <option value="text">Copy/paste your identifiers</option> - <option value="file" selected="true">Input file containing your identifiers</option> + <option value="file" selected="true">Input file containing IDs</option> </param> <when value="text" > - <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by "," into the form field, for example: P31946,P62258' > + <param name="txt" type="text" label="Copy/paste IDs" help='IDs must be separated by tab, space or carriage return into the form field, for example: P31946 P62258' > <sanitizer invalid_char=""> <valid initial="string.printable"> <remove value="'"/> @@ -47,27 +47,28 @@ </param> </when> <when value="file" > - <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of IDs" help="" /> - <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" /> - <param name="ncol" type="text" value="c1" label="The column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> + <param name="file" type="data" format="txt,tabular" label="Select your file" help="" /> + <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" /> + <param name="ncol" type="text" value="c1" label="Column number of IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> </when> </conditional> <conditional name="species"> - <param name="mapping_file" type="select" label="Select species for ID conversion" > - <options from_data_table="id_mapping_tab"/> + <param name="mapping_file" type="select" label="Species" > + <options from_data_table="proteore_id_mapping"/> <option value="human_id_mapping"></option> <option value="mouse_id_mapping"></option> + <option value="rat_id_mapping"></option> </param> <when value="human_id_mapping"> - <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" > + <param name="idtypein" type="select" label="Type/source of IDs" optional="false" > <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option> - <option value="UniProt.AC" selected="True" >Uniprot accession number (e.g. P31946)</option> - <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option> + <option value="UniProt-AC" selected="True" >UniProt accession number (e.g. P31946)</option> + <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option> <option value="GeneID" >Entrez gene ID (e.g. 7529)</option> <option value="RefSeq" >RefSeq (NCBI) protein (e.g. NP_003395.1)</option> - <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option> + <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option> <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option> - <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option> + <option value="GO" >GO terms (Gene Ontology) ID (e.g. GO:0070062)</option> <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option> <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option> <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option> @@ -76,16 +77,16 @@ <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option> <option value="BioGrid" >BioGrid (e.g. 113361)</option> <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option> - <option value="KEGG" >KEGG (e.g. hsa:7529)</option> + <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option> </param> - <section name="idto" title="Target type of IDs" expanded="True" > - <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" > + <section name="idto" title="Target type" expanded="True" > + <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" > <option value="neXtProt" >neXtProt ID (e.g. NX_P31946)</option> - <option value="UniProt.AC" >Uniprot accession number (e.g. P31946)</option> - <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option> + <option value="UniProt-AC" >UniProt accession number (e.g. P31946)</option> + <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option> <option value="GeneID" >Entrez gene ID (e.g. 7529)</option> <option value="RefSeq" >RefSeq (NCBI) protein (e.g. NP_003395.1)</option> - <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option> + <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option> <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option> <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option> <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option> @@ -96,17 +97,17 @@ <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option> <option value="BioGrid" >BioGrid (e.g. 113361)</option> <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option> - <option value="KEGG" >KEGG (e.g. hsa:7529)</option> + <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option> </param> </section> </when> <when value="mouse_id_mapping"> - <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" > - <option value="UniProt.AC" selected="True" >Uniprot accession number (e.g. P31946)</option> - <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option> + <param name="idtypein" type="select" label="Type/source of IDs" optional="false" > + <option value="UniProt-AC" selected="True" >UniProt accession number (e.g. P31946)</option> + <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option> <option value="GeneID" >Entrez gene ID (e.g. 7529)</option> <option value="RefSeq" >RefSeq (NCBI) protein (e.g. NP_003395.1)</option> - <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option> + <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option> <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option> <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option> <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option> @@ -117,33 +118,71 @@ <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option> <option value="BioGrid" >BioGrid (e.g. 113361)</option> <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option> - <option value="KEGG" >KEGG (e.g. hsa:7529)</option> + <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option> </param> <section name="idto" title="Target type of IDs" expanded="True" > - <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" help="Please see example of IDs in help section" > - <option value="UniProt.AC" >Uniprot accession number (e.g. P31946)</option> - <option value="UniProt.ID" >Uniprot ID (e.g 1433B_HUMAN)</option> + <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" > + <option value="UniProt-AC" >UniProt accession number (e.g. P31946)</option> + <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option> <option value="GeneID" >Entrez gene ID (e.g. 7529)</option> <option value="RefSeq" >RefSeq (NCBI) protein (e.g. NP_003395.1)</option> - <option value="GI" >GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448)</option> + <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option> <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option> <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option> <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option> - <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option> <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option> <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option> <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option> <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option> <option value="BioGrid" >BioGrid (e.g. 113361)</option> <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option> - <option value="KEGG" >KEGG (e.g. hsa:7529)</option> + <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option> + </param> + </section> + </when> + <when value="rat_id_mapping"> + <param name="idtypein" type="select" label="Select type/source of identifier of your list" optional="false" > + <option value="UniProt-AC" selected="True" >UniProt accession number (e.g. P31946)</option> + <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option> + <option value="GeneID" >Entrez gene ID (e.g. 7529)</option> + <option value="RefSeq" >RefSeq (NCBI) protein (e.g. NP_003395.1)</option> + <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option> + <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option> + <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option> + <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option> + <option value="MIM" >OMIM (Online Mendelian Inheritance in Man database) ID (e.g: 601289)</option> + <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option> + <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option> + <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option> + <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option> + <option value="BioGrid" >BioGrid (e.g. 113361)</option> + <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option> + <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option> + </param> + <section name="idto" title="Target type of IDs" expanded="True" > + <param name="idtypeout" type="select" label="Target type of IDs you would like to map to" display="checkboxes" multiple="True" optional="false" > + <option value="UniProt-AC" >UniProt accession number (e.g. P31946)</option> + <option value="UniProt-ID" >UniProt ID (e.g 1433B_HUMAN)</option> + <option value="GeneID" >Entrez gene ID (e.g. 7529)</option> + <option value="RefSeq" >RefSeq (NCBI) protein (e.g. NP_003395.1)</option> + <option value="GI" >GI (NCBI GI number) (e.g. 21328448)</option> + <option value="PDB" >Protein DataBank ID (e.g. 2BR9:A)</option> + <option value="GO" >GOterms (Gene Ontology) ID (e.g. GO:0070062)</option> + <option value="PIR" >Protein Information Resource ID (e.g. S34755)</option> + <option value="UniGene" >Unigene ID (e.g. Hs.643544)</option> + <option value="Ensembl_Gene" >Ensembl gene ID (e.g. ENSG00000166913)</option> + <option value="Ensembl_Transcript" >Ensembl transcript ID (e.g. ENST00000353703)</option> + <option value="Ensembl_Protein" >Ensembl protein ID (e.g. ENSP00000300161)</option> + <option value="BioGrid" >BioGrid (e.g. 113361)</option> + <option value="STRING" >STRING (e.g. 9606.ENSP00000300161)</option> + <option value="KEGG" >KEGG gene id (e.g. hsa:7529)</option> </param> </section> </when> </conditional> </inputs> <outputs> - <data name="output" format="tabular" /> + <data name="output" format="tsv" /> </outputs> <tests> <test> @@ -155,36 +194,47 @@ </conditional> <conditional name="species"> <param name="mapping_file" value="human_id_mapping"/> - <param name="idtypein" value="UniProt.AC"/> + <param name="idtypein" value="UniProt-AC"/> <section name="idto"> - <param name="idtypeout" value="neXtProt,UniProt.ID,GeneID,MIM,Ensembl" /> + <param name="idtypeout" value="neXtProt,UniProt-ID,GeneID,MIM,Ensembl" /> </section> </conditional> <output name="output" value="ID_Converted_FKW_Lacombe_et_al_2017_OK.txt" /> </test> </tests> <help><![CDATA[ -This tool converts a list of IDs to another identifier type, select the source and target type from the dropdown menus above (see below supported source and target types). +**Description** +This tool converts a list of identifiers (IDs) into another type of ID. Currently the conversion of IDs applies to the following three species: Human (homo sapiens, Mouse (Mus musculus), Rat (Rattus norvegicus). +Supported source and target type of IDs are listed below. After choosing the type of your input IDs, you can select one or more types of ID you may need. -After choosing the type of input IDs, you can choose one or more types of IDs you would like to map to. +----- -If your input is a list of IDs or a single-column file, the tool will return a file containing the mapped IDs. Please, note that a "NA" is returned when there is no corresponding ID. +**Input** +A list of IDs (entered in a copy/paste mode) or a single-column file, the tool will then return a file containing the mapped IDs. If your input is a multiple-column file, the mapped IDs column(s) will be added at the end of the input file. -**Available databases** +.. class:: warningmark + +Accession numbers with an hyphen ("-") that normally correspond to isoform are considered as similar to its canonical form. +For example, "Q71U36-2" will be treated as "Q71U36". + +----- + +**Parameters** +Target type of IDs currently supported: * neXtProt ID (e.g. NX_P31946) -* Uniprot accession number (e.g. P31946) +* UniProt accession number (e.g. P31946) -* Uniprot ID (e.g 1433B_HUMAN) +* UniProt ID (e.g 1433B_HUMAN) * Entrez gene ID (e.g. 7529) -* RefSeq (NCBI) protein (e.g. NP_003395.1) +* RefSeq protein (NCBI) (e.g. NP_003395.1) -* GI (NCBI GI number) ID assigned to each sequence record processed by NCBI (e.g. 21328448) +* GI (NCBI GI number) (e.g. 21328448) * Protein DataBank ID (e.g. 2BR9:A) @@ -202,29 +252,36 @@ * Ensembl protein ID (e.g. ENSP00000300161) +* BioGrid (e.g. 113361) + +* STRING (e.g. 9606.ENSP00000300161) + +* KEGG gene id (e.g. hsa:7529) + +.. class:: warningmark + +Nextprot and OMIM only applicable to Human species. + ----- -.. class:: infomark +**Output** + +A text file containing the selected type of IDs (in addition to the original column(s) provided) +Please, note that a "NA" is returned when there is no match between a source ID and the corresponding target ID. + +----- + +**Data sources (release date)** This tool converts human IDs using the following source files: -* HUMAN_9606_idmapping_selected.tab (Uniprot 02/07/2018) - Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ - -* HUMAN_9606_idmapping.dat (Uniprot 02/07/18) - Tarball downloaded from ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ - -* nextprot_ac_list_all.txt (Nextprot released on 17/01/2018) - Downloaded from ftp://ftp.nextprot.org/pub/current_release/ac_lists/ - ------ - -.. class:: warningmark - -Accession numbers with an hyphen ("-") that normally correspond to isoform are considered -(and will therefore be treated) as similar to its canonical form. - -For example, "Q71U36-2" will be treated as "Q71U36". +- **HUMAN_9606_idmapping_selected.tab (Uniprot 23/10/2018)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ +- **HUMAN_9606_idmapping.dat (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ +- **nextprot_ac_list_all.txt (Nextprot released on 10/10/2018)**: ftp://ftp.nextprot.org/pub/current_release/ac_lists/ +- **MOUSE_10090_idmapping_selected.tab (Uniprot 23/10/2018)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ +- **MOUSE_10090_idmapping.dat (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ +- **RAT_10116_idmapping.dat (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ +- **RAT_10116_idmapping_selected.tab (Uniprot 23/10/18)**: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/ ----- @@ -232,9 +289,9 @@ **Authors** -T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR +David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR -Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform +Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR This work has been partially funded through the French National Agency for Research (ANR) IFB project.