Mercurial > repos > pieterlukasse > primo_multiomics
changeset 7:ce9228263148
renamed to TermMapper
author | pieter.lukasse@wur.nl |
---|---|
date | Mon, 23 Mar 2015 21:02:01 +0100 |
parents | 8fa07f40d2eb |
children | 97e10319d86f |
files | LICENSE README.rst Results2O.jar TermMapperTool.jar results2o.xml term_mapper.xml |
diffstat | 6 files changed, 209 insertions(+), 108 deletions(-) [+] |
line wrap: on
line diff
--- a/LICENSE Fri Aug 01 17:21:30 2014 +0200 +++ b/LICENSE Mon Mar 23 21:02:01 2015 +0100 @@ -1,7 +1,7 @@ Apache License Version 2.0, January 2004 - http://www.apache.org/licenses/ + http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
--- a/README.rst Fri Aug 01 17:21:30 2014 +0200 +++ b/README.rst Mon Mar 23 21:02:01 2015 +0100 @@ -20,6 +20,7 @@ ============== ====================================================================== Date Changes -------------- ---------------------------------------------------------------------- +August 2014 * improvements release May 2014 * first release via Tool Shed ============== ======================================================================
--- a/results2o.xml Fri Aug 01 17:21:30 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,107 +0,0 @@ -<tool name="Results2O" id="results2o1" version="0.0.1"> - <description>use ontology mapping to annotate results (e.g. annotate protein identifications with Gene Ontology[GO] terms)</description> - <!-- - For remote debugging start you listener on port 8000 and use the following as command interpreter: - java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 - --> - <!-- similar to "join two datasets" tool http://galaxy.wur.nl/galaxy_production/root?tool_id=join1 - but this one is probably having more powerful features like supporting multiple ';' codes in key fields - and the feature in ontologyTermColName(s) supporting direct hierarchy like annotation --> - <command interpreter="java -jar "> - Results2O.jar - -inputFileName $inputFileName - -inputIdColumnName "$inputIdColumnName" - -inputIdPrefix "$inputIdPrefix" - -quantifColumn "$quantifColumn" - - -ontologyMappingFileName $ontologyMappingFileName - -mappingFileIdColName "$mappingFileIdColName" - -mappingIdPrefix "$mappingIdPrefix" - -mappingFileOntologyTermColName "$mappingFileOntologyTermColName" - -removeWhiteSpacesFromOterms $removeWhiteSpacesFromOterms - - -outputFileName $outputFileName - -outputObservationsFileName $outputObservationsFileName - - </command> - - <inputs> - - <param name="inputFileName" type="data" format="tabular,csv" label="Input file (TSV/CSV)" /> - <param name="inputIdColumnName" type="text" size="50" value="" label="ID column name" help="Name of the column containing the identification codes (in the given input file)"/> - <param name="inputIdPrefix" type="text" size="50" value="" label="(Optional) Prefix in ID column" - help="Fill in if any prefix is found in the ID column values (e.g. in some - files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this - example one would fill in 'lipidmaps:' as prefix)"/> - <param name="quantifColumn" type="text" size="50" value="" label="(Optional) Values column name" help="Name of the column containing the quantification values (in the given input file)"/> - - <!-- =================== ONTOLOGY part ============== --> - <param name="ontologyMappingFileName" type="data" format="obo" label="ID to Ontology mapping file (TSV/CSV)" help="Simple file linking the coding scheme used for the identifications in the given input file to one or more ontology terms."/> - <param name="mappingFileIdColName" type="text" size="50" value="" label="ID column name (in ontology mapping file)" help="Name of the column containing the identification codes (which will in fact link the input file records to the ontology records)"/> - <param name="mappingIdPrefix" type="text" size="50" value="" label="(Optional) Prefix in mapping ID column" - help="Fill in if any prefix is found in the ID column values (e.g. in some - files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this - example one would fill in 'lipidmaps:' as prefix)"/> - - <param name="mappingFileOntologyTermColName" type="text" size="50" value="" label="Ontology term column name(s)" - help="Name(s) of the column(s) containing the ontology terms in the ontology mapping file (and which will be transfered to the input file). - For using multiple columns, set the names separated by comma (,). If multiple columns are specified, the algorithm will look for an annotation in the first one, if none - found it will try the second one, and so forth. "/> - - <param name="removeWhiteSpacesFromOterms" type="boolean" checked="false" - label="Remove white spaces from ontology terms" - help="This could be needed for some ontologies, like the current custom one for Lipidmaps."/> - - </inputs> - <outputs> - #if isinstance( $inputFileName.datatype, $__app__.datatypes_registry.get_datatype_by_extension('tabular').__class__): - <data name="outputFileName" format="tabular" label="${tool.name} on ${on_string}: annotated file " ></data> - #else: - <data name="outputFileName" format="csv" label="${tool.name} on ${on_string}: annotated file " ></data> - #end if - - <data name="outputObservationsFileName" format="tabular" label="${tool.name} on ${on_string}: ontology observations file (TSV)"></data> - </outputs> - <tests> - <!-- find out how to use --> - <test> - </test> - </tests> - <help> - -.. class:: infomark - -This tool is responsible for annotating quantifications result file -with the ontology terms given in a mapping file. This mapping file links the items found in the result file -(e.g. protein identifications coded in common protein coding formats such as UniProt ) -to their respective ontology terms (e.g. GO terms). It enables users to use the cross-reference -information now available in different repositories (like uniprot and KEGG - see for example -http://www.uniprot.org/taxonomy/ or http://www.genome.jp/linkdb/ ) -to map their results to other useful coding schemes such as ontologies for functional annotations. - -As an example for transcripts and proteins, users can check http://www.uniprot.org/taxonomy/ to -see if their organism has been mapped to GO terms by Uniprot. For example the link -http://www.uniprot.org/uniprot/?query=taxonomy:2850 will show the Uniprot repository and cross-references -for the taxonomy 2850. -When the organism being studied is not available, then other strategies -could be tried (like Blast2GO for example). - - -Despite the specific examples above, this class is generic and can be used to map any -results file to an Ontology according to a given mapping file. One example would be mapping metabolomics -identifications to the CheBI ontology. - - ------ - -**Output** - -This method will read in the given input file and for each line it will add a new column -containing the Ontology terms found for the ID in that line. So the output file is the same as the -input file + extra Ontology terms column (separated by ; ). - -A second summarized "ontology observations" file is also generated which can be used for visualizing the results -in an ontology viewer (e.g. see OntologyAndObservationsViewer). - - </help> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/term_mapper.xml Mon Mar 23 21:02:01 2015 +0100 @@ -0,0 +1,207 @@ +<tool name="TermMapperTool" id="TermMapperTool1" version="0.0.2"> + <description>use cross-reference lookup tables to annotate results</description> + <!-- + For remote debugging start you listener on port 8000 and use the following as command interpreter: + java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 + --> + <!-- similar to "join two datasets" tool http://galaxy.wur.nl/galaxy_production/root?tool_id=join1 + but this one is probably having more powerful features like supporting multiple ';' codes in key fields + and the feature in termColName(s) supporting direct hierarchy like annotation --> + <command interpreter="java -jar "> + TermMapperTool.jar + -inputFileName $inputFileName + -inputIdColumnName "$inputIdColumnName" + #if $inputIdCol.inputIdHasPrefix == True + -inputIdPrefix "$inputIdCol.inputIdPrefix" + #end if + + -mappingFileName $mappingFileName + -mappingFileIdColName "$mappingFileIdColName" + + #if $mappingIdCol.mappingIdHasPrefix == True + -mappingIdPrefix "$mappingIdCol.mappingIdPrefix" + #end if + + -mappingFileTermColName "$mappingFileTermColName" + + -outputFileName $outputFileName + + #if $genObservations.genObservationsFile == True + -outputObservationsFileName $outputObservationsFileName + -quantifColumn "$genObservations.quantifColumn" + #end if + + -mappedTermsColName $mappedTermsColName + + </command> + + <inputs> + + <param name="inputFileName" type="data" format="tabular,csv" label="Target file (TSV/CSV)" /> + + <param name="inputIdColumnName" type="text" size="50" value="" label="ID column name" + help="Name of the column containing the identification codes (in the given input file)"/> + + <conditional name="inputIdCol"> + <param name="inputIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false" + label="ID values have a prefix"/> + <when value="Yes"> + <param name="inputIdPrefix" type="text" size="50" value="" label="Prefix in ID column" + help="Fill in if any prefix is found in the ID column values (e.g. in some + files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this + example one would fill in 'lipidmaps:' as prefix)"/> + </when> + <when value="No"> + </when> + </conditional> + + <!-- =================== cross-reference part ============== --> + <param name="mappingFileName" type="data" format="tabular,csv" label="Lookup table (TSV/CSV)" help="Simple mapping file between the coding scheme used to another scheme"/> + <param name="mappingFileIdColName" type="text" size="50" value="" label="ID column name (in lookup table)" help="Name of the ID column for the lookup"/> + + <conditional name="mappingIdCol"> + <param name="mappingIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false" + label="ID values have a prefix"/> + <when value="Yes"> + <param name="mappingIdPrefix" type="text" size="50" value="" label="Prefix in ID column" + help="Fill in if any prefix is found in the ID column values (e.g. in some + files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this + example one would fill in 'lipidmaps:' as prefix)"/> + </when> + <when value="No"> + </when> + </conditional> + + <param name="mappingFileTermColName" type="text" size="50" value="" label="Term column name(s) or number(s)" + help="Name(s) or number(s) of the column(s) containing the term(s) in the lookup table (and which will be transfered to the target file based on ID match in 'ID column name'). + For using multiple term column names, set the names separated by comma (,). + If multiple columns are specified, the algorithm will look for an annotation in the first one, if none + found it will try the second one, and so forth. "/> + + + <param name="mappedTermsColName" type="text" size="50" value="Mapped terms" label="Name to give to the new column:" + help="Name to give to the new column that will be added to the target file. This new column is the one + that will contain the respectively mapped terms."/> + + <conditional name="genObservations"> + <param name="genObservationsFile" type="boolean" truevalue="Yes" falsevalue="No" checked="false" + label="Generate also observations file"/> + <when value="Yes"> + <param name="quantifColumn" type="text" size="50" value="" + label="(Optional) Values column name" + help="Name of the column containing the quantification values (in the given input file)"/> + </when> + <when value="No"> + </when> + </conditional> + + </inputs> + <outputs> + #if isinstance( $inputFileName.datatype, $__app__.datatypes_registry.get_datatype_by_extension('tabular').__class__): + <data name="outputFileName" format="tabular" label="${tool.name} on ${on_string}: annotated file " ></data> + #else: + <data name="outputFileName" format="csv" label="${tool.name} on ${on_string}: annotated file " ></data> + #end if + + <data name="outputObservationsFileName" format="tabular" label="${tool.name} on ${on_string}: term observations file (TSV)"> + <!-- If the expression is false, the file is not created --> + <filter>( genObservations.genObservationsFile == True )</filter> + </data> + </outputs> + <tests> + <!-- find out how to use --> + <test> + </test> + </tests> + <help> + +.. class:: infomark + + +This tool is responsible for annotating the given target file +with the terms given in a lookup table. This lookup table maps the items found in the target file +(e.g. protein identifications coded in common protein coding formats such as UniProt ) +to their respective terms (e.g. GO terms). It enables users to use the cross-reference +information now available from different repositories (like uniprot and KEGG - see for example +http://www.uniprot.org/taxonomy/ or http://www.genome.jp/linkdb/ ) +to map their data to other useful coding schemes or to ontologies and functional annotations. + +.. class:: infomark + +**NB:** Currently the tool will do "smart parsing" of hierarchy based fields in the target file ID column. + This means that if the colum contains a ".", the trailing part of the ID after the "." is ignored if the full + ID does not get a match in the lookup table while the part before the "." does. + +.. class:: infomark + +Examples of usage: + + annotate protein identifications with Gene Ontology[GO] terms + + annotate metabolite CAS identifications with chebi codes + + add KEGG gene codes to a file containing UNIPROT codes + + add KEGG compound codes to a file containing chebi codes + + etc + +As an example for transcripts and proteins, users can check http://www.uniprot.org/taxonomy/ to +see if their organism has been mapped to GO terms by Uniprot. For example the link +http://www.uniprot.org/uniprot/?query=taxonomy:2850 will show the Uniprot repository and cross-references +for the taxonomy 2850. +When the organism being studied is not available, then other strategies +could be tried (like Blast2GO for example). + +Despite the specific examples above, this class is generic and can be used to map any +values to new terms according to a given lookup table. + +.. class:: infomark + +*Omics cross-reference resources on the web:* + +LinkDB: http://www.genome.jp/linkdb/ + +*Ready to use metabolomics links:* + +http://rest.genome.jp/link/compound/chebi + +http://rest.genome.jp/link/compound/lipidmaps + +http://rest.genome.jp/link/compound/lipidbank + +http://rest.genome.jp/link/compound/hmdb + + +*Ready to use proteomics links:* + +http://rest.genome.jp/link/uniprot/pti (Phaeodactylum Tri.) + +http://rest.genome.jp/link/uniprot/hsa (Homo Sapiens) + +(for organism code list see: ) + + +Uniprot to GO + +http://www.uniprot.org/taxonomy/ + + +----- + +**Output** + +This method will read in the given input file and for each line it will add a new column +containing the terms found for the ID in that line. So the output file is the same as the +input file + extra terms column (separated by ; ). + +----- + +**Link to ontology viewer** + +A second summarized "terms observations" file can also be generated. +In case the terms are ontology terms, this file can be used for visualizing the results +in the ontology viewer "OntologyAndObservationsViewer". + + </help> +</tool>