changeset 7:ce9228263148

renamed to TermMapper
author pieter.lukasse@wur.nl
date Mon, 23 Mar 2015 21:02:01 +0100
parents 8fa07f40d2eb
children 97e10319d86f
files LICENSE README.rst Results2O.jar TermMapperTool.jar results2o.xml term_mapper.xml
diffstat 6 files changed, 209 insertions(+), 108 deletions(-) [+]
line wrap: on
line diff
--- a/LICENSE	Fri Aug 01 17:21:30 2014 +0200
+++ b/LICENSE	Mon Mar 23 21:02:01 2015 +0100
@@ -1,7 +1,7 @@
 
                                  Apache License
                            Version 2.0, January 2004
-                        http://www.apache.org/licenses/ 
+                        http://www.apache.org/licenses/
 
    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 
--- a/README.rst	Fri Aug 01 17:21:30 2014 +0200
+++ b/README.rst	Mon Mar 23 21:02:01 2015 +0100
@@ -20,6 +20,7 @@
 ============== ======================================================================
 Date            Changes
 -------------- ----------------------------------------------------------------------
+August 2014    * improvements release
 May 2014       * first release via Tool Shed
 ============== ======================================================================
 
Binary file Results2O.jar has changed
Binary file TermMapperTool.jar has changed
--- a/results2o.xml	Fri Aug 01 17:21:30 2014 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,107 +0,0 @@
-<tool name="Results2O" id="results2o1" version="0.0.1">
-	<description>use ontology mapping to annotate results (e.g. annotate protein identifications with Gene Ontology[GO] terms)</description>
-	<!-- 
-	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
-	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
-	    -->
-	     <!--  similar to "join two datasets" tool http://galaxy.wur.nl/galaxy_production/root?tool_id=join1 
-	           but this one is probably having more powerful features like supporting multiple ';' codes in key fields 
-	           and the feature in ontologyTermColName(s) supporting direct hierarchy like annotation -->
-	<command interpreter="java -jar ">
-	    Results2O.jar 
-		-inputFileName $inputFileName
- 		-inputIdColumnName "$inputIdColumnName"
- 		-inputIdPrefix "$inputIdPrefix"  
-		-quantifColumn "$quantifColumn" 
-		
-		-ontologyMappingFileName $ontologyMappingFileName
-		-mappingFileIdColName "$mappingFileIdColName"  
-		-mappingIdPrefix "$mappingIdPrefix"  
-		-mappingFileOntologyTermColName "$mappingFileOntologyTermColName"
-		-removeWhiteSpacesFromOterms $removeWhiteSpacesFromOterms
-		
-		-outputFileName $outputFileName
-		-outputObservationsFileName $outputObservationsFileName
-        	    
-	</command>
-	
-	<inputs>
-	 	
-  		<param name="inputFileName" type="data" format="tabular,csv" label="Input file (TSV/CSV)" />
-  		<param name="inputIdColumnName" type="text" size="50" value="" label="ID column name" help="Name of the column containing the identification codes (in the given input file)"/>
-  		<param name="inputIdPrefix" type="text" size="50" value="" label="(Optional) Prefix in ID column" 
-  				help="Fill in if any prefix is found in the ID column values (e.g. in some 
-					 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this 
-					 example one would fill in 'lipidmaps:' as prefix)"/>
-  		<param name="quantifColumn" type="text" size="50" value="" label="(Optional) Values column name" help="Name of the column containing the quantification values (in the given input file)"/>
-  		
-  		<!-- =================== ONTOLOGY part ============== -->
-  		<param name="ontologyMappingFileName" type="data" format="obo" label="ID to Ontology mapping file (TSV/CSV)" help="Simple file linking the coding scheme used for the identifications in the given input file to one or more ontology terms."/>
-  		<param name="mappingFileIdColName" type="text" size="50" value="" label="ID column name (in ontology mapping file)" help="Name of the column containing the identification codes (which will in fact link the input file records to the ontology records)"/>
-  		<param name="mappingIdPrefix" type="text" size="50" value="" label="(Optional) Prefix in mapping ID column" 
-  				help="Fill in if any prefix is found in the ID column values (e.g. in some 
-					 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this 
-					 example one would fill in 'lipidmaps:' as prefix)"/>
-
-  		<param name="mappingFileOntologyTermColName" type="text" size="50" value="" label="Ontology term column name(s)" 
-  		       help="Name(s) of the column(s) containing the ontology terms in the ontology mapping file (and which will be transfered to the input file). 
-  		             For using multiple columns, set the names separated by comma (,). If multiple columns are specified, the algorithm will look for an annotation in the first one, if none
-  		             found it will try the second one, and so forth. "/>
-  		
-  		<param name="removeWhiteSpacesFromOterms" type="boolean" checked="false" 
-		  label="Remove white spaces from ontology terms" 
-     	  help="This could be needed for some ontologies, like the current custom one for Lipidmaps."/>
-     	
-	</inputs>
-	<outputs>
-		#if isinstance( $inputFileName.datatype, $__app__.datatypes_registry.get_datatype_by_extension('tabular').__class__):
-			<data name="outputFileName" format="tabular" label="${tool.name} on ${on_string}: annotated file " ></data>
-		#else:
-       		<data name="outputFileName" format="csv" label="${tool.name} on ${on_string}: annotated file " ></data>
-   		#end if
-	  
-	  <data name="outputObservationsFileName" format="tabular" label="${tool.name} on ${on_string}: ontology observations file (TSV)"></data>
-	</outputs>
-	<tests>
-	  <!--  find out how to use -->
-	  <test>
-	  </test>
-	</tests>
-  <help>
-  
-.. class:: infomark
-  
-This tool is responsible for annotating quantifications result file 
-with the ontology terms given in a mapping file. This mapping file links the items found in the result file
-(e.g. protein identifications coded in common protein coding formats such as UniProt )
-to their respective ontology terms (e.g. GO terms). It enables users to use the cross-reference 
-information now available in different repositories (like uniprot and KEGG - see for example
-http://www.uniprot.org/taxonomy/ or http://www.genome.jp/linkdb/ )
-to map their results to other useful coding schemes such as ontologies for functional annotations.  
- 
-As an example for transcripts and proteins, users can check http://www.uniprot.org/taxonomy/ to
-see if their organism has been mapped to GO terms by Uniprot. For example the link 
-http://www.uniprot.org/uniprot/?query=taxonomy:2850 will show the Uniprot repository and cross-references
-for the taxonomy 2850.
-When the organism being studied is not available, then other strategies 
-could be tried (like Blast2GO for example).
-
-
-Despite the specific examples above, this class is generic and can be used to map any 
-results file to an Ontology according to a given mapping file. One example would be mapping metabolomics
-identifications to the CheBI ontology.    
-  
-
------
-
-**Output**
-
-This method will read in the given input file and for each line it will add a new column 
-containing the Ontology terms found for the ID in that line. So the output file is the same as the 
-input file + extra Ontology terms column (separated by ; ).
-
-A second summarized "ontology observations" file is also generated which can be used for visualizing the results
-in an ontology viewer (e.g. see OntologyAndObservationsViewer). 
-
-  </help>
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/term_mapper.xml	Mon Mar 23 21:02:01 2015 +0100
@@ -0,0 +1,207 @@
+<tool name="TermMapperTool" id="TermMapperTool1" version="0.0.2">
+	<description>use cross-reference lookup tables to annotate results</description>
+	<!-- 
+	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
+	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
+	    -->
+	     <!--  similar to "join two datasets" tool http://galaxy.wur.nl/galaxy_production/root?tool_id=join1 
+	           but this one is probably having more powerful features like supporting multiple ';' codes in key fields 
+	           and the feature in termColName(s) supporting direct hierarchy like annotation -->
+	<command interpreter="java -jar ">
+	    TermMapperTool.jar 
+		-inputFileName $inputFileName
+ 		-inputIdColumnName "$inputIdColumnName"
+ 		#if $inputIdCol.inputIdHasPrefix == True
+ 			-inputIdPrefix "$inputIdCol.inputIdPrefix"  
+ 		#end if
+		
+		-mappingFileName $mappingFileName
+		-mappingFileIdColName "$mappingFileIdColName"  
+		
+		#if $mappingIdCol.mappingIdHasPrefix == True
+ 			-mappingIdPrefix "$mappingIdCol.mappingIdPrefix"  
+ 		#end if
+		
+		-mappingFileTermColName "$mappingFileTermColName"
+
+		-outputFileName $outputFileName
+		
+		#if $genObservations.genObservationsFile == True
+			-outputObservationsFileName $outputObservationsFileName
+        	-quantifColumn "$genObservations.quantifColumn" 
+ 		#end if
+		
+		-mappedTermsColName $mappedTermsColName
+        	    
+	</command>
+	
+	<inputs>
+	 	
+  		<param name="inputFileName" type="data" format="tabular,csv" label="Target file (TSV/CSV)" />
+  		
+  		<param name="inputIdColumnName" type="text" size="50" value="" label="ID column name" 
+  			help="Name of the column containing the identification codes (in the given input file)"/>
+  		
+  		<conditional name="inputIdCol">
+     		<param name="inputIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false" 
+     			label="ID values have a prefix"/>
+     		<when value="Yes">
+  				<param name="inputIdPrefix" type="text" size="50" value="" label="Prefix in ID column" 
+  					help="Fill in if any prefix is found in the ID column values (e.g. in some 
+						 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this 
+						 example one would fill in 'lipidmaps:' as prefix)"/>
+			</when>
+			<when value="No">
+			</when>
+		</conditional>
+  		
+  		<!-- =================== cross-reference part ============== -->
+  		<param name="mappingFileName" type="data" format="tabular,csv" label="Lookup table (TSV/CSV)" help="Simple mapping file between the coding scheme used to another scheme"/>
+  		<param name="mappingFileIdColName" type="text" size="50" value="" label="ID column name (in lookup table)" help="Name of the ID column for the lookup"/>
+  		
+  		<conditional name="mappingIdCol">
+     		<param name="mappingIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false" 
+     			label="ID values have a prefix"/>
+     		<when value="Yes">
+  				<param name="mappingIdPrefix" type="text" size="50" value="" label="Prefix in ID column" 
+  					help="Fill in if any prefix is found in the ID column values (e.g. in some 
+						files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this 
+					 	example one would fill in 'lipidmaps:' as prefix)"/>
+			</when>
+			<when value="No">
+			</when>
+		</conditional>
+
+  		<param name="mappingFileTermColName" type="text" size="50" value="" label="Term column name(s) or number(s)" 
+  		       help="Name(s) or number(s) of the column(s) containing the term(s) in the lookup table (and which will be transfered to the target file based on ID match in 'ID column name'). 
+  		             For using multiple term column names, set the names separated by comma (,). 
+  		             If multiple columns are specified, the algorithm will look for an annotation in the first one, if none
+  		             found it will try the second one, and so forth. "/>
+  		
+  		
+  		<param name="mappedTermsColName" type="text" size="50" value="Mapped terms" label="Name to give to the new column:" 
+  		       help="Name to give to the new column that will be added to the target file. This new column is the one
+  		             that will contain the respectively mapped terms."/>
+  		
+		<conditional name="genObservations">
+     		<param name="genObservationsFile" type="boolean" truevalue="Yes" falsevalue="No" checked="false" 
+     			label="Generate also observations file"/>
+     		<when value="Yes">     	
+     			<param name="quantifColumn" type="text" size="50" value="" 
+     				label="(Optional) Values column name" 
+     				help="Name of the column containing the quantification values (in the given input file)"/>
+     		</when>
+     		<when value="No">
+			</when>
+		</conditional>
+     	
+	</inputs>
+	<outputs>
+		#if isinstance( $inputFileName.datatype, $__app__.datatypes_registry.get_datatype_by_extension('tabular').__class__):
+			<data name="outputFileName" format="tabular" label="${tool.name} on ${on_string}: annotated file " ></data>
+		#else:
+       		<data name="outputFileName" format="csv" label="${tool.name} on ${on_string}: annotated file " ></data>
+   		#end if
+	  
+	  <data name="outputObservationsFileName" format="tabular" label="${tool.name} on ${on_string}: term observations file (TSV)">
+	  	<!-- If the expression is false, the file is not created -->
+	  	<filter>( genObservations.genObservationsFile == True )</filter>
+	  </data>
+	</outputs>
+	<tests>
+	  <!--  find out how to use -->
+	  <test>
+	  </test>
+	</tests>
+  <help>
+  
+.. class:: infomark
+
+  
+This tool is responsible for annotating the given target file 
+with the terms given in a lookup table. This lookup table maps the items found in the target file
+(e.g. protein identifications coded in common protein coding formats such as UniProt )
+to their respective terms (e.g. GO terms). It enables users to use the cross-reference 
+information now available from different repositories (like uniprot and KEGG - see for example
+http://www.uniprot.org/taxonomy/ or http://www.genome.jp/linkdb/ )
+to map their data to other useful coding schemes or to ontologies and functional annotations.  
+
+.. class:: infomark
+
+**NB:** Currently the tool will do "smart parsing" of hierarchy based fields in the target file ID column. 
+ This means that if the colum contains a ".", the trailing part of the ID after the "." is ignored if the full
+ ID does not get a match in the lookup table while the part before the "." does. 
+ 
+.. class:: infomark
+
+Examples of usage:
+
+  annotate protein identifications with Gene Ontology[GO] terms
+  
+  annotate metabolite CAS identifications with chebi codes
+  
+  add KEGG gene codes to a file containing UNIPROT codes
+  
+  add KEGG compound codes to a file containing chebi codes
+  
+  etc
+ 
+As an example for transcripts and proteins, users can check http://www.uniprot.org/taxonomy/ to
+see if their organism has been mapped to GO terms by Uniprot. For example the link 
+http://www.uniprot.org/uniprot/?query=taxonomy:2850 will show the Uniprot repository and cross-references
+for the taxonomy 2850.
+When the organism being studied is not available, then other strategies 
+could be tried (like Blast2GO for example).
+
+Despite the specific examples above, this class is generic and can be used to map any 
+values to new terms according to a given lookup table.    
+  
+.. class:: infomark
+
+*Omics cross-reference resources on the web:*
+
+LinkDB: http://www.genome.jp/linkdb/
+
+*Ready to use metabolomics links:*
+
+http://rest.genome.jp/link/compound/chebi
+
+http://rest.genome.jp/link/compound/lipidmaps
+
+http://rest.genome.jp/link/compound/lipidbank
+
+http://rest.genome.jp/link/compound/hmdb
+
+
+*Ready to use proteomics links:*
+
+http://rest.genome.jp/link/uniprot/pti  (Phaeodactylum Tri.)
+
+http://rest.genome.jp/link/uniprot/hsa  (Homo Sapiens)
+
+(for organism code list see: )
+
+
+Uniprot to GO
+
+http://www.uniprot.org/taxonomy/
+
+
+-----
+
+**Output**
+
+This method will read in the given input file and for each line it will add a new column 
+containing the terms found for the ID in that line. So the output file is the same as the 
+input file + extra terms column (separated by ; ).
+
+-----
+
+**Link to ontology viewer**
+
+A second summarized "terms observations" file can also be generated.
+In case the terms are ontology terms, this file can be used for visualizing the results
+in the ontology viewer "OntologyAndObservationsViewer". 
+
+  </help>
+</tool>