view results2o.xml @ 6:8fa07f40d2eb

added more parameters/options and made Ontology observations file optional;
author pieter.lukasse@wur.nl
date Fri, 01 Aug 2014 17:21:30 +0200
parents eb0b7889dd08
children
line wrap: on
line source

<tool name="Results2O" id="results2o1" version="0.0.1">
	<description>use ontology mapping to annotate results (e.g. annotate protein identifications with Gene Ontology[GO] terms)</description>
	<!-- 
	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
	    -->
	     <!--  similar to "join two datasets" tool http://galaxy.wur.nl/galaxy_production/root?tool_id=join1 
	           but this one is probably having more powerful features like supporting multiple ';' codes in key fields 
	           and the feature in ontologyTermColName(s) supporting direct hierarchy like annotation -->
	<command interpreter="java -jar ">
	    Results2O.jar 
		-inputFileName $inputFileName
 		-inputIdColumnName "$inputIdColumnName"
 		-inputIdPrefix "$inputIdPrefix"  
		-quantifColumn "$quantifColumn" 
		
		-ontologyMappingFileName $ontologyMappingFileName
		-mappingFileIdColName "$mappingFileIdColName"  
		-mappingIdPrefix "$mappingIdPrefix"  
		-mappingFileOntologyTermColName "$mappingFileOntologyTermColName"
		-removeWhiteSpacesFromOterms $removeWhiteSpacesFromOterms
		
		-outputFileName $outputFileName
		-outputObservationsFileName $outputObservationsFileName
        	    
	</command>
	
	<inputs>
	 	
  		<param name="inputFileName" type="data" format="tabular,csv" label="Input file (TSV/CSV)" />
  		<param name="inputIdColumnName" type="text" size="50" value="" label="ID column name" help="Name of the column containing the identification codes (in the given input file)"/>
  		<param name="inputIdPrefix" type="text" size="50" value="" label="(Optional) Prefix in ID column" 
  				help="Fill in if any prefix is found in the ID column values (e.g. in some 
					 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this 
					 example one would fill in 'lipidmaps:' as prefix)"/>
  		<param name="quantifColumn" type="text" size="50" value="" label="(Optional) Values column name" help="Name of the column containing the quantification values (in the given input file)"/>
  		
  		<!-- =================== ONTOLOGY part ============== -->
  		<param name="ontologyMappingFileName" type="data" format="obo" label="ID to Ontology mapping file (TSV/CSV)" help="Simple file linking the coding scheme used for the identifications in the given input file to one or more ontology terms."/>
  		<param name="mappingFileIdColName" type="text" size="50" value="" label="ID column name (in ontology mapping file)" help="Name of the column containing the identification codes (which will in fact link the input file records to the ontology records)"/>
  		<param name="mappingIdPrefix" type="text" size="50" value="" label="(Optional) Prefix in mapping ID column" 
  				help="Fill in if any prefix is found in the ID column values (e.g. in some 
					 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this 
					 example one would fill in 'lipidmaps:' as prefix)"/>

  		<param name="mappingFileOntologyTermColName" type="text" size="50" value="" label="Ontology term column name(s)" 
  		       help="Name(s) of the column(s) containing the ontology terms in the ontology mapping file (and which will be transfered to the input file). 
  		             For using multiple columns, set the names separated by comma (,). If multiple columns are specified, the algorithm will look for an annotation in the first one, if none
  		             found it will try the second one, and so forth. "/>
  		
  		<param name="removeWhiteSpacesFromOterms" type="boolean" checked="false" 
		  label="Remove white spaces from ontology terms" 
     	  help="This could be needed for some ontologies, like the current custom one for Lipidmaps."/>
     	
	</inputs>
	<outputs>
		#if isinstance( $inputFileName.datatype, $__app__.datatypes_registry.get_datatype_by_extension('tabular').__class__):
			<data name="outputFileName" format="tabular" label="${tool.name} on ${on_string}: annotated file " ></data>
		#else:
       		<data name="outputFileName" format="csv" label="${tool.name} on ${on_string}: annotated file " ></data>
   		#end if
	  
	  <data name="outputObservationsFileName" format="tabular" label="${tool.name} on ${on_string}: ontology observations file (TSV)"></data>
	</outputs>
	<tests>
	  <!--  find out how to use -->
	  <test>
	  </test>
	</tests>
  <help>
  
.. class:: infomark
  
This tool is responsible for annotating quantifications result file 
with the ontology terms given in a mapping file. This mapping file links the items found in the result file
(e.g. protein identifications coded in common protein coding formats such as UniProt )
to their respective ontology terms (e.g. GO terms). It enables users to use the cross-reference 
information now available in different repositories (like uniprot and KEGG - see for example
http://www.uniprot.org/taxonomy/ or http://www.genome.jp/linkdb/ )
to map their results to other useful coding schemes such as ontologies for functional annotations.  
 
As an example for transcripts and proteins, users can check http://www.uniprot.org/taxonomy/ to
see if their organism has been mapped to GO terms by Uniprot. For example the link 
http://www.uniprot.org/uniprot/?query=taxonomy:2850 will show the Uniprot repository and cross-references
for the taxonomy 2850.
When the organism being studied is not available, then other strategies 
could be tried (like Blast2GO for example).


Despite the specific examples above, this class is generic and can be used to map any 
results file to an Ontology according to a given mapping file. One example would be mapping metabolomics
identifications to the CheBI ontology.    
  

-----

**Output**

This method will read in the given input file and for each line it will add a new column 
containing the Ontology terms found for the ID in that line. So the output file is the same as the 
input file + extra Ontology terms column (separated by ; ).

A second summarized "ontology observations" file is also generated which can be used for visualizing the results
in an ontology viewer (e.g. see OntologyAndObservationsViewer). 

  </help>
</tool>