comparison term_mapper.xml @ 7:ce9228263148

renamed to TermMapper
author pieter.lukasse@wur.nl
date Mon, 23 Mar 2015 21:02:01 +0100
parents
children 97e10319d86f
comparison
equal deleted inserted replaced
6:8fa07f40d2eb 7:ce9228263148
1 <tool name="TermMapperTool" id="TermMapperTool1" version="0.0.2">
2 <description>use cross-reference lookup tables to annotate results</description>
3 <!--
4 For remote debugging start you listener on port 8000 and use the following as command interpreter:
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000
6 -->
7 <!-- similar to "join two datasets" tool http://galaxy.wur.nl/galaxy_production/root?tool_id=join1
8 but this one is probably having more powerful features like supporting multiple ';' codes in key fields
9 and the feature in termColName(s) supporting direct hierarchy like annotation -->
10 <command interpreter="java -jar ">
11 TermMapperTool.jar
12 -inputFileName $inputFileName
13 -inputIdColumnName "$inputIdColumnName"
14 #if $inputIdCol.inputIdHasPrefix == True
15 -inputIdPrefix "$inputIdCol.inputIdPrefix"
16 #end if
17
18 -mappingFileName $mappingFileName
19 -mappingFileIdColName "$mappingFileIdColName"
20
21 #if $mappingIdCol.mappingIdHasPrefix == True
22 -mappingIdPrefix "$mappingIdCol.mappingIdPrefix"
23 #end if
24
25 -mappingFileTermColName "$mappingFileTermColName"
26
27 -outputFileName $outputFileName
28
29 #if $genObservations.genObservationsFile == True
30 -outputObservationsFileName $outputObservationsFileName
31 -quantifColumn "$genObservations.quantifColumn"
32 #end if
33
34 -mappedTermsColName $mappedTermsColName
35
36 </command>
37
38 <inputs>
39
40 <param name="inputFileName" type="data" format="tabular,csv" label="Target file (TSV/CSV)" />
41
42 <param name="inputIdColumnName" type="text" size="50" value="" label="ID column name"
43 help="Name of the column containing the identification codes (in the given input file)"/>
44
45 <conditional name="inputIdCol">
46 <param name="inputIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
47 label="ID values have a prefix"/>
48 <when value="Yes">
49 <param name="inputIdPrefix" type="text" size="50" value="" label="Prefix in ID column"
50 help="Fill in if any prefix is found in the ID column values (e.g. in some
51 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this
52 example one would fill in 'lipidmaps:' as prefix)"/>
53 </when>
54 <when value="No">
55 </when>
56 </conditional>
57
58 <!-- =================== cross-reference part ============== -->
59 <param name="mappingFileName" type="data" format="tabular,csv" label="Lookup table (TSV/CSV)" help="Simple mapping file between the coding scheme used to another scheme"/>
60 <param name="mappingFileIdColName" type="text" size="50" value="" label="ID column name (in lookup table)" help="Name of the ID column for the lookup"/>
61
62 <conditional name="mappingIdCol">
63 <param name="mappingIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
64 label="ID values have a prefix"/>
65 <when value="Yes">
66 <param name="mappingIdPrefix" type="text" size="50" value="" label="Prefix in ID column"
67 help="Fill in if any prefix is found in the ID column values (e.g. in some
68 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this
69 example one would fill in 'lipidmaps:' as prefix)"/>
70 </when>
71 <when value="No">
72 </when>
73 </conditional>
74
75 <param name="mappingFileTermColName" type="text" size="50" value="" label="Term column name(s) or number(s)"
76 help="Name(s) or number(s) of the column(s) containing the term(s) in the lookup table (and which will be transfered to the target file based on ID match in 'ID column name').
77 For using multiple term column names, set the names separated by comma (,).
78 If multiple columns are specified, the algorithm will look for an annotation in the first one, if none
79 found it will try the second one, and so forth. "/>
80
81
82 <param name="mappedTermsColName" type="text" size="50" value="Mapped terms" label="Name to give to the new column:"
83 help="Name to give to the new column that will be added to the target file. This new column is the one
84 that will contain the respectively mapped terms."/>
85
86 <conditional name="genObservations">
87 <param name="genObservationsFile" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
88 label="Generate also observations file"/>
89 <when value="Yes">
90 <param name="quantifColumn" type="text" size="50" value=""
91 label="(Optional) Values column name"
92 help="Name of the column containing the quantification values (in the given input file)"/>
93 </when>
94 <when value="No">
95 </when>
96 </conditional>
97
98 </inputs>
99 <outputs>
100 #if isinstance( $inputFileName.datatype, $__app__.datatypes_registry.get_datatype_by_extension('tabular').__class__):
101 <data name="outputFileName" format="tabular" label="${tool.name} on ${on_string}: annotated file " ></data>
102 #else:
103 <data name="outputFileName" format="csv" label="${tool.name} on ${on_string}: annotated file " ></data>
104 #end if
105
106 <data name="outputObservationsFileName" format="tabular" label="${tool.name} on ${on_string}: term observations file (TSV)">
107 <!-- If the expression is false, the file is not created -->
108 <filter>( genObservations.genObservationsFile == True )</filter>
109 </data>
110 </outputs>
111 <tests>
112 <!-- find out how to use -->
113 <test>
114 </test>
115 </tests>
116 <help>
117
118 .. class:: infomark
119
120
121 This tool is responsible for annotating the given target file
122 with the terms given in a lookup table. This lookup table maps the items found in the target file
123 (e.g. protein identifications coded in common protein coding formats such as UniProt )
124 to their respective terms (e.g. GO terms). It enables users to use the cross-reference
125 information now available from different repositories (like uniprot and KEGG - see for example
126 http://www.uniprot.org/taxonomy/ or http://www.genome.jp/linkdb/ )
127 to map their data to other useful coding schemes or to ontologies and functional annotations.
128
129 .. class:: infomark
130
131 **NB:** Currently the tool will do "smart parsing" of hierarchy based fields in the target file ID column.
132 This means that if the colum contains a ".", the trailing part of the ID after the "." is ignored if the full
133 ID does not get a match in the lookup table while the part before the "." does.
134
135 .. class:: infomark
136
137 Examples of usage:
138
139 annotate protein identifications with Gene Ontology[GO] terms
140
141 annotate metabolite CAS identifications with chebi codes
142
143 add KEGG gene codes to a file containing UNIPROT codes
144
145 add KEGG compound codes to a file containing chebi codes
146
147 etc
148
149 As an example for transcripts and proteins, users can check http://www.uniprot.org/taxonomy/ to
150 see if their organism has been mapped to GO terms by Uniprot. For example the link
151 http://www.uniprot.org/uniprot/?query=taxonomy:2850 will show the Uniprot repository and cross-references
152 for the taxonomy 2850.
153 When the organism being studied is not available, then other strategies
154 could be tried (like Blast2GO for example).
155
156 Despite the specific examples above, this class is generic and can be used to map any
157 values to new terms according to a given lookup table.
158
159 .. class:: infomark
160
161 *Omics cross-reference resources on the web:*
162
163 LinkDB: http://www.genome.jp/linkdb/
164
165 *Ready to use metabolomics links:*
166
167 http://rest.genome.jp/link/compound/chebi
168
169 http://rest.genome.jp/link/compound/lipidmaps
170
171 http://rest.genome.jp/link/compound/lipidbank
172
173 http://rest.genome.jp/link/compound/hmdb
174
175
176 *Ready to use proteomics links:*
177
178 http://rest.genome.jp/link/uniprot/pti (Phaeodactylum Tri.)
179
180 http://rest.genome.jp/link/uniprot/hsa (Homo Sapiens)
181
182 (for organism code list see: )
183
184
185 Uniprot to GO
186
187 http://www.uniprot.org/taxonomy/
188
189
190 -----
191
192 **Output**
193
194 This method will read in the given input file and for each line it will add a new column
195 containing the terms found for the ID in that line. So the output file is the same as the
196 input file + extra terms column (separated by ; ).
197
198 -----
199
200 **Link to ontology viewer**
201
202 A second summarized "terms observations" file can also be generated.
203 In case the terms are ontology terms, this file can be used for visualizing the results
204 in the ontology viewer "OntologyAndObservationsViewer".
205
206 </help>
207 </tool>