7
|
1 <tool name="TermMapperTool" id="TermMapperTool1" version="0.0.2">
|
|
2 <description>use cross-reference lookup tables to annotate results</description>
|
|
3 <!--
|
|
4 For remote debugging start you listener on port 8000 and use the following as command interpreter:
|
|
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000
|
|
6 -->
|
|
7 <!-- similar to "join two datasets" tool http://galaxy.wur.nl/galaxy_production/root?tool_id=join1
|
|
8 but this one is probably having more powerful features like supporting multiple ';' codes in key fields
|
|
9 and the feature in termColName(s) supporting direct hierarchy like annotation -->
|
|
10 <command interpreter="java -jar ">
|
|
11 TermMapperTool.jar
|
|
12 -inputFileName $inputFileName
|
|
13 -inputIdColumnName "$inputIdColumnName"
|
|
14 #if $inputIdCol.inputIdHasPrefix == True
|
|
15 -inputIdPrefix "$inputIdCol.inputIdPrefix"
|
|
16 #end if
|
|
17
|
|
18 -mappingFileName $mappingFileName
|
|
19 -mappingFileIdColName "$mappingFileIdColName"
|
|
20
|
|
21 #if $mappingIdCol.mappingIdHasPrefix == True
|
|
22 -mappingIdPrefix "$mappingIdCol.mappingIdPrefix"
|
|
23 #end if
|
|
24
|
|
25 -mappingFileTermColName "$mappingFileTermColName"
|
|
26
|
|
27 -outputFileName $outputFileName
|
|
28
|
|
29 #if $genObservations.genObservationsFile == True
|
|
30 -outputObservationsFileName $outputObservationsFileName
|
|
31 -quantifColumn "$genObservations.quantifColumn"
|
9
|
32 -multipleMappingSolution $genObservations.multipleMappingSolution
|
|
33 -filterZeros $genObservations.filterZeros
|
7
|
34 #end if
|
|
35
|
|
36 -mappedTermsColName $mappedTermsColName
|
8
|
37 -numberOfHeaderLines $numberOfHeaderLines
|
9
|
38
|
|
39 -htmlReportFile $htmlReportFile
|
|
40 -htmlReportFilesPath $htmlReportFile.files_path
|
7
|
41
|
|
42 </command>
|
|
43
|
|
44 <inputs>
|
|
45
|
9
|
46 <param name="inputFileName" type="data" format="tabular,csv,txt" label="Target file (TSV/CSV)" />
|
7
|
47
|
|
48 <param name="inputIdColumnName" type="text" size="50" value="" label="ID column name"
|
|
49 help="Name of the column containing the identification codes (in the given input file)"/>
|
|
50
|
|
51 <conditional name="inputIdCol">
|
|
52 <param name="inputIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
|
|
53 label="ID values have a prefix"/>
|
|
54 <when value="Yes">
|
|
55 <param name="inputIdPrefix" type="text" size="50" value="" label="Prefix in ID column"
|
|
56 help="Fill in if any prefix is found in the ID column values (e.g. in some
|
|
57 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this
|
|
58 example one would fill in 'lipidmaps:' as prefix)"/>
|
|
59 </when>
|
|
60 <when value="No">
|
|
61 </when>
|
|
62 </conditional>
|
|
63
|
|
64 <!-- =================== cross-reference part ============== -->
|
|
65 <param name="mappingFileName" type="data" format="tabular,csv" label="Lookup table (TSV/CSV)" help="Simple mapping file between the coding scheme used to another scheme"/>
|
8
|
66 <param name="numberOfHeaderLines" type="select" label="Number of header lines in mapping file"
|
|
67 help="If this is '0', use the column numbers starting from 1 as the 'names' in the paramters below.">
|
|
68 <option value="0" >0</option>
|
|
69 <option value="1" selected="true">1</option>
|
|
70 </param>
|
|
71
|
|
72
|
9
|
73 <param name="mappingFileIdColName" type="text" size="50" value="" label="ID column name or number (in lookup table)" help="Name (or number) of the ID column for the lookup"/>
|
7
|
74
|
|
75 <conditional name="mappingIdCol">
|
|
76 <param name="mappingIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
|
|
77 label="ID values have a prefix"/>
|
|
78 <when value="Yes">
|
|
79 <param name="mappingIdPrefix" type="text" size="50" value="" label="Prefix in ID column"
|
|
80 help="Fill in if any prefix is found in the ID column values (e.g. in some
|
|
81 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this
|
|
82 example one would fill in 'lipidmaps:' as prefix)"/>
|
|
83 </when>
|
|
84 <when value="No">
|
|
85 </when>
|
|
86 </conditional>
|
|
87
|
|
88 <param name="mappingFileTermColName" type="text" size="50" value="" label="Term column name(s) or number(s)"
|
|
89 help="Name(s) or number(s) of the column(s) containing the term(s) in the lookup table (and which will be transfered to the target file based on ID match in 'ID column name').
|
|
90 For using multiple term column names, set the names separated by comma (,).
|
|
91 If multiple columns are specified, the algorithm will look for an annotation in the first one, if none
|
|
92 found it will try the second one, and so forth. "/>
|
|
93
|
|
94
|
9
|
95 <param name="mappedTermsColName" type="text" size="50" value="Mapped terms" label="Name to give to the new column"
|
7
|
96 help="Name to give to the new column that will be added to the target file. This new column is the one
|
|
97 that will contain the respectively mapped terms."/>
|
|
98
|
|
99 <conditional name="genObservations">
|
|
100 <param name="genObservationsFile" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
|
|
101 label="Generate also observations file"/>
|
|
102 <when value="Yes">
|
|
103 <param name="quantifColumn" type="text" size="50" value=""
|
|
104 label="(Optional) Values column name"
|
|
105 help="Name of the column containing the quantification values (in the given input file)"/>
|
9
|
106 <param name="multipleMappingSolution" type="select"
|
|
107 label="(when using values column above) What to do when multiple items map to the same term"
|
|
108 help="When e.g. two Uniprot codes map to the same KEGG code, which quantification value to use">
|
|
109 <option value="not" selected="true">Do nothing, leave as is</option>
|
|
110 <option value="max" >Use max value</option>
|
|
111 <option value="min">Use min value</option>
|
|
112 <option value="avg">Use avg value</option>
|
|
113 </param>
|
|
114 <param name="filterZeros" type="boolean" checked="false"
|
|
115 label="Filter zeros"
|
|
116 help="Filter out the items that have quantification value = 0"/>
|
7
|
117 </when>
|
|
118 <when value="No">
|
|
119 </when>
|
|
120 </conditional>
|
9
|
121
|
8
|
122
|
7
|
123 </inputs>
|
|
124 <outputs>
|
|
125 #if isinstance( $inputFileName.datatype, $__app__.datatypes_registry.get_datatype_by_extension('tabular').__class__):
|
|
126 <data name="outputFileName" format="tabular" label="${tool.name} on ${on_string}: annotated file " ></data>
|
|
127 #else:
|
|
128 <data name="outputFileName" format="csv" label="${tool.name} on ${on_string}: annotated file " ></data>
|
|
129 #end if
|
9
|
130 #if $genObservations.genObservationsFile == True :
|
|
131 <data name="outputObservationsFileName" format="tabular" label="${tool.name} on ${on_string}: term observations file (TSV)"></data>
|
|
132 #end if
|
|
133 <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"/>
|
7
|
134 </outputs>
|
|
135 <tests>
|
|
136 <!-- find out how to use -->
|
|
137 <test>
|
|
138 </test>
|
|
139 </tests>
|
|
140 <help>
|
|
141
|
|
142 .. class:: infomark
|
|
143
|
|
144
|
|
145 This tool is responsible for annotating the given target file
|
|
146 with the terms given in a lookup table. This lookup table maps the items found in the target file
|
|
147 (e.g. protein identifications coded in common protein coding formats such as UniProt )
|
|
148 to their respective terms (e.g. GO terms). It enables users to use the cross-reference
|
|
149 information now available from different repositories (like uniprot and KEGG - see for example
|
|
150 http://www.uniprot.org/taxonomy/ or http://www.genome.jp/linkdb/ )
|
|
151 to map their data to other useful coding schemes or to ontologies and functional annotations.
|
|
152
|
|
153 .. class:: infomark
|
|
154
|
|
155 **NB:** Currently the tool will do "smart parsing" of hierarchy based fields in the target file ID column.
|
|
156 This means that if the colum contains a ".", the trailing part of the ID after the "." is ignored if the full
|
|
157 ID does not get a match in the lookup table while the part before the "." does.
|
|
158
|
|
159 .. class:: infomark
|
|
160
|
|
161 Examples of usage:
|
|
162
|
|
163 annotate protein identifications with Gene Ontology[GO] terms
|
|
164
|
|
165 annotate metabolite CAS identifications with chebi codes
|
|
166
|
|
167 add KEGG gene codes to a file containing UNIPROT codes
|
|
168
|
|
169 add KEGG compound codes to a file containing chebi codes
|
|
170
|
|
171 etc
|
|
172
|
|
173 As an example for transcripts and proteins, users can check http://www.uniprot.org/taxonomy/ to
|
|
174 see if their organism has been mapped to GO terms by Uniprot. For example the link
|
|
175 http://www.uniprot.org/uniprot/?query=taxonomy:2850 will show the Uniprot repository and cross-references
|
|
176 for the taxonomy 2850.
|
|
177 When the organism being studied is not available, then other strategies
|
|
178 could be tried (like Blast2GO for example).
|
|
179
|
|
180 Despite the specific examples above, this class is generic and can be used to map any
|
|
181 values to new terms according to a given lookup table.
|
|
182
|
|
183 .. class:: infomark
|
|
184
|
|
185 *Omics cross-reference resources on the web:*
|
|
186
|
|
187 LinkDB: http://www.genome.jp/linkdb/
|
|
188
|
|
189 *Ready to use metabolomics links:*
|
|
190
|
|
191 http://rest.genome.jp/link/compound/chebi
|
|
192
|
|
193 http://rest.genome.jp/link/compound/lipidmaps
|
|
194
|
|
195 http://rest.genome.jp/link/compound/lipidbank
|
|
196
|
|
197 http://rest.genome.jp/link/compound/hmdb
|
|
198
|
|
199
|
|
200 *Ready to use proteomics links:*
|
|
201
|
9
|
202 http://rest.genome.jp/link/uniprot/pti (Phaeodactylum Tricornutum)
|
|
203 http://rest.genome.jp/link/pti/uniprot
|
7
|
204
|
|
205 http://rest.genome.jp/link/uniprot/hsa (Homo Sapiens)
|
|
206
|
|
207 (for organism code list see: )
|
|
208
|
|
209
|
|
210 Uniprot to GO
|
|
211
|
|
212 http://www.uniprot.org/taxonomy/
|
|
213
|
9
|
214 http://www.uniprot.org/uniprot/?sort=&desc=&query=proteome:UP000000759&fil=&format=tab&force=yes&columns=id,go-id (Phaeodactylum Tricornutum)
|
|
215
|
7
|
216
|
|
217 -----
|
|
218
|
|
219 **Output**
|
|
220
|
|
221 This method will read in the given input file and for each line it will add a new column
|
|
222 containing the terms found for the ID in that line. So the output file is the same as the
|
|
223 input file + extra terms column (separated by ; ).
|
|
224
|
|
225 -----
|
|
226
|
|
227 **Link to ontology viewer**
|
|
228
|
|
229 A second summarized "terms observations" file can also be generated.
|
|
230 In case the terms are ontology terms, this file can be used for visualizing the results
|
|
231 in the ontology viewer "OntologyAndObservationsViewer".
|
|
232
|
|
233 </help>
|
|
234 </tool>
|