7
|
1 <tool name="TermMapperTool" id="TermMapperTool1" version="0.0.2">
|
|
2 <description>use cross-reference lookup tables to annotate results</description>
|
|
3 <!--
|
|
4 For remote debugging start you listener on port 8000 and use the following as command interpreter:
|
|
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000
|
|
6 -->
|
|
7 <!-- similar to "join two datasets" tool http://galaxy.wur.nl/galaxy_production/root?tool_id=join1
|
|
8 but this one is probably having more powerful features like supporting multiple ';' codes in key fields
|
|
9 and the feature in termColName(s) supporting direct hierarchy like annotation -->
|
|
10 <command interpreter="java -jar ">
|
|
11 TermMapperTool.jar
|
|
12 -inputFileName $inputFileName
|
|
13 -inputIdColumnName "$inputIdColumnName"
|
|
14 #if $inputIdCol.inputIdHasPrefix == True
|
|
15 -inputIdPrefix "$inputIdCol.inputIdPrefix"
|
|
16 #end if
|
|
17
|
|
18 -mappingFileName $mappingFileName
|
|
19 -mappingFileIdColName "$mappingFileIdColName"
|
|
20
|
|
21 #if $mappingIdCol.mappingIdHasPrefix == True
|
|
22 -mappingIdPrefix "$mappingIdCol.mappingIdPrefix"
|
|
23 #end if
|
|
24
|
|
25 -mappingFileTermColName "$mappingFileTermColName"
|
|
26
|
|
27 -outputFileName $outputFileName
|
|
28
|
|
29 #if $genObservations.genObservationsFile == True
|
|
30 -outputObservationsFileName $outputObservationsFileName
|
|
31 -quantifColumn "$genObservations.quantifColumn"
|
|
32 #end if
|
|
33
|
|
34 -mappedTermsColName $mappedTermsColName
|
|
35
|
|
36 </command>
|
|
37
|
|
38 <inputs>
|
|
39
|
|
40 <param name="inputFileName" type="data" format="tabular,csv" label="Target file (TSV/CSV)" />
|
|
41
|
|
42 <param name="inputIdColumnName" type="text" size="50" value="" label="ID column name"
|
|
43 help="Name of the column containing the identification codes (in the given input file)"/>
|
|
44
|
|
45 <conditional name="inputIdCol">
|
|
46 <param name="inputIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
|
|
47 label="ID values have a prefix"/>
|
|
48 <when value="Yes">
|
|
49 <param name="inputIdPrefix" type="text" size="50" value="" label="Prefix in ID column"
|
|
50 help="Fill in if any prefix is found in the ID column values (e.g. in some
|
|
51 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this
|
|
52 example one would fill in 'lipidmaps:' as prefix)"/>
|
|
53 </when>
|
|
54 <when value="No">
|
|
55 </when>
|
|
56 </conditional>
|
|
57
|
|
58 <!-- =================== cross-reference part ============== -->
|
|
59 <param name="mappingFileName" type="data" format="tabular,csv" label="Lookup table (TSV/CSV)" help="Simple mapping file between the coding scheme used to another scheme"/>
|
|
60 <param name="mappingFileIdColName" type="text" size="50" value="" label="ID column name (in lookup table)" help="Name of the ID column for the lookup"/>
|
|
61
|
|
62 <conditional name="mappingIdCol">
|
|
63 <param name="mappingIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
|
|
64 label="ID values have a prefix"/>
|
|
65 <when value="Yes">
|
|
66 <param name="mappingIdPrefix" type="text" size="50" value="" label="Prefix in ID column"
|
|
67 help="Fill in if any prefix is found in the ID column values (e.g. in some
|
|
68 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this
|
|
69 example one would fill in 'lipidmaps:' as prefix)"/>
|
|
70 </when>
|
|
71 <when value="No">
|
|
72 </when>
|
|
73 </conditional>
|
|
74
|
|
75 <param name="mappingFileTermColName" type="text" size="50" value="" label="Term column name(s) or number(s)"
|
|
76 help="Name(s) or number(s) of the column(s) containing the term(s) in the lookup table (and which will be transfered to the target file based on ID match in 'ID column name').
|
|
77 For using multiple term column names, set the names separated by comma (,).
|
|
78 If multiple columns are specified, the algorithm will look for an annotation in the first one, if none
|
|
79 found it will try the second one, and so forth. "/>
|
|
80
|
|
81
|
|
82 <param name="mappedTermsColName" type="text" size="50" value="Mapped terms" label="Name to give to the new column:"
|
|
83 help="Name to give to the new column that will be added to the target file. This new column is the one
|
|
84 that will contain the respectively mapped terms."/>
|
|
85
|
|
86 <conditional name="genObservations">
|
|
87 <param name="genObservationsFile" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
|
|
88 label="Generate also observations file"/>
|
|
89 <when value="Yes">
|
|
90 <param name="quantifColumn" type="text" size="50" value=""
|
|
91 label="(Optional) Values column name"
|
|
92 help="Name of the column containing the quantification values (in the given input file)"/>
|
|
93 </when>
|
|
94 <when value="No">
|
|
95 </when>
|
|
96 </conditional>
|
|
97
|
|
98 </inputs>
|
|
99 <outputs>
|
|
100 #if isinstance( $inputFileName.datatype, $__app__.datatypes_registry.get_datatype_by_extension('tabular').__class__):
|
|
101 <data name="outputFileName" format="tabular" label="${tool.name} on ${on_string}: annotated file " ></data>
|
|
102 #else:
|
|
103 <data name="outputFileName" format="csv" label="${tool.name} on ${on_string}: annotated file " ></data>
|
|
104 #end if
|
|
105
|
|
106 <data name="outputObservationsFileName" format="tabular" label="${tool.name} on ${on_string}: term observations file (TSV)">
|
|
107 <!-- If the expression is false, the file is not created -->
|
|
108 <filter>( genObservations.genObservationsFile == True )</filter>
|
|
109 </data>
|
|
110 </outputs>
|
|
111 <tests>
|
|
112 <!-- find out how to use -->
|
|
113 <test>
|
|
114 </test>
|
|
115 </tests>
|
|
116 <help>
|
|
117
|
|
118 .. class:: infomark
|
|
119
|
|
120
|
|
121 This tool is responsible for annotating the given target file
|
|
122 with the terms given in a lookup table. This lookup table maps the items found in the target file
|
|
123 (e.g. protein identifications coded in common protein coding formats such as UniProt )
|
|
124 to their respective terms (e.g. GO terms). It enables users to use the cross-reference
|
|
125 information now available from different repositories (like uniprot and KEGG - see for example
|
|
126 http://www.uniprot.org/taxonomy/ or http://www.genome.jp/linkdb/ )
|
|
127 to map their data to other useful coding schemes or to ontologies and functional annotations.
|
|
128
|
|
129 .. class:: infomark
|
|
130
|
|
131 **NB:** Currently the tool will do "smart parsing" of hierarchy based fields in the target file ID column.
|
|
132 This means that if the colum contains a ".", the trailing part of the ID after the "." is ignored if the full
|
|
133 ID does not get a match in the lookup table while the part before the "." does.
|
|
134
|
|
135 .. class:: infomark
|
|
136
|
|
137 Examples of usage:
|
|
138
|
|
139 annotate protein identifications with Gene Ontology[GO] terms
|
|
140
|
|
141 annotate metabolite CAS identifications with chebi codes
|
|
142
|
|
143 add KEGG gene codes to a file containing UNIPROT codes
|
|
144
|
|
145 add KEGG compound codes to a file containing chebi codes
|
|
146
|
|
147 etc
|
|
148
|
|
149 As an example for transcripts and proteins, users can check http://www.uniprot.org/taxonomy/ to
|
|
150 see if their organism has been mapped to GO terms by Uniprot. For example the link
|
|
151 http://www.uniprot.org/uniprot/?query=taxonomy:2850 will show the Uniprot repository and cross-references
|
|
152 for the taxonomy 2850.
|
|
153 When the organism being studied is not available, then other strategies
|
|
154 could be tried (like Blast2GO for example).
|
|
155
|
|
156 Despite the specific examples above, this class is generic and can be used to map any
|
|
157 values to new terms according to a given lookup table.
|
|
158
|
|
159 .. class:: infomark
|
|
160
|
|
161 *Omics cross-reference resources on the web:*
|
|
162
|
|
163 LinkDB: http://www.genome.jp/linkdb/
|
|
164
|
|
165 *Ready to use metabolomics links:*
|
|
166
|
|
167 http://rest.genome.jp/link/compound/chebi
|
|
168
|
|
169 http://rest.genome.jp/link/compound/lipidmaps
|
|
170
|
|
171 http://rest.genome.jp/link/compound/lipidbank
|
|
172
|
|
173 http://rest.genome.jp/link/compound/hmdb
|
|
174
|
|
175
|
|
176 *Ready to use proteomics links:*
|
|
177
|
|
178 http://rest.genome.jp/link/uniprot/pti (Phaeodactylum Tri.)
|
|
179
|
|
180 http://rest.genome.jp/link/uniprot/hsa (Homo Sapiens)
|
|
181
|
|
182 (for organism code list see: )
|
|
183
|
|
184
|
|
185 Uniprot to GO
|
|
186
|
|
187 http://www.uniprot.org/taxonomy/
|
|
188
|
|
189
|
|
190 -----
|
|
191
|
|
192 **Output**
|
|
193
|
|
194 This method will read in the given input file and for each line it will add a new column
|
|
195 containing the terms found for the ID in that line. So the output file is the same as the
|
|
196 input file + extra terms column (separated by ; ).
|
|
197
|
|
198 -----
|
|
199
|
|
200 **Link to ontology viewer**
|
|
201
|
|
202 A second summarized "terms observations" file can also be generated.
|
|
203 In case the terms are ontology terms, this file can be used for visualizing the results
|
|
204 in the ontology viewer "OntologyAndObservationsViewer".
|
|
205
|
|
206 </help>
|
|
207 </tool>
|