Mercurial > repos > pieterlukasse > primo_multiomics
comparison term_mapper.xml @ 7:ce9228263148
renamed to TermMapper
author | pieter.lukasse@wur.nl |
---|---|
date | Mon, 23 Mar 2015 21:02:01 +0100 |
parents | |
children | 97e10319d86f |
comparison
equal
deleted
inserted
replaced
6:8fa07f40d2eb | 7:ce9228263148 |
---|---|
1 <tool name="TermMapperTool" id="TermMapperTool1" version="0.0.2"> | |
2 <description>use cross-reference lookup tables to annotate results</description> | |
3 <!-- | |
4 For remote debugging start you listener on port 8000 and use the following as command interpreter: | |
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 | |
6 --> | |
7 <!-- similar to "join two datasets" tool http://galaxy.wur.nl/galaxy_production/root?tool_id=join1 | |
8 but this one is probably having more powerful features like supporting multiple ';' codes in key fields | |
9 and the feature in termColName(s) supporting direct hierarchy like annotation --> | |
10 <command interpreter="java -jar "> | |
11 TermMapperTool.jar | |
12 -inputFileName $inputFileName | |
13 -inputIdColumnName "$inputIdColumnName" | |
14 #if $inputIdCol.inputIdHasPrefix == True | |
15 -inputIdPrefix "$inputIdCol.inputIdPrefix" | |
16 #end if | |
17 | |
18 -mappingFileName $mappingFileName | |
19 -mappingFileIdColName "$mappingFileIdColName" | |
20 | |
21 #if $mappingIdCol.mappingIdHasPrefix == True | |
22 -mappingIdPrefix "$mappingIdCol.mappingIdPrefix" | |
23 #end if | |
24 | |
25 -mappingFileTermColName "$mappingFileTermColName" | |
26 | |
27 -outputFileName $outputFileName | |
28 | |
29 #if $genObservations.genObservationsFile == True | |
30 -outputObservationsFileName $outputObservationsFileName | |
31 -quantifColumn "$genObservations.quantifColumn" | |
32 #end if | |
33 | |
34 -mappedTermsColName $mappedTermsColName | |
35 | |
36 </command> | |
37 | |
38 <inputs> | |
39 | |
40 <param name="inputFileName" type="data" format="tabular,csv" label="Target file (TSV/CSV)" /> | |
41 | |
42 <param name="inputIdColumnName" type="text" size="50" value="" label="ID column name" | |
43 help="Name of the column containing the identification codes (in the given input file)"/> | |
44 | |
45 <conditional name="inputIdCol"> | |
46 <param name="inputIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false" | |
47 label="ID values have a prefix"/> | |
48 <when value="Yes"> | |
49 <param name="inputIdPrefix" type="text" size="50" value="" label="Prefix in ID column" | |
50 help="Fill in if any prefix is found in the ID column values (e.g. in some | |
51 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this | |
52 example one would fill in 'lipidmaps:' as prefix)"/> | |
53 </when> | |
54 <when value="No"> | |
55 </when> | |
56 </conditional> | |
57 | |
58 <!-- =================== cross-reference part ============== --> | |
59 <param name="mappingFileName" type="data" format="tabular,csv" label="Lookup table (TSV/CSV)" help="Simple mapping file between the coding scheme used to another scheme"/> | |
60 <param name="mappingFileIdColName" type="text" size="50" value="" label="ID column name (in lookup table)" help="Name of the ID column for the lookup"/> | |
61 | |
62 <conditional name="mappingIdCol"> | |
63 <param name="mappingIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false" | |
64 label="ID values have a prefix"/> | |
65 <when value="Yes"> | |
66 <param name="mappingIdPrefix" type="text" size="50" value="" label="Prefix in ID column" | |
67 help="Fill in if any prefix is found in the ID column values (e.g. in some | |
68 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this | |
69 example one would fill in 'lipidmaps:' as prefix)"/> | |
70 </when> | |
71 <when value="No"> | |
72 </when> | |
73 </conditional> | |
74 | |
75 <param name="mappingFileTermColName" type="text" size="50" value="" label="Term column name(s) or number(s)" | |
76 help="Name(s) or number(s) of the column(s) containing the term(s) in the lookup table (and which will be transfered to the target file based on ID match in 'ID column name'). | |
77 For using multiple term column names, set the names separated by comma (,). | |
78 If multiple columns are specified, the algorithm will look for an annotation in the first one, if none | |
79 found it will try the second one, and so forth. "/> | |
80 | |
81 | |
82 <param name="mappedTermsColName" type="text" size="50" value="Mapped terms" label="Name to give to the new column:" | |
83 help="Name to give to the new column that will be added to the target file. This new column is the one | |
84 that will contain the respectively mapped terms."/> | |
85 | |
86 <conditional name="genObservations"> | |
87 <param name="genObservationsFile" type="boolean" truevalue="Yes" falsevalue="No" checked="false" | |
88 label="Generate also observations file"/> | |
89 <when value="Yes"> | |
90 <param name="quantifColumn" type="text" size="50" value="" | |
91 label="(Optional) Values column name" | |
92 help="Name of the column containing the quantification values (in the given input file)"/> | |
93 </when> | |
94 <when value="No"> | |
95 </when> | |
96 </conditional> | |
97 | |
98 </inputs> | |
99 <outputs> | |
100 #if isinstance( $inputFileName.datatype, $__app__.datatypes_registry.get_datatype_by_extension('tabular').__class__): | |
101 <data name="outputFileName" format="tabular" label="${tool.name} on ${on_string}: annotated file " ></data> | |
102 #else: | |
103 <data name="outputFileName" format="csv" label="${tool.name} on ${on_string}: annotated file " ></data> | |
104 #end if | |
105 | |
106 <data name="outputObservationsFileName" format="tabular" label="${tool.name} on ${on_string}: term observations file (TSV)"> | |
107 <!-- If the expression is false, the file is not created --> | |
108 <filter>( genObservations.genObservationsFile == True )</filter> | |
109 </data> | |
110 </outputs> | |
111 <tests> | |
112 <!-- find out how to use --> | |
113 <test> | |
114 </test> | |
115 </tests> | |
116 <help> | |
117 | |
118 .. class:: infomark | |
119 | |
120 | |
121 This tool is responsible for annotating the given target file | |
122 with the terms given in a lookup table. This lookup table maps the items found in the target file | |
123 (e.g. protein identifications coded in common protein coding formats such as UniProt ) | |
124 to their respective terms (e.g. GO terms). It enables users to use the cross-reference | |
125 information now available from different repositories (like uniprot and KEGG - see for example | |
126 http://www.uniprot.org/taxonomy/ or http://www.genome.jp/linkdb/ ) | |
127 to map their data to other useful coding schemes or to ontologies and functional annotations. | |
128 | |
129 .. class:: infomark | |
130 | |
131 **NB:** Currently the tool will do "smart parsing" of hierarchy based fields in the target file ID column. | |
132 This means that if the colum contains a ".", the trailing part of the ID after the "." is ignored if the full | |
133 ID does not get a match in the lookup table while the part before the "." does. | |
134 | |
135 .. class:: infomark | |
136 | |
137 Examples of usage: | |
138 | |
139 annotate protein identifications with Gene Ontology[GO] terms | |
140 | |
141 annotate metabolite CAS identifications with chebi codes | |
142 | |
143 add KEGG gene codes to a file containing UNIPROT codes | |
144 | |
145 add KEGG compound codes to a file containing chebi codes | |
146 | |
147 etc | |
148 | |
149 As an example for transcripts and proteins, users can check http://www.uniprot.org/taxonomy/ to | |
150 see if their organism has been mapped to GO terms by Uniprot. For example the link | |
151 http://www.uniprot.org/uniprot/?query=taxonomy:2850 will show the Uniprot repository and cross-references | |
152 for the taxonomy 2850. | |
153 When the organism being studied is not available, then other strategies | |
154 could be tried (like Blast2GO for example). | |
155 | |
156 Despite the specific examples above, this class is generic and can be used to map any | |
157 values to new terms according to a given lookup table. | |
158 | |
159 .. class:: infomark | |
160 | |
161 *Omics cross-reference resources on the web:* | |
162 | |
163 LinkDB: http://www.genome.jp/linkdb/ | |
164 | |
165 *Ready to use metabolomics links:* | |
166 | |
167 http://rest.genome.jp/link/compound/chebi | |
168 | |
169 http://rest.genome.jp/link/compound/lipidmaps | |
170 | |
171 http://rest.genome.jp/link/compound/lipidbank | |
172 | |
173 http://rest.genome.jp/link/compound/hmdb | |
174 | |
175 | |
176 *Ready to use proteomics links:* | |
177 | |
178 http://rest.genome.jp/link/uniprot/pti (Phaeodactylum Tri.) | |
179 | |
180 http://rest.genome.jp/link/uniprot/hsa (Homo Sapiens) | |
181 | |
182 (for organism code list see: ) | |
183 | |
184 | |
185 Uniprot to GO | |
186 | |
187 http://www.uniprot.org/taxonomy/ | |
188 | |
189 | |
190 ----- | |
191 | |
192 **Output** | |
193 | |
194 This method will read in the given input file and for each line it will add a new column | |
195 containing the terms found for the ID in that line. So the output file is the same as the | |
196 input file + extra terms column (separated by ; ). | |
197 | |
198 ----- | |
199 | |
200 **Link to ontology viewer** | |
201 | |
202 A second summarized "terms observations" file can also be generated. | |
203 In case the terms are ontology terms, this file can be used for visualizing the results | |
204 in the ontology viewer "OntologyAndObservationsViewer". | |
205 | |
206 </help> | |
207 </tool> |