1
|
1 <tool id="unipept" name="Unipept" version="1.1.0">
|
0
|
2 <description>retrieve taxonomy for peptides</description>
|
|
3 <macros>
|
|
4 <xml name="equate_il">
|
1
|
5 <param name="equate_il" type="boolean" truevalue="-e" falsevalue="" checked="true" label="Equate isoleucine and leucine">
|
0
|
6 <help>isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records</help>
|
|
7 </param >
|
|
8 </xml>
|
|
9 <xml name="extra">
|
|
10 <param name="extra" type="boolean" truevalue="-x" falsevalue="" checked="false" label="retrieve extra information">
|
|
11 <yield/>
|
|
12 </param >
|
|
13 </xml>
|
|
14 <xml name="names">
|
|
15 <param name="names" type="boolean" truevalue="-n" falsevalue="" checked="true" label="names" >
|
1
|
16 <help>return the names in complete taxonomic lineage</help>
|
|
17 </param >
|
|
18 <param name="allfields" type="boolean" truevalue="-A" falsevalue="" checked="false" label="allfields" >
|
|
19 <help>include fields for most specific taxonomic classification: taxon_rank,taxon_id,taxon_name before lineage</help>
|
0
|
20 </param >
|
|
21 </xml>
|
|
22 </macros>
|
|
23 <requirements>
|
|
24 </requirements>
|
|
25 <stdio>
|
|
26 <exit_code range="1:" />
|
|
27 </stdio>
|
|
28 <command interpreter="python"><![CDATA[
|
|
29 unipept.py
|
|
30 --api=$unipept.api
|
|
31 $unipept.equate_il $unipept.extra
|
|
32 #if $unipept.api != 'pept2prot':
|
1
|
33 $unipept.names $unipept.allfields
|
0
|
34 #end if
|
|
35 $strict
|
|
36 #if str($peptide_src.fmt) == 'proteomic':
|
|
37 #if $peptide_src.input.datatype.file_ext == 'fasta':
|
|
38 --fasta="$peptide_src.input"
|
|
39 #elif $peptide_src.input.datatype.file_ext == 'mzid':
|
|
40 --mzid="$peptide_src.input"
|
|
41 #elif $peptide_src.input.datatype.file_ext == 'pepxml':
|
|
42 --pepxml="$peptide_src.input"
|
|
43 #end if
|
|
44 #elif str($peptide_src.fmt) == 'tabular':
|
|
45 --tabular="$peptide_src.input_tsv"
|
|
46 #set $col = int(str($peptide_src.column)) - 1
|
|
47 --column=$col
|
|
48 #elif str($peptide_src.fmt) == 'fasta':
|
|
49 --fasta="$peptide_src.input_fasta"
|
|
50 #elif str($peptide_src.fmt) == 'mzid':
|
|
51 --mzid="$peptide_src.input_mzid"
|
|
52 #elif str($peptide_src.fmt) == 'pepxml':
|
|
53 --pepxml="$peptide_src.input_pepxml"
|
|
54 #end if
|
|
55 #if 'json' in str($outputs).split(','):
|
|
56 --json $output_json
|
|
57 #end if
|
|
58 #if 'tsv' in str($outputs).split(','):
|
|
59 --tsv $output_tsv
|
|
60 #end if
|
|
61 #if 'csv' in str($outputs).split(','):
|
|
62 --csv $output_csv
|
|
63 #end if
|
1
|
64 #if 'unmatched' in str($outputs).split(','):
|
|
65 --unmatched $output_unmatched
|
0
|
66 #end if
|
|
67 ]]></command>
|
|
68 <inputs>
|
|
69 <conditional name="unipept">
|
|
70 <param name="api" type="select" label="Unipept application" >
|
1
|
71 <option value="pept2lca" selected="true">pept2lca: lowest common ancestor</option>
|
|
72 <option value="pept2taxa">pept2taxa: organisms associated with the UniProt entries containing a given tryptic peptide</option>
|
0
|
73 <option value="pept2prot">pept2prot: UniProt entries containing a given tryptic peptide</option>
|
|
74 </param>
|
1
|
75 <when value="pept2lca">
|
|
76 <expand macro="equate_il" />
|
|
77 <expand macro="extra">
|
|
78 <help>Return the complete lineage of the taxonomic lowest common ancestor, and include ID fields.</help>
|
|
79 </expand>
|
|
80 <expand macro="names" />
|
|
81 </when>
|
0
|
82 <when value="pept2taxa">
|
|
83 <expand macro="equate_il" />
|
|
84 <expand macro="extra">
|
|
85 <checked>true</checked>
|
1
|
86 <help>Return the complete lineage of each organism, and include ID fields.</help>
|
0
|
87 </expand>
|
|
88 <expand macro="names" />
|
|
89 </when>
|
|
90 <when value="pept2prot">
|
|
91 <expand macro="equate_il" />
|
|
92 <expand macro="extra">
|
|
93 <help>Return additional information fields: taxon_name, ec_references, go_references, refseq_ids, refseq_protein_ids, insdc_ids, insdc_protein_ids
|
|
94 WARNING: Huge perfomance penalty! Only use for small number of peptides when the extra infomation is required.
|
|
95 </help>
|
|
96 </expand>
|
|
97 </when>
|
|
98 </conditional>
|
|
99 <conditional name="peptide_src">
|
|
100 <param name="fmt" type="select" label="Peptides input format" >
|
|
101 <option value="proteomic">proteomics formats: mzid, pepxml, fasta</option>
|
|
102 <option value="tabular">tabular</option>
|
|
103 <option value="fasta">fasta</option>
|
|
104 <option value="mzid">mzid</option>
|
|
105 <option value="pepxml">pepxml</option>
|
|
106 </param>
|
|
107 <when value="proteomic">
|
|
108 <param name="input" type="data" format="mzid,pepxml,fasta" label="Peptide Input" />
|
|
109 </when>
|
|
110 <when value="tabular">
|
|
111 <param name="input_tsv" type="data" format="tabular" label="Tabular Input Containing Peptide column" />
|
|
112 <param name="column" label="Select column with peptides" type="data_column" numerical="false" data_ref="input_tsv" />
|
|
113 </when>
|
|
114 <when value="fasta">
|
|
115 <param name="input_fasta" type="data" format="fasta" label="Peptide Fasta Input" />
|
|
116 </when>
|
|
117 <when value="mzid">
|
|
118 <param name="input_mzid" type="data" format="mzid" label="mzIndetML Input" />
|
|
119 </when>
|
|
120 <when value="pepxml">
|
|
121 <param name="input_pepxml" type="data" format="pepxml" label="mzIndetML Input" />
|
|
122 </when>
|
|
123 </conditional>
|
|
124 <param name="outputs" type="select" multiple="true" display="checkboxes" label="Choose outputs">
|
|
125 <option value="tsv" selected="true">tabular</option>
|
|
126 <option value="csv">Comma Separated Values (.csv)</option>
|
|
127 <option value="json">JSON</option>
|
1
|
128 <option value="unmatched">Unmatched peptides</option>
|
0
|
129 </param>
|
|
130 <param name="strict" type="boolean" truevalue="--strict" falsevalue="" checked="false" label="Exit with error on invalid peptides, otherwise ignore them"/>
|
|
131 </inputs>
|
|
132 <outputs>
|
|
133 <data name="output_json" format="json" label="${tool.name} ${unipept.api} on ${on_string} json">
|
|
134 <filter>'json' in outputs</filter>
|
|
135 </data>
|
|
136 <data name="output_tsv" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} tsv">
|
|
137 <filter>'tsv' in outputs</filter>
|
|
138 </data>
|
|
139 <data name="output_csv" format="csv" label="${tool.name} ${unipept.api} on ${on_string} csv">
|
|
140 <filter>'csv' in outputs</filter>
|
|
141 </data>
|
1
|
142 <data name="output_unmatched" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} unmatched">
|
|
143 <filter>'unmatched' in outputs</filter>
|
0
|
144 </data>
|
|
145 </outputs>
|
|
146 <tests>
|
|
147 <test>
|
1
|
148 <param name="api" value="pept2lca"/>
|
0
|
149 <param name="fmt" value="tabular"/>
|
1
|
150 <param name="input_tsv" value="tryptic.tsv"/>
|
0
|
151 <param name="column" value="2"/>
|
|
152 <param name="extra" value="True"/>
|
|
153 <param name="names" value="True"/>
|
1
|
154 <param name="outputs" value="tsv,unmatched"/>
|
0
|
155 <output name="output_tsv">
|
|
156 <assert_contents>
|
1
|
157 <has_text text="Homininae" />
|
0
|
158 </assert_contents>
|
|
159 </output>
|
1
|
160 <output name="output_unmatched">
|
0
|
161 <assert_contents>
|
1
|
162 <has_text text="QTAMAV" />
|
0
|
163 </assert_contents>
|
|
164 </output>
|
|
165 </test>
|
|
166 <test>
|
1
|
167 <param name="api" value="pept2lca"/>
|
0
|
168 <param name="fmt" value="fasta"/>
|
1
|
169 <param name="input_fasta" value="peptide.fa"/>
|
0
|
170 <param name="equate_il" value="True"/>
|
|
171 <param name="extra" value="True"/>
|
|
172 <param name="names" value="True"/>
|
1
|
173 <param name="outputs" value="json,tsv"/>
|
0
|
174 <output name="output_json">
|
|
175 <assert_contents>
|
1
|
176 <has_text text="VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPHIPIDDLTMVVYDPDKGSNGTFLLSLGGPDAEAFSVSPERAAGSASVQVLVRVSALVDYERQTAMAV" />
|
|
177 </assert_contents>
|
|
178 </output>
|
|
179 <output name="output_tsv">
|
|
180 <assert_contents>
|
|
181 <has_text text="9606" />
|
|
182 <has_text text="9598" />
|
0
|
183 </assert_contents>
|
|
184 </output>
|
1
|
185 </test>
|
|
186 <test>
|
|
187 <param name="api" value="pept2taxa"/>
|
|
188 <param name="fmt" value="fasta"/>
|
|
189 <param name="input_fasta" value="peptide.fa"/>
|
|
190 <param name="equate_il" value="True"/>
|
|
191 <param name="extra" value="False"/>
|
|
192 <param name="names" value="False"/>
|
|
193 <param name="outputs" value="tsv"/>
|
|
194 <output name="output_tsv">
|
0
|
195 <assert_contents>
|
1
|
196 <has_text text="sapiens" />
|
|
197 <has_text text="troglodytes" />
|
|
198 <has_text text="Gorilla" />
|
|
199 <has_text text="Macaca" />
|
0
|
200 </assert_contents>
|
|
201 </output>
|
|
202 </test>
|
|
203 </tests>
|
|
204 <help><![CDATA[
|
|
205 **Unipept**
|
|
206
|
|
207 Retrieve Uniprot and taxanomic information for trypic peptides.
|
1
|
208
|
|
209 Unipept API documentation - http://unipept.ugent.be/apidocs
|
0
|
210
|
1
|
211 **Input**
|
|
212
|
|
213 Input peptides can be retrieved from tabular, fasta, mzid, or pepxml datasets.
|
|
214
|
|
215 Processing deatils::
|
|
216
|
|
217 The input peptides are split into typtic peptide fragments in order to match the Unipept records.
|
|
218 Only fragments that are complete tryptic peptides between 5 and 50 animo acid in length will be matched by Unipept.
|
|
219 The match to the most specific tryptic fragment is reported.
|
|
220
|
|
221
|
|
222 **Unipept APIs**
|
|
223
|
|
224 **pept2prot** - http://unipept.ugent.be/apidocs/pept2prot
|
|
225
|
0
|
226 Returns the list of UniProt entries containing a given tryptic peptide. This is the same information as provided on the Protein matches tab when performing a search with the Tryptic Peptide Analysis in the web interface.
|
|
227
|
|
228 By default, each object contains the following information fields extracted from the UniProt record::
|
|
229
|
|
230 peptide: the peptide that matched this record
|
|
231 uniprot_id: the UniProt accession number of the matching record
|
|
232 taxon_id: the NCBI taxon id of the organism associated with the matching record
|
|
233
|
|
234 When the extra parameter is set to true, objects contain the following additional fields extracted from the UniProt record::
|
|
235
|
|
236 taxon_name: the name of the organism associated with the matching UniProt record
|
|
237 ec_references: a space separated list of associated EC numbers
|
|
238 go_references: a space separated list of associated GO terms
|
|
239 refseq_ids: a space separated list of associated RefSeq accession numbers
|
|
240 refseq_protein_ids: a space separated list of associated RefSeq protein accession numbers
|
|
241 insdc_ids: a space separated list of associated insdc accession numbers
|
|
242 insdc_protein_ids: a space separated list of associated insdc protein accession numbers
|
|
243
|
|
244
|
1
|
245 **pept2taxa** - http://unipept.ugent.be/apidocs/pept2taxa
|
|
246
|
0
|
247 Returns the set of organisms associated with the UniProt entries containing a given tryptic peptide. This is the same information as provided on the Lineage table tab when performing a search with the Tryptic Peptide Analysis in the web interface.
|
|
248
|
|
249 By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::
|
|
250
|
|
251 peptide: the peptide that matched this record
|
|
252 taxon_id: the NCBI taxon id of the organism associated with the matching record
|
|
253 taxon_name: the name of the organism associated with the matching record
|
|
254 taxon_rank: the taxonomic rank of the organism associated with the matching record
|
|
255
|
|
256 When the extra parameter is set to true, objects contain additional information about the lineages of the organism extracted from the NCBI taxonomy. The taxon id of each rank in the lineage is specified using the following information fields::
|
|
257
|
|
258 superkingdom_id
|
|
259 kingdom_id
|
|
260 subkingdom_id
|
|
261 superphylum_id
|
|
262 phylum_id
|
|
263 subphylum_id
|
|
264 superclass_id
|
|
265 class_id
|
|
266 subclass_id
|
|
267 infraclass_id
|
|
268 superorder_id
|
|
269 order_id
|
|
270 suborder_id
|
|
271 infraorder_id
|
|
272 parvorder_id
|
|
273 superfamily_id
|
|
274 family_id
|
|
275 subfamily_id
|
|
276 tribe_id
|
|
277 subtribe_id
|
|
278 genus_id
|
|
279 subgenus_id
|
|
280 species_group_id
|
|
281 species_subgroup_id
|
|
282 species_id
|
|
283 subspecies_id
|
|
284 varietas_id
|
|
285 forma_id
|
|
286
|
|
287
|
1
|
288 **pept2lca** - http://unipept.ugent.be/apidocs/pept2lca
|
|
289
|
0
|
290 Returns the taxonomic lowest common ancestor for a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface.
|
|
291
|
|
292 By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::
|
|
293
|
|
294 peptide: the peptide that matched this record
|
|
295 taxon_id: the NCBI taxon id of the organism associated with the matching record
|
|
296 taxon_name: the name of the organism associated with the matching record
|
|
297 taxon_rank: the taxonomic rank of the organism associated with the matching record
|
|
298
|
|
299 When the extra parameter is set to true, objects contain additional information about the lineage of the taxonomic lowest common ancestor extracted from the NCBI taxonomy. The taxon id of each rank in the lineage is specified using the following information fields::
|
|
300
|
|
301 superkingdom_id
|
|
302 kingdom_id
|
|
303 subkingdom_id
|
|
304 superphylum_id
|
|
305 phylum_id
|
|
306 subphylum_id
|
|
307 superclass_id
|
|
308 class_id
|
|
309 subclass_id
|
|
310 infraclass_id
|
|
311 superorder_id
|
|
312 order_id
|
|
313 suborder_id
|
|
314 infraorder_id
|
|
315 parvorder_id
|
|
316 superfamily_id
|
|
317 family_id
|
|
318 subfamily_id
|
|
319 tribe_id
|
|
320 subtribe_id
|
|
321 genus_id
|
|
322 subgenus_id
|
|
323 species_group_id
|
|
324 species_subgroup_id
|
|
325 species_id
|
|
326 subspecies_id
|
|
327 varietas_id
|
|
328 forma_id
|
|
329
|
|
330
|
|
331 **Attributions**
|
|
332
|
|
333 The Unipept metaproteomics analysis pipeline
|
|
334 Bart Mesuere1,*, Griet Debyser2, Maarten Aerts3, Bart Devreese2, Peter Vandamme3 andPeter Dawyndt1
|
|
335 Article first published online: 11 FEB 2015
|
|
336 DOI: 10.1002/pmic.201400361
|
|
337 http://onlinelibrary.wiley.com/doi/10.1002/pmic.201400361/abstract;jsessionid=BFF1994E4C14DA73D7C907EB208AD710.f04t04
|
|
338
|
|
339 ]]></help>
|
|
340 <citations>
|
|
341 <citation type="doi">doi:10.1002/pmic.201400361</citation>
|
|
342 </citations>
|
|
343
|
|
344 </tool>
|