0
|
1 <tool id="unipept" name="Unipept" version="0.1.0">
|
|
2 <description>retrieve taxonomy for peptides</description>
|
|
3 <macros>
|
|
4 <xml name="equate_il">
|
|
5 <param name="equate_il" type="boolean" truevalue="-e" falsevalue="" checked="false" label="Equate isoleucine and leucine">
|
|
6 <help>isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records</help>
|
|
7 </param >
|
|
8 </xml>
|
|
9 <xml name="extra">
|
|
10 <param name="extra" type="boolean" truevalue="-x" falsevalue="" checked="false" label="retrieve extra information">
|
|
11 <yield/>
|
|
12 </param >
|
|
13 </xml>
|
|
14 <xml name="names">
|
|
15 <param name="names" type="boolean" truevalue="-n" falsevalue="" checked="true" label="names" >
|
|
16 <help>return the names of taxons</help>
|
|
17 </param >
|
|
18 </xml>
|
|
19 </macros>
|
|
20 <requirements>
|
|
21 </requirements>
|
|
22 <stdio>
|
|
23 <exit_code range="1:" />
|
|
24 </stdio>
|
|
25 <command interpreter="python"><![CDATA[
|
|
26 unipept.py
|
|
27 --api=$unipept.api
|
|
28 $unipept.equate_il $unipept.extra
|
|
29 #if $unipept.api != 'pept2prot':
|
|
30 $unipept.names
|
|
31 #end if
|
|
32 $strict
|
|
33 #if str($peptide_src.fmt) == 'proteomic':
|
|
34 #if $peptide_src.input.datatype.file_ext == 'fasta':
|
|
35 --fasta="$peptide_src.input"
|
|
36 #elif $peptide_src.input.datatype.file_ext == 'mzid':
|
|
37 --mzid="$peptide_src.input"
|
|
38 #elif $peptide_src.input.datatype.file_ext == 'pepxml':
|
|
39 --pepxml="$peptide_src.input"
|
|
40 #end if
|
|
41 #elif str($peptide_src.fmt) == 'tabular':
|
|
42 --tabular="$peptide_src.input_tsv"
|
|
43 #set $col = int(str($peptide_src.column)) - 1
|
|
44 --column=$col
|
|
45 #elif str($peptide_src.fmt) == 'fasta':
|
|
46 --fasta="$peptide_src.input_fasta"
|
|
47 #elif str($peptide_src.fmt) == 'mzid':
|
|
48 --mzid="$peptide_src.input_mzid"
|
|
49 #elif str($peptide_src.fmt) == 'pepxml':
|
|
50 --pepxml="$peptide_src.input_pepxml"
|
|
51 #end if
|
|
52 #if 'json' in str($outputs).split(','):
|
|
53 --json $output_json
|
|
54 #end if
|
|
55 #if 'tsv' in str($outputs).split(','):
|
|
56 --tsv $output_tsv
|
|
57 #end if
|
|
58 #if 'csv' in str($outputs).split(','):
|
|
59 --csv $output_csv
|
|
60 #end if
|
|
61 #if 'mismatch' in str($outputs).split(','):
|
|
62 --mismatch $output_mismatch
|
|
63 #end if
|
|
64 ]]></command>
|
|
65 <inputs>
|
|
66 <conditional name="unipept">
|
|
67 <param name="api" type="select" label="Unipept application" >
|
|
68 <option value="pept2taxa" selected="true">pept2taxa: organisms associated with the UniProt entries containing a given tryptic peptide</option>
|
|
69 <option value="pept2lca">pept2lca: lowest common ancestor</option>
|
|
70 <option value="pept2prot">pept2prot: UniProt entries containing a given tryptic peptide</option>
|
|
71 </param>
|
|
72 <when value="pept2taxa">
|
|
73 <expand macro="equate_il" />
|
|
74 <expand macro="extra">
|
|
75 <checked>true</checked>
|
|
76 <help>Return the complete lineage of each organism.</help>
|
|
77 </expand>
|
|
78 <expand macro="names" />
|
|
79 </when>
|
|
80 <when value="pept2lca">
|
|
81 <expand macro="equate_il" />
|
|
82 <expand macro="extra">
|
|
83 <help>Return the complete lineage of the taxonomic lowest common ancestor.</help>
|
|
84 </expand>
|
|
85 <expand macro="names" />
|
|
86 </when>
|
|
87 <when value="pept2prot">
|
|
88 <expand macro="equate_il" />
|
|
89 <expand macro="extra">
|
|
90 <help>Return additional information fields: taxon_name, ec_references, go_references, refseq_ids, refseq_protein_ids, insdc_ids, insdc_protein_ids
|
|
91 WARNING: Huge perfomance penalty! Only use for small number of peptides when the extra infomation is required.
|
|
92 </help>
|
|
93 </expand>
|
|
94 </when>
|
|
95 </conditional>
|
|
96 <conditional name="peptide_src">
|
|
97 <param name="fmt" type="select" label="Peptides input format" >
|
|
98 <option value="proteomic">proteomics formats: mzid, pepxml, fasta</option>
|
|
99 <option value="tabular">tabular</option>
|
|
100 <option value="fasta">fasta</option>
|
|
101 <option value="mzid">mzid</option>
|
|
102 <option value="pepxml">pepxml</option>
|
|
103 </param>
|
|
104 <when value="proteomic">
|
|
105 <param name="input" type="data" format="mzid,pepxml,fasta" label="Peptide Input" />
|
|
106 </when>
|
|
107 <when value="tabular">
|
|
108 <param name="input_tsv" type="data" format="tabular" label="Tabular Input Containing Peptide column" />
|
|
109 <param name="column" label="Select column with peptides" type="data_column" numerical="false" data_ref="input_tsv" />
|
|
110 </when>
|
|
111 <when value="fasta">
|
|
112 <param name="input_fasta" type="data" format="fasta" label="Peptide Fasta Input" />
|
|
113 </when>
|
|
114 <when value="mzid">
|
|
115 <param name="input_mzid" type="data" format="mzid" label="mzIndetML Input" />
|
|
116 </when>
|
|
117 <when value="pepxml">
|
|
118 <param name="input_pepxml" type="data" format="pepxml" label="mzIndetML Input" />
|
|
119 </when>
|
|
120 </conditional>
|
|
121 <param name="outputs" type="select" multiple="true" display="checkboxes" label="Choose outputs">
|
|
122 <option value="tsv" selected="true">tabular</option>
|
|
123 <option value="csv">Comma Separated Values (.csv)</option>
|
|
124 <option value="json">JSON</option>
|
|
125 <option value="mismatch">Mismatches</option>
|
|
126 </param>
|
|
127 <param name="strict" type="boolean" truevalue="--strict" falsevalue="" checked="false" label="Exit with error on invalid peptides, otherwise ignore them"/>
|
|
128 </inputs>
|
|
129 <outputs>
|
|
130 <data name="output_json" format="json" label="${tool.name} ${unipept.api} on ${on_string} json">
|
|
131 <filter>'json' in outputs</filter>
|
|
132 </data>
|
|
133 <data name="output_tsv" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} tsv">
|
|
134 <filter>'tsv' in outputs</filter>
|
|
135 </data>
|
|
136 <data name="output_csv" format="csv" label="${tool.name} ${unipept.api} on ${on_string} csv">
|
|
137 <filter>'csv' in outputs</filter>
|
|
138 </data>
|
|
139 <data name="output_mismatch" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} mismatch">
|
|
140 <filter>'mismatch' in outputs</filter>
|
|
141 </data>
|
|
142 </outputs>
|
|
143 <tests>
|
|
144 <test>
|
|
145 <param name="fmt" value="tabular"/>
|
|
146 <param name="input_tsv" value="input.tsv"/>
|
|
147 <param name="column" value="2"/>
|
|
148 <param name="extra" value="True"/>
|
|
149 <param name="names" value="True"/>
|
|
150 <param name="outputs" value="tsv,mismatch"/>
|
|
151 <output name="output_tsv">
|
|
152 <assert_contents>
|
|
153 <has_text text="AIPQLEVARPADAYETAEAYR" />
|
|
154 </assert_contents>
|
|
155 </output>
|
|
156 <output name="output_mismatch">
|
|
157 <assert_contents>
|
|
158 <has_text text="DQIAHEGK" />
|
|
159 </assert_contents>
|
|
160 </output>
|
|
161 </test>
|
|
162 <test>
|
|
163 <param name="fmt" value="fasta"/>
|
|
164 <param name="input_tsv" value="input.fasta"/>
|
|
165 <param name="equate_il" value="True"/>
|
|
166 <param name="extra" value="True"/>
|
|
167 <param name="names" value="True"/>
|
|
168 <param name="outputs" value="json,mismatch"/>
|
|
169 <output name="output_json">
|
|
170 <assert_contents>
|
|
171 <has_text text="AIPQLEVARPADAYETAEAYR" />
|
|
172 </assert_contents>
|
|
173 </output>
|
|
174 <output name="output_mismatch">
|
|
175 <assert_contents>
|
|
176 <has_text text="DQIAHEGK" />
|
|
177 </assert_contents>
|
|
178 </output>
|
|
179 </test>
|
|
180 </tests>
|
|
181 <help><![CDATA[
|
|
182 **Unipept**
|
|
183
|
|
184 Retrieve Uniprot and taxanomic information for trypic peptides.
|
|
185
|
|
186 **pept2prot**
|
|
187 Returns the list of UniProt entries containing a given tryptic peptide. This is the same information as provided on the Protein matches tab when performing a search with the Tryptic Peptide Analysis in the web interface.
|
|
188
|
|
189 By default, each object contains the following information fields extracted from the UniProt record::
|
|
190
|
|
191 peptide: the peptide that matched this record
|
|
192 uniprot_id: the UniProt accession number of the matching record
|
|
193 taxon_id: the NCBI taxon id of the organism associated with the matching record
|
|
194
|
|
195 When the extra parameter is set to true, objects contain the following additional fields extracted from the UniProt record::
|
|
196
|
|
197 taxon_name: the name of the organism associated with the matching UniProt record
|
|
198 ec_references: a space separated list of associated EC numbers
|
|
199 go_references: a space separated list of associated GO terms
|
|
200 refseq_ids: a space separated list of associated RefSeq accession numbers
|
|
201 refseq_protein_ids: a space separated list of associated RefSeq protein accession numbers
|
|
202 insdc_ids: a space separated list of associated insdc accession numbers
|
|
203 insdc_protein_ids: a space separated list of associated insdc protein accession numbers
|
|
204
|
|
205 http://unipept.ugent.be/apidocs/pept2prot
|
|
206
|
|
207 **pept2taxa**
|
|
208 Returns the set of organisms associated with the UniProt entries containing a given tryptic peptide. This is the same information as provided on the Lineage table tab when performing a search with the Tryptic Peptide Analysis in the web interface.
|
|
209
|
|
210 By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::
|
|
211
|
|
212 peptide: the peptide that matched this record
|
|
213 taxon_id: the NCBI taxon id of the organism associated with the matching record
|
|
214 taxon_name: the name of the organism associated with the matching record
|
|
215 taxon_rank: the taxonomic rank of the organism associated with the matching record
|
|
216
|
|
217 When the extra parameter is set to true, objects contain additional information about the lineages of the organism extracted from the NCBI taxonomy. The taxon id of each rank in the lineage is specified using the following information fields::
|
|
218
|
|
219 superkingdom_id
|
|
220 kingdom_id
|
|
221 subkingdom_id
|
|
222 superphylum_id
|
|
223 phylum_id
|
|
224 subphylum_id
|
|
225 superclass_id
|
|
226 class_id
|
|
227 subclass_id
|
|
228 infraclass_id
|
|
229 superorder_id
|
|
230 order_id
|
|
231 suborder_id
|
|
232 infraorder_id
|
|
233 parvorder_id
|
|
234 superfamily_id
|
|
235 family_id
|
|
236 subfamily_id
|
|
237 tribe_id
|
|
238 subtribe_id
|
|
239 genus_id
|
|
240 subgenus_id
|
|
241 species_group_id
|
|
242 species_subgroup_id
|
|
243 species_id
|
|
244 subspecies_id
|
|
245 varietas_id
|
|
246 forma_id
|
|
247
|
|
248 http://unipept.ugent.be/apidocs/pept2taxa
|
|
249
|
|
250 **pept2lca**
|
|
251 Returns the taxonomic lowest common ancestor for a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface.
|
|
252
|
|
253 By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::
|
|
254
|
|
255 peptide: the peptide that matched this record
|
|
256 taxon_id: the NCBI taxon id of the organism associated with the matching record
|
|
257 taxon_name: the name of the organism associated with the matching record
|
|
258 taxon_rank: the taxonomic rank of the organism associated with the matching record
|
|
259
|
|
260 When the extra parameter is set to true, objects contain additional information about the lineage of the taxonomic lowest common ancestor extracted from the NCBI taxonomy. The taxon id of each rank in the lineage is specified using the following information fields::
|
|
261
|
|
262 superkingdom_id
|
|
263 kingdom_id
|
|
264 subkingdom_id
|
|
265 superphylum_id
|
|
266 phylum_id
|
|
267 subphylum_id
|
|
268 superclass_id
|
|
269 class_id
|
|
270 subclass_id
|
|
271 infraclass_id
|
|
272 superorder_id
|
|
273 order_id
|
|
274 suborder_id
|
|
275 infraorder_id
|
|
276 parvorder_id
|
|
277 superfamily_id
|
|
278 family_id
|
|
279 subfamily_id
|
|
280 tribe_id
|
|
281 subtribe_id
|
|
282 genus_id
|
|
283 subgenus_id
|
|
284 species_group_id
|
|
285 species_subgroup_id
|
|
286 species_id
|
|
287 subspecies_id
|
|
288 varietas_id
|
|
289 forma_id
|
|
290
|
|
291 http://unipept.ugent.be/apidocs/pept2lca
|
|
292
|
|
293 **Attributions**
|
|
294
|
|
295 The Unipept metaproteomics analysis pipeline
|
|
296 Bart Mesuere1,*, Griet Debyser2, Maarten Aerts3, Bart Devreese2, Peter Vandamme3 andPeter Dawyndt1
|
|
297 Article first published online: 11 FEB 2015
|
|
298 DOI: 10.1002/pmic.201400361
|
|
299 http://onlinelibrary.wiley.com/doi/10.1002/pmic.201400361/abstract;jsessionid=BFF1994E4C14DA73D7C907EB208AD710.f04t04
|
|
300
|
|
301 ]]></help>
|
|
302 <citations>
|
|
303 <citation type="doi">doi:10.1002/pmic.201400361</citation>
|
|
304 </citations>
|
|
305
|
|
306 </tool>
|