Mercurial > repos > iuc > metaphlan
comparison metaphlan.xml @ 0:f5df500fcc3c draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
author | iuc |
---|---|
date | Mon, 19 Apr 2021 20:56:20 +0000 |
parents | |
children | b89b0765695d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f5df500fcc3c |
---|---|
1 <tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>to profile the composition of microbial communities</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="edam_ontology"/> | |
7 <expand macro="requirements"/> | |
8 <version_command>metaphlan -v</version_command> | |
9 <command detect_errors="aggressive"><![CDATA[ | |
10 #if $inputs.in.selector == "raw" | |
11 #if $inputs.in.raw_in.selector == "single" | |
12 #set full_ext=$inputs.in.raw_in.in.datatype.file_ext | |
13 #if $full_ext.endswith("gz") | |
14 #set $file_path="in" | |
15 zcat '$inputs.in.raw_in.in' > '$file_path' | |
16 && | |
17 #else if $full_ext.endswith("bz2") | |
18 #set $file_path="in" | |
19 bzcat '$inputs.in.raw_in.in' > '$file_path' | |
20 && | |
21 #else | |
22 #set $file_path=$inputs.in.raw_in.in | |
23 #end if | |
24 #else if $inputs.in.raw_in.selector == "multiple" | |
25 #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext | |
26 #set file_path="" | |
27 #set sep="" | |
28 #for $i, $f in enumerate($inputs.in.raw_in.in) | |
29 #if $f.datatype.file_ext != $full_ext | |
30 echo "Different datatypes for input files" | |
31 && | |
32 exit 1 | |
33 #end if | |
34 #if $full_ext.endswith("gz") | |
35 #set fp="input_%s" % ($i) | |
36 zcat '$f' > '$fp' | |
37 && | |
38 #else if $full_ext.endswith("bz2") | |
39 #set fp="input_%s" % ($i) | |
40 bzcat '$f' > '$fp' | |
41 && | |
42 #else | |
43 #set fp=$f | |
44 #end if | |
45 #set $file_path+="%s%s" % ($sep, $fp) | |
46 #set $sep="," | |
47 #end for | |
48 #else if $inputs.in.raw_in.selector == "paired" | |
49 #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext | |
50 #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext | |
51 echo "Different datatypes for input paired-end files" | |
52 && | |
53 exit 1 | |
54 #end if | |
55 #if $full_ext.endswith("gz") | |
56 zcat '$inputs.in.raw_in.in_f' > 'in_f' | |
57 && | |
58 zcat '$inputs.in.raw_in.in_r' > 'in_r' | |
59 && | |
60 #set file_path="in_f,in_r" | |
61 #else if $full_ext.endswith("bz2") | |
62 bzcat '$inputs.in.raw_in.in_f' > 'in_f' | |
63 && | |
64 bzcat '$inputs.in.raw_in.in_r' > 'in_r' | |
65 && | |
66 #set file_path="in_f,in_r" | |
67 #else | |
68 #set file_path="%s,%s" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r) | |
69 #end if | |
70 #end if | |
71 | |
72 #if $full_ext.startswith("fastq") | |
73 #set ext='fastq' | |
74 #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2")) | |
75 #set ext='fasta' | |
76 #else | |
77 #set ext=$full_ext | |
78 #end if | |
79 #end if | |
80 | |
81 #if $inputs.db.db_selector == "history" | |
82 mkdir 'ref_db' | |
83 && | |
84 bowtie2-build '$inputs.db.bowtie2db' 'ref_db/custom_db' | |
85 && | |
86 python '$__tool_directory__/customizemetadata.py' | |
87 transform_json_to_pkl | |
88 --json '$inputs.db.mpa_pkl' | |
89 --pkl 'ref_db/custom_db.pkl' | |
90 && | |
91 #end if | |
92 | |
93 metaphlan | |
94 #if $inputs.in.selector == "raw" | |
95 '$file_path' | |
96 --input_type '$ext' | |
97 --read_min_len $inputs.in.read_min_len | |
98 --bt2_ps '$inputs.in.mapping.bt2_ps' | |
99 --min_mapq_val $inputs.in.mapping.min_mapq_val | |
100 #else | |
101 '$inputs.in.in' | |
102 --input_type '$inputs.in.selector' | |
103 #end if | |
104 #if $inputs.db.db_selector == "cached" | |
105 --bowtie2db '$inputs.db.cached_db.fields.path' | |
106 --index '$inputs.db.cached_db.fields.dbkey' | |
107 #else | |
108 --bowtie2db 'ref_db/' | |
109 --index 'custom_db' | |
110 #end if | |
111 -t '$analysis.analysis_type.t' | |
112 #if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats" | |
113 --tax_lev '$analysis.analysis_type.tax_lev' | |
114 #else if $analysis.analysis_type.t == "clade_specific_strain_tracker" | |
115 --clade '$analysis.analysis_type.clade' | |
116 #if str($analysis.analysis_type.min_ab) != '' | |
117 --min_ab $analysis.analysis_type.min_ab | |
118 #end if | |
119 #else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != '' | |
120 --nreads $$analysis.analysis_type.nreads | |
121 #else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != '' | |
122 --pres_th $analysis.analysis_type.pres_th | |
123 #end if | |
124 --min_cu_len $analysis.min_cu_len | |
125 #if str($analysis.min_alignment_len) != '' | |
126 --min_alignment_len $analysis.min_alignment_len | |
127 #end if | |
128 #if 'add_viruses' in $analysis.organism_profiling | |
129 --add_viruses | |
130 #end if | |
131 #if 'ignore_eukaryotes' in $analysis.organism_profiling | |
132 --ignore_eukaryotes | |
133 #end if | |
134 #if 'ignore_bacteria' in $analysis.organism_profiling | |
135 --ignore_bacteria | |
136 #end if | |
137 #if 'ignore_archaea' in $analysis.organism_profiling | |
138 --ignore_archaea | |
139 #end if | |
140 --stat_q $analysis.stat_q | |
141 --perc_nonzero $analysis.perc_nonzero | |
142 #if $analysis.ignore_markers | |
143 --ignore_markers '$analysis.ignore_markers' | |
144 #end if | |
145 $analysis.avoid_disqm | |
146 --sample_id_key '$out.sample_id_key' | |
147 --sample_id '$out.sample_id' | |
148 $out.use_group_representative | |
149 $out.legacy_output | |
150 $out.CAMI_format_output | |
151 $out.unknown_estimation | |
152 -o '$output_file' | |
153 --bowtie2out 'bowtie2out' | |
154 -s '$sam_output_file' | |
155 --biom '$biom_output_file' | |
156 --nproc \${GALAXY_SLOTS:-4} | |
157 | |
158 #if $inputs.in.selector == "raw" | |
159 && | |
160 mv 'bowtie2out' '$bowtie2out' | |
161 #end if | |
162 ]]></command> | |
163 <inputs> | |
164 <section name="inputs" title="Inputs" expanded="true"> | |
165 <conditional name="in"> | |
166 <param name="selector" type="select" label="Input(s)"> | |
167 <option value="raw" selected="true">Fasta/FastQ file(s) with metagenomic reads</option> | |
168 <option value="sam">Externally BowTie2-mapped SAM file</option> | |
169 <option value="bowtie2out">Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run</option> | |
170 </param> | |
171 <when value="raw"> | |
172 <conditional name="raw_in"> | |
173 <param name="selector" type="select" label="Fasta/FastQ file(s) with metagenomic reads"> | |
174 <option value="single" selected="true">One single-end file</option> | |
175 <option value="multiple">Multiple single-end files</option> | |
176 <option value="paired">Paired-end files</option> | |
177 </param> | |
178 <when value="single"> | |
179 <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with metagenomic reads"/> | |
180 </when> | |
181 <when value="multiple"> | |
182 <param name="in" type="data" format="@FILE_FORMATS@" multiple="true" label="Single-end Fasta/FastQ files with metagenomic reads"/> | |
183 </when> | |
184 <when value="paired"> | |
185 <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with metagenomic reads"/> | |
186 <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with metagenomic reads"/> | |
187 </when> | |
188 </conditional> | |
189 <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/> | |
190 <section name="mapping" title="Mapping" expanded="true"> | |
191 <param argument="--bt2_ps" type="select" label="Presets options for BowTie2" help="Applied only with FASTA files"> | |
192 <option value="sensitive">Sensitive</option> | |
193 <option value="very-sensitive" selected="true">Very sensitive</option> | |
194 <option value="sensite-local">Sensitive local</option> | |
195 <option value="very-sensite-local">Very sensitive local</option> | |
196 </param> | |
197 <param argument="--min_mapq_val" type="integer" value="5" label="Minimum mapping quality value (MAPQ)"/> | |
198 </section> | |
199 </when> | |
200 <when value="sam"> | |
201 <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map metagenom reads"/> | |
202 </when> | |
203 <when value="bowtie2out"> | |
204 <param name="in" type="data" format="tabular" label="Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run" | |
205 help="File needs to be generated with MetaPhlAn versions >3.0"/> | |
206 </when> | |
207 </conditional> | |
208 <conditional name="db"> | |
209 <param name="db_selector" type="select" label="Database with clade-specific marker genes"> | |
210 <option value="cached" selected="true">Locally cached</option> | |
211 <option value="history">From history</option> | |
212 </param> | |
213 <when value="cached"> | |
214 <param name="cached_db" label="Cached database with clade-specific marker genes" type="select"> | |
215 <options from_data_table="metaphlan_database"> | |
216 <validator message="No MetaPhlAn database is available" type="no_options" /> | |
217 </options> | |
218 </param> | |
219 </when> | |
220 <when value="history"> | |
221 <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/> | |
222 <param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/> | |
223 </when> | |
224 </conditional> | |
225 </section> | |
226 <section name="analysis" title="Analysis" expanded="true"> | |
227 <conditional name="analysis_type"> | |
228 <param argument="-t" type="select" label="Type of analysis to perform"> | |
229 <option value="rel_ab" selected="true">rel_ab: Profiling a metagenomes in terms of relative abundances</option> | |
230 <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a metagenomes in terms of relative abundances and estimate the number of reads comming from each clade</option> | |
231 <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option> | |
232 <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option> | |
233 <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option> | |
234 <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by metagenome size if number of reads is specified)</option> | |
235 <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option> | |
236 <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option> | |
237 </param> | |
238 <when value="rel_ab"> | |
239 <expand macro="tax_lev"/> | |
240 </when> | |
241 <when value="rel_ab_w_read_stats"> | |
242 <expand macro="tax_lev"/> | |
243 </when> | |
244 <when value="reads_map"/> | |
245 <when value="clade_profiles"/> | |
246 <when value="clade_specific_strain_tracker"> | |
247 <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" | |
248 help="Markers are also extracted for subclades" /> | |
249 <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/> | |
250 </when> | |
251 <when value="marker_ab_table"> | |
252 <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original metagenome" | |
253 help="It is used for normalizing the length-normalized counts with the metagenome size as well. No normalization applied if the value is not specified"/> | |
254 </when> | |
255 <when value="marker_counts"/> | |
256 <when value="marker_pres_table"> | |
257 <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/> | |
258 </when> | |
259 </conditional> | |
260 <param argument="--min_cu_len" type="integer" value="2000" | |
261 label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/> | |
262 <param argument="--min_alignment_len" type="integer" optional="true" | |
263 label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/> | |
264 <param name="organism_profiling" type="select" multiple="true" optional="true" label="Organisms to profile"> | |
265 <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option> | |
266 <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option> | |
267 <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option> | |
268 <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option> | |
269 </param> | |
270 <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances"> | |
271 <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option> | |
272 <option value="avg_l">avg_l: Average of length-normalized marker counts</option> | |
273 <option value="tavg_g" selected="true">tavg_g: Truncated clade global average at --stat_q quantile</option> | |
274 <option value="tavg_l">tavg_l: Truncated average of length-normalized marker counts (at --stat_q)</option> | |
275 <option value="wavg_g">wavg_g: Winsorized clade global average (at --stat_q)</option> | |
276 <option value="wavg_l">wavg_l: Winsorized average of length-normalized marker counts (at --stat_q)</option> | |
277 <option value="med">med: Median of length-normalized marker counts</option> | |
278 </param> | |
279 <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/> | |
280 <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/> | |
281 <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/> | |
282 <param argument="--avoid_disqm" type='boolean' checked="true" truevalue='--avoid_disqm' falsevalue='' | |
283 label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?" | |
284 help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/> | |
285 </section> | |
286 <section name="out" title="Outputs" expanded="true"> | |
287 <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/> | |
288 <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/> | |
289 <param argument="--use_group_representative" type='boolean' checked="false" truevalue='--use_group_representative' falsevalue='' | |
290 label="Use a species as representative for species groups?"/> | |
291 <param name="legacy_output" argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue='' | |
292 label="Old MetaPhlAn2 two columns output?"/> | |
293 <param argument="--CAMI_format_output" type='boolean' checked="false" truevalue='--CAMI_format_output' falsevalue='' | |
294 label="Report the profiling using the CAMI output format?"/> | |
295 <param argument="--unknown_estimation" type='boolean' checked="false" truevalue='--unknown_estimation' falsevalue='' | |
296 label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/> | |
297 </section> | |
298 </inputs> | |
299 <outputs> | |
300 <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances" /> | |
301 <data name="bowtie2out" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output"> | |
302 <filter>inputs['in']['selector'] == "raw"</filter> | |
303 </data> | |
304 <data name="sam_output_file" format="sam" label="${tool.name} on ${on_string}: SAM file"> | |
305 <filter>inputs['in']['selector'] == "raw"</filter> | |
306 </data> | |
307 <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file" /> | |
308 </outputs> | |
309 <tests> | |
310 <test expect_num_outputs="4"> | |
311 <section name="inputs"> | |
312 <conditional name="in"> | |
313 <param name="selector" value="raw"/> | |
314 <conditional name="raw_in"> | |
315 <!-- Single GZ file --> | |
316 <param name="selector" value="single"/> | |
317 <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/> | |
318 </conditional> | |
319 <param name="read_min_len" value="70"/> | |
320 <section name="mapping"> | |
321 <param name="bt2_ps" value="sensitive"/> | |
322 <param name="min_mapq_val" value="5"/> | |
323 </section> | |
324 </conditional> | |
325 <conditional name="db"> | |
326 <!-- Cached db --> | |
327 <param name="db_selector" value="cached"/> | |
328 <param name="cached_db" value="test-db-20210409"/> | |
329 </conditional> | |
330 </section> | |
331 <section name="analysis"> | |
332 <param name="min_cu_len" value="2000"/> | |
333 <param name="organism_profiling" value="add_viruses"/> | |
334 <param name="stat" value="avg_g"/> | |
335 <param name="stat_q" value="0.2"/> | |
336 <param name="perc_nonzero" value="0.33"/> | |
337 <param name="avoid_disqm" value="true"/> | |
338 </section> | |
339 <section name="out"> | |
340 <param name="sample_id_key" value="SampleID"/> | |
341 <param name="sample_id" value="Metaphlan_Analysis"/> | |
342 <param name="use_group_representative" value="false"/> | |
343 <param name="legacy_output" value="false"/> | |
344 <param name="CAMI_format_output" value="false"/> | |
345 <param name="unknown_estimation" value="false"/> | |
346 </section> | |
347 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
348 <assert_contents> | |
349 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
350 </assert_contents> | |
351 </output> | |
352 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size"> | |
353 <assert_contents> | |
354 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> | |
355 <has_text text="37637__U2I1U8__N579_01580"/> | |
356 </assert_contents> | |
357 </output> | |
358 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size"> | |
359 <assert_contents> | |
360 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> | |
361 </assert_contents> | |
362 </output> | |
363 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
364 <assert_contents> | |
365 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
366 </assert_contents> | |
367 </output> | |
368 </test> | |
369 <test expect_num_outputs="4"> | |
370 <section name="inputs"> | |
371 <conditional name="in"> | |
372 <param name="selector" value="raw"/> | |
373 <conditional name="raw_in"> | |
374 <!-- Multiple GZ file --> | |
375 <param name="selector" value="multiple"/> | |
376 <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/> | |
377 </conditional> | |
378 <param name="read_min_len" value="70"/> | |
379 <section name="mapping"> | |
380 <param name="bt2_ps" value="sensitive"/> | |
381 <param name="min_mapq_val" value="5"/> | |
382 </section> | |
383 </conditional> | |
384 <conditional name="db"> | |
385 <!-- Local db --> | |
386 <param name="db_selector" value="history"/> | |
387 <param name="bowtie2db" value="test-db.fasta"/> | |
388 <param name="mpa_pkl" value="test-db.json"/> | |
389 </conditional> | |
390 </section> | |
391 <section name="analysis"> | |
392 <param name="min_cu_len" value="2000"/> | |
393 <param name="organism_profiling" value="add_viruses"/> | |
394 <param name="stat" value="avg_g"/> | |
395 <param name="stat_q" value="0.2"/> | |
396 <param name="perc_nonzero" value="0.33"/> | |
397 <param name="avoid_disqm" value="true"/> | |
398 </section> | |
399 <section name="out"> | |
400 <param name="sample_id_key" value="SampleID"/> | |
401 <param name="sample_id" value="Metaphlan_Analysis"/> | |
402 <param name="use_group_representative" value="false"/> | |
403 <param name="legacy_output" value="false"/> | |
404 <param name="CAMI_format_output" value="false"/> | |
405 <param name="unknown_estimation" value="false"/> | |
406 </section> | |
407 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
408 <assert_contents> | |
409 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
410 <has_text text="relative_abundance"/> | |
411 <has_text text="NCBI_tax_id"/> | |
412 <has_text text="clade_name"/> | |
413 </assert_contents> | |
414 </output> | |
415 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> | |
416 <assert_contents> | |
417 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> | |
418 <has_text text="37637__U2I1U8__N579_01580"/> | |
419 </assert_contents> | |
420 </output> | |
421 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size"> | |
422 <assert_contents> | |
423 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> | |
424 </assert_contents> | |
425 </output> | |
426 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
427 <assert_contents> | |
428 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
429 </assert_contents> | |
430 </output> | |
431 </test> | |
432 <test expect_num_outputs="4"> | |
433 <section name="inputs"> | |
434 <conditional name="in"> | |
435 <param name="selector" value="raw"/> | |
436 <conditional name="raw_in"> | |
437 <!-- Paired GZ file --> | |
438 <param name="selector" value="paired"/> | |
439 <param name="in_f" value="SRS014464-Anterior_nares.fasta.gz"/> | |
440 <param name="in_r" value="SRS014464-Anterior_nares.fasta.gz"/> | |
441 </conditional> | |
442 <param name="read_min_len" value="70"/> | |
443 <section name="mapping"> | |
444 <param name="bt2_ps" value="sensitive"/> | |
445 <param name="min_mapq_val" value="5"/> | |
446 </section> | |
447 </conditional> | |
448 <conditional name="db"> | |
449 <!-- Cached db --> | |
450 <param name="db_selector" value="cached"/> | |
451 <param name="cached_db" value="test-db-20210409"/> | |
452 </conditional> | |
453 </section> | |
454 <section name="analysis"> | |
455 <param name="min_cu_len" value="2000"/> | |
456 <param name="organism_profiling" value="add_viruses"/> | |
457 <param name="stat" value="avg_g"/> | |
458 <param name="stat_q" value="0.2"/> | |
459 <param name="perc_nonzero" value="0.33"/> | |
460 <param name="avoid_disqm" value="true"/> | |
461 </section> | |
462 <section name="out"> | |
463 <param name="sample_id_key" value="SampleID"/> | |
464 <param name="sample_id" value="Metaphlan_Analysis"/> | |
465 <param name="use_group_representative" value="false"/> | |
466 <param name="legacy_output" value="false"/> | |
467 <param name="CAMI_format_output" value="false"/> | |
468 <param name="unknown_estimation" value="false"/> | |
469 </section> | |
470 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
471 <assert_contents> | |
472 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
473 <has_text text="relative_abundance"/> | |
474 <has_text text="NCBI_tax_id"/> | |
475 <has_text text="clade_name"/> | |
476 </assert_contents> | |
477 </output> | |
478 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> | |
479 <assert_contents> | |
480 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> | |
481 <has_text text="37637__U2I1U8__N579_01580"/> | |
482 </assert_contents> | |
483 </output> | |
484 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size"> | |
485 <assert_contents> | |
486 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> | |
487 </assert_contents> | |
488 </output> | |
489 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
490 <assert_contents> | |
491 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
492 </assert_contents> | |
493 </output> | |
494 </test> | |
495 <test expect_num_outputs="2"> | |
496 <section name="inputs"> | |
497 <conditional name="in"> | |
498 <!-- SAM --> | |
499 <param name="selector" value="sam"/> | |
500 <param name="in" value="SRS014464-Anterior_nares.sam"/> | |
501 </conditional> | |
502 <conditional name="db"> | |
503 <!-- Cached db --> | |
504 <param name="db_selector" value="cached"/> | |
505 <param name="cached_db" value="test-db-20210409"/> | |
506 </conditional> | |
507 </section> | |
508 <section name="analysis"> | |
509 <param name="min_cu_len" value="2000"/> | |
510 <param name="organism_profiling" value="add_viruses"/> | |
511 <param name="stat" value="avg_g"/> | |
512 <param name="stat_q" value="0.2"/> | |
513 <param name="perc_nonzero" value="0.33"/> | |
514 <param name="avoid_disqm" value="true"/> | |
515 </section> | |
516 <section name="out"> | |
517 <param name="sample_id_key" value="SampleID"/> | |
518 <param name="sample_id" value="Metaphlan_Analysis"/> | |
519 <param name="use_group_representative" value="false"/> | |
520 <param name="legacy_output" value="false"/> | |
521 <param name="CAMI_format_output" value="false"/> | |
522 <param name="unknown_estimation" value="false"/> | |
523 </section> | |
524 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
525 <assert_contents> | |
526 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
527 <has_text text="relative_abundance"/> | |
528 <has_text text="NCBI_tax_id"/> | |
529 <has_text text="clade_name"/> | |
530 </assert_contents> | |
531 </output> | |
532 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
533 <assert_contents> | |
534 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
535 </assert_contents> | |
536 </output> | |
537 </test> | |
538 <test expect_num_outputs="2"> | |
539 <section name="inputs"> | |
540 <conditional name="in"> | |
541 <!-- bowtie2out --> | |
542 <param name="selector" value="bowtie2out"/> | |
543 <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/> | |
544 </conditional> | |
545 <conditional name="db"> | |
546 <!-- Cached db --> | |
547 <param name="db_selector" value="cached"/> | |
548 <param name="cached_db" value="test-db-20210409"/> | |
549 </conditional> | |
550 </section> | |
551 <conditional name="in"> | |
552 <param name="selector" value="bowtie2out"/> | |
553 <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/> | |
554 </conditional> | |
555 <section name="mapping"> | |
556 <param name="bt2_ps" value="sensite"/> | |
557 <param name="min_mapq_val" value="5"/> | |
558 </section> | |
559 <section name="analysis"> | |
560 <param name="min_cu_len" value="2000"/> | |
561 <param name="organism_profiling" value="add_viruses"/> | |
562 <param name="stat" value="avg_g"/> | |
563 <param name="stat_q" value="0.2"/> | |
564 <param name="perc_nonzero" value="0.33"/> | |
565 <param name="avoid_disqm" value="true"/> | |
566 </section> | |
567 <section name="out"> | |
568 <param name="sample_id_key" value="SampleID"/> | |
569 <param name="sample_id" value="Metaphlan_Analysis"/> | |
570 <param name="use_group_representative" value="false"/> | |
571 <param name="legacy_output" value="false"/> | |
572 <param name="CAMI_format_output" value="false"/> | |
573 <param name="unknown_estimation" value="false"/> | |
574 </section> | |
575 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> | |
576 <assert_contents> | |
577 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
578 <has_text text="relative_abundance"/> | |
579 <has_text text="NCBI_tax_id"/> | |
580 <has_text text="clade_name"/> | |
581 </assert_contents> | |
582 </output> | |
583 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
584 <assert_contents> | |
585 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
586 </assert_contents> | |
587 </output> | |
588 </test> | |
589 <test expect_num_outputs="4"> | |
590 <section name="inputs"> | |
591 <conditional name="in"> | |
592 <param name="selector" value="raw"/> | |
593 <conditional name="raw_in"> | |
594 <!-- Single FASTA file --> | |
595 <param name="selector" value="single"/> | |
596 <param name="in" value="SRS014464-Anterior_nares.fasta"/> | |
597 </conditional> | |
598 <param name="read_min_len" value="70"/> | |
599 <section name="mapping"> | |
600 <param name="bt2_ps" value="sensitive"/> | |
601 <param name="min_mapq_val" value="5"/> | |
602 </section> | |
603 </conditional> | |
604 <conditional name="db"> | |
605 <!-- Cached db --> | |
606 <param name="db_selector" value="cached"/> | |
607 <param name="cached_db" value="test-db-20210409"/> | |
608 </conditional> | |
609 </section> | |
610 <section name="analysis"> | |
611 <param name="min_cu_len" value="2000"/> | |
612 <param name="organism_profiling" value="add_viruses"/> | |
613 <param name="stat" value="avg_g"/> | |
614 <param name="stat_q" value="0.2"/> | |
615 <param name="perc_nonzero" value="0.33"/> | |
616 <param name="ignore_markers" value="marker.txt"/> | |
617 <param name="avoid_disqm" value="true"/> | |
618 </section> | |
619 <section name="out"> | |
620 <param name="sample_id_key" value="SampleID"/> | |
621 <param name="sample_id" value="Metaphlan_Analysis"/> | |
622 <param name="use_group_representative" value="false"/> | |
623 <param name="legacy_output" value="true"/> | |
624 <param name="CAMI_format_output" value="false"/> | |
625 <param name="unknown_estimation" value="false"/> | |
626 </section> | |
627 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size"> | |
628 <assert_contents> | |
629 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> | |
630 <has_text text="SampleID"/> | |
631 <has_text text="Metaphlan_Analysis"/> | |
632 </assert_contents> | |
633 </output> | |
634 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size"> | |
635 <assert_contents> | |
636 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> | |
637 <has_text text="37637__U2I1U8__N579_01580"/> | |
638 </assert_contents> | |
639 </output> | |
640 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size"> | |
641 <assert_contents> | |
642 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> | |
643 </assert_contents> | |
644 </output> | |
645 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> | |
646 <assert_contents> | |
647 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> | |
648 </assert_contents> | |
649 </output> | |
650 </test> | |
651 </tests> | |
652 <help><![CDATA[ | |
653 What it does | |
654 ============ | |
655 | |
656 MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, | |
657 Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) with species-level. | |
658 | |
659 MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes | |
660 (~99,500 bacterial and archaeal and ~500 eukaryotic), allowing: | |
661 | |
662 - unambiguous taxonomic assignments; | |
663 - accurate estimation of organismal relative abundance; | |
664 - species-level resolution for bacteria, archaea, eukaryotes and viruses; | |
665 - strain identification and tracking | |
666 - orders of magnitude speedups compared to existing methods. | |
667 - metagenomic strain-level population genomics | |
668 | |
669 MetaPhlAn clade-abundance estimation | |
670 ------------------------------------ | |
671 | |
672 The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and | |
673 strains in particular cases) present in the metagenome obtained from a microbiome sample and their | |
674 relative abundance. | |
675 | |
676 Marker level analysis | |
677 --------------------- | |
678 | |
679 MetaPhlAn introduces the capability of characterizing organisms at the strain level using non | |
680 aggregated marker information. Such capability comes with several slightly different flavours and | |
681 are a way to perform strain tracking and comparison across multiple samples. | |
682 | |
683 Usually, MetaPhlAn is first ran with the default parameter for the type of analysis to profile the | |
684 species present in the community, and then a strain-level profiling can be performed to zoom-in into | |
685 specific species of interest. This operation can be performed quickly as it exploits the bowtie2out | |
686 intermediate file saved during the execution of the default analysis type. | |
687 | |
688 Inputs | |
689 ====== | |
690 | |
691 Metaphlan takes as input either: | |
692 | |
693 - one or several sequence files in Fasta, FastQ (compressed or not) | |
694 - a BowTie2 produced SAM file | |
695 - an intermediary mapping file of the metagenome generated by a previous MetaPhlAn run | |
696 | |
697 It also need the reference database, which can be locally installed or customized using the dedicated tools. | |
698 | |
699 Outputs | |
700 ======= | |
701 | |
702 The main output file is a tab-separated file with the predicted taxon relative abundances. | |
703 | |
704 It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs. | |
705 | |
706 | |
707 More help and use cases | |
708 ======================= | |
709 | |
710 To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_. | |
711 | |
712 .. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#basic-usage | |
713 | |
714 ]]></help> | |
715 <expand macro="citations"/> | |
716 </tool> |