comparison metaphlan.xml @ 0:f5df500fcc3c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
author iuc
date Mon, 19 Apr 2021 20:56:20 +0000
parents
children b89b0765695d
comparison
equal deleted inserted replaced
-1:000000000000 0:f5df500fcc3c
1 <tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>to profile the composition of microbial communities</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="edam_ontology"/>
7 <expand macro="requirements"/>
8 <version_command>metaphlan -v</version_command>
9 <command detect_errors="aggressive"><![CDATA[
10 #if $inputs.in.selector == "raw"
11 #if $inputs.in.raw_in.selector == "single"
12 #set full_ext=$inputs.in.raw_in.in.datatype.file_ext
13 #if $full_ext.endswith("gz")
14 #set $file_path="in"
15 zcat '$inputs.in.raw_in.in' > '$file_path'
16 &&
17 #else if $full_ext.endswith("bz2")
18 #set $file_path="in"
19 bzcat '$inputs.in.raw_in.in' > '$file_path'
20 &&
21 #else
22 #set $file_path=$inputs.in.raw_in.in
23 #end if
24 #else if $inputs.in.raw_in.selector == "multiple"
25 #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext
26 #set file_path=""
27 #set sep=""
28 #for $i, $f in enumerate($inputs.in.raw_in.in)
29 #if $f.datatype.file_ext != $full_ext
30 echo "Different datatypes for input files"
31 &&
32 exit 1
33 #end if
34 #if $full_ext.endswith("gz")
35 #set fp="input_%s" % ($i)
36 zcat '$f' > '$fp'
37 &&
38 #else if $full_ext.endswith("bz2")
39 #set fp="input_%s" % ($i)
40 bzcat '$f' > '$fp'
41 &&
42 #else
43 #set fp=$f
44 #end if
45 #set $file_path+="%s%s" % ($sep, $fp)
46 #set $sep=","
47 #end for
48 #else if $inputs.in.raw_in.selector == "paired"
49 #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext
50 #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext
51 echo "Different datatypes for input paired-end files"
52 &&
53 exit 1
54 #end if
55 #if $full_ext.endswith("gz")
56 zcat '$inputs.in.raw_in.in_f' > 'in_f'
57 &&
58 zcat '$inputs.in.raw_in.in_r' > 'in_r'
59 &&
60 #set file_path="in_f,in_r"
61 #else if $full_ext.endswith("bz2")
62 bzcat '$inputs.in.raw_in.in_f' > 'in_f'
63 &&
64 bzcat '$inputs.in.raw_in.in_r' > 'in_r'
65 &&
66 #set file_path="in_f,in_r"
67 #else
68 #set file_path="%s,%s" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r)
69 #end if
70 #end if
71
72 #if $full_ext.startswith("fastq")
73 #set ext='fastq'
74 #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2"))
75 #set ext='fasta'
76 #else
77 #set ext=$full_ext
78 #end if
79 #end if
80
81 #if $inputs.db.db_selector == "history"
82 mkdir 'ref_db'
83 &&
84 bowtie2-build '$inputs.db.bowtie2db' 'ref_db/custom_db'
85 &&
86 python '$__tool_directory__/customizemetadata.py'
87 transform_json_to_pkl
88 --json '$inputs.db.mpa_pkl'
89 --pkl 'ref_db/custom_db.pkl'
90 &&
91 #end if
92
93 metaphlan
94 #if $inputs.in.selector == "raw"
95 '$file_path'
96 --input_type '$ext'
97 --read_min_len $inputs.in.read_min_len
98 --bt2_ps '$inputs.in.mapping.bt2_ps'
99 --min_mapq_val $inputs.in.mapping.min_mapq_val
100 #else
101 '$inputs.in.in'
102 --input_type '$inputs.in.selector'
103 #end if
104 #if $inputs.db.db_selector == "cached"
105 --bowtie2db '$inputs.db.cached_db.fields.path'
106 --index '$inputs.db.cached_db.fields.dbkey'
107 #else
108 --bowtie2db 'ref_db/'
109 --index 'custom_db'
110 #end if
111 -t '$analysis.analysis_type.t'
112 #if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats"
113 --tax_lev '$analysis.analysis_type.tax_lev'
114 #else if $analysis.analysis_type.t == "clade_specific_strain_tracker"
115 --clade '$analysis.analysis_type.clade'
116 #if str($analysis.analysis_type.min_ab) != ''
117 --min_ab $analysis.analysis_type.min_ab
118 #end if
119 #else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != ''
120 --nreads $$analysis.analysis_type.nreads
121 #else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != ''
122 --pres_th $analysis.analysis_type.pres_th
123 #end if
124 --min_cu_len $analysis.min_cu_len
125 #if str($analysis.min_alignment_len) != ''
126 --min_alignment_len $analysis.min_alignment_len
127 #end if
128 #if 'add_viruses' in $analysis.organism_profiling
129 --add_viruses
130 #end if
131 #if 'ignore_eukaryotes' in $analysis.organism_profiling
132 --ignore_eukaryotes
133 #end if
134 #if 'ignore_bacteria' in $analysis.organism_profiling
135 --ignore_bacteria
136 #end if
137 #if 'ignore_archaea' in $analysis.organism_profiling
138 --ignore_archaea
139 #end if
140 --stat_q $analysis.stat_q
141 --perc_nonzero $analysis.perc_nonzero
142 #if $analysis.ignore_markers
143 --ignore_markers '$analysis.ignore_markers'
144 #end if
145 $analysis.avoid_disqm
146 --sample_id_key '$out.sample_id_key'
147 --sample_id '$out.sample_id'
148 $out.use_group_representative
149 $out.legacy_output
150 $out.CAMI_format_output
151 $out.unknown_estimation
152 -o '$output_file'
153 --bowtie2out 'bowtie2out'
154 -s '$sam_output_file'
155 --biom '$biom_output_file'
156 --nproc \${GALAXY_SLOTS:-4}
157
158 #if $inputs.in.selector == "raw"
159 &&
160 mv 'bowtie2out' '$bowtie2out'
161 #end if
162 ]]></command>
163 <inputs>
164 <section name="inputs" title="Inputs" expanded="true">
165 <conditional name="in">
166 <param name="selector" type="select" label="Input(s)">
167 <option value="raw" selected="true">Fasta/FastQ file(s) with metagenomic reads</option>
168 <option value="sam">Externally BowTie2-mapped SAM file</option>
169 <option value="bowtie2out">Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run</option>
170 </param>
171 <when value="raw">
172 <conditional name="raw_in">
173 <param name="selector" type="select" label="Fasta/FastQ file(s) with metagenomic reads">
174 <option value="single" selected="true">One single-end file</option>
175 <option value="multiple">Multiple single-end files</option>
176 <option value="paired">Paired-end files</option>
177 </param>
178 <when value="single">
179 <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with metagenomic reads"/>
180 </when>
181 <when value="multiple">
182 <param name="in" type="data" format="@FILE_FORMATS@" multiple="true" label="Single-end Fasta/FastQ files with metagenomic reads"/>
183 </when>
184 <when value="paired">
185 <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with metagenomic reads"/>
186 <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with metagenomic reads"/>
187 </when>
188 </conditional>
189 <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/>
190 <section name="mapping" title="Mapping" expanded="true">
191 <param argument="--bt2_ps" type="select" label="Presets options for BowTie2" help="Applied only with FASTA files">
192 <option value="sensitive">Sensitive</option>
193 <option value="very-sensitive" selected="true">Very sensitive</option>
194 <option value="sensite-local">Sensitive local</option>
195 <option value="very-sensite-local">Very sensitive local</option>
196 </param>
197 <param argument="--min_mapq_val" type="integer" value="5" label="Minimum mapping quality value (MAPQ)"/>
198 </section>
199 </when>
200 <when value="sam">
201 <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map metagenom reads"/>
202 </when>
203 <when value="bowtie2out">
204 <param name="in" type="data" format="tabular" label="Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run"
205 help="File needs to be generated with MetaPhlAn versions >3.0"/>
206 </when>
207 </conditional>
208 <conditional name="db">
209 <param name="db_selector" type="select" label="Database with clade-specific marker genes">
210 <option value="cached" selected="true">Locally cached</option>
211 <option value="history">From history</option>
212 </param>
213 <when value="cached">
214 <param name="cached_db" label="Cached database with clade-specific marker genes" type="select">
215 <options from_data_table="metaphlan_database">
216 <validator message="No MetaPhlAn database is available" type="no_options" />
217 </options>
218 </param>
219 </when>
220 <when value="history">
221 <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
222 <param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/>
223 </when>
224 </conditional>
225 </section>
226 <section name="analysis" title="Analysis" expanded="true">
227 <conditional name="analysis_type">
228 <param argument="-t" type="select" label="Type of analysis to perform">
229 <option value="rel_ab" selected="true">rel_ab: Profiling a metagenomes in terms of relative abundances</option>
230 <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a metagenomes in terms of relative abundances and estimate the number of reads comming from each clade</option>
231 <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option>
232 <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option>
233 <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option>
234 <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by metagenome size if number of reads is specified)</option>
235 <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option>
236 <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option>
237 </param>
238 <when value="rel_ab">
239 <expand macro="tax_lev"/>
240 </when>
241 <when value="rel_ab_w_read_stats">
242 <expand macro="tax_lev"/>
243 </when>
244 <when value="reads_map"/>
245 <when value="clade_profiles"/>
246 <when value="clade_specific_strain_tracker">
247 <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present"
248 help="Markers are also extracted for subclades" />
249 <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/>
250 </when>
251 <when value="marker_ab_table">
252 <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original metagenome"
253 help="It is used for normalizing the length-normalized counts with the metagenome size as well. No normalization applied if the value is not specified"/>
254 </when>
255 <when value="marker_counts"/>
256 <when value="marker_pres_table">
257 <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/>
258 </when>
259 </conditional>
260 <param argument="--min_cu_len" type="integer" value="2000"
261 label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/>
262 <param argument="--min_alignment_len" type="integer" optional="true"
263 label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/>
264 <param name="organism_profiling" type="select" multiple="true" optional="true" label="Organisms to profile">
265 <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option>
266 <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option>
267 <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option>
268 <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option>
269 </param>
270 <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances">
271 <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option>
272 <option value="avg_l">avg_l: Average of length-normalized marker counts</option>
273 <option value="tavg_g" selected="true">tavg_g: Truncated clade global average at --stat_q quantile</option>
274 <option value="tavg_l">tavg_l: Truncated average of length-normalized marker counts (at --stat_q)</option>
275 <option value="wavg_g">wavg_g: Winsorized clade global average (at --stat_q)</option>
276 <option value="wavg_l">wavg_l: Winsorized average of length-normalized marker counts (at --stat_q)</option>
277 <option value="med">med: Median of length-normalized marker counts</option>
278 </param>
279 <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/>
280 <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/>
281 <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/>
282 <param argument="--avoid_disqm" type='boolean' checked="true" truevalue='--avoid_disqm' falsevalue=''
283 label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?"
284 help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/>
285 </section>
286 <section name="out" title="Outputs" expanded="true">
287 <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/>
288 <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/>
289 <param argument="--use_group_representative" type='boolean' checked="false" truevalue='--use_group_representative' falsevalue=''
290 label="Use a species as representative for species groups?"/>
291 <param name="legacy_output" argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue=''
292 label="Old MetaPhlAn2 two columns output?"/>
293 <param argument="--CAMI_format_output" type='boolean' checked="false" truevalue='--CAMI_format_output' falsevalue=''
294 label="Report the profiling using the CAMI output format?"/>
295 <param argument="--unknown_estimation" type='boolean' checked="false" truevalue='--unknown_estimation' falsevalue=''
296 label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/>
297 </section>
298 </inputs>
299 <outputs>
300 <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances" />
301 <data name="bowtie2out" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output">
302 <filter>inputs['in']['selector'] == "raw"</filter>
303 </data>
304 <data name="sam_output_file" format="sam" label="${tool.name} on ${on_string}: SAM file">
305 <filter>inputs['in']['selector'] == "raw"</filter>
306 </data>
307 <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file" />
308 </outputs>
309 <tests>
310 <test expect_num_outputs="4">
311 <section name="inputs">
312 <conditional name="in">
313 <param name="selector" value="raw"/>
314 <conditional name="raw_in">
315 <!-- Single GZ file -->
316 <param name="selector" value="single"/>
317 <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/>
318 </conditional>
319 <param name="read_min_len" value="70"/>
320 <section name="mapping">
321 <param name="bt2_ps" value="sensitive"/>
322 <param name="min_mapq_val" value="5"/>
323 </section>
324 </conditional>
325 <conditional name="db">
326 <!-- Cached db -->
327 <param name="db_selector" value="cached"/>
328 <param name="cached_db" value="test-db-20210409"/>
329 </conditional>
330 </section>
331 <section name="analysis">
332 <param name="min_cu_len" value="2000"/>
333 <param name="organism_profiling" value="add_viruses"/>
334 <param name="stat" value="avg_g"/>
335 <param name="stat_q" value="0.2"/>
336 <param name="perc_nonzero" value="0.33"/>
337 <param name="avoid_disqm" value="true"/>
338 </section>
339 <section name="out">
340 <param name="sample_id_key" value="SampleID"/>
341 <param name="sample_id" value="Metaphlan_Analysis"/>
342 <param name="use_group_representative" value="false"/>
343 <param name="legacy_output" value="false"/>
344 <param name="CAMI_format_output" value="false"/>
345 <param name="unknown_estimation" value="false"/>
346 </section>
347 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
348 <assert_contents>
349 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
350 </assert_contents>
351 </output>
352 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size">
353 <assert_contents>
354 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
355 <has_text text="37637__U2I1U8__N579_01580"/>
356 </assert_contents>
357 </output>
358 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size">
359 <assert_contents>
360 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
361 </assert_contents>
362 </output>
363 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
364 <assert_contents>
365 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
366 </assert_contents>
367 </output>
368 </test>
369 <test expect_num_outputs="4">
370 <section name="inputs">
371 <conditional name="in">
372 <param name="selector" value="raw"/>
373 <conditional name="raw_in">
374 <!-- Multiple GZ file -->
375 <param name="selector" value="multiple"/>
376 <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/>
377 </conditional>
378 <param name="read_min_len" value="70"/>
379 <section name="mapping">
380 <param name="bt2_ps" value="sensitive"/>
381 <param name="min_mapq_val" value="5"/>
382 </section>
383 </conditional>
384 <conditional name="db">
385 <!-- Local db -->
386 <param name="db_selector" value="history"/>
387 <param name="bowtie2db" value="test-db.fasta"/>
388 <param name="mpa_pkl" value="test-db.json"/>
389 </conditional>
390 </section>
391 <section name="analysis">
392 <param name="min_cu_len" value="2000"/>
393 <param name="organism_profiling" value="add_viruses"/>
394 <param name="stat" value="avg_g"/>
395 <param name="stat_q" value="0.2"/>
396 <param name="perc_nonzero" value="0.33"/>
397 <param name="avoid_disqm" value="true"/>
398 </section>
399 <section name="out">
400 <param name="sample_id_key" value="SampleID"/>
401 <param name="sample_id" value="Metaphlan_Analysis"/>
402 <param name="use_group_representative" value="false"/>
403 <param name="legacy_output" value="false"/>
404 <param name="CAMI_format_output" value="false"/>
405 <param name="unknown_estimation" value="false"/>
406 </section>
407 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
408 <assert_contents>
409 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
410 <has_text text="relative_abundance"/>
411 <has_text text="NCBI_tax_id"/>
412 <has_text text="clade_name"/>
413 </assert_contents>
414 </output>
415 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
416 <assert_contents>
417 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
418 <has_text text="37637__U2I1U8__N579_01580"/>
419 </assert_contents>
420 </output>
421 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size">
422 <assert_contents>
423 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
424 </assert_contents>
425 </output>
426 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
427 <assert_contents>
428 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
429 </assert_contents>
430 </output>
431 </test>
432 <test expect_num_outputs="4">
433 <section name="inputs">
434 <conditional name="in">
435 <param name="selector" value="raw"/>
436 <conditional name="raw_in">
437 <!-- Paired GZ file -->
438 <param name="selector" value="paired"/>
439 <param name="in_f" value="SRS014464-Anterior_nares.fasta.gz"/>
440 <param name="in_r" value="SRS014464-Anterior_nares.fasta.gz"/>
441 </conditional>
442 <param name="read_min_len" value="70"/>
443 <section name="mapping">
444 <param name="bt2_ps" value="sensitive"/>
445 <param name="min_mapq_val" value="5"/>
446 </section>
447 </conditional>
448 <conditional name="db">
449 <!-- Cached db -->
450 <param name="db_selector" value="cached"/>
451 <param name="cached_db" value="test-db-20210409"/>
452 </conditional>
453 </section>
454 <section name="analysis">
455 <param name="min_cu_len" value="2000"/>
456 <param name="organism_profiling" value="add_viruses"/>
457 <param name="stat" value="avg_g"/>
458 <param name="stat_q" value="0.2"/>
459 <param name="perc_nonzero" value="0.33"/>
460 <param name="avoid_disqm" value="true"/>
461 </section>
462 <section name="out">
463 <param name="sample_id_key" value="SampleID"/>
464 <param name="sample_id" value="Metaphlan_Analysis"/>
465 <param name="use_group_representative" value="false"/>
466 <param name="legacy_output" value="false"/>
467 <param name="CAMI_format_output" value="false"/>
468 <param name="unknown_estimation" value="false"/>
469 </section>
470 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
471 <assert_contents>
472 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
473 <has_text text="relative_abundance"/>
474 <has_text text="NCBI_tax_id"/>
475 <has_text text="clade_name"/>
476 </assert_contents>
477 </output>
478 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
479 <assert_contents>
480 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
481 <has_text text="37637__U2I1U8__N579_01580"/>
482 </assert_contents>
483 </output>
484 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size">
485 <assert_contents>
486 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
487 </assert_contents>
488 </output>
489 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
490 <assert_contents>
491 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
492 </assert_contents>
493 </output>
494 </test>
495 <test expect_num_outputs="2">
496 <section name="inputs">
497 <conditional name="in">
498 <!-- SAM -->
499 <param name="selector" value="sam"/>
500 <param name="in" value="SRS014464-Anterior_nares.sam"/>
501 </conditional>
502 <conditional name="db">
503 <!-- Cached db -->
504 <param name="db_selector" value="cached"/>
505 <param name="cached_db" value="test-db-20210409"/>
506 </conditional>
507 </section>
508 <section name="analysis">
509 <param name="min_cu_len" value="2000"/>
510 <param name="organism_profiling" value="add_viruses"/>
511 <param name="stat" value="avg_g"/>
512 <param name="stat_q" value="0.2"/>
513 <param name="perc_nonzero" value="0.33"/>
514 <param name="avoid_disqm" value="true"/>
515 </section>
516 <section name="out">
517 <param name="sample_id_key" value="SampleID"/>
518 <param name="sample_id" value="Metaphlan_Analysis"/>
519 <param name="use_group_representative" value="false"/>
520 <param name="legacy_output" value="false"/>
521 <param name="CAMI_format_output" value="false"/>
522 <param name="unknown_estimation" value="false"/>
523 </section>
524 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
525 <assert_contents>
526 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
527 <has_text text="relative_abundance"/>
528 <has_text text="NCBI_tax_id"/>
529 <has_text text="clade_name"/>
530 </assert_contents>
531 </output>
532 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
533 <assert_contents>
534 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
535 </assert_contents>
536 </output>
537 </test>
538 <test expect_num_outputs="2">
539 <section name="inputs">
540 <conditional name="in">
541 <!-- bowtie2out -->
542 <param name="selector" value="bowtie2out"/>
543 <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/>
544 </conditional>
545 <conditional name="db">
546 <!-- Cached db -->
547 <param name="db_selector" value="cached"/>
548 <param name="cached_db" value="test-db-20210409"/>
549 </conditional>
550 </section>
551 <conditional name="in">
552 <param name="selector" value="bowtie2out"/>
553 <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/>
554 </conditional>
555 <section name="mapping">
556 <param name="bt2_ps" value="sensite"/>
557 <param name="min_mapq_val" value="5"/>
558 </section>
559 <section name="analysis">
560 <param name="min_cu_len" value="2000"/>
561 <param name="organism_profiling" value="add_viruses"/>
562 <param name="stat" value="avg_g"/>
563 <param name="stat_q" value="0.2"/>
564 <param name="perc_nonzero" value="0.33"/>
565 <param name="avoid_disqm" value="true"/>
566 </section>
567 <section name="out">
568 <param name="sample_id_key" value="SampleID"/>
569 <param name="sample_id" value="Metaphlan_Analysis"/>
570 <param name="use_group_representative" value="false"/>
571 <param name="legacy_output" value="false"/>
572 <param name="CAMI_format_output" value="false"/>
573 <param name="unknown_estimation" value="false"/>
574 </section>
575 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
576 <assert_contents>
577 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
578 <has_text text="relative_abundance"/>
579 <has_text text="NCBI_tax_id"/>
580 <has_text text="clade_name"/>
581 </assert_contents>
582 </output>
583 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
584 <assert_contents>
585 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
586 </assert_contents>
587 </output>
588 </test>
589 <test expect_num_outputs="4">
590 <section name="inputs">
591 <conditional name="in">
592 <param name="selector" value="raw"/>
593 <conditional name="raw_in">
594 <!-- Single FASTA file -->
595 <param name="selector" value="single"/>
596 <param name="in" value="SRS014464-Anterior_nares.fasta"/>
597 </conditional>
598 <param name="read_min_len" value="70"/>
599 <section name="mapping">
600 <param name="bt2_ps" value="sensitive"/>
601 <param name="min_mapq_val" value="5"/>
602 </section>
603 </conditional>
604 <conditional name="db">
605 <!-- Cached db -->
606 <param name="db_selector" value="cached"/>
607 <param name="cached_db" value="test-db-20210409"/>
608 </conditional>
609 </section>
610 <section name="analysis">
611 <param name="min_cu_len" value="2000"/>
612 <param name="organism_profiling" value="add_viruses"/>
613 <param name="stat" value="avg_g"/>
614 <param name="stat_q" value="0.2"/>
615 <param name="perc_nonzero" value="0.33"/>
616 <param name="ignore_markers" value="marker.txt"/>
617 <param name="avoid_disqm" value="true"/>
618 </section>
619 <section name="out">
620 <param name="sample_id_key" value="SampleID"/>
621 <param name="sample_id" value="Metaphlan_Analysis"/>
622 <param name="use_group_representative" value="false"/>
623 <param name="legacy_output" value="true"/>
624 <param name="CAMI_format_output" value="false"/>
625 <param name="unknown_estimation" value="false"/>
626 </section>
627 <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">
628 <assert_contents>
629 <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
630 <has_text text="SampleID"/>
631 <has_text text="Metaphlan_Analysis"/>
632 </assert_contents>
633 </output>
634 <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size">
635 <assert_contents>
636 <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
637 <has_text text="37637__U2I1U8__N579_01580"/>
638 </assert_contents>
639 </output>
640 <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size">
641 <assert_contents>
642 <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
643 </assert_contents>
644 </output>
645 <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
646 <assert_contents>
647 <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
648 </assert_contents>
649 </output>
650 </test>
651 </tests>
652 <help><![CDATA[
653 What it does
654 ============
655
656 MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria,
657 Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) with species-level.
658
659 MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes
660 (~99,500 bacterial and archaeal and ~500 eukaryotic), allowing:
661
662 - unambiguous taxonomic assignments;
663 - accurate estimation of organismal relative abundance;
664 - species-level resolution for bacteria, archaea, eukaryotes and viruses;
665 - strain identification and tracking
666 - orders of magnitude speedups compared to existing methods.
667 - metagenomic strain-level population genomics
668
669 MetaPhlAn clade-abundance estimation
670 ------------------------------------
671
672 The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and
673 strains in particular cases) present in the metagenome obtained from a microbiome sample and their
674 relative abundance.
675
676 Marker level analysis
677 ---------------------
678
679 MetaPhlAn introduces the capability of characterizing organisms at the strain level using non
680 aggregated marker information. Such capability comes with several slightly different flavours and
681 are a way to perform strain tracking and comparison across multiple samples.
682
683 Usually, MetaPhlAn is first ran with the default parameter for the type of analysis to profile the
684 species present in the community, and then a strain-level profiling can be performed to zoom-in into
685 specific species of interest. This operation can be performed quickly as it exploits the bowtie2out
686 intermediate file saved during the execution of the default analysis type.
687
688 Inputs
689 ======
690
691 Metaphlan takes as input either:
692
693 - one or several sequence files in Fasta, FastQ (compressed or not)
694 - a BowTie2 produced SAM file
695 - an intermediary mapping file of the metagenome generated by a previous MetaPhlAn run
696
697 It also need the reference database, which can be locally installed or customized using the dedicated tools.
698
699 Outputs
700 =======
701
702 The main output file is a tab-separated file with the predicted taxon relative abundances.
703
704 It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs.
705
706
707 More help and use cases
708 =======================
709
710 To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_.
711
712 .. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#basic-usage
713
714 ]]></help>
715 <expand macro="citations"/>
716 </tool>