comparison metaphlan2.xml @ 0:8c82c4d90cc6 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan2/ commit 345fb7ef485456ae833be5ad2d2ce4f8765652c8
author iuc
date Sat, 04 Mar 2017 12:23:45 -0500
parents
children 9be4beda6482
comparison
equal deleted inserted replaced
-1:000000000000 0:8c82c4d90cc6
1 <tool id="metaphlan2" name="MetaPhlAn2" version="@WRAPPER_VERSION@.0">
2
3 <description>to profile the composition of microbial communities</description>
4
5 <macros>
6 <import>metaphlan2_macros.xml</import>
7 </macros>
8
9 <expand macro="requirements">
10 <requirement type="package" version="2.3.0">bowtie2</requirement>
11 <requirement type="package" version="2.7.10">python</requirement>
12 </expand>
13
14 <expand macro="stdio"/>
15
16 <version_command>
17 <![CDATA[
18 metaphlan2.py -v
19 ]]>
20 </version_command>
21
22 <command>
23 <![CDATA[
24 #if $db.db_selector == "history"
25 mkdir ref_db
26 &&
27 bowtie2-build '$db.bowtie2db' 'ref_db/ref_db'
28 &&
29 python '$__tool_directory__/transform_json_to_pkl.py'
30 --json_input '$db.mpa_pkl'
31 --pkl_output 'ref_db/metadata.pkl'
32 &&
33 #end if
34
35 metaphlan2.py
36 '$input_file'
37 -o '$output_file'
38 --input_type '${input_file.datatype.file_ext}'
39 --bowtie2_exe `which bowtie2`
40
41 #if $db.db_selector == "cached"
42 #set $path = $db.cached_db.fields.path
43 #set $value = $db.cached_db.fields.value
44 --bowtie2db $path/$value
45 --mpa_pkl $path/$value'.pkl'
46 #else
47 --bowtie2db 'ref_db/ref_db'
48 --mpa_pkl 'ref_db/metadata.pkl'
49 #end if
50
51 --no_map
52
53 -t '$analysis_type.analysis_type_select'
54 #if $analysis_type.analysis_type_select == "rel_ab" or $analysis_type.analysis_type_select == "rel_ab_w_read_stats"
55 --tax_lev '$analysis_type.tax_lev'
56 #else if $analysis_type.analysis_type_select == "marker_ab_table"
57 --nreads '$analysis_type.nreads'
58 #else if $analysis_type.analysis_type_select == "marker_pres_table"
59 --pres_th '$analysis_type.pres_th'
60 #end if
61
62 --min_cu_len '$min_cu_len'
63 --min_alignment_len '$min_alignment_len'
64
65 $ignore_viruses
66 $ignore_eukaryotes
67 $ignore_bacteria
68 $ignore_archaea
69
70 --stat_q '$stat_q'
71 -s '$sam_output_file'
72 --biom '$biom_output_file'
73 ]]>
74 </command>
75
76 <inputs>
77 <param name="input_file" type="data" format="fastq,fasta,sam" label="Input file"/>
78 <conditional name="db">
79 <param name="db_selector" type="select" label="Database with clade-specific marker genes">
80 <option value="cached" selected="true">Locally cached</option>
81 <option value="history">From history</option>
82 </param>
83 <when value="cached">
84 <param name="cached_db" label="Cached database with clade-specific marker genes" type="select">
85 <options from_data_table="metaphlan2_database"/>
86 </param>
87 </when>
88 <when value="history">
89 <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
90 <param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/>
91 </when>
92 </conditional>
93 <conditional name="analysis_type">
94 <param name="analysis_type_select" type="select" label="Type of analysis to perform" argument="-t">
95 <option value="rel_ab" selected="true">Profiling a metagenomes in terms of relative abundances</option>
96 <option value="rel_ab_w_read_stats">Profiling a metagenomes in terms of relative abundances and estimate the number of reads comming from each clade</option>
97 <option value="reads_map">Mapping from reads to clades (only reads hitting a marker)</option>
98 <option value="clade_profiles">Normalized marker counts for clades with at least a non-null marker</option>
99 <option value="marker_ab_table">Normalized marker counts (only when > 0.0 and normalized by metagenome size if --nreads is specified)</option>
100 <option value="marker_counts">Non-normalized marker counts (use with extreme caution)</option>
101 <option value="marker_pres_table">List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option>
102 </param>
103 <when value="rel_ab">
104 <expand macro="tax_lev"/>
105 </when>
106 <when value="rel_ab_w_read_stats">
107 <expand macro="tax_lev"/>
108 </when>
109 <when value="reads_map"/>
110 <when value="clade_profiles"/>
111 <when value="marker_ab_table">
112 <param argument="--nreads" type="integer" value="0" label="Total number of reads in the original metagenome" help="It is used for normalizing the length-normalized counts with the metagenome size as well. No normalization applied if the value is not specified"/>
113 </when>
114 <when value="marker_counts"/>
115 <when value="marker_pres_table">
116 <param argument="--pres_th" type="integer" value="0" label=" Threshold for calling a marker present"/>
117 </when>
118 </conditional>
119 <param argument="--min_cu_len" type="integer" value="2000" label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/>
120 <param argument="--min_alignment_len" type="integer" value="0" label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/>
121 <param argument="--ignore_viruses" type='boolean' checked="true" truevalue='' falsevalue='--ignore_viruses' label="Profile viral organisms?"/>
122 <param argument="--ignore_eukaryotes" type='boolean' checked="true" truevalue='' falsevalue='--ignore_eukaryotes' label="Profile eukaryotic organisms?"/>
123 <param argument="--ignore_bacteria" type='boolean' checked="true" truevalue='' falsevalue='--ignore_bacteria' label="Profile bacteria organisms?"/>
124 <param argument="--ignore_archaea" type='boolean' checked="true" truevalue='' falsevalue='--ignore_archaea' label="Profile archea organisms?"/>
125 <param argument="--stat_q" type="float" value="0.1" label="Quantile value for the robust average"/>
126 </inputs>
127
128 <outputs>
129 <data format="tabular" name="output_file" label="${tool.name} on ${on_string}: Community profile" />
130 <data format="sam" name="sam_output_file" label="${tool.name} on ${on_string}: SAM file" />
131 <data format="biom" name="biom_output_file" label="${tool.name} on ${on_string}: BIOM file" />
132 </outputs>
133
134 <tests>
135 <test>
136 <param name="input_file" value="input_sequences.fasta"/>
137 <param name="db_selector" value="history" />
138 <param name="mpa_pkl" value="marker_metadata.json" />
139 <param name="bowtie2db" value="marker_sequences.fasta" />
140 <param name="analysis_type_select" value="rel_ab" />
141 <param name="taxonomic_level" value="a" />
142 <param name="min_cu_len" value="2000" />
143 <param name="min_alignment_len" value="0" />
144 <param name="ignore_viruses" value="" />
145 <param name="ignore_eukaryotes" value="" />
146 <param name="ignore_bacteria" value="" />
147 <param name="ignore_archaea" value="" />
148 <param name="stat_q" value="0.1" />
149 <output name="output_file" file="community_profile.tabular"/>
150 </test>
151 </tests>
152
153 <help><![CDATA[
154 **What it does**
155
156 MetaPhlAn is a computational tool to profile the structure and the composition of microbial communities (Bacteria, Archaea, Eukaryotes and Viruses) from metagenomic shotgun sequencing data with species level resolution. For more information, check the `user manual <https://bitbucket.org/biobakery/metaphlan2/>`_.
157
158 **Inputs**
159
160 Metaphlan2 takes as input a sequence file in Fasta, FastQ, a SAM file.
161
162 It is also possible to use a custom database with clade-specific marker genes. In this case, a fasta file with marker gene sequences is required and also a file containing metadata. This file is a json file with:
163
164 ::
165
166 {
167 "taxonomy": {
168 "taxonomy of genome1": genome1_length,
169 "taxonomy of genome2": genome2_length,
170 ...
171 }
172 "markers": {
173 "marker1_name": {
174 "clade": the clade that the marker belongs to,
175 "ext": [list of external genomes where the marker appears],
176 "len": length of the marker,
177 "score": score of the marker,
178 "taxon": the taxon of the marker
179 }
180 ...
181 }
182 }
183
184 The marker name correspond to the corresponding sequence name in the FastA file containing marker gene sequences.
185
186 **Outputs**
187
188 The main output file is a tab-separated file with the predicted taxon relative abundances.
189
190 ]]></help>
191
192 <expand macro="citations"/>
193 </tool>