Mercurial > repos > iuc > checkm_lineage_wf
comparison lineage_wf.xml @ 0:760dc0c0e689 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/checkm commit 2a3b068a98bf0e913dc03e0d5c2182cfd102cf27
author | iuc |
---|---|
date | Fri, 29 Jul 2022 20:30:08 +0000 |
parents | |
children | f0107b9f2dc3 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:760dc0c0e689 |
---|---|
1 <tool id="checkm_lineage_wf" name="CheckM lineage_wf" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description> | |
3 Assessing the completeness and contamination of genome bins using lineage-specific marker sets | |
4 </description> | |
5 <macros> | |
6 <import>macros.xml</import> | |
7 </macros> | |
8 <expand macro="biotools"/> | |
9 <expand macro="requirements"/> | |
10 <expand macro="version"/> | |
11 <command detect_errors="exit_code"><![CDATA[ | |
12 @BIN_INPUTS@ | |
13 | |
14 checkm lineage_wf | |
15 'bins' | |
16 'output' | |
17 $tree_analyze.reduced_tree | |
18 $tree_analyze.ali | |
19 $tree_analyze.nt | |
20 $tree_analyze.genes | |
21 --unique '$lineage_set.unique' | |
22 --multi '$lineage_set.multi' | |
23 $lineage_set.force_domain | |
24 $lineage_set.no_refinement | |
25 $qa.individual_markers | |
26 $qa.skip_adj_correction | |
27 $qa.skip_pseudogene_correction | |
28 --aai_strain $qa.aai_strain | |
29 $qa.ignore_thresholds | |
30 --e_value $qa.e_value | |
31 --length $qa.length | |
32 --file '$results' | |
33 --tab_table | |
34 --extension 'fasta' | |
35 --threads \${GALAXY_SLOTS:-1} | |
36 --pplacer_threads \${GALAXY_SLOTS:-1} | |
37 ]]></command> | |
38 <inputs> | |
39 <expand macro="bin_inputs" /> | |
40 <section name="tree_analyze" title="Bin placement in the genome tree and marker gene identification"> | |
41 <expand macro="tree_params" /> | |
42 </section> | |
43 <section name="lineage_set" title="Bin lineage-specific marker set inference"> | |
44 <expand macro="lineage_set_params" /> | |
45 </section> | |
46 <section name="qa" title="Bin assessment"> | |
47 <expand macro="qa_params" /> | |
48 </section> | |
49 <param name="extra_outputs" type="select" multiple="true" optional="true" label="Extra outputs"> | |
50 <option value="phylo_hmm_info">Phylogenetic HMM model info for each bin</option> | |
51 <option value="bin_stats_tree">Phylogenetic bin stats</option> | |
52 <option value="hmmer_tree">Phylogenetic HMM hits to each bin</option> | |
53 <option value="concatenated_tre">Concatenated tree</option> | |
54 <option value="concatenated_fasta">Concatenated masked sequences</option> | |
55 <expand macro="tree_extra_output_options" /> | |
56 <option value="marker_file">Marker genes</option> | |
57 <option value="hmmer_analyze">Marker gene HMM hits to each bin</option> | |
58 <option value="bin_stats_analyze">Marker gene bin stats</option> | |
59 <option value="checkm_hmm_info">Marker gene HMM info for each bin</option> | |
60 <expand macro="analyze_extra_output_options" /> | |
61 <option value="bin_stats_ext">Marker gene bin extensive stats</option> | |
62 <expand macro="qa_extra_output_options" /> | |
63 </param> | |
64 </inputs> | |
65 <outputs> | |
66 <data name="results" format="tabular" label="${tool.name} on ${on_string}: Bin statistics"/> | |
67 <!--tree outputs--> | |
68 <data name="phylo_hmm_info" format="zip" from_work_dir="output/storage/phylo_hmm_info.pkl.gz" label="${tool.name} on ${on_string}: Phylogenetic HMM model info for each bin"> | |
69 <filter>'phylo_hmm_info' in extra_outputs</filter> | |
70 </data> | |
71 <data name="bin_stats_tree" format="tabular" from_work_dir="output/storage/bin_stats.tree.tsv" label="${tool.name} on ${on_string}: Phylogenetic bin stats"> | |
72 <filter>'bin_stats_tree' in extra_outputs</filter> | |
73 </data> | |
74 <collection name="hmmer_tree" type="list" label="${tool.name} on ${on_string}: Phylogenetic HMM hits to each bin"> | |
75 <filter>'hmmer_tree' in extra_outputs</filter> | |
76 <discover_datasets pattern="(?P<designation>.*)/hmmer\.tree\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/> | |
77 </collection> | |
78 <data name="concatenated_fasta" format="fasta" from_work_dir="output/storage/tree/concatenated.fasta" label="${tool.name} on ${on_string}: Concatenated masked sequences"> | |
79 <filter>'concatenated_fasta' in extra_outputs</filter> | |
80 </data> | |
81 <data name="concatenated_tre" format="phyloxml" from_work_dir="output/storage/tree/concatenated.tre" label="${tool.name} on ${on_string}: Concatenated tree"> | |
82 <filter>'concatenated_tre' in extra_outputs</filter> | |
83 </data> | |
84 <collection name="hmmer_tree_ali" type="list" label="${tool.name} on ${on_string}: Phylogenetic HMMER alignment file for each bin"> | |
85 <filter>tree_analyze['ali'] and 'hmmer_tree_ali' in extra_outputs</filter> | |
86 <discover_datasets pattern="(?P<designation>.*)/hmmer\.tree\.ali\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/> | |
87 </collection> | |
88 <data name="concatenated_pplacer_json" format="json" from_work_dir="output/storage/tree/concatenated.pplacer.json" label="${tool.name} on ${on_string}: Concatenated pplacer JSON"> | |
89 <filter>'concatenate_pplacer_json' in extra_outputs</filter> | |
90 </data> | |
91 <collection name="genes_fna" type="list" label="${tool.name} on ${on_string}: Protein gene sequences for each bin"> | |
92 <filter>not tree_analyze['genes'] and tree_analyze['nt'] and 'genes_fna' in extra_outputs</filter> | |
93 <discover_datasets pattern="(?P<designation>.*)/genes\.fna" format="fasta" directory="output/bins/" recurse="true" match_relative_path="true"/> | |
94 </collection> | |
95 <collection name="genes_faa" type="list" label="${tool.name} on ${on_string}: Nucleotide gene sequences for each bin"> | |
96 <filter>'genes_faa' in extra_outputs</filter> | |
97 <discover_datasets pattern="(?P<designation>.*)/genes\.faa" format="fasta" directory="output/bins/" recurse="true" match_relative_path="true"/> | |
98 </collection> | |
99 <collection name="genes_gff" type="list" label="${tool.name} on ${on_string}: Gene feature files for each bin"> | |
100 <filter>not tree_analyze['genes'] and 'genes_gff' in extra_outputs</filter> | |
101 <discover_datasets pattern="(?P<designation>.*)/genes\.gff" format="gff" directory="output/bins/" recurse="true" match_relative_path="true"/> | |
102 </collection> | |
103 <!--lineage_set outputs--> | |
104 <data name="marker_file" format="tabular" from_work_dir="output/lineage.ms" label="${tool.name} on ${on_string}: Marker genes"> | |
105 <filter>'marker_file' in extra_outputs</filter> | |
106 </data> | |
107 <!--analyze outputs--> | |
108 <collection name="hmmer_analyze" type="list" label="${tool.name} on ${on_string}: Marker gene HMM hits to each bin"> | |
109 <filter>'hmmer_analyze' in extra_outputs</filter> | |
110 <discover_datasets pattern="(?P<designation>.*)/hmmer\.analyze\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/> | |
111 </collection> | |
112 <data name="bin_stats_analyze" format="tabular" from_work_dir="output/storage/bin_stats.analyze.tsv" label="${tool.name} on ${on_string}: Marker gene bin stats"> | |
113 <filter>'bin_stats_analyze' in extra_outputs</filter> | |
114 </data> | |
115 <data name="checkm_hmm_info" format="zip" from_work_dir="output/storage/checkm_hmm_info.pkl.gz" label="${tool.name} on ${on_string}: Marker gene HMM info for each bin" > | |
116 <filter>'checkm_hmm_info' in extra_outputs</filter> | |
117 </data> | |
118 <collection name="hmmer_analyze_ali" type="list" label="${tool.name} on ${on_string}: HMMER alignment file for each bin"> | |
119 <filter>tree_analyze['ali'] and 'hmmer_analyze_ali' in extra_outputs</filter> | |
120 <discover_datasets pattern="(?P<designation>.*)/hmmer\.analyze\.ali\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/> | |
121 </collection> | |
122 <!--qa outputs--> | |
123 <data name="bin_stats_ext" format="tabular" from_work_dir="output/storage/bin_stats_ext.tsv" label="${tool.name} on ${on_string}: Marker gene bin extensive stats"> | |
124 <filter>'bin_stats_ext' in extra_outputs</filter> | |
125 </data> | |
126 <expand macro="qa_extra_outputs" /> | |
127 | |
128 </outputs> | |
129 <tests> | |
130 <test expect_num_outputs="1"> | |
131 <conditional name="bins"> | |
132 <param name="select" value="individual"/> | |
133 <param name="bins_ind" ftype="fasta" value="637000110.fna"/> | |
134 </conditional> | |
135 <section name="tree_analyze"> | |
136 <param name="reduced_tree" value="true"/> | |
137 <param name="ali" value="false"/> | |
138 <param name="nt" value="false"/> | |
139 <param name="genes" value="false"/> | |
140 </section> | |
141 <section name="lineage_set"> | |
142 <param name="unique" value="10"/> | |
143 <param name="multi" value="10"/> | |
144 <param name="force_domain" value="false"/> | |
145 <param name="no_refinement" value="false"/> | |
146 </section> | |
147 <section name="qa"> | |
148 <param name="individual_markers" value="false"/> | |
149 <param name="skip_adj_correction" value="false"/> | |
150 <param name="skip_pseudogene_correction" value="false"/> | |
151 <param name="aai_strain" value="0.9"/> | |
152 <param name="ignore_thresholds" value="false"/> | |
153 <param name="e_value" value="1e-10"/> | |
154 <param name="length" value="0.7"/> | |
155 </section> | |
156 <param name="extra_outputs" value=""/> | |
157 <output name="results" ftype="tabular"> | |
158 <assert_contents> | |
159 <has_text text="637000110"/> | |
160 <has_text text="Marker lineage"/> | |
161 <has_text text="k__Bacteria"/> | |
162 </assert_contents> | |
163 </output> | |
164 </test> | |
165 <test expect_num_outputs="12"> | |
166 <conditional name="bins"> | |
167 <param name="select" value="collection"/> | |
168 <param name="bins_coll"> | |
169 <collection type="list"> | |
170 <element name="637000110" ftype="fasta" value="637000110.fna"/> | |
171 </collection> | |
172 </param> | |
173 </conditional> | |
174 <section name="tree_analyze"> | |
175 <param name="reduced_tree" value="true"/> | |
176 <param name="ali" value="true"/> | |
177 <param name="nt" value="false"/> | |
178 <param name="genes" value="false"/> | |
179 </section> | |
180 <section name="lineage_set"> | |
181 <param name="unique" value="10"/> | |
182 <param name="multi" value="10"/> | |
183 <param name="force_domain" value="false"/> | |
184 <param name="no_refinement" value="false"/> | |
185 </section> | |
186 <section name="qa"> | |
187 <param name="individual_markers" value="false"/> | |
188 <param name="skip_adj_correction" value="false"/> | |
189 <param name="skip_pseudogene_correction" value="false"/> | |
190 <param name="aai_strain" value="0.9"/> | |
191 <param name="ignore_thresholds" value="false"/> | |
192 <param name="e_value" value="1e-10"/> | |
193 <param name="length" value="0.7"/> | |
194 </section> | |
195 <param name="extra_outputs" value="phylo_hmm_info,bin_stats_tree,hmmer_tree,concatenated_tre,concatenated_fasta,marker_file,hmmer_analyze,bin_stats_analyze,bin_stats_ext,checkm_hmm_info,marker_gene_stats"/> | |
196 <output name="results" ftype="tabular"> | |
197 <assert_contents> | |
198 <has_text text="637000110"/> | |
199 <has_text text="Marker lineage"/> | |
200 <has_text text="k__Bacteria"/> | |
201 </assert_contents> | |
202 </output> | |
203 <output name="phylo_hmm_info" ftype="zip"> | |
204 <assert_contents> | |
205 <has_size value="1575" delta="10"/> | |
206 </assert_contents> | |
207 </output> | |
208 <output name="bin_stats_tree" ftype="tabular"> | |
209 <assert_contents> | |
210 <has_text text="637000110"/> | |
211 <has_text text="Mean scaffold length"/> | |
212 <has_text text="Translation table"/> | |
213 </assert_contents> | |
214 </output> | |
215 <output_collection name="hmmer_tree" count="1"> | |
216 <element name="637000110" ftype="txt"> | |
217 <assert_contents> | |
218 <has_text text="target name"/> | |
219 <has_text text="AC_000091_79"/> | |
220 </assert_contents> | |
221 </element> | |
222 </output_collection> | |
223 <output name="concatenated_fasta" ftype="fasta"> | |
224 <assert_contents> | |
225 <has_text text="637000110"/> | |
226 <has_text text="MLKAGVHFGHQ"/> | |
227 </assert_contents> | |
228 </output> | |
229 <output name="concatenated_tre" ftype="phyloxml"> | |
230 <assert_contents> | |
231 <has_text text="IMG_646564547"/> | |
232 <has_text text="g__Methanocaldococcus"/> | |
233 </assert_contents> | |
234 </output> | |
235 <output name="marker_file" ftype="tabular"> | |
236 <assert_contents> | |
237 <has_text text="Lineage Marker File"/> | |
238 <has_text text="637000110"/> | |
239 <has_text text="k__Bacteria"/> | |
240 </assert_contents> | |
241 </output> | |
242 <output_collection name="hmmer_analyze" count="1"> | |
243 <element name="637000110" ftype="txt"> | |
244 <assert_contents> | |
245 <has_text text="target name"/> | |
246 <has_text text="AC_000091_859"/> | |
247 </assert_contents> | |
248 </element> | |
249 </output_collection> | |
250 <output name="bin_stats_analyze" ftype="tabular"> | |
251 <assert_contents> | |
252 <has_text text="637000110"/> | |
253 <has_text text="GC"/> | |
254 <has_text text="GC std"/> | |
255 </assert_contents> | |
256 </output> | |
257 <output name="bin_stats_ext" ftype="tabular"> | |
258 <assert_contents> | |
259 <has_text text="637000110"/> | |
260 <has_text text="marker lineage"/> | |
261 </assert_contents> | |
262 </output> | |
263 <output name="checkm_hmm_info" ftype="zip"> | |
264 <assert_contents> | |
265 <has_size value="17052" delta="200"/> | |
266 </assert_contents> | |
267 </output> | |
268 <output name="marker_gene_stats" ftype="tabular"> | |
269 <assert_contents> | |
270 <has_text text="637000110"/> | |
271 <has_text text="AC_000091_79"/> | |
272 <has_text text="PF00318.15"/> | |
273 </assert_contents> | |
274 </output> | |
275 </test> | |
276 </tests> | |
277 <help><![CDATA[ | |
278 @HELP_HEADER@ | |
279 | |
280 This command runs the recommended workflow for assessing the completeness and contamination of genome bins is to use lineage-specific marker sets. | |
281 This workflow consists of 4 mandatory (M) steps and 1 recommended (R) step: | |
282 | |
283 - (M) The tree command places genome bins into a reference genome tree | |
284 - (R) The tree_qa command indicates the number of phylogenetically informative marker genes found in each genome bin along with a taxonomic string indicating its approximate placement in the tree. | |
285 | |
286 If desired, genome bins with few phylogenetically marker genes may be removed in order to reduce the computational requirements of the following commands. | |
287 Alternatively, if only genomes from a particular taxonomic group are of interest these can be moved to a new directory and analyzed separately. | |
288 | |
289 - (M) The lineage_set command creates a marker file indicating lineage-specific marker sets suitable for evaluating each genome. | |
290 - (M) The analyze command identifies marker genes and estimates the completeness and contamination of each genome bin. | |
291 - (M) The qa command can be used to produce different tables summarizing the quality of each genome bin. | |
292 | |
293 ]]></help> | |
294 <expand macro="citations"/> | |
295 </tool> |