Mercurial > repos > galaxy-australia > panaroo
comparison panaroo.xml @ 0:01864c78c5a5 draft default tip
planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/panaroo commit 3be367228b531c346c10700f07d57ae44394be36-dirty
author | galaxy-australia |
---|---|
date | Tue, 27 Aug 2024 05:51:12 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:01864c78c5a5 |
---|---|
1 <tool id="panaroo" name="Panaroo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>A Bacterial Pangenome Analysis Pipeline</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="edam_ontology"/> | |
7 <expand macro="biotools"/> | |
8 <expand macro="requirements"/> | |
9 <stdio> | |
10 <exit_code range="1:" /> | |
11 <regex match="System..*Exception" | |
12 source="both" | |
13 level="fatal" | |
14 description="Error encountered" /> | |
15 </stdio> | |
16 <command><![CDATA[ | |
17 | |
18 mkdir outdir && | |
19 | |
20 #import re | |
21 #set input_directory = 'input_directory' | |
22 mkdir $input_directory && | |
23 #for $gff in $gff_input_collection: | |
24 #set identifier = re.sub('[^\s\w\-\\.]','_',str($gff.element_identifier)) | |
25 ln -fs '$gff' '$input_directory/$identifier' && | |
26 #end for | |
27 | |
28 panaroo | |
29 -t \${GALAXY_SLOTS:-2} | |
30 #if str($gen_code) != 'None': | |
31 --codon-table $gen_code | |
32 #end if | |
33 #if str($advanced.adv_options_selector) == "set": | |
34 #if $advanced.remove_invalid_gene | |
35 $advanced.remove_invalid_gene | |
36 #end if | |
37 -c '$advanced.matching_option.seq_threshold' | |
38 -f '$advanced.matching_option.peptide_threshold' | |
39 --len_dif_percent '$advanced.matching_option.length_diff_cutoff' | |
40 $advanced.matching_option.merge_paralogs | |
41 --search_radius '$advanced.refind_option.search_radius' | |
42 --refind_prop_match '$advanced.refind_option.refind_prop_match' | |
43 --refind-mode '$advanced.refind_option.refind_mode' | |
44 --min_trailing_support '$advanced.graph_correction_option.min_trailing_support' | |
45 --trailing_recursive '$advanced.graph_correction_option.trailing_recursive' | |
46 --edge_support_threshold '$advanced.graph_correction_option.edge_support_threshold' | |
47 --remove_by_consensus '$advanced.graph_correction_option.remove_by_consensus' | |
48 --high_var_flag '$advanced.graph_correction_option.high_var_flag' | |
49 --min_edge_support_sv '$advanced.graph_correction_option.min_edge_support_sv' | |
50 $advanced.graph_correction_option.all_seq_in_graph | |
51 $advanced.graph_correction_option.no_clean_edges | |
52 | |
53 #if $advanced.gene_alignment_option.a != 'None' | |
54 -a '$advanced.gene_alignment_option.a' | |
55 #end if | |
56 | |
57 #if '$advanced.gene_alignment_option.aligner' == 'mafft' | |
58 --aligner mafft | |
59 #else | |
60 --aligner '$advanced.gene_alignment_option.aligner' | |
61 #end if | |
62 #if $advanced.gene_alignment_option.core_subset != '' | |
63 --core_subset $advanced.gene_alignment_option.core_subset | |
64 #end if | |
65 #end if | |
66 -i $input_directory/*.gff | |
67 -o outdir | |
68 --clean-mode $mode | |
69 > '$log' && | |
70 mv outdir/gene_presence_absence.Rtab outdir/gene_presence_absence_rtab.Rtab && | |
71 2>&1 | |
72 | |
73 ]]></command> | |
74 <inputs> | |
75 <param name="gff_input_collection" type="data_collection" format="gff" collection_type="list" label="GFF Input Collection" help="A list of gff files (i.e prokka)"/> | |
76 <param name="mode" type="select" label="The stringency mode at which to run panaroo" help="--clean-mode"> | |
77 <expand macro="clean_mode"/> | |
78 </param> | |
79 <param name="gen_code" type="select" label="the codon table user for translation" help="default: 11"> | |
80 <expand macro="genetic_code"/> | |
81 </param> | |
82 <conditional name="advanced"> | |
83 <param name="adv_options_selector" type="select" label="Set advanced options?" help="Provides additional controls"> | |
84 <option value="set">Set</option> | |
85 <option value="do_not_set" selected="True">Do not set</option> | |
86 </param> | |
87 <when value="set"> | |
88 <param name="remove_invalid_gene" argument="--remove-invalid-genes" type="boolean" truevalue="--remove-invalid-genes" falsevalue="" label="removes annotations that do not conform to the expected Prokka format such as those including premature stop codons" help="--remove-invalid-genes"/> | |
89 | |
90 <section name="matching_option" title="Matching" expanded="false"> | |
91 <param name="seq_threshold" argument="--threshold" type="float" value="0.98" label="sequence identity threshold" help="default: 0.98"/> | |
92 <param name="peptide_threshold" argument="--family_threshold" type="float" value="0.7" label="protein family sequence identity threshold" help="default: 0.7"/> | |
93 <param name="length_diff_cutoff" argument="--len_dif_percent" type="float" value="0.98" label="length difference cutoff" help="default: 0.98"/> | |
94 <param name="merge_paralogs" type="boolean" truevalue="--merge_paralogs" falsevalue="" checked="false" label="do not split paralogs" help="--merge_paralogs"/> | |
95 </section> | |
96 | |
97 <section name="refind_option" title="Refind" expanded="false"> | |
98 <param argument="--search_radius" type="integer" value="5000" label="Search radius" help="--search_radius (default: 5000)"/> | |
99 <param argument="--refind_prop_match" type="float" value="0.75" label="Gene proportion match" help="default: 0.75"/> | |
100 <param argument="--refind_mode" type="select" label="The stringency mode at which to re-find genes" help="default: default"> | |
101 <expand macro="refind_mode_option"/> | |
102 </param> | |
103 </section> | |
104 | |
105 <section name="graph_correction_option" title="Graph Correction" expanded="false"> | |
106 <param argument="--min_trailing_support" type="integer" value="2" label="Minimum cluster size to keep a gene called at the end of a contig" help="--min_traiiing_support [relexed mode : 2 is used]"/> | |
107 <param argument="--trailing_recursive" type="integer" value="1" label="Number of times to perform recursive trimming of low support nodes near the end of contigs" help="--trailing_recursive [relaxed mode: 1 is used]"/> | |
108 <param name="edge_support_threshold" type="integer" value="1" label="Edge support threshold" help="--edge_support_threshold [ Minimal edge 1 is used ]"/> | |
109 <param name="len_outlier_proportion" type="float" value="0.01" label="Length outlier support proportion" help="--length_outlier_support_proportion [default: 0.01]"/> | |
110 <param name="remove_by_consensus" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Remove consensus" help="--remove_by_consensus [default: False]"/> | |
111 <param name="high_var_flag" type="integer" value="5" label="Highly variable gene region" help="--high_var_flag [default: 5]"/> | |
112 <param name="min_edge_support_sv" type="integer" value="2" label="Minimum edge support structural variants" help="--min_edge_support_sv [relaxed mode: 2 is used]"/> | |
113 <param argument="--all_seq_in_graph" type="boolean" truevalue="--all_seq_in_graph" falsevalue="" label="Retains all DNA sequence" help="--all_seq_in_graph [default: off]"/> | |
114 <param argument="--no_clean_edges" type="boolean" truevalue="--no_clean_edges" falsevalue="" label="Edge filtering in the final output graph" help="--no_clean_edges [default: off]"/> | |
115 </section> | |
116 | |
117 <section name="gene_alignment_option" title="Gene Alignment" expanded="false"> | |
118 <param argument="-a" type="select" label="Output alignments of core genes or all genes." help="-a [optional: core or pan; default: None"> | |
119 <expand macro="gene_alignment"/> | |
120 </param> | |
121 <param argument="--aligner" type="select" label="Specify an aligner" help="--aligner [mafft|prank|clustal][default: mafft]"> | |
122 <expand macro="gene_aligner"/> | |
123 </param> | |
124 <param name="codons" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate codon alignments" help="--codons"/> | |
125 <param name="core_threshold" type="float" value="0.95" label="Core-genome sample threshold" help="--core_threshold [default: 0.95]"/> | |
126 <param argument="--core_subset" type="integer" value="" optional="true" label="Subset of the core genome to these many genes" help="--core_subset [default: all]"/> | |
127 <param name="core_entropy" type="float" value="0.1" label="Set the Block Mapping and Gathering with Entropy" help="--core_entropy_filter (threshold can be between 0.0 and 1.0) [default: Tukey outlier method]"/> | |
128 </section> | |
129 </when> | |
130 <when value="do_not_set"/> | |
131 </conditional> | |
132 </inputs> | |
133 <outputs> | |
134 <collection name="output" type="list" label="${tool.name} on ${on_string}: Pangenome output"> | |
135 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>clstr)" directory="outdir" format="txt" visible="false" /> | |
136 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>txt)" directory="outdir" format="txt" visible="false" /> | |
137 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>gml)" directory="outdir" format="txt" visible="false" /> | |
138 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>Rtab)" directory="outdir" format="tabular" visible="false" /> | |
139 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>csv)" directory="outdir" format="csv" visible="false" /> | |
140 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fasta)" directory="outdir" format="fasta" visible="false" /> | |
141 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fa)" directory="outdir" format="fasta" visible="false" /> | |
142 <filter>advanced['adv_options_selector'] != 'set'</filter> | |
143 </collection> | |
144 <collection name="output_advance" type="list" label="${tool.name} on ${on_string}: Pangenome output (advance)"> | |
145 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>clstr)" directory="outdir" format="txt" visible="false" /> | |
146 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>txt)" directory="outdir" format="txt" visible="false" /> | |
147 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>gml)" directory="outdir" format="txt" visible="false" /> | |
148 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>Rtab)" directory="outdir" format="tabular" visible="false" /> | |
149 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>csv)" directory="outdir" format="csv" visible="false" /> | |
150 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fasta)" directory="outdir" format="fasta" visible="false" /> | |
151 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fa)" directory="outdir" format="fasta" visible="false" /> | |
152 <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] == 'None'</filter> | |
153 </collection> | |
154 <collection name="output_pangenome" type="list" label="${tool.name} on ${on_string}: Pangenome alignment output"> | |
155 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>clstr)" directory="outdir" format="txt" visible="false" /> | |
156 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>txt)" directory="outdir" format="txt" visible="false" /> | |
157 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>gml)" directory="outdir" format="txt" visible="false" /> | |
158 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>Rtab)" directory="outdir" format="tabular" visible="false" /> | |
159 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>csv)" directory="outdir" format="csv" visible="false" /> | |
160 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fasta)" directory="outdir" format="fasta" visible="false" /> | |
161 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fa)" directory="outdir" format="fasta" visible="false" /> | |
162 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>aln)" directory="outdir" format="aln" visible="false" /> | |
163 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>embl)" directory="outdir" format="embl" visible="false" /> | |
164 <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] != 'None' </filter> | |
165 </collection> | |
166 <collection name="output_pangenome_fasta" type="list" label="${tool.name} on ${on_string}: Pangenom alignment fasta"> | |
167 <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fas)" directory="outdir/aligned_gene_sequences" format="fasta" visible="false" /> | |
168 <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] != 'None'</filter> | |
169 </collection> | |
170 <data name="log" format="txt" label="${tool.name} on ${on_string}: log"/> | |
171 </outputs> | |
172 <tests> | |
173 <!-- run panaroo with default parameters (i.e panaroo -t 2 -i *.gff -o default \-\-clean-mode strict \-\-remove-invalid-genes) --> | |
174 <test expect_num_outputs="2"> | |
175 <param name="gen_code" value="11"/> | |
176 <param name="mode" value="strict"/> | |
177 <param name="adv_options_selector" value="do_not_set"/> | |
178 <param name="gff_input_collection"> | |
179 <collection type="list"> | |
180 <element name="gff10.gff" value="10_small.gff"/> | |
181 <element name="gff11.gff" value="11_small.gff"/> | |
182 </collection> | |
183 </param> | |
184 <output_collection name="output" count="13"/> | |
185 <output name="log"> | |
186 <assert_contents> | |
187 <has_text text="pre-processing gff3 files..."/> | |
188 </assert_contents> | |
189 </output> | |
190 </test> | |
191 <test expect_num_outputs="2"> | |
192 <param name="gen_code" value="11"/> | |
193 <param name="mode" value="strict"/> | |
194 <param name="adv_options_selector" value="set"/> | |
195 <param name="a" value="None"/> | |
196 <param name="gff_input_collection"> | |
197 <collection type="list"> | |
198 <element name="gff10.gff" value="10_small.gff"/> | |
199 <element name="gff11.gff" value="11_small.gff"/> | |
200 </collection> | |
201 </param> | |
202 <output_collection name="output_advance" count="13"/> | |
203 <output name="log"> | |
204 <assert_contents> | |
205 <has_text text="pre-processing gff3 files..."/> | |
206 </assert_contents> | |
207 </output> | |
208 </test> | |
209 <test expect_num_outputs="3"> | |
210 <param name="gen_code" value="11"/> | |
211 <param name="mode" value="strict"/> | |
212 <param name="adv_options_selector" value="set"/> | |
213 <param name="a" value="core"/> | |
214 <param name="gff_input_collection"> | |
215 <collection type="list"> | |
216 <element name="gff10.gff" value="10_small.gff"/> | |
217 <element name="gff11.gff" value="11_small.gff"/> | |
218 </collection> | |
219 </param> | |
220 <output_collection name="output_pangenome" count="18"/> | |
221 <output_collection name="output_pangenome_fasta" count="251"/> | |
222 <output name="log"> | |
223 <assert_contents> | |
224 <has_text text="pre-processing gff3 files..."/> | |
225 </assert_contents> | |
226 </output> | |
227 </test> | |
228 </tests> | |
229 <help><![CDATA[ | |
230 Panaroo_ is A Bacterial Pangenome Analysis Pipeline. | |
231 | |
232 **INPUTS** | |
233 Panaroo now supports multiple input formats. To use non-standard GFF3 files you must profile the input file as a list in a text file (one per line). Separate GFF and FASTA files can be provided per isolate by providing each file delimited by a space or a tab. Genbank file formats are also supported with extensions '.gbk', '.gb' or '.gbff'. These must compliant with Genbank/ENA/DDJB. This can be forced in Prokka by specifying the --compliance parameter. | |
234 | |
235 - a list of gff format in a collection | |
236 | |
237 **OUTPUTS** | |
238 | |
239 - combined_protein_cdhit_out.txt | |
240 - combined_protein_cdhit_out.txt.clstr | |
241 - pre_filt_graph.gml | |
242 - gene_data.csv | |
243 - combined_protein_CDS.fasta | |
244 - combined_DNA_CDS.fasta | |
245 - gene_presence_absence_rtab.Rtab | |
246 - gene_presence_absence_roary.csv | |
247 - gene_presence_absence.csv | |
248 - summary_statistics.txt | |
249 - pan_genome_reference.fa | |
250 - struct_presence_absence.Rtab | |
251 - final_graph.gml | |
252 | |
253 **OUTPUTS with Advance parameters** | |
254 | |
255 - combined_protein_cdhit_out.txt | |
256 - combined_protein_cdhit_out.txt.clstr | |
257 - pre_filt_graph.gml | |
258 - gene_data.csv | |
259 - combined_protein_CDS.fasta | |
260 - combined_DNA_CDS.fasta | |
261 - gene_presence_absence_rtab.Rtab | |
262 - gene_presence_absence_roary.csv | |
263 - gene_presence_absence.csv | |
264 - summary_statistics.txt | |
265 - pan_genome_reference.fa | |
266 - struct_presence_absence.Rtab | |
267 - final_graph.gml | |
268 - core_gene_alignment | |
269 - core_gene_alignment_filtered | |
270 - core_alignment_filtered_header | |
271 - core_alignment_header | |
272 - a collection of fasta files | |
273 | |
274 .. _Panaroo: https://gthlab.au/panaroo/#/gettingstarted/quickstart | |
275 | |
276 ]]></help> | |
277 <citations> | |
278 <citation type="doi">10.1186/s13059-020-02090-4</citation> | |
279 </citations> | |
280 </tool> | |
281 |