Mercurial > repos > galaxy-australia > panaroo
diff panaroo.xml @ 0:01864c78c5a5 draft default tip
planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/panaroo commit 3be367228b531c346c10700f07d57ae44394be36-dirty
author | galaxy-australia |
---|---|
date | Tue, 27 Aug 2024 05:51:12 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/panaroo.xml Tue Aug 27 05:51:12 2024 +0000 @@ -0,0 +1,281 @@ +<tool id="panaroo" name="Panaroo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>A Bacterial Pangenome Analysis Pipeline</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="edam_ontology"/> + <expand macro="biotools"/> + <expand macro="requirements"/> + <stdio> + <exit_code range="1:" /> + <regex match="System..*Exception" + source="both" + level="fatal" + description="Error encountered" /> + </stdio> + <command><![CDATA[ + + mkdir outdir && + + #import re + #set input_directory = 'input_directory' + mkdir $input_directory && + #for $gff in $gff_input_collection: + #set identifier = re.sub('[^\s\w\-\\.]','_',str($gff.element_identifier)) + ln -fs '$gff' '$input_directory/$identifier' && + #end for + + panaroo + -t \${GALAXY_SLOTS:-2} + #if str($gen_code) != 'None': + --codon-table $gen_code + #end if + #if str($advanced.adv_options_selector) == "set": + #if $advanced.remove_invalid_gene + $advanced.remove_invalid_gene + #end if + -c '$advanced.matching_option.seq_threshold' + -f '$advanced.matching_option.peptide_threshold' + --len_dif_percent '$advanced.matching_option.length_diff_cutoff' + $advanced.matching_option.merge_paralogs + --search_radius '$advanced.refind_option.search_radius' + --refind_prop_match '$advanced.refind_option.refind_prop_match' + --refind-mode '$advanced.refind_option.refind_mode' + --min_trailing_support '$advanced.graph_correction_option.min_trailing_support' + --trailing_recursive '$advanced.graph_correction_option.trailing_recursive' + --edge_support_threshold '$advanced.graph_correction_option.edge_support_threshold' + --remove_by_consensus '$advanced.graph_correction_option.remove_by_consensus' + --high_var_flag '$advanced.graph_correction_option.high_var_flag' + --min_edge_support_sv '$advanced.graph_correction_option.min_edge_support_sv' + $advanced.graph_correction_option.all_seq_in_graph + $advanced.graph_correction_option.no_clean_edges + + #if $advanced.gene_alignment_option.a != 'None' + -a '$advanced.gene_alignment_option.a' + #end if + + #if '$advanced.gene_alignment_option.aligner' == 'mafft' + --aligner mafft + #else + --aligner '$advanced.gene_alignment_option.aligner' + #end if + #if $advanced.gene_alignment_option.core_subset != '' + --core_subset $advanced.gene_alignment_option.core_subset + #end if + #end if + -i $input_directory/*.gff + -o outdir + --clean-mode $mode + > '$log' && + mv outdir/gene_presence_absence.Rtab outdir/gene_presence_absence_rtab.Rtab && + 2>&1 + + ]]></command> + <inputs> + <param name="gff_input_collection" type="data_collection" format="gff" collection_type="list" label="GFF Input Collection" help="A list of gff files (i.e prokka)"/> + <param name="mode" type="select" label="The stringency mode at which to run panaroo" help="--clean-mode"> + <expand macro="clean_mode"/> + </param> + <param name="gen_code" type="select" label="the codon table user for translation" help="default: 11"> + <expand macro="genetic_code"/> + </param> + <conditional name="advanced"> + <param name="adv_options_selector" type="select" label="Set advanced options?" help="Provides additional controls"> + <option value="set">Set</option> + <option value="do_not_set" selected="True">Do not set</option> + </param> + <when value="set"> + <param name="remove_invalid_gene" argument="--remove-invalid-genes" type="boolean" truevalue="--remove-invalid-genes" falsevalue="" label="removes annotations that do not conform to the expected Prokka format such as those including premature stop codons" help="--remove-invalid-genes"/> + + <section name="matching_option" title="Matching" expanded="false"> + <param name="seq_threshold" argument="--threshold" type="float" value="0.98" label="sequence identity threshold" help="default: 0.98"/> + <param name="peptide_threshold" argument="--family_threshold" type="float" value="0.7" label="protein family sequence identity threshold" help="default: 0.7"/> + <param name="length_diff_cutoff" argument="--len_dif_percent" type="float" value="0.98" label="length difference cutoff" help="default: 0.98"/> + <param name="merge_paralogs" type="boolean" truevalue="--merge_paralogs" falsevalue="" checked="false" label="do not split paralogs" help="--merge_paralogs"/> + </section> + + <section name="refind_option" title="Refind" expanded="false"> + <param argument="--search_radius" type="integer" value="5000" label="Search radius" help="--search_radius (default: 5000)"/> + <param argument="--refind_prop_match" type="float" value="0.75" label="Gene proportion match" help="default: 0.75"/> + <param argument="--refind_mode" type="select" label="The stringency mode at which to re-find genes" help="default: default"> + <expand macro="refind_mode_option"/> + </param> + </section> + + <section name="graph_correction_option" title="Graph Correction" expanded="false"> + <param argument="--min_trailing_support" type="integer" value="2" label="Minimum cluster size to keep a gene called at the end of a contig" help="--min_traiiing_support [relexed mode : 2 is used]"/> + <param argument="--trailing_recursive" type="integer" value="1" label="Number of times to perform recursive trimming of low support nodes near the end of contigs" help="--trailing_recursive [relaxed mode: 1 is used]"/> + <param name="edge_support_threshold" type="integer" value="1" label="Edge support threshold" help="--edge_support_threshold [ Minimal edge 1 is used ]"/> + <param name="len_outlier_proportion" type="float" value="0.01" label="Length outlier support proportion" help="--length_outlier_support_proportion [default: 0.01]"/> + <param name="remove_by_consensus" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Remove consensus" help="--remove_by_consensus [default: False]"/> + <param name="high_var_flag" type="integer" value="5" label="Highly variable gene region" help="--high_var_flag [default: 5]"/> + <param name="min_edge_support_sv" type="integer" value="2" label="Minimum edge support structural variants" help="--min_edge_support_sv [relaxed mode: 2 is used]"/> + <param argument="--all_seq_in_graph" type="boolean" truevalue="--all_seq_in_graph" falsevalue="" label="Retains all DNA sequence" help="--all_seq_in_graph [default: off]"/> + <param argument="--no_clean_edges" type="boolean" truevalue="--no_clean_edges" falsevalue="" label="Edge filtering in the final output graph" help="--no_clean_edges [default: off]"/> + </section> + + <section name="gene_alignment_option" title="Gene Alignment" expanded="false"> + <param argument="-a" type="select" label="Output alignments of core genes or all genes." help="-a [optional: core or pan; default: None"> + <expand macro="gene_alignment"/> + </param> + <param argument="--aligner" type="select" label="Specify an aligner" help="--aligner [mafft|prank|clustal][default: mafft]"> + <expand macro="gene_aligner"/> + </param> + <param name="codons" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate codon alignments" help="--codons"/> + <param name="core_threshold" type="float" value="0.95" label="Core-genome sample threshold" help="--core_threshold [default: 0.95]"/> + <param argument="--core_subset" type="integer" value="" optional="true" label="Subset of the core genome to these many genes" help="--core_subset [default: all]"/> + <param name="core_entropy" type="float" value="0.1" label="Set the Block Mapping and Gathering with Entropy" help="--core_entropy_filter (threshold can be between 0.0 and 1.0) [default: Tukey outlier method]"/> + </section> + </when> + <when value="do_not_set"/> + </conditional> + </inputs> + <outputs> + <collection name="output" type="list" label="${tool.name} on ${on_string}: Pangenome output"> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>clstr)" directory="outdir" format="txt" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>txt)" directory="outdir" format="txt" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>gml)" directory="outdir" format="txt" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>Rtab)" directory="outdir" format="tabular" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>csv)" directory="outdir" format="csv" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fasta)" directory="outdir" format="fasta" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fa)" directory="outdir" format="fasta" visible="false" /> + <filter>advanced['adv_options_selector'] != 'set'</filter> + </collection> + <collection name="output_advance" type="list" label="${tool.name} on ${on_string}: Pangenome output (advance)"> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>clstr)" directory="outdir" format="txt" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>txt)" directory="outdir" format="txt" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>gml)" directory="outdir" format="txt" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>Rtab)" directory="outdir" format="tabular" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>csv)" directory="outdir" format="csv" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fasta)" directory="outdir" format="fasta" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fa)" directory="outdir" format="fasta" visible="false" /> + <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] == 'None'</filter> + </collection> + <collection name="output_pangenome" type="list" label="${tool.name} on ${on_string}: Pangenome alignment output"> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>clstr)" directory="outdir" format="txt" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>txt)" directory="outdir" format="txt" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>gml)" directory="outdir" format="txt" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>Rtab)" directory="outdir" format="tabular" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>csv)" directory="outdir" format="csv" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fasta)" directory="outdir" format="fasta" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fa)" directory="outdir" format="fasta" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>aln)" directory="outdir" format="aln" visible="false" /> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>embl)" directory="outdir" format="embl" visible="false" /> + <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] != 'None' </filter> + </collection> + <collection name="output_pangenome_fasta" type="list" label="${tool.name} on ${on_string}: Pangenom alignment fasta"> + <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fas)" directory="outdir/aligned_gene_sequences" format="fasta" visible="false" /> + <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] != 'None'</filter> + </collection> + <data name="log" format="txt" label="${tool.name} on ${on_string}: log"/> + </outputs> + <tests> + <!-- run panaroo with default parameters (i.e panaroo -t 2 -i *.gff -o default \-\-clean-mode strict \-\-remove-invalid-genes) --> + <test expect_num_outputs="2"> + <param name="gen_code" value="11"/> + <param name="mode" value="strict"/> + <param name="adv_options_selector" value="do_not_set"/> + <param name="gff_input_collection"> + <collection type="list"> + <element name="gff10.gff" value="10_small.gff"/> + <element name="gff11.gff" value="11_small.gff"/> + </collection> + </param> + <output_collection name="output" count="13"/> + <output name="log"> + <assert_contents> + <has_text text="pre-processing gff3 files..."/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <param name="gen_code" value="11"/> + <param name="mode" value="strict"/> + <param name="adv_options_selector" value="set"/> + <param name="a" value="None"/> + <param name="gff_input_collection"> + <collection type="list"> + <element name="gff10.gff" value="10_small.gff"/> + <element name="gff11.gff" value="11_small.gff"/> + </collection> + </param> + <output_collection name="output_advance" count="13"/> + <output name="log"> + <assert_contents> + <has_text text="pre-processing gff3 files..."/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="3"> + <param name="gen_code" value="11"/> + <param name="mode" value="strict"/> + <param name="adv_options_selector" value="set"/> + <param name="a" value="core"/> + <param name="gff_input_collection"> + <collection type="list"> + <element name="gff10.gff" value="10_small.gff"/> + <element name="gff11.gff" value="11_small.gff"/> + </collection> + </param> + <output_collection name="output_pangenome" count="18"/> + <output_collection name="output_pangenome_fasta" count="251"/> + <output name="log"> + <assert_contents> + <has_text text="pre-processing gff3 files..."/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +Panaroo_ is A Bacterial Pangenome Analysis Pipeline. + +**INPUTS** +Panaroo now supports multiple input formats. To use non-standard GFF3 files you must profile the input file as a list in a text file (one per line). Separate GFF and FASTA files can be provided per isolate by providing each file delimited by a space or a tab. Genbank file formats are also supported with extensions '.gbk', '.gb' or '.gbff'. These must compliant with Genbank/ENA/DDJB. This can be forced in Prokka by specifying the --compliance parameter. + + - a list of gff format in a collection + +**OUTPUTS** + + - combined_protein_cdhit_out.txt + - combined_protein_cdhit_out.txt.clstr + - pre_filt_graph.gml + - gene_data.csv + - combined_protein_CDS.fasta + - combined_DNA_CDS.fasta + - gene_presence_absence_rtab.Rtab + - gene_presence_absence_roary.csv + - gene_presence_absence.csv + - summary_statistics.txt + - pan_genome_reference.fa + - struct_presence_absence.Rtab + - final_graph.gml + +**OUTPUTS with Advance parameters** + + - combined_protein_cdhit_out.txt + - combined_protein_cdhit_out.txt.clstr + - pre_filt_graph.gml + - gene_data.csv + - combined_protein_CDS.fasta + - combined_DNA_CDS.fasta + - gene_presence_absence_rtab.Rtab + - gene_presence_absence_roary.csv + - gene_presence_absence.csv + - summary_statistics.txt + - pan_genome_reference.fa + - struct_presence_absence.Rtab + - final_graph.gml + - core_gene_alignment + - core_gene_alignment_filtered + - core_alignment_filtered_header + - core_alignment_header + - a collection of fasta files + +.. _Panaroo: https://gthlab.au/panaroo/#/gettingstarted/quickstart + + ]]></help> + <citations> + <citation type="doi">10.1186/s13059-020-02090-4</citation> + </citations> +</tool> +