Mercurial > repos > iuc > squirrel_phylo
view squirrel-phylo.xml @ 1:eaa0b1e114eb draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/squirrel commit d684b71bf5129645fe8eb349a56fcb29c321a7ab
author | iuc |
---|---|
date | Tue, 11 Feb 2025 19:01:12 +0000 |
parents | b64098b87ea0 |
children | 46a0e11a7607 |
line wrap: on
line source
<tool id="squirrel_phylo" name="Squirrel Phylo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> <description>Phylogenetic and APOBEC3 analysis of MPXV (Mpox virus)</description> <macros> <import>macros.xml</import> </macros> <expand macro="xrefs"/> <expand macro="requirements"/> <expand macro="version_command"/> <command detect_errors="exit_code"><![CDATA[ #set $alignment_output = 'input.aln.fasta' #set $tree_output = 'input.tree' #if $apobec3 #set $aa_recon_output = "input.tree.amino_acid.reconstruction.csv" #set $branch_snps_output = "input.tree.branch_snps.reconstruction.csv" #set $svg_output = "input.tree.svg" #set $png_output = "input.tree.png" #end if ln -s '${sequences}' input.fasta && squirrel #if $apobec3 --run-apobec3-phylo --fig-height $fig_height --fig-width $fig_width #else --run-phylo #end if --clade $clade #if $mask_file --additional-mask $mask_file #end if #if $bg_file --background-file '$bg_file' #else --include-background #end if #if $out_group --outgroups $out_group #end if $no_mask $no_iter_mask --threads \${GALAXY_SLOTS:-1} input.fasta && mv '${alignment_output}' '$alignment' && mv '${tree_output}' '$tree' #if $apobec3 && mv '${aa_recon_output}' '$aa_recon' && mv '${branch_snps_output}' '$branch_snps' && mv '${svg_output}' '$svg' && mv '${png_output}' '$png' #end if ]]></command> <inputs> <param name="sequences" type="data" format="fasta" label="Sequences in fasta format" help="You can upload a FASTA sequence to the history and use it as reference" /> <param name="apobec3" type="boolean" checked="false" label="Run additional APOBEC3-mutation reconstruction pipeline" /> <param name="clade" type="select" label="Select MPXV Clade"> <option value="cladei">Clade I</option> <option value="cladeia">Clade Ia</option> <option value="cladeib">Clade Ib</option> <option value="cladeii">Clade II</option> <option value="cladeiia">Clade IIa</option> <option value="cladeiib">Clade IIb</option> </param> <section name="other_settings" expanded="false" title="Additional Settings"> <param name="no_mask" type="boolean" truevalue="--no-mask" falsevalue="" label="SKIP masking repeat regions?" help="Set to True to Skip masking of repetitive regions. Default: masks repeat regions." /> <param name="no_iter_mask" type="boolean" truevalue="--no-itr-mask" falsevalue="" label="SKIP masking of end ITR?" help="Set to True to skip masking of end ITR. Default: masks ITR" /> <param name="mask_file" type="data" format="csv" optional="true" label="Mask additional sites" help="Run squirrel in alignment with QC to generate the SNP mask file." /> <param name="bg_file" type="data" format="fasta" optional="true" label="Background file - leave empty for automatic background sequences." help="Include a default background set of sequences for the phylogenetics pipeline. The set will be determined by previous 'clade' setting"/> <param name="out_group" type="text" label="Specify outgroup(s)" help="Specify which MPXV outgroup(s) in the alignment to use in the phylogeny. These will get pruned out from the final tree."/> <param name="fig_height" label="Overwrite tree figure default height" type="integer" min="0" value="25" optional="true"> </param> <param name="fig_width" label="Overwrite tree figure default width" type="integer" min="0" value="40" optional="true"> </param> </section> </inputs> <outputs> <!-- standard outputs--> <data name="tree" format="newick" label="${tool.name} - phylogenetic tree" /> <data name="alignment" format="fasta" label="${tool.name} - aligned sequences" /> <!-- apobec3 outputs--> <data name="svg" format="svg" label="${tool.name} - phylotree svg image"> <filter>apobec3</filter> </data> <data name="png" format="png" label="${tool.name} - phylotree png image"> <filter>apobec3</filter> </data> <data name="aa_recon" format="png" label="${tool.name} - aa mutations ancestral reconstruction"> <filter>apobec3</filter> </data> <data name="branch_snps" format="png" label="${tool.name} - apobec3 nt mutations"> <filter>apobec3</filter> </data> </outputs> <tests> <test expect_num_outputs="2"> <param name="sequences" value="test-sequences.fasta" /> <param name="bg_file" value="test-background.fasta" /> <param name="out_group" value="KJ642615" /> <param name="apobec3" value="false" /> <output name="alignment" file="sequences.aln.fasta" /> <output name="tree"> <assert_contents> <has_line_matching expression="#NEXUS"/> </assert_contents> </output> </test> <test expect_num_outputs="6"> <param name="sequences" value="test-sequences.fasta" /> <param name="bg_file" value="test-background.fasta" /> <param name="out_group" value="KJ642615" /> <param name="apobec3" value="true" /> <output name="alignment" file="sequences.aln.fasta" /> <output name="tree"> <assert_contents> <has_line_matching expression="#NEXUS"/> </assert_contents> </output> <output name="svg"> <assert_contents> <has_text text="svg xmlns:"/> <has_text text="DQ011155"/> </assert_contents> </output> <output name="png" file="sequences.tree.png" ftype="png" compare="sim_size" delta="1000" /> <output name="aa_recon" file="sequences.tree.amino_acid.reconstruction.csv" /> <output name="branch_snps" file="sequences.tree.branch_snps.reconstruction.csv" /> </test> </tests> <help><![CDATA[ squirrel allows for rapidly producing reliable alignments for MPXV and also enable maximum-likelihood phylogenetics pipeline tree estimation. Ensure your input sequences are of a singular clade and not mixed CladeI/CladeII. CladeI and CladeIa/b are fine to combine. **Alignment** Squirrel maps each query genome in the input file against a reference genome specific to each clade using minimap2. Using gofasta, the mapping file is then converted into a multiple sequence alignment. For Clade II, the reference used is NC_063383 and for Clade I, we use NC_003310. This means that all coordinates within an alignment will be relative to these references. A benefit of this is that within a clade, alignment files and be combined without having to recalculate the alignment. Note however that insertions relative to the reference sequence will not be included in the alignment. Squirrel by default creates a single alignment fasta file. Using the genbank coordinates for NC_063383 it also has the ability to extract the aligned coding sequences either as separate records or as a concatenated alignment. This can facilitate codon-aware phylogenetic or sequence analysis. **APOBEC3** Enrichment of APOBEC3-mutations in the MPXV population are a signature of sustained human-to-human transmission. Identifying APOBEC3-like mutations in MPXV genomes from samples in a new outbreak can be a piece of evidence to support sustained human transmission of mpox. Squirrel can run an APOBEC3-reconstruction and map these mutations onto the phylogeny. **Default Masking** Squirrel performs masking (replacement with N) on low-complexity or repetitive regions that have been characterised for Clade I and II. These regions are defined in to_mask.cladeii.csv and to_mask.cladei.csv (see github: https://github.com/aineniamh/squirrel/blob/main/squirrel/data/). **Additional Masking** Additional mask file can be provided to mask sites in addition to default masking. To generate additional masking file, run the galaxy tool *squirrel-qc* ]]></help> <expand macro="citations" /> </tool>