Mercurial > repos > galaxy-australia > cactus_cactus
view cactus_cactus.xml @ 2:8b67dd48fec1 draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit b2235e7e7970f74e5d55ce150e3380d93fcdd70f
author | galaxy-australia |
---|---|
date | Tue, 13 Sep 2022 00:17:17 +0000 |
parents | 1bc1199f0ff4 |
children | 9422c5a87ee2 |
line wrap: on
line source
<tool id="cactus_cactus" name="Cactus" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> <description>whole-genome multiple sequence alignment.</description> <macros> <import>macros.xml</import> </macros> <expand macro="xrefs"/> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ ## Check the FASTA headers ## This is only necessary in pangenome mode #if $aln_mode.aln_mode_select == 'intraspecies': #for $seq in $in_seqs: if #if $seq.fasta.is_of_type('fasta.gz'): zgrep #else grep #end if "^>" $seq.fasta | grep -q "[[:space:]]" ; then echo "Error parsing input FASTA." ; echo "Pangenome mode fails if there are spaces in the header." ; echo "Please remove them with the NormalizeFasta tool." ; exit 1 ; fi && #end for #end if ## Set up seqfile #if $aln_mode.aln_mode_select == 'interspecies': cat $aln_mode.in_tree >> seqfile.txt && #end if #set seq_line = '' #for $seq in $in_seqs: #set seq_fn = str($seq.label) + '.' + $seq.fasta.ext ln -s '$seq.fasta' '$seq_fn' && printf '%s %s\n' '$seq.label' '$seq_fn' >> seqfile.txt #set seq_line += $seq_fn + ' ' && #end for ## Run cactus #if $aln_mode.aln_mode_select == 'intraspecies': ## If we're doing a pangenome, we need to run the steps manually minigraph -xggs -t \${GALAXY_SLOTS:-4} $seq_line > pangenome.gfa && cactus-graphmap --maxCores \${GALAXY_SLOTS:-4} --maxMemory \${GALAXY_MEMORY_MB:-8192}M ./jobStore ./seqfile.txt pangenome.gfa pangenome.paf --outputFasta pangenome.gfa.fa --binariesMode local --workDir ./ && cactus-align --maxCores \${GALAXY_SLOTS:-4} --maxMemory \${GALAXY_MEMORY_MB:-8192}M ./jobStore ./seqfile.txt pangenome.paf alignment.hal --pangenome --binariesMode local --workDir ./ #else if $aln_mode.aln_mode_select == 'interspecies': ## Run cactus normally cactus --maxCores \${GALAXY_SLOTS:-4} --maxMemory \${GALAXY_MEMORY_MB:-8192}M jobStore seqfile.txt alignment.hal --binariesMode local --workDir ./ #end if ]]></command> <inputs> <conditional name="aln_mode"> <param name="aln_mode_select" type="select" label="Alignment mode" help="The taxonomic relationship between input genomes. If genomes are from multiple individuals of the same species, select 'Within-species'"> <option value="interspecies" selected="true">Between-species</option> <option value="intraspecies">Within-species</option> </param> <when value="interspecies"> <param name="in_tree" type="data" format="nhx" label="Guide tree" help="Phylogenetic tree in Newick format. Required by Cactus to achieve linear scaling with number of input genomes" /> </when> <when value="intraspecies"> </when> </conditional> <repeat name="in_seqs" title="Input genome"> <param name="label" type="text" value="" label="Genome Label" help="NO SPACES. Must match a label in the guide tree."> </param> <param name="fasta" type="data" format="fasta,fasta.gz" label="Genome Sequence" help="Input genome"/> </repeat> <!-- add an option for root --> <!-- root mr --> </inputs> <outputs> <data name="out_hal" format="h5" from_work_dir="alignment.hal" label="${tool.name} on ${on_string} (HAL file)" /> </outputs> <tests> <!-- test interspecies mode --> <test expect_num_outputs="1"> <conditional name="aln_mode"> <param name="aln_mode_select" value="interspecies"/> <param name="in_tree" value="test_tree.nhx"/> </conditional> <repeat name="in_seqs"> <param name="label" value="simCow_chr6"/> <param name="fasta" value="simCow_chr6.fasta"/> </repeat> <repeat name="in_seqs"> <param name="label" value="simDog_chr6"/> <param name="fasta" value="simDog_chr6.fasta"/> </repeat> <repeat name="in_seqs"> <param name="label" value="simHuman_chr6"/> <param name="fasta" value="simHuman_chr6.fasta"/> </repeat> <repeat name="in_seqs"> <param name="label" value="simMouse_chr6"/> <param name="fasta" value="simMouse_chr6.fasta"/> </repeat> <repeat name="in_seqs"> <param name="label" value="simRat_chr6"/> <param name="fasta" value="simRat_chr6.fasta"/> </repeat> <output name="out_hal"> <assert_contents> <has_size value="4783905" delta="200000" /> </assert_contents> </output> </test> <!-- within-species mode --> <test expect_num_outputs="1"> <conditional name="aln_mode"> <param name="aln_mode_select" value="intraspecies"/> </conditional> <repeat name="in_seqs"> <param name="label" value="simCow_chr6"/> <param name="fasta" value="simCow_chr6.fasta"/> </repeat> <repeat name="in_seqs"> <param name="label" value="simDog_chr6"/> <param name="fasta" value="simDog_chr6.fasta"/> </repeat> <repeat name="in_seqs"> <param name="label" value="simHuman_chr6"/> <param name="fasta" value="simHuman_chr6.fasta"/> </repeat> <repeat name="in_seqs"> <param name="label" value="simMouse_chr6"/> <param name="fasta" value="simMouse_chr6.fasta"/> </repeat> <repeat name="in_seqs"> <param name="label" value="simRat_chr6"/> <param name="fasta" value="simRat_chr6.fasta"/> </repeat> <output name="out_hal"> <assert_contents> <has_size value="1349620" delta="200000" /> </assert_contents> </output> </test> <!-- compressed input --> <test expect_num_outputs="1"> <conditional name="aln_mode"> <param name="aln_mode_select" value="intraspecies"/> </conditional> <repeat name="in_seqs"> <param name="label" value="germ_25"/> <param name="fasta" value="germ_25.fasta.gz"/> </repeat> <repeat name="in_seqs"> <param name="label" value="vulg_25"/> <param name="fasta" value="vulg_25.fasta.gz"/> </repeat> <repeat name="in_seqs"> <param name="label" value="pens_25"/> <param name="fasta" value="pens_25.fasta.gz"/> </repeat> <output name="out_hal"> <assert_contents> <has_size value="7420424" delta="200000" /> </assert_contents> </output> </test> <!-- FASTA header --> <test expect_exit_code="1" expect_failure="true"> <conditional name="aln_mode"> <param name="aln_mode_select" value="intraspecies"/> </conditional> <repeat name="in_seqs"> <param name="label" value="badheader1"/> <param name="fasta" value="bh1.fasta.gz"/> </repeat> <repeat name="in_seqs"> <param name="label" value="badheader2"/> <param name="fasta" value="bh2.fasta.gz"/> </repeat> </test> </tests> <help><![CDATA[ **What it does** `Cactus <https://github.com/ComparativeGenomicsToolkit/cactus>`__ is a reference-free whole-genome multiple alignment program. It can be used to progressively align a large number of genomes. **Usage** **Between-species mode** If you are aligning genomes from **multiple species**, you need to provide a guide tree in Newick format. Cactus uses the guide tree to progressively align genomes, meaning that it doesn’t need to align all possible pairs of genomes. A Newick-formatted tree for human, chimp and gorilla genomes looks like this: :: (((human:0.006,chimp:0.006667):0.0022,gorilla:0.008825):0.0096,orang:0.01831); The numbers are the branch lengths. **Beta: Within-species mode** You can also run Cactus in `pangenome mode <https://github.com/ComparativeGenomicsToolkit/cactus/blob/master/doc/pangenome.md>`__ to align genomes of multiple individuals from the **same species**. In this mode you will not use a guide tree. Cactus will use `minigraph <https://github.com/lh3/minigraph>`__ to generate a graph of the input genomes and then use the graph to order the alignments. To use pangenome mode, select ‘Within-species’ in the ‘Alignment mode’ dropdown. ⚠️ To use pangenome mode, you will have to remove spaces from the headers in your FASTA file. You can do this with the NormalizeFasta tool. **Input** The developers recommend soft-masking your genomes with RepeatMasker before running Cactus. RepeatMasker is available on Galaxy. If you’re using Between-species mode, you need to provide labels for the fasta files that match the leaves on the guide tree. In the example above, you would use the label ‘human’ for the human fasta file. **Output** The main output of Cactus is in `HAL format <https://github.com/ComparativeGenomicsToolkit/cactus#using-the-output>`__. You can use the `Cactus: export <root?tool_id=cactus_export>`__ tool to convert the Cactus output to a VG or Multiple Alignment Format (MAF) file. ]]></help> <expand macro="citations"/> </tool>