Mercurial > repos > devteam > bwa
changeset 26:2477830927ec draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bwa commit 6e9628b1d92fdb358b79959ad54a456cfa46fa33
| author | iuc | 
|---|---|
| date | Fri, 17 May 2024 21:09:07 +0000 | 
| parents | e188dc7a68e6 | 
| children | 4a196b9c72c2 | 
| files | bwa-mem.xml bwa.xml bwa_macros.xml test-data/bwa-aln-test1-fasta.bam test-data/bwa-aln-test1.bam test-data/bwa-aln-test2.bam test-data/bwa-mem-test1-fasta.bam test-data/bwa-mem-test1.bam test-data/bwa-mem-test2.bam test-data/bwa-mem-test3.bam test-data/bwa-mem-test4.bam | 
| diffstat | 11 files changed, 121 insertions(+), 102 deletions(-) [+] | 
line wrap: on
 line diff
--- a/bwa-mem.xml Fri Aug 19 18:51:56 2022 +0000 +++ b/bwa-mem.xml Fri May 17 21:09:07 2024 +0000 @@ -1,13 +1,11 @@ <?xml version="1.0"?> -<tool id="bwa_mem" name="Map with BWA-MEM" version="@VERSION@.2"> +<tool id="bwa_mem" name="Map with BWA-MEM" version="@TOOL_VERSION@" profile="22.05"> <description>- map medium and long reads (> 100 bp) against reference genome</description> - <xrefs> - <xref type="bio.tools">bwa</xref> - </xrefs> <macros> <import>read_group_macros.xml</import> <import>bwa_macros.xml</import> </macros> + <expand macro="bio_tools"/> <expand macro="requirements"/> <expand macro="stdio"/> <command><![CDATA[ @@ -100,7 +98,8 @@ #end if '${reference_fasta_filename}' - '${fastq_input.fastq_input1}' '${fastq_input.fastq_input2}' + '${fastq_input.fastq_input1}' + '${fastq_input.fastq_input2}' #elif str( $fastq_input.fastq_input_selector ) == "paired_collection": ## check that insert statistics is used #if str( $fastq_input.iset_stats ): @@ -108,7 +107,8 @@ #end if '${reference_fasta_filename}' - '${fastq_input.fastq_input1.forward}' '${fastq_input.fastq_input1.reverse}' + '${fastq_input.fastq_input1.forward}' + '${fastq_input.fastq_input1.reverse}' #else: '${reference_fasta_filename}' '${fastq_input.fastq_input1}' @@ -167,7 +167,7 @@ <expand macro="read_group_conditional" /> <conditional name="analysis_type"> - <param name="analysis_type_selector" type="select" label="Select analysis mode"> + <param name="analysis_type_selector" type="select" label="Select analysis mode" help="Please note that minimap2 is recommended over BWA as the aligner for long-read or contig data, for which it outperforms BWA in speed and typically in accuracy (see tool help below)."> <option value="illumina">1.Simple Illumina mode</option> <option value="pacbio">2.PacBio mode (-x pacbio)</option> <option value="ont2d">3.Nanopore 2D-reads mode (-x ont2d)</option> @@ -279,35 +279,35 @@ </change_format> </data> </outputs> - <tests> - <test> + <!-- `samtools sort` in the new update adds PG lines to the output so the lines_diff is changed from "2" to "4" --> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> <param name="fastq_input_selector" value="paired"/> <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" /> + <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="4" /> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> <param name="fastq_input_selector" value="single"/> <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fasta1.fa"/> <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-mem-test1-fasta.bam" lines_diff="2" /> + <output name="bam_output" ftype="bam" file="bwa-mem-test1-fasta.bam" lines_diff="4" /> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> <param name="fastq_input_selector" value="paired"/> <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/> <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" /> + <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="4" /> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> <param name="index_a" value="is"/> @@ -319,9 +319,9 @@ <param name="PL" value="CAPILLARY"/> <param name="LB" value="AARDVARK-1" /> <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="2" /> + <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="4" /> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> <param name="fastq_input_selector" value="paired"/> @@ -329,9 +329,9 @@ <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> <param name="analysis_type_selector" value="illumina"/> <param name="output_sort" value="unsorted"/> - <output name="bam_output" ftype="qname_input_sorted.bam" file="bwa-mem-test3.bam" lines_diff="2" /> + <output name="bam_output" ftype="qname_input_sorted.bam" file="bwa-mem-test3.bam" lines_diff="4" /> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> <param name="fastq_input_selector" value="paired"/> @@ -339,11 +339,34 @@ <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> <param name="analysis_type_selector" value="illumina"/> <param name="output_sort" value="name"/> - <output name="bam_output" ftype="qname_sorted.bam" file="bwa-mem-test4.bam" lines_diff="2" /> + <output name="bam_output" ftype="qname_sorted.bam" file="bwa-mem-test4.bam" lines_diff="4" /> + </test> + <test expect_num_outputs="1"> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> + <conditional name="fastq_input"> + <param name="fastq_input_selector" value="paired_collection"/> + <param name="fastq_input1"> + <collection type="paired"> + <element name="forward" value="bwa-mem-fastq1.fq" /> + <element name="reverse" value="bwa-mem-fastq2.fq" /> + </collection> + </param> + </conditional> + <conditional name="analysis_type"> + <param name="analysis_type_selector" value="illumina"/> + </conditional> + <param name="output_sort" value="name"/> + <output name="bam_output" ftype="qname_sorted.bam" file="bwa-mem-test4.bam" lines_diff="4" /> </test> </tests> <help><![CDATA[ -**What is does** + +**What it does** + +This Galaxy tool wraps the bwa-mem module of the BWA_ read mapping tool. For more details about the different modules of the BWA package see the `BWA manual`_. + +The Galaxy implementation takes fastq files as input and produces output in BAM format, which can be further processed using various BAM utilities existing in Galaxy (BAMTools, SAMTools, Picard). From http://arxiv.org/abs/1303.3997: @@ -351,34 +374,37 @@ It automatically chooses between local and end-to-end alignments, supports paired-end reads and performs chimeric alignment. The algorithm is robust to sequencing errors and applicable to a wide range of sequence lengths from 70bp to a few megabases. -This Galaxy tool wraps bwa-mem module of bwa read mapping tool. The Galaxy implementation takes fastq files as input and produces output in BAM format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard). +----- + +@ref_genomes@ ----- -**Indices: Selecting reference genomes for BWA** +**Analysis modes** + +The tool supports different preconfigured analysis modes optimized for different types of input data. Alternatively, it allows you to take full control over all available options. -Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options: +The preconfigured modes are: + + 1. *Simple Illumina mode* + + This corresponds to the simplest possible and standard bwa mem application in which it aligns single or paired-end data to a reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2] + 2. *PacBio mode* - 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against. - 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa mem`. + This mode is adjusted specifically for mapping of long PacBio subreads. It is running bwa mame with the `-x pacbio` option. + 3. *Nanopore 2D-reads mode* + + This mode is running bwa mem with the `-x ont2d` option. + 4. *Intra-sepcies contigs mode* -If your genome of interest is not listed here you have two choices: + This mode is running bwa mem with the `-x intractg` option. - 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added - 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option. +.. class:: infomark + + Please note: minimap2_ is recommended over and outperforms BWA-MEM for most types of input data except for Illumina short reads. For Illumina short-read mapping you may also consider using `BWA-MEM2`_, which is about twice as fast as BWA-MEM. ----- -**Galaxy-specific option** - -Galaxy allows four levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are: - - 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2] - 2. *PacBio mode*: The mode adjusted specifically for mapping of long PacBio subreads. Equivalent to the following command: bwa mem -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 <reference index> <PacBio dataset in fastq format> - 3. *Full list of options*: Allows access to all options through Galaxy interface. - - ----- - **Bam sorting mode** The generated bam files can be sorted according to three criteria: coordinates, names and input order. @@ -392,7 +418,9 @@ @RG@ -@info@ +@links@ +.. _minimap2: https://github.com/lh3/minimap2 +.. _`BWA-MEM2`: https://github.com/bwa-mem2/bwa-mem2 ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btp324</citation>
--- a/bwa.xml Fri Aug 19 18:51:56 2022 +0000 +++ b/bwa.xml Fri May 17 21:09:07 2024 +0000 @@ -1,9 +1,6 @@ <?xml version="1.0"?> -<tool id="bwa" name="Map with BWA" version="@VERSION@.5"> +<tool id="bwa" name="Map with BWA" version="@TOOL_VERSION@" profile="22.05"> <description>- map short reads (< 100 bp) against reference genome</description> - <xrefs> - <xref type="bio.tools">bwa</xref> - </xrefs> <macros> <import>read_group_macros.xml</import> <import>bwa_macros.xml</import> @@ -77,6 +74,7 @@ </when> </xml> </macros> + <expand macro="bio_tools"/> <expand macro="requirements"/> <expand macro="stdio"/> <command> @@ -314,41 +312,42 @@ </data> </outputs> <tests> - <test> + <!-- `samtools sort` in the new update adds PG lines to the output so the lines_diff is changed from "2" to "4" --> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history"/> <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> <param name="input_type_selector" value="single"/> <param name="fastq_input1" ftype="fasta" value="bwa-mem-fasta1.fa"/> <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-aln-test1-fasta.bam" lines_diff="2"/> + <output name="bam_output" ftype="bam" file="bwa-aln-test1-fasta.bam" lines_diff="4"/> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history"/> <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> <param name="input_type_selector" value="paired"/> <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2"/> + <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="4"/> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history"/> <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> <param name="input_type_selector" value="paired"/> <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/> <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2"/> + <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="4"/> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history"/> <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> <param name="input_type_selector" value="paired_bam"/> <param name="bam_input" ftype="bam" value="bwa-aln-bam-input.bam"/> <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-aln-test2.bam" lines_diff="2"/> + <output name="bam_output" ftype="bam" file="bwa-aln-test2.bam" lines_diff="4"/> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history"/> <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> <param name="input_type_selector" value="paired"/> @@ -358,57 +357,34 @@ <param name="ID" value="rg1"/> <param name="PL" value="CAPILLARY"/> <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-aln-test3.bam" lines_diff="2"/> + <output name="bam_output" ftype="bam" file="bwa-aln-test3.bam" lines_diff="5"/> </test> </tests> <help><![CDATA[ -**What is does** +**What it does** -BWA is a software package for mapping low-divergent sequences against a large reference genome, such as the -human genome. The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use -the separate BWA-MEM Galaxy tool. +BWA_ is a software package for mapping low-divergent sequences against a large reference genome, such as the human genome. +The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use the separate BWA-MEM Galaxy tool. -This Galaxy tool wraps bwa-aln, bwa-samse and -sampe modules of bwa read mapping tool: +This Galaxy tool wraps the bwa-aln, bwa-samse and -sampe modules of the BWA read mapping tool: - **bwa aln** - actual mapper placing reads onto the reference sequence - **bwa samse** - post-processor converting suffix array coordinates into genome coordinates in SAM format for single reads - **bam sampe** - post-processor for paired reads +For more details about the different modules of the BWA package see the `BWA manual`_. The Galaxy implementation takes fastq or BAM (unaligned BAM) datasets as input and produces output in BAM format, -which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard). +which can be further processed using various BAM utilities existing in Galaxy (BAMTools, SAMTools, Picard). ----- -**Indices: Selecting reference genomes for BWA** - -The Galaxy wrapper for BWA allows you to select between precomputed and user-defined indices for reference genomes -using the **Will you select a reference genome from your history or use a built-in index?** select box. - -This select box has two options: - - 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select - reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility - and are ready to be mapped against. - 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select - reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your - current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome - from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run - mapping with `bwa aln`. - - -If your genome of interest is not listed here you have two choices: - - 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index - needs to be added - 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history - and build index** option. - +@ref_genomes@ @RG@ -@info@ +@links@ ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btp324</citation>
--- a/bwa_macros.xml Fri Aug 19 18:51:56 2022 +0000 +++ b/bwa_macros.xml Fri May 17 21:09:07 2024 +0000 @@ -1,7 +1,7 @@ <macros> <import>read_group_macros.xml</import> - <token name="@VERSION@">0.7.17</token> + <token name="@TOOL_VERSION@">0.7.18</token> <token name="@pipefail@"><![CDATA[set -o | grep -q pipefail && set -o pipefail;]]></token> @@ -36,11 +36,17 @@ <xml name="requirements"> <requirements> - <requirement type="package" version="@VERSION@">bwa</requirement> - <requirement type="package" version="1.6">samtools</requirement> + <requirement type="package" version="@TOOL_VERSION@">bwa</requirement> + <requirement type="package" version="1.20">samtools</requirement> </requirements> </xml> + <xml name="bio_tools"> + <xrefs> + <xref type="bio.tools">bwa</xref> + </xrefs> + </xml> + <xml name="stdio"> <stdio> <exit_code range="1:" /> @@ -103,6 +109,26 @@ </actions> </macro> + <token name="@ref_genomes@"> +**Indices: Selecting reference genomes for BWA** + +The Galaxy wrapper for BWA allows you to select between precomputed and user-defined indices for reference genomes +using the **Will you select a reference genome from your history or use a built-in index?** select box. + +This select box has two options: + + 1. **Use a built-in genome index** + + With this option (which is the default), Galaxy provides you with a dropdown select menu populated with genomes that have been pre-indexed with the bwa index utility and are ready to map sequenced reads against. + + The collection of pre-indexed genomes is managed by the administrators of your Galaxy instance. If your genome of interest is missing and its impractical to use the second option below to work with it, consider contacting the support team for the Galaxy server you are working on to let them know that you would like to have an additional genome indexed. + + 2. **Use a genome from history and build index** + + With this option, Galaxy provides you with a dropdown select menu populated with all FASTA formatted files listed in your current history. If you have uploaded your genome of interest into your history it will be shown there. + + Selecting a genome from this dropdown will cause Galaxy to index it transparently first using the `bwa index` command, and then map against it with `bwa aln`. + </token> <token name="@RG@"> ----- @@ -155,19 +181,8 @@ Note the hierarchical relationship between read groups (unique for each lane) to libraries (sequenced on two lanes) and samples (across four lanes, two lanes for each library). </token> - <token name="@info@"> ------ - -.. class:: infomark - -**More info** - -To obtain more information about BWA and ask questions use these resources: - - 1. https://biostar.usegalaxy.org/ - 2. https://www.biostars.org/ - 3. https://github.com/lh3/bwa - 4. http://bio-bwa.sourceforge.net/ - + <token name="@links@"> +.. _BWA: https://github.com/lh3/bwa +.. _`BWA manual`: https://bio-bwa.sourceforge.net/bwa.shtml </token> </macros>
