view coverm_contig.xml @ 3:c2a5823e4763 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/coverm commit 2d02165f40a9f8206a69716b2302bc58f5364982
author iuc
date Wed, 26 Jul 2023 07:35:24 +0000
parents f8cb3f0a19fa
children 1dcea261abbe
line wrap: on
line source

<tool id="coverm_contig" name="CoverM contig" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Calculate read coverage per contig</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <command><![CDATA[
#import re
#import os

#set $single_fp = []
#set $fw_fp = []
#set $rv_fp = []
#set $interl_fp = []
#set $ref_fp = []
#set $bam_fp = []

mkdir 'single/' && 
mkdir 'fw/' &&
mkdir 'rv/' && 
mkdir 'interl/' && 
mkdir 'ref/' && 
mkdir 'bam/' && 

#if $mapped.mapped == 'mapped'
    @BAMS@
#else
    #if $mapped.mode.mode == 'individual'
        @INDIVIDUAL_ASSEMBLY_READS@
        #set $ref = $mapped.mode.reference
        @INDIVIDUAL_ASSEMBLY_REF@
    #else
        @CO_ASSEMBLY_ALL_READS@
        #set $refs = $mapped.mode.reference
        @CO_ASSEMBLY_REF@ 
    #end if
#end if

coverm 
    contig
#if $mapped.mapped == 'mapped'
    -b
    #for $bam in $bam_fp
        '$bam'
    #end for
    $mapped.sharded
#else
    #if $fw_fp
    -1
        #for $read in $fw_fp
        '$read'
        #end for
    -2
        #for $read in $rv_fp
        '$read'
        #end for
    #else if $single_fp
    --single
        #for $read in $single_fp
        '$read'
        #end for
    #else if $interl_fp
    --interleaved
        #for $read in $interl_fp
        '$read'
        #end for
    #end if
    --reference
    #for $ref in $ref_fp
        '$ref'
    #end for
    #if $mapped.mode.mode == 'co'
    $mapped.mode.sharded
    #end if
    --mapper '$mapped.mapper'
#end if

    --min-read-aligned-length $alignment.min_read_aligned_length
    --min-read-percent-identity $alignment.min_read_percent_identity
    --min-read-aligned-percent $alignment.min_read_aligned_percent
    $alignment.proper_pairs_only.proper_pairs_only
#if $alignment.proper_pairs_only.proper_pairs_only != ''
    --min-read-aligned-length-pair $alignment.proper_pairs_only.min_read_aligned_length_pair
    --min-read-percent-identity-pair $alignment.proper_pairs_only.min_read_percent_identity_pair
    --min-read-aligned-percent-pair $alignment.proper_pairs_only.min_read_aligned_percent_pair
#end if

    --methods
#for method in $cov.methods
        '$method'
#end for
    --min-covered-fraction $cov.min_covered_fraction
    --contig-end-exclusion $cov.contig_end_exclusion
    --trim-min $cov.trim_min
    --trim-max $cov.trim_max

    --output-format '$output_format'
    --output-file '$output'
    --threads \${GALAXY_SLOTS:-1}
    ]]></command>
    <inputs>
        <conditional name="mapped">
            <expand macro="mapped"/>
            <when value="mapped">
                <expand macro="mapped_params"/>
            </when>
            <when value="not-mapped">
                <conditional name="mode">
                    <expand macro="assembly_mode"/>
                    <when value="individual">
                        <expand macro="individual_assembly_reads"/>
                        <expand macro="individual_assembly_reference"/> 
                    </when>
                    <when value="co">
                        <expand macro="co_assembly_reads"/>
                        <repeat name="extra_reads" title="Additional reads">
                            <expand macro="co_assembly_reads"/>
                        </repeat>
                        <expand macro="co_assembly_reference"/>
                    </when>
                </conditional>
                <expand macro="mapping"/>
            </when>
        </conditional>
        <expand macro="alignment"/>
        <section name="cov" title="Coverage calculation options" expanded="false">
            <param argument="--methods" type="select" multiple="true" label="Method(s) for calculating coverage">
                <option value="mean" selected="true">mean: Average number of aligned reads overlapping each position on the contig</option>
                <expand macro="cov_method_options"/>
            </param>
            <expand macro="coverage_params"/>
        </section>
        <expand macro="output_format"/>
    </inputs>
    <outputs>
        <data name="output" format="tabular"/>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <conditional name="mapped">
                <param name="mapped" value="not-mapped" />
                <conditional name="mode">
                    <param name="mode" value="co"/>
                    <conditional name="read_type">
                        <param name="type" value="paired_collection"/>
                        <param name="paired_reads">
                            <collection type="list:paired">
                                <element name="reads_for_seq1_and_seq2">
                                    <collection type="paired">
                                        <element name="forward" value="reads_for_seq1_and_seq2.1.fq.gz" ftype="fastqsanger.gz"/>
                                        <element name="reverse" value="reads_for_seq1_and_seq2.2.fq.gz" ftype="fastqsanger.gz"/>
                                    </collection>
                                </element>
                            </collection>
                        </param>
                    </conditional>
                    <param name="reference" value="7seqs.fna" />
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs_only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="mean"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="10"/>
                <param name="contig_end_exclusion" value="75"/>
            </section>
            <param name="output_format" value="dense"/>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Contig"/>
                    <has_text text="Mean"/>
                    <has_text text="7seqs.fna"/>
                    <has_text text="genome1~random_sequence_length_11000"/>
                    <has_text text="genome2~seq1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <conditional name="mapped">
                <param name="mapped" value="not-mapped" />
                <conditional name="mode">
                    <param name="mode" value="individual"/>
                    <conditional name="read_type">
                        <param name="type" value="paired"/>
                        <param name="read1" value="reads_for_seq1_and_seq2.1.fq.gz" ftype="fastqsanger.gz"/>
                        <param name="read2" value="reads_for_seq1_and_seq2.2.fq.gz" ftype="fastqsanger.gz"/>
                    </conditional>
                    <param name="reference" value="7seqs.fna" />
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs_only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="mean"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="10"/>
                <param name="contig_end_exclusion" value="75"/>
            </section>
            <param name="output_format" value="dense"/>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Contig"/>
                    <has_text text="Mean"/>
                    <has_text text="7seqs.fna"/>
                    <has_text text="genome1~random_sequence_length_11000"/>
                    <has_text text="genome2~seq1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <conditional name="mapped">
                <param name="mapped" value="not-mapped" />
                <conditional name="mode">
                    <param name="mode" value="co"/>
                    <conditional name="read_type">
                        <param name="type" value="single"/>
                        <param name="single" value="bad_reads.interleaved.fq"/>
                    </conditional>
                    <param name="reference" value="2seqs.fasta" />
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs_only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="mean"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="10"/>
                <param name="contig_end_exclusion" value="0"/>
            </section>
            <param name="output_format" value="sparse"/>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Contig"/>
                    <has_text text="Mean"/>
                    <has_text text="Sample"/>
                    <has_text text="2seqs.fasta_0/bad_reads.interleaved.fq_single_0"/>
                    <has_text text="seq2"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <conditional name="mapped">
                <param name="mapped" value="not-mapped" />
                <conditional name="mode">
                    <param name="mode" value="co"/>
                    <conditional name="read_type">
                        <param name="type" value="interleaved"/>
                        <param name="interleaved" value="bad_reads.all.interleaved.fa"/>
                    </conditional>
                    <param name="reference" value="2seqs.fasta" />
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs_only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="mean"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="10"/>
                <param name="contig_end_exclusion" value="0"/>
            </section>
            <param name="output_format" value="sparse"/>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Contig"/>
                    <has_text text="Mean"/>
                    <has_text text="Sample"/>
                    <has_text text="2seqs.fasta_0/bad_reads.all.interleaved.fa_interleaved_0"/>
                    <has_text text="seq1"/>
                    <has_text text="seq2"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <conditional name="mapped">
                <param name="mapped" value="not-mapped" />
                <conditional name="mode">
                    <param name="mode" value="co"/>
                    <conditional name="read_type">
                        <param name="type" value="paired"/>
                        <param name="read1" value="reads_for_seq1_and_seq2.1.fq.gz"/>
                        <param name="read2" value="reads_for_seq1_and_seq2.2.fq.gz"/>
                    </conditional>
                    <repeat name="extra_reads">
                        <conditional name="read_type">
                            <param name="type" value="paired_collection"/>
                            <param name="paired_reads">
                                <collection type="list:paired">
                                    <element name="reads_for_seq1_and_seq2">
                                        <collection type="paired">
                                            <element name="forward" value="reads_for_seq1_and_seq2.1.fq.gz" ftype="fastqsanger.gz"/>
                                            <element name="reverse" value="reads_for_seq1_and_seq2.2.fq.gz" ftype="fastqsanger.gz"/>
                                        </collection>
                                    </element>
                                </collection>
                            </param>
                        </conditional>
                    </repeat>
                    <param name="reference" value="2seqs.fasta" />
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0.95" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs-only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="mean"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="10"/>
                <param name="contig_end_exclusion" value="0"/>
            </section>
            <param name="output_format" value="sparse"/>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Contig"/>
                    <has_text text="Mean"/>
                    <has_text text="Sample"/>
                    <has_text text="2seqs.fasta_0/reads_for_seq1_and_seq2.1.fq.gz_paired_0"/>
                    <has_text text="2seqs.fasta_0/reads_for_seq1_and_seq2_paired_collection_00"/>
                    <has_text text="seq1"/>
                    <has_text text="seq2"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <conditional name="mapped">
                <param name="mapped" value="mapped" />
                <conditional name="mode">
                    <param name="mode" value="co"/>
                    <param name="bam_files" value="tpm_test.bam"/>
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0.95" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs-only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="mean,tpm"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="10"/>
                <param name="contig_end_exclusion" value="75"/>
            </section>
            <param name="output_format" value="sparse"/>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Contig"/>
                    <has_text text="Mean"/>
                    <has_text text="Sample"/>
                    <has_text text="TPM"/>
                    <has_text text="tpm_test"/>
                    <has_text text="genome1~random_sequence_length_11000"/>
                    <has_text text="genome2~seq1"/>
                    <has_text text="genome3~random_sequence_length_11001"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <conditional name="mapped">
                <param name="mapped" value="mapped" />
                <conditional name="mode">
                    <param name="mode" value="co"/>
                    <param name="bam_files" value="tpm_test.bam"/>
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0.95" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs-only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="mean,tpm"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="10"/>
                <param name="contig_end_exclusion" value="75"/>
            </section>
            <param name="output_format" value="sparse"/>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Contig"/>
                    <has_text text="Mean"/>
                    <has_text text="Sample"/>
                    <has_text text="TPM"/>
                    <has_text text="tpm_test"/>
                    <has_text text="genome1~random_sequence_length_11000"/>
                    <has_text text="genome2~seq1"/>
                    <has_text text="genome3~random_sequence_length_11001"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark
    
**Method for calculating coverage**

Calculation of genome-wise coverage (genome mode) is similar to calculating contig-wise (contig mode) coverage, except that the unit of reporting is per-genome rather than per-contig. For calculation methods which exclude base positions based on their coverage, all positions from all contigs are considered together. 

- Relative abundance: Percentage relative abundance of each genome, and the unmapped read percentage
- Mean: Average number of aligned reads overlapping each position on the genome
- Trimmed mean: Average number of aligned reads overlapping each position after removing the most deeply and shallowly covered positions.
- Covered fraction: Fraction of genome covered by 1 or more reads
- Covered bases: Number of bases covered by 1 or more reads
- Variance: Variance of coverage depths
- Length: Length of each genome in base pairs
- Count: Number of reads aligned toq each genome. Note that a single read may be aligned to multiple genomes with supplementary alignments
- Reads per base: Number of reads aligned divided by the length of the genome
- MetaBAT: Reproduction of the `MetaBAT <https://bitbucket.org/berkeleylab/metabat>`_ tool output
- Coverage histogram: Histogram of coverage depths
- RPKM: Reads mapped per kilobase of genome, per million mapped reads
- TPM: Transcripts Per Million as described in `Li et al 2010 <https://doi.org/10.1093/bioinformatics/btp692>`_

    ]]></help>
    <expand macro="citation"/>
</tool>