view blobtoolkit.xml @ 4:392811378515 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/blobtoolkit commit c25f0422af4155a6ed5a456ebf6181b869ccefe9
author bgruening
date Tue, 19 Nov 2024 17:08:56 +0000
parents 8c2167481e72
children
line wrap: on
line source

<tool id="blobtoolkit" name="BlobToolKit" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
    <description>Genome Assembly QC</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro='requirements' />
    <command detect_errors="exit_code"><![CDATA[
        #if $mode_conditional.selector == 'create'
            mkdir -p './Blobdir' &&
            mkdir -p './taxdump' &&
            ln -s '${mode_conditional.taxdump}' 'taxdump.tar' &&
            tar -xf 'taxdump.tar' -C './taxdump' &&
            ln -s '${mode_conditional.fasta}' 'assembly.fasta' &&
            #if $mode_conditional.meta
                ln -s '${mode_conditional.meta}' 'assembly.yaml' &&
            #end if
            blobtools create
            --fasta 'assembly.fasta'
            #if $mode_conditional.meta
                --meta 'assembly.yaml'
            #end if
            --taxid $mode_conditional.taxid
            --taxdump './taxdump'
            './Blobdir'
            
            && tar -C ./Blobdir -zcvf Blobdir.tgz .

        #else if $mode_conditional.selector == 'plot'
            mkdir -p blobtk_files && 
            tar -xvf '$blobdir_input' -C blobtk_files &&
            blobtk plot --blobdir blobtk_files  --view '$plot_view'
            
        #else if $mode_conditional.selector == 'add'
            mkdir -p './Blobdir' &&
            mkdir -p './taxdump' &&
            ln -s '${mode_conditional.taxdump}' 'taxdump.tar' &&
            tar -xf 'taxdump.tar' -C './taxdump' &&
            #if $mode_conditional.blobdir.is_of_type("tgz")
                tar -zxf '${mode_conditional.blobdir}' -C './Blobdir' &&
            #else
                tar -xf '${mode_conditional.blobdir}' -C './Blobdir' &&
            #end if
            #if $mode_conditional.busco
                ln -s '${mode_conditional.busco}' './busco_results.tab' && 
            #end if
            #if $mode_conditional.blast_input.selector == 'enabled'
                ln -s '$mode_conditional.blast_input.hits' './blast_results.tab' &&
            #end if
            #if $mode_conditional.cov
                ln -s '$mode_conditional.cov' './input.bam' &&
            #end if
            #if $mode_conditional.bed
                    mkdir -p './bed_files' &&
                    #import re
                    #for $i, $input in enumerate($mode_conditional.bed):
                        #set $safename = re.sub('[^\w\-_]', '_', $input.element_identifier) + "." + str($i)
                        ln -sf '${input}' './bed_files/${safename}.bed' &&
                    #end for
            #end if
            blobtools add
                --threads \${GALAXY_SLOTS:-8}
                --taxdump './taxdump'
                #if $mode_conditional.busco
                    --busco './busco_results.tab'
                #end if
                #if $mode_conditional.blast_input.selector == 'enabled'
                    --hits './blast_results.tab'
                    #if $mode_conditional.blast_input.hits_cols
                        --hits-cols '${$mode_conditional.blast_input.hits_cols}'
                    #end if
                    --taxrule $mode_conditional.blast_input.taxrule
                    --evalue $mode_conditional.blast_input.evalue
                    --hit-count $mode_conditional.blast_input.hit_count
                    --bitscore $mode_conditional.blast_input.bitscore
                #end if
                #if $mode_conditional.bed
                    --beddir './bed_files'
                #end if
                #if $mode_conditional.cov
                    --cov './input.bam'
                #end if
                #if $mode_conditional.fasta
                    --fasta '${mode_conditional.fasta}'
                #end if
                #if $mode_conditional.trnascan
                    --trnascan '${mode_conditional.trnascan}'
                #end if
                #if $mode_conditional.text_input.selector == 'enabled'
                    --text '${mode_conditional.text_input.text}'
                    --text-cols '${mode_conditional.text_input.text_cols}'
                    $mode_conditional.text_input.text_header
                    $mode_conditional.text_input.text_no_array
                    --text-delimiter $mode_conditional.text_input.text_delimiter
                #end if
                #if $mode_conditional.advanced_options.blobdb
                    --blobdb '${mode_conditional.advanced_options.blobdb}'
                #end if
                #if $mode_conditional.advanced_options.synonyms
                    --synonyms '${mode_conditional.advanced_options.synonyms}'
                #end if
                $mode_conditional.advanced_options.update_plot
                #if $mode_conditional.advanced_options.pileup_args
                    --pileup-args '${mode_conditional.advanced_options.pileup_args}'
                #end if
                $mode_conditional.advanced_options.replace
                './Blobdir'

            #if $mode_conditional.blobtk_plot_options.blobtk_plot == 'yes'
                && blobtk plot --blobdir './Blobdir' --view $blobtk_plot_options.plot_view
            #end if
            
            && tar -C ./Blobdir -zcvf Blobdir.tgz .

        #else if $mode_conditional.selector == 'filter'
            mkdir -p './Blobdir' &&
            #if $mode_conditional.blobdir.is_of_type("tgz")
                tar -zxf '${mode_conditional.blobdir}' -C './Blobdir' &&
            #else
                tar -xf '${mode_conditional.blobdir}' -C './Blobdir' &&
            #end if
            #if $mode_conditional.filter_options.list
                    echo $mode_conditional.filter_options.list > id_list.txt &&
            #end if
            #if $mode_conditional.blobdir.is_of_type("tgz")
                tar -zxf '${mode_conditional.blobdir}' -C './' &&
            #else
                tar -xf '${mode_conditional.blobdir}' -C './' &&
            #end if            
            #if $mode_conditional.filter_options.fasta
                ln -s '${mode_conditional.filter_options.fasta}' './input.fasta' &&
            #end if
            #if $mode_conditional.filter_options.fastq_filter.selector == 'enabled'
                ln -s '${mode_conditional.filter_options.fastq_filter.fastq}' './input.fastq' &&
                ln -s '$mode_conditional.filter_options.fastq_filter.cov' './input.bam' &&
            #end if
            blobtools filter
            #if $mode_conditional.filter_options.param
                --param $mode_conditional.filter_options.param
            #end if
            #if $mode_conditional.filter_options.json
                --json $mode_conditional.filter_options.json
            #end if
            #if $mode_conditional.filter_options.list
                --list id_list.txt
            #end if
            $mode_conditional.filter_options.invert
            #if $mode_conditional.filter_options.fasta
                --fasta './input.fasta'
            #end if
            #if $mode_conditional.filter_options.fastq_filter.selector == 'enabled'
                --fastq './input.fastq'
                --cov './input.bam'
            #end if
            #if $mode_conditional.filter_options.text_input.selector == 'enabled'
                --text '${mode_conditional.filter_options.text_input.text}'
                --text-cols '${mode_conditional.filter_options.text_input.text_cols}'
                $mode_conditional.filter_options.text_input.text_header
                --text-delimiter $mode_conditional.filter_options.text_input.text_delimiter
            #end if
            ./Blobdir
            #if $mode_conditional.remove_options.remove_selector or $mode_conditional.remove_options.field
                && blobtools remove
                #for $param in $mode_conditional.remove_options.remove_selector
                    $param
                #end for 
                #if $mode_conditional.remove_options.field
                    --field $mode_conditional.remove_options.field
                #end if
                ./Blobdir
            #end if

            #if $mode_conditional.blobtk_plot_options.blobtk_plot == 'yes'
                && blobtk plot --blobdir ./Blobdir --view $blobtk_plot_options.plot_view
            #end if

            && tar -C ./Blobdir -zcvf Blobdir.tgz .
        #end if

    ]]></command>
    <inputs>
        <conditional name="mode_conditional">
            <param name="selector" type="select" label="Select mode" help="Select a BlobToolKit module">
                <option value="create">Create a BlobToolKit dataset</option>
                <option value="add">Add data to a BlobToolKit dataset</option>
                <option value="filter">Edit a BlobToolKit dataset</option>
                <option value="plot">Plot a BlobToolKit dataset</option>
            </param>
            <when value="create">
                <param argument="--fasta" type="data" format="fasta" label="Genome assembly file" help="FASTA sequence file" />
                <param argument="--meta" type="data" format="yaml" label="Metadata file" optional="true" help="Optional metadata dataset"/>
                <param argument="--taxid" type="integer" value="" label="NCBI taxonomy ID" help="Add ranks to metadata for a taxid"/>
                <expand macro="macro_taxdump"/>
            </when>
            <when value="add">
                <param name="blobdir" type="data" format="tgz" label="Blobdir.tgz file" help="This file should be generated by Blobtool create" />
                <expand macro="macro_taxdump"/>
                <param argument="--busco" type="data" format="tsv,tabular,txt" optional="true" label="BUSCO full table file"/>
                <conditional name="blast_input">
                    <param name="selector" type="select" label="BLAST/Diamond hits">
                        <option value="enabled">Enabled</option>
                        <option value="disabled" selected="true">Disabled</option>
                    </param>
                    <when value="enabled">
                        <param argument="--hits" type="data" format="tsv,tabular" optional="true" label="BLAST/Diamond hits dataset" help="Tabular BLAST/Diamond output file"/>
                        <param argument="--taxrule" type="select" label="BLAST hits to taxa rule" help="Rule to use when assigning BLAST hits to taxa">
                            <option value="bestsum">Bestsum</option>
                            <option value="bestsumorder">Bestsumorder</option>
                            <option value="bestdistsum">Bestdistsum</option>
                            <option value="bestdistsumorder">Bestdistsumorder</option>
                            <option value="blastp">Blastp</option>
                        </param>              
                        <param argument="--evalue" type="float" min="0" value="1" label="E-value cutoff"
                            help="The smaller the E-value, the better the match. Any hits with an evalue weaker than the value specified will be excluded"/>
                        <param argument="--bitscore" type="float" min="0" value="1" label="Bitscore cutoff" 
                            help="The higher the bit-score, the better the sequence similarity. Any hits with an bitscore lower the value specified will be excluded"/>
                        <param argument="--hit-count" type="integer" min="1" value="10" label="Hits to a given taxon" optional="true" 
                            help="By default the 10 highest scoring hits to a given taxon will be used when applying the --taxrule"/>
                        <param argument="--hits-cols" type="text" value="1=qseqid,2=staxids,3=bitscore,5=sseqid,10=qstart,11=qend,14=evalue" optional="true" 
                            label="BLAST/Diamond file column order" help="More information in the help section">
                            <sanitizer invalid_char="">
                                <valid initial="string.letters,string.digits">
                                    <add value="=" />
                                    <add value="," />
                                </valid>
                            </sanitizer>
                            <validator type="regex">[0-9a-z=,]+</validator>
                        </param>
                    </when>
                    <when value="disabled"/>
                </conditional>
                <param argument="--bed" type="data" format="bed" multiple="true" optional="true" label="BED file of coverages per scaffold"/>
                <param argument="--cov" type="data" format="bam,sam,cram" optional="true" label="BAM/SAM/CRAM read alignment file"/>
                <param argument="--fasta" type="data" format="fasta" optional="true" label="FASTA sequence" help="FASTA sequence file" />
                <param argument="--trnascan" type="data" format="tsv,tabular" optional="true" label="tRNAscan2-SE" help="tRNAscan-SE is employed for identifying and annotating tRNA genes in genomes"/>
                <expand macro="macro_text_input">
                    <param argument="--text-no-array" type="boolean" truevalue="--text-no-array" falsevalue="" checked="false" label="Prevent duplicated identifiers" 
                        help="Prevent fields in files with duplicate identifiers being loaded as array fields" />
                </expand>
                <expand macro="blobtk_plotting"></expand>
                <section name="advanced_options" title="Advanced options">
                    <param argument="--blobdb" type="data" format="json" optional="true" label="Blobtools v1 database" help="This file should have been generated with the previous Blobtools tool version"/>
                    <param argument="--synonyms" type="data" format="tsv" optional="true" label="Identifier and sinonyms" help="TSV file containing current identifiers and synonyms"/>
                    <param argument="--update-plot" type="boolean" truevalue="--update-plot" falsevalue="" checked="false" label="Update plot" help="Flag to use new taxrule as default category" />
                    <param argument="--pileup-args" type="text" value="" optional="true" label="Samtools Pileup" help="Key/value pairs to pass to samtools pileup">
                        <sanitizer invalid_char="">
                            <valid initial="string.letters,string.digits">
                                <add value="=" />
                                <add value="," />
                                <add value="-" />
                            </valid>
                        </sanitizer>
                        <validator type="regex">[0-9a-zA-Z=,-]+</validator>
                    </param>
                    <param argument="--replace" type="boolean" truevalue="--replace" falsevalue="" checked="false" label="Replace fields" help="Allow existing fields to be overwritten" />
                </section>
            </when>
            <when value="filter">
                <param name="blobdir" type="data" format="tgz" label="Blobdir file" help="This file should be generated by the moudule create" />
                <section name="filter_options" title="Filter mode options" expanded="true">
                    <param argument="--param" type="text" value="" optional="true" label="Parameter value" help="String of type param=value. Individual param=value pairs can be specified to 
                        filter based on Variable or Category fields">
                        <sanitizer invalid_char="">
                            <valid initial="string.letters,string.digits">
                                <add value="=" />
                                <add value="," />
                                <add value="-" />
                            </valid>
                        </sanitizer>
                        <validator type="regex">[0-9a-zA-Z=,-]+</validator>
                    </param>
                    <param argument="--list" type="text" value="" optional="true" label="List of identifiers" help="Space separated list of identifiers">
                        <sanitizer invalid_char="">
                            <valid initial="string.letters,string.digits">
                                <add value=" " />
                                <add value="-" />
                                <add value="_" />
                            </valid>
                        </sanitizer>
                        <validator type="regex">[0-9a-zA-Z _-]+</validator>
                    </param>
                    <param argument="--json" type="data" format="json" optional="true" label="JSON format list file" help="JSON format list file as generated by BlobtoolKit Viewer"/>
                    <param argument="--fasta" type="data" format="fasta" multiple="true" optional="true" label="Assembly to be filtered" help="FASTA format assembly file to be filtered"/>
                    <conditional name="fastq_filter">
                        <param name="selector" type="select" label="Filter FASTQ reads">
                            <option value="disabled">Disabled</option>
                            <option value="enabled">Enabled</option>
                        </param>
                        <when value="disabled"/>
                        <when value="enabled">
                            <param argument="--fastq" type="data" format="fastq" optional="true" label="Reads to be filtered" help="FASTQ format read file to be filtered"/>
                            <param argument="--cov" type="data" format="bam,sam,cram" optional="true" label="BAM/SAM/CRAM read alignment file" />
                        </when>
                    </conditional>
                    <expand macro="macro_text_input"/>
                    <param argument="--summary-rank" type="text" value="phylum" label="Sumamry rank" help="Taxonomic level for summary">
                        <sanitizer invalid_char="">
                            <valid initial="string.letters"/>
                        </sanitizer>
                        <validator type="regex">[a-zA-Z]+</validator>
                    </param>
                    <param argument="--invert" type="boolean" truevalue="--invert" falsevalue="" checked="false" label="Invert filter" help="Invert the filter (exclude matching records)"/>
                </section>
                <section name="remove_options" title="Remove mode options" expanded="true">
                    <param name="remove_selector" type="select" multiple="true" display="checkboxes" label="Fields to remove">
                        <option value="--all">Remove all fields except identifiers (--all)</option>
                        <option value="--busco">Remove all BUSCO fields (--busco)</option>
                        <option value="--cov">Remove all cov and read_cov fields (--cov)</option>
                        <option value="--fasta">Remove gc, length and ncount fields (--fasta)</option>
                        <option value="--hits">Remove all taxonomy fields (--hits)</option>
                    </param>
                    <param argument="--field" type="text" value="" optional="true" label="Remove fields by ID">
                        <sanitizer invalid_char="">
                            <valid initial="string.letters,string.digits"/>
                        </sanitizer>
                        <validator type="regex">[0-9a-zA-Z]+</validator>
                    </param>
                </section>
                <expand macro="blobtk_plotting"></expand>
            </when>
            <when value="plot">
                <param name="blobdir_input" type="data" format="tgz" label="Select the blobdir tar.gz file generated from blobtools"/>
                <param name="plot_view" type="select" label="Select the view type for the plot">
                    <option value="blob" selected="true">Blob view</option>
                    <option value="cumulative">Cumulative view</option>
                    <option value="legend">Legend view</option>
                    <option value="snail">Snail view</option>
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="blobdir" format="tgz" from_work_dir="./Blobdir.tgz" label="${tool.name} on ${on_string}: Blobdir.tgz">
            <filter>mode_conditional['selector'] == 'create' or mode_conditional['selector'] == 'add' or mode_conditional['selector'] == 'filter'</filter>
        </data>
        <data name="output_svg" format="svg" from_work_dir="output.svg" label="${tool.name} on ${on_string}: Output Plot">
            <filter>
                ((
                (mode_conditional['selector'] == 'add' and mode_conditional['blobtk_plot_options']['blobtk_plot'] == 'yes') or 
                (mode_conditional['selector'] == 'filter' and mode_conditional['blobtk_plot_options']['blobtk_plot'] == 'yes') or
                (mode_conditional['selector'] == 'plot')
                ))
            </filter>
        </data>
    </outputs>
    <tests>
        <!-- Test 01: Testing blobtoolkit with create with no plotting enabled -->
        <test expect_num_outputs="1">
            <conditional name="mode_conditional">
                <param name="selector" value="create"/>
                <param name="fasta" value="assembly.fasta.gz"/>
                <param name="meta" value="assembly.yaml"/>
                <param name="taxid" value="801"/>
                <param name="taxdump" value="small_taxdump.tar.gz"/>
            </conditional>
            <output name="blobdir" file="test01_blobdir.tar.gz" compare="sim_size" delta="100"/>
        </test>
        <!-- Test 02: Testing blobtoolkit with add with plotting enabled -->
        <test expect_num_outputs="2">
            <conditional name="mode_conditional">
                <param name="selector" value="add"/>
                <param name="blobdir" value="test01_blobdir.tar.gz"/>
                <param name="taxdump" value="small_taxdump.tar.gz"/>
                <param name="busco" value="busco_full_table.tabular"/>
                <param name="cov" value="bam_file.bam"/>
                <param name="fasta" value="contig.fasta"/>
                <section name="advanced_options">
                    <param name="update_plot" value="false"/>
                </section>
                <conditional name="blast_input">
                    <param name="selector" value="enabled"/>
                    <param name="hits" value="blast_results.tabular"/>
                    <param name="taxrule" value="bestsum"/>
                    <param name="evalue" value="1"/>
                    <param name="bitscore" value="1"/>
                    <param name="hit_count" value="10"/>
                    <param name="hits_cols" value="1=qseqid,2=staxids,3=bitscore,5=sseqid,10=qstart,11=qend,14=evalue"/>
                </conditional>
                <conditional name="blobtk_plot_options">
                    <param name="blobtk_plot" value="yes"/>
                    <param name="plot_view" value="blob"/>
                </conditional>
            </conditional>
            <output name="blobdir" file="test02_blobdir.tar.gz"  compare="sim_size" delta="100"/>
            <output name="output_svg" file="test02_blob.svg"  compare="sim_size" delta="100"/>
        </test>
        <!-- Test 03: Testing blobtoolkit with filtering and plotting disabled -->
        <test expect_num_outputs="1">
            <conditional name="mode_conditional">
                <param name="selector" value="filter"/>
                <param name="blobdir" value="test02_blobdir.tar.gz"/>
                <section name="filter_options">
                    <param name="fasta" value="contig.fasta"/>
                    <param name="summary_rank" value="phylum"/>
                    <param name="invert" value="false"/>
                </section>
                <section name="remove_options">
                    <param name="remove_selector" value="--all"/>
                </section>
            </conditional>
            <conditional name="blobtk_plot_options">
                <param name="blobtk_plot" value="no"/>
            </conditional>
            <output name="blobdir">
                <assert_contents>
                    <has_size value="620" delta="50"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test 04: Testing blobtoolkit with filter with list and fasta filtering -->
        <test expect_num_outputs="1">
            <conditional name="mode_conditional">
                <param name="selector" value="filter"/>
                <param name="blobdir" value="test02_blobdir.tar.gz"/>
                <section name="filter_options">
                    <param name="list" value="801 802"/>
                    <param name="fasta" value="contig.fasta"/>
                </section>
                <section name="remove_options">
                    <param name="remove_selector" value="--all"/>
                </section>
            </conditional>
            <conditional name="blobtk_plot_options">
                <param name="blobtk_plot" value="no"/>
            </conditional>
            <output name="blobdir">
                <assert_contents>
                    <has_size value="620" delta="50"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test 05: Testing blobtoolkit blob plot -->
        <test expect_num_outputs="1">
            <conditional name="mode_conditional">
                <param name="selector" value="plot"/>
                <param name="blobdir_input" value="blobdir.tgz"/>
                <param name="plot_view" value="blob"/>
            </conditional>
            <output name="output_svg" ftype="svg" file="output.svg" compare="sim_size"/>
        </test>
        <!-- Test 06 :Testing blobtoolkit Snail plot -->
        <test expect_num_outputs="1">
            <conditional name="mode_conditional">
                <param name="selector" value="plot"/>
                <param name="blobdir_input" value="blobdir.tgz"/>
                <param name="plot_view" value="snail"/>
            </conditional>
                <output name="output_svg" ftype="svg">
                 <assert_contents>
                    <has_text text="Log10 scaffold count (total 242)"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test 07: Testing blobtoolkit legend plot -->
        <test expect_num_outputs="1">
            <conditional name="mode_conditional">
                <param name="selector" value="plot"/>
                <param name="blobdir_input" value="blobdir.tgz"/>
                <param name="plot_view" value="legend"/>
            </conditional>
            <output name="output_svg" ftype="svg" file="output_legend.svg" compare="sim_size"/>
        </test>
        <!-- Test 08: Testing blobtoolkit Cumulative plot -->
        <test expect_num_outputs="1">
            <conditional name="mode_conditional">
                <param name="selector" value="plot"/>
                <param name="blobdir_input" value="blobdir.tgz"/>
                <param name="plot_view" value="cumulative"/>
            </conditional>
            <output name="output_svg" ftype="svg" file="output_cumulative.svg" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[
    BlobToolKit is a software suite to aid researchers in identifying and isolating non-target data in draft and publicly available genome assemblies. It can be used to process assembly, 
    read and analysis files for fully reproducible interactive exploration in the browser-based Viewer. BlobToolKit can be used during assembly to filter non-target DNA, helping researchers produce assemblies with high biological credibility.

.. class:: infomark
    
**NCBI taxdump directory**

The taxdump database, provided by NCBI, includes the taxonomic lineage of taxa, information on type strains and material, and host information. The file **new_taxdump.tar.gz** can be downloaded from the taxonomy directory on the `FTP site <https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/>`_. 
    
    ]]></help>
    <expand macro="citations"/>
</tool>