view create.xml @ 20:1dc3f0c61817 draft

Uploaded 20190304
author fabio
date Mon, 04 Mar 2019 09:14:04 -0500
parents 7f712cc0d3d5
children
line wrap: on
line source

<?xml version="1.0"?>
<tool name="BloomTree Manager - Create" id="btman_create" version="1.0.0">
    <description>a Sequence Bloom Tree</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code">
<![CDATA[
    python '$__tool_directory__/create.py'

    #set formats = ''
    #set filepaths = ''
    #set filenames = ''
    #set compressed = ''
    #set minab = ''
    #set qthres = ''
    #for $i, $exp in enumerate( $experiments ):
        #set formats += str( $exp.conditional_format.format ) + '|'
        #if $exp.conditional_format.format == 'accessions':
            #set filepaths += str( $exp.conditional_format.accession_numbers ) + '|'
            #set filenames += str( $exp.conditional_format.accession_numbers.name ) + '|'
            #set compressed += '0|'
        #else:
            #if $exp.conditional_format.format == 'fasta':
                #set compressed += str( $exp.conditional_format.conditional_fasta_compressed.fasta_compressed ) + '|'
                #if $exp.conditional_format.conditional_fasta_compressed.fasta_compressed == 0:
                    #set filepaths += ','.join( [ str( $f ) for $f in $exp.conditional_format.conditional_fasta_compressed.fastafiles ] ) + '|'
                    #set filenames += ','.join( [ str( $f.name ) for $f in $exp.conditional_format.conditional_fasta_compressed.fastafiles ] ) + '|'
                #else:
                    #set filepaths += ','.join( [ str( $f ) for $f in $exp.conditional_format.conditional_fasta_compressed.fastagzfiles ] ) + '|'
                    #set filenames += ','.join( [ str( $f.name ) for $f in $exp.conditional_format.conditional_fasta_compressed.fastagzfiles ] ) + '|'
                #end if
            #elif $exp.conditional_format.format == 'fastq':
                #set compressed += str( $exp.conditional_format.conditional_fastq_compressed.fastq_compressed ) + '|'
                #if $exp.conditional_format.conditional_fastq_compressed.fastq_compressed == 0:
                    #set filepaths += ','.join( [ str( $f ) for $f in $exp.conditional_format.conditional_fastq_compressed.fastqfiles ] ) + '|'
                    #set filenames += ','.join( [ str( $f.name ) for $f in $exp.conditional_format.conditional_fastq_compressed.fastqfiles ] ) + '|'
                #else:
                    #set filepaths += ','.join( [ str( $f ) for $f in $exp.conditional_format.conditional_fastq_compressed.fastqgzfiles ] ) + '|'
                    #set filenames += ','.join( [ str( $f.name ) for $f in $exp.conditional_format.conditional_fastq_compressed.fastqgzfiles ] ) + '|'
                #end if
            #end if
        #end if
        #set minab += str( $exp.min_abundance ) + '|'
        #if $exp.conditional_quality.quality_control == '1':
            #set qthres += str( $exp.conditional_quality.quality_threshold ) + '|'
        #else:
            #set qthres += '-1.0|'
        #end if
    #end for
    #set klen = $kmer_len
    #set bfsize = -1
    #if $bloomsize_condition.bloomsize_control == '0':
        #set bfsize = $bloomsize_condition.bloom_filter_size
    #end if

    --formats '${formats}'
    --filepaths '${filepaths}'
    --filenames '${filenames}'
    --compressed '${compressed}'
    --minabundances '${minab}'
    --qualitythresholds '${qthres}'
    
    --klen ${klen}
    --bfsize ${bfsize}

    --outfile '${resulttxt}'
    --outdir 'sbt'
    --tooldir '$__tool_directory__'
]]>
    </command>
    <inputs>
        <repeat name="experiments" title="Select a list of experiments" help="Select a set of experiments on which the Sequence Bloom Tree will be built." min="1">
            <conditional name="conditional_format">
                <param name="format" type="select" label="Select the experiment format" help="FASTA and FASTQ are the supported formats">
                    <option value="fasta">FASTA Experiments</option>
                    <option value="fastq">FASTQ Experiments</option>
                    <option value="accessions">SRA Accession Numbers</option>
                </param>
                <when value="fasta">
                    <conditional name="conditional_fasta_compressed">
                        <param name="fasta_compressed" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Are your experiments compressed?" />
                        <when value="0">
                            <param format="fasta" name="fastafiles" multiple="true" type="data" label="Select one or more FASTA experiments" />
                        </when>
                        <when value="1">
                            <param format="fastagz" name="fastagzfiles" multiple="true" type="data" label="Select one or more FASTA .gz experiments" />
                        </when>
                    </conditional>
                </when>
                <when value="fastq">
                    <conditional name="conditional_fastq_compressed">
                        <param name="fastq_compressed" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Are youe experiments compressed?" />
                        <when value="0">
                            <param format="fastq" name="fastqfiles" multiple="true" type="data" label="Select one or more FASTQ experiments" />
                        </when>
                        <when value="1">
                            <param format="fastqgz" name="fastqgzfiles" multiple="true" type="data" label="Select one or more FASTQ .gz experiments" />
                        </when>
                    </conditional>
                </when>
                <when value="accessions">
                    <param name="accession_numbers" type="data" format="tabular" label="Select a list of SRA Accession Numbers" help="Select a tabular file with a list of accession numbers in the first column." />
                </when>
            </conditional>

            <param name="min_abundance" type="integer" value="2" min="0" label="Insert a Bloom filter minimum abundance" help="This value is the minimum abundance cutoff for the creation of the Bloom filter." />

            <conditional name="conditional_quality">
                <param name="quality_control" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Apply a quality control procedure" />
                <when value="1">
                    <param name="quality_threshold" size="1" type="float" value="0.8" min="0.0" max="1.0" label="Quality threshold" help="If the number of sequences flagged as poor quality on the total number of sequences in a file is less than this threshold, the whole experiment will be excluded." />
                </when>
            </conditional>
        </repeat>

        <param name="kmer_len" type="integer" value="21" min="0" label="K-mer length" />

        <conditional name="bloomsize_condition">
            <param name="bloomsize_control" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Automatically estimate the Bloom filter size" />
            <when value="0">
                <param name="bloom_filter_size" size="1" type="integer" value="1" min="1" label="Bloom Filter size" help="Disable this field to let the tool estimate an appropriate Bloom filter size." />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <collection name="list_output" type="list" label="${tool.name} SBT Collection">
            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*(?=\.)).(?P&lt;ext&gt;[^\.]*$)" ext="auto" directory="sbt" />
        </collection>
        <data format="txt" name="resulttxt" label="${tool.name} SBT: Result" from_work_dir="sbtres.txt" />
    </outputs>

    <help><![CDATA[
This tool allows to create Sequence Bloom Trees starting from a set of FASTA or FASTQ files. 
It also allows to control the quality of the input dataset and exclude the files that do not reach a specified quality level.

-----

**Input file**

The input of this tool is a set of FASTA or FASTQ experiments, additionally to a set of SRA accession numbers.
For each of the selected experiments, the minimum abundance for the corresponding Bloom filter is required.
Additionally, a quality control procedure could be applied to guarantee that the quality of every experiment always exceed a
specified treshold. Otherwise, experiments with low quality level will be discarded.

The k-mer length must also be specified, additionally to the Bloom filter size. This last field is optional and it will be
automatically estimated if not provided.

-----

**Output**

This tool returns a collection containing the Sequence Bloom Tree nodes and a file representing the organization of the tree.

Take a look at the Query tool documentation for a detailed description about how
to query a Sequence Bloom Tree.

-----

.. class:: infomark

**Notes**

This Galaxy tool has been developed by Fabio Cumbo.

Please visit this GithHub_repository_ for more information about the BloomTree Manager

.. _GithHub_repository: https://github.com/fabio-cumbo/bloomtree-manager
    ]]></help>

    <expand macro="citations" />
</tool>