diff create.xml @ 19:7f712cc0d3d5 draft

Uploaded 20190304.2
author fabio
date Mon, 04 Mar 2019 08:31:28 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/create.xml	Mon Mar 04 08:31:28 2019 -0500
@@ -0,0 +1,173 @@
+<?xml version="1.0"?>
+<tool name="BloomTree Manager - Create" id="btman_create" version="1.0.0">
+    <description>a Sequence Bloom Tree</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code">
+<![CDATA[
+    python '$__tool_directory__/create.py'
+
+    #set formats = ''
+    #set filepaths = ''
+    #set filenames = ''
+    #set compressed = ''
+    #set minab = ''
+    #set qthres = ''
+    #for $i, $exp in enumerate( $experiments ):
+        #set formats += str( $exp.conditional_format.format ) + '|'
+        #if $exp.conditional_format.format == 'accessions':
+            #set filepaths += str( $exp.conditional_format.accession_numbers ) + '|'
+            #set filenames += str( $exp.conditional_format.accession_numbers.name ) + '|'
+            #set compressed += '0|'
+        #else:
+            #if $exp.conditional_format.format == 'fasta':
+                #set compressed += str( $exp.conditional_format.conditional_fasta_compressed.fasta_compressed ) + '|'
+                #if $exp.conditional_format.conditional_fasta_compressed.fasta_compressed == 0:
+                    #set filepaths += ','.join( [ str( $f ) for $f in $exp.conditional_format.conditional_fasta_compressed.fastafiles ] ) + '|'
+                    #set filenames += ','.join( [ str( $f.name ) for $f in $exp.conditional_format.conditional_fasta_compressed.fastafiles ] ) + '|'
+                #else:
+                    #set filepaths += ','.join( [ str( $f ) for $f in $exp.conditional_format.conditional_fasta_compressed.fastagzfiles ] ) + '|'
+                    #set filenames += ','.join( [ str( $f.name ) for $f in $exp.conditional_format.conditional_fasta_compressed.fastagzfiles ] ) + '|'
+                #end if
+            #elif $exp.conditional_format.format == 'fastq':
+                #set compressed += str( $exp.conditional_format.conditional_fastq_compressed.fastq_compressed ) + '|'
+                #if $exp.conditional_format.conditional_fastq_compressed.fastq_compressed == 0:
+                    #set filepaths += ','.join( [ str( $f ) for $f in $exp.conditional_format.conditional_fastq_compressed.fastqfiles ] ) + '|'
+                    #set filenames += ','.join( [ str( $f.name ) for $f in $exp.conditional_format.conditional_fastq_compressed.fastqfiles ] ) + '|'
+                #else:
+                    #set filepaths += ','.join( [ str( $f ) for $f in $exp.conditional_format.conditional_fastq_compressed.fastqgzfiles ] ) + '|'
+                    #set filenames += ','.join( [ str( $f.name ) for $f in $exp.conditional_format.conditional_fastq_compressed.fastqgzfiles ] ) + '|'
+                #end if
+            #end if
+        #end if
+        #set minab += str( $exp.min_abundance ) + '|'
+        #if $exp.conditional_quality.quality_control == '1':
+            #set qthres += str( $exp.conditional_quality.quality_threshold ) + '|'
+        #else:
+            #set qthres += '-1.0|'
+        #end if
+    #end for
+    #set klen = $kmer_len
+    #set bfsize = -1
+    #if $bloomsize_condition.bloomsize_control == '0':
+        #set bfsize = $bloomsize_condition.bloom_filter_size
+    #end if
+
+    --formats '${formats}'
+    --filepaths '${filepaths}'
+    --filenames '${filenames}'
+    --compressed '${compressed}'
+    --minabundances '${minab}'
+    --qualitythresholds '${qthres}'
+    
+    --klen ${klen}
+    --bfsize ${bfsize}
+
+    --outfile '${resulttxt}'
+    --outdir 'sbt'
+    --tooldir '$__tool_directory__'
+]]>
+    </command>
+    <inputs>
+        <repeat name="experiments" title="Select a list of experiments" help="Select a set of experiments on which the Sequence Bloom Tree will be built." min="1">
+            <conditional name="conditional_format">
+                <param name="format" type="select" label="Select the experiment format" help="FASTA and FASTQ are the supported formats">
+                    <option value="fasta">FASTA Experiments</option>
+                    <option value="fastq">FASTQ Experiments</option>
+                    <option value="accessions">SRA Accession Numbers</option>
+                </param>
+                <when value="fasta">
+                    <conditional name="conditional_fasta_compressed">
+                        <param name="fasta_compressed" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Are your experiments compressed?" />
+                        <when value="0">
+                            <param format="fasta" name="fastafiles" multiple="true" type="data" label="Select one or more FASTA experiments" />
+                        </when>
+                        <when value="1">
+                            <param format="fastagz" name="fastagzfiles" multiple="true" type="data" label="Select one or more FASTA .gz experiments" />
+                        </when>
+                    </conditional>
+                </when>
+                <when value="fastq">
+                    <conditional name="conditional_fastq_compressed">
+                        <param name="fastq_compressed" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Are youe experiments compressed?" />
+                        <when value="0">
+                            <param format="fastq" name="fastqfiles" multiple="true" type="data" label="Select one or more FASTQ experiments" />
+                        </when>
+                        <when value="1">
+                            <param format="fastqgz" name="fastqgzfiles" multiple="true" type="data" label="Select one or more FASTQ .gz experiments" />
+                        </when>
+                    </conditional>
+                </when>
+                <when value="accessions">
+                    <param name="accession_numbers" type="data" format="tabular" label="Select a list of SRA Accession Numbers" help="Select a tabular file with a list of accession numbers in the first column." />
+                </when>
+            </conditional>
+
+            <param name="min_abundance" type="integer" value="2" min="0" label="Insert a Bloom filter minimum abundance" help="This value is the minimum abundance cutoff for the creation of the Bloom filter." />
+
+            <conditional name="conditional_quality">
+                <param name="quality_control" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Apply a quality control procedure" />
+                <when value="1">
+                    <param name="quality_threshold" size="1" type="float" value="0.8" min="0.0" max="1.0" label="Quality threshold" help="If the number of sequences flagged as poor quality on the total number of sequences in a file is less than this threshold, the whole experiment will be excluded." />
+                </when>
+            </conditional>
+        </repeat>
+
+        <param name="kmer_len" type="integer" value="21" min="0" label="K-mer length" />
+
+        <conditional name="bloomsize_condition">
+            <param name="bloomsize_control" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Automatically estimate the Bloom filter size" />
+            <when value="0">
+                <param name="bloom_filter_size" size="1" type="integer" value="1" min="1" label="Bloom Filter size" help="Disable this field to let the tool estimate an appropriate Bloom filter size." />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <collection name="list_output" type="list" label="${tool.name} SBT Collection">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*(?=\.)).(?P&lt;ext&gt;[^\.]*$)" ext="auto" directory="sbt" />
+        </collection>
+        <data format="txt" name="resulttxt" label="${tool.name} SBT: Result" from_work_dir="sbtres.txt" />
+    </outputs>
+
+    <help><![CDATA[
+This tool allows to create Sequence Bloom Trees starting from a set of FASTA or FASTQ files. 
+It also allows to control the quality of the input dataset and exclude the files that do not reach a specified quality level.
+
+-----
+
+**Input file**
+
+The input of this tool is a set of FASTA or FASTQ experiments, additionally to a set of SRA accession numbers.
+For each of the selected experiments, the minimum abundance for the corresponding Bloom filter is required.
+Additionally, a quality control procedure could be applied to guarantee that the quality of every experiment always exceed a
+specified treshold. Otherwise, experiments with low quality level will be discarded.
+
+The k-mer length must also be specified, additionally to the Bloom filter size. This last field is optional and it will be
+automatically estimated if not provided.
+
+-----
+
+**Output**
+
+This tool returns a collection containing the Sequence Bloom Tree nodes and a file representing the organization of the tree.
+
+Take a look at the Query tool documentation for a detailed description about how
+to query a Sequence Bloom Tree.
+
+-----
+
+.. class:: infomark
+
+**Notes**
+
+This Galaxy tool has been developed by Fabio Cumbo.
+
+Please visit this GithHub_repository_ for more information about the BloomTree Manager
+
+.. _GithHub_repository: https://github.com/fabio-cumbo/bloomtree-manager
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>