view macros.xml @ 1:da90c37031bb draft default tip

Uploaded
author brenninc
date Tue, 21 Jun 2016 03:37:10 -0400
parents 40c86e14b674
children
line wrap: on
line source

<?xml version="1.0"?>
<macros>
    <xml name="requirements">
        <requirements>
            <requirement type="package" version="2.31">tagdust</requirement>
        </requirements>
    </xml>
    <xml name="version_command">
        <version_command>tagdust --version | head -n 1 | awk '{ print $2 }'</version_command>
    </xml>
    <xml name="stdio">
        <stdio>
            <exit_code range="1:" level="fatal" description="Error" />
        </stdio>
    </xml>
    <token name="@cat-archiecture@">
        echo using architecture ;
        cat $architecture.fields.path ;
        echo ;
    </token>
    <token name="@tagdust-call-minus-files@">
        mkdir output ;
        tagdust -t\${GALAXY_SLOTS:-4} -o output/data 
            -arch $architecture.fields.path
            #if $reference_source.reference_source_selector=='history':
                -ref $reference_source.ref_file
            #end if
            #if $reference_source.reference_source_selector=='cached':
                -ref $reference_source.ref_path.fields.path
            #end if
    </token>
    <token name="@ls_and_barcode@">
        echo files in output folder ;
        ls -al output ;
        #if $include.barcode_files=="yes"
            #if $architecture.fields.barcode=="yes"
                cd output ;
                ls *_BC_* > $barcode_files
            #else
                echo Selected architecture not expected to generate any barcode files > $barcode_files ;
                echo To avoid this output set Choose to find the barcode files to No >> $barcode_files ;
            #end if
        #else 
            #if $architecture.fields.barcode=="yes"
                echo ;
                echo Tagdust not included in output at users request! ;
                echo To get then set Choose to find the barcode files to Yes and run the tool again ;
            #end if
        #end if
    </token>
    <xml name="architecture">
         <param name="architecture" type="select" label="Using architecture">
            <options from_data_table="tagdust_architecture"/>
            <validator type="no_options" message="No architecture found"/>
            <!--filter name="barcode" value="yes" column="3"/-->
        </param>
    </xml>
    <xml name="reference_source">
        <conditional name="reference_source">
            <param name="reference_source_selector" type="select" label="Choose the source for the reference fasta (If any)">
                <option value="none"  selected="true">Do not include a reference fasta</option>
                <option value="cached">Locally cached</option>
                <option value="history">History</option>
            </param>
            <when value="none"/>
            <when value="cached">
                <param name="ref_path" type="select" label="Using reference fasta">
                <options from_data_table="all_fasta"/>
                    <validator type="no_options" message="A built-in reference fasta is not available for the build associated with the selected input file"/>
                </param>
            </when>
            <when value="history"> 
                <param name="ref_file" type="data" format="fasta" label="Using reference file" />
            </when>
        </conditional>
    </xml>
    <xml name="include_barcode_file">
        <conditional name="include">
            <param name="barcode_files" type="select" label="Choose to find the barcode files (If any)">
                <option value="no" selected="true">No. Ignores all barcode files even if these are generated.</option>
                <option value="yes">Yes. Look for possible barcode separated files.</option>
            </param>
            <when value="yes"/>
            <when value="no"/>
        </conditional>
    </xml>
    <xml name="output_barcode">
        <data format="txt" name="barcode_files" label="Tagdust barcoded reads.">
            <discover_datasets pattern="data_BC_(?P&lt;designation&gt;.+)\.fq" ext="fastq" directory="output" visible="true" />
            <filter>(include['barcode_files']=='yes')</filter>
        </data>
    </xml>
    <token name="@tool-documentation@">
<![CDATA[

Note: Output from Architecture with could have a Barcode HMM building block choose to find the barcode files.
....If no barcode files are found this will simply be empty.
....You may have refresh the history for all barcode files to show.

Please contact the admin to add an architecture / HMM building blocks. 
(There is Data Manager that they can use)

]]>
    </token>
    <token name="@tagdust-documentation@">
<![CDATA[
====

Taken from The TagDust2 Manual http://tagdust.sourceforge.net (part of Version 2_31 download)

Raw sequences produced by next generation sequencing (NGS) machines can contain adapter, linker, 
barcode and fingerprint sequences. TagDust2 is a program to extract and correctly label the sequences
to be mapped in downstream pipelines.
TagDust allows users to specify the expected architecture of a read and converts it into a hidden
Markov model. The latter can assign sequences to a particular barcode (or index) even in the presence
of sequencing errors. Sequences not matching the architecture (primer dimers, contaminants etc.) are
automatically discarded

TagDust requires an input file containing sequences and a user defined HMM architecture used to ex-
tract the reads. The architecture is composed of a selection of pre-defined building blocks representing
indices, barcodes, spacers and other sequences one might encounter in the raw output of a sequenced
sample.

HMM Building Blocks

TagDust comes with a set of pre-defined HMM building blocks. Each includes a silent state at the
beginning and end used to link blocks together. Each block is specified by a unique letter following
by a colon and some information about the sequence.

Read
Segment modeling the read.
Code: R:N

Optional
Segment modeling an optional single or short stretch of nucleotides.
Code: O:N

G addition
Segment modeling the occasional addition of guanines to the reads. 
(89.3% chance of a single G , 19.5% chance of 2 Gs..).
Code: G:G

Barcode or Index
Segment modeling a set of barcode sequences. For each sequence a separate HMM is created. The
barcode sequences must be given as a comma separated list. A null model of the same length as the
barcode is automatically added and initialized to the background nucleotide frequencies.
Code: B:GTA,AAC

Fingerprint or Unique Molecular Identifier - UMI
Segment modeling a fingerprint (or unique molecular identifiers). Insertions and deletions are by
default not allowed within a fingerprint segment.
Code: F:NNN

Spacer
Segment modeling a pre-defined sequence.
Code: S:GTA

Partial
This segment is used to model sequences that may only be partially present at the 5‘ or 3‘ end of
the read. The transition probabilities (orange and blue) are set automatically based on the length
distribution of exactly matching adapters.
Code: P:CCTTAA
]]>
    </token>
    <xml name="citations">
        <citations>
        <citation type="bibtex">
            @misc{
                TagDust,
                author = {Timo Lassmann},
                title = {TagDust on sourceforge},
                url = {http://tagdust.sourceforge.net/}
            }
        </citation>
            <citation type="doi">10.1093/bioinformatics/btp527</citation>
            <citation type="doi">10.1186/s12859-015-0454-y</citation>
        </citations>
    </xml>
</macros>