diff macros.xml @ 0:40c86e14b674 draft

Uploaded first version of tagdust
author brenninc
date Mon, 09 May 2016 04:08:01 -0400
parents
children da90c37031bb
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon May 09 04:08:01 2016 -0400
@@ -0,0 +1,179 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="2.31">tagdust</requirement>
+        </requirements>
+    </xml>
+    <xml name="version_command">
+        <version_command>tagdust --version | head -n 1 | awk '{ print $2 }'</version_command>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal" description="Error" />
+        </stdio>
+    </xml>
+    <token name="@cat-archiecture@">
+        echo using architecture ;
+        cat $architecture.fields.path ;
+        echo ;
+    </token>
+    <token name="@tagdust-call-minus-files@">
+        mkdir output ;
+        tagdust -t\${GALAXY_SLOTS:-4} -o output/data 
+            -arch $architecture.fields.path
+            #if $reference_source.reference_source_selector=='history':
+                -ref $reference_source.ref_file
+            #end if
+            #if $reference_source.reference_source_selector=='cached':
+                -ref $reference_source.ref_path.fields.path
+            #end if
+    </token>
+    <token name="@ls_and_barcode@">
+        echo files in output folder ;
+        ls -al output ;
+        #if $include.barcode_files=="yes"
+            #if $architecture.fields.barcode=="yes"
+                cd output ;
+                ls *_BC_* > $barcode_files
+            #else
+                echo Selected architecture not expected to generate any barcode files > $barcode_files ;
+                echo To avoid this output set Choose to find the barcode files to No >> $barcode_files ;
+            #end if
+        #else 
+            #if $architecture.fields.barcode=="yes"
+                echo ;
+                echo Tagdust not included in output at users request! ;
+                echo To get then set Choose to find the barcode files to Yes and run the tool again ;
+            #end if
+        #end if
+    </token>
+    <xml name="architecture">
+         <param name="architecture" type="select" label="Using architecture">
+            <options from_data_table="tagdust_architecture"/>
+            <validator type="no_options" message="No architecture found"/>
+            <!--filter name="barcode" value="yes" column="3"/-->
+        </param>
+    </xml>
+    <xml name="reference_source">
+        <conditional name="reference_source">
+            <param name="reference_source_selector" type="select" label="Choose the source for the reference fasta (If any)">
+                <option value="none"  selected="true">Do not include a reference fasta</option>
+                <option value="cached">Locally cached</option>
+                <option value="history">History</option>
+            </param>
+            <when value="none"/>
+            <when value="cached">
+                <param name="ref_path" type="select" label="Using reference fasta">
+                <options from_data_table="all_fasta"/>
+                    <validator type="no_options" message="A built-in reference fasta is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="history"> 
+                <param name="ref_file" type="data" format="fasta" label="Using reference file" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="include_barcode_file">
+        <conditional name="include">
+            <param name="barcode_files" type="select" label="Choose to find the barcode files (If any)">
+                <option value="yes" selected="true">Yes. Look for possible barcode separated files.</option>
+                <option value="no" >No. Ignores all barcode files even if these are generated.</option>
+            </param>
+            <when value="yes"/>
+            <when value="no"/>
+        </conditional>
+    </xml>
+    <xml name="output_barcode">
+        <data format="txt" name="barcode_files" label="Tagdust barcoded reads.">
+            <discover_datasets pattern="data_BC_(?P&lt;designation&gt;.+)\.fq" ext="fastq" directory="output" visible="true" />
+            <filter>(include['barcode_files']=='yes')</filter>
+        </data>
+    </xml>
+    <token name="@tool-documentation@">
+<![CDATA[
+
+Note: Output from Architecture with could have a Barcode HMM building block choose to find the barcode files.
+....If no barcode files are found this will simply be empty.
+....You may have refresh the history for all barcode files to show.
+
+Please contact the admin to add an architecture / HMM building blocks. 
+(There is Data Manager that they can use)
+
+]]>
+    </token>
+    <token name="@tagdust-documentation@">
+<![CDATA[
+====
+
+Taken from The TagDust2 Manual http://tagdust.sourceforge.net (part of Version 2_31 download)
+
+Raw sequences produced by next generation sequencing (NGS) machines can contain adapter, linker, 
+barcode and fingerprint sequences. TagDust2 is a program to extract and correctly label the sequences
+to be mapped in downstream pipelines.
+TagDust allows users to specify the expected architecture of a read and converts it into a hidden
+Markov model. The latter can assign sequences to a particular barcode (or index) even in the presence
+of sequencing errors. Sequences not matching the architecture (primer dimers, contaminants etc.) are
+automatically discarded
+
+TagDust requires an input file containing sequences and a user defined HMM architecture used to ex-
+tract the reads. The architecture is composed of a selection of pre-defined building blocks representing
+indices, barcodes, spacers and other sequences one might encounter in the raw output of a sequenced
+sample.
+
+HMM Building Blocks
+
+TagDust comes with a set of pre-defined HMM building blocks. Each includes a silent state at the
+beginning and end used to link blocks together. Each block is specified by a unique letter following
+by a colon and some information about the sequence.
+
+Read
+Segment modeling the read.
+Code: R:N
+
+Optional
+Segment modeling an optional single or short stretch of nucleotides.
+Code: O:N
+
+G addition
+Segment modeling the occasional addition of guanines to the reads. 
+(89.3% chance of a single G , 19.5% chance of 2 Gs..).
+Code: G:G
+
+Barcode or Index
+Segment modeling a set of barcode sequences. For each sequence a separate HMM is created. The
+barcode sequences must be given as a comma separated list. A null model of the same length as the
+barcode is automatically added and initialized to the background nucleotide frequencies.
+Code: B:GTA,AAC
+
+Fingerprint or Unique Molecular Identifier - UMI
+Segment modeling a fingerprint (or unique molecular identifiers). Insertions and deletions are by
+default not allowed within a fingerprint segment.
+Code: F:NNN
+
+Spacer
+Segment modeling a pre-defined sequence.
+Code: S:GTA
+
+Partial
+This segment is used to model sequences that may only be partially present at the 5‘ or 3‘ end of
+the read. The transition probabilities (orange and blue) are set automatically based on the length
+distribution of exactly matching adapters.
+Code: P:CCTTAA
+]]>
+    </token>
+    <xml name="citations">
+        <citations>
+        <citation type="bibtex">
+            @misc{
+                TagDust,
+                author = {Timo Lassmann},
+                title = {TagDust on sourceforge},
+                url = {http://tagdust.sourceforge.net/}
+            }
+        </citation>
+            <citation type="doi">10.1093/bioinformatics/btp527</citation>
+            <citation type="doi">10.1186/s12859-015-0454-y</citation>
+        </citations>
+    </xml>
+</macros>