comparison macros.xml @ 0:40c86e14b674 draft

Uploaded first version of tagdust
author brenninc
date Mon, 09 May 2016 04:08:01 -0400
parents
children da90c37031bb
comparison
equal deleted inserted replaced
-1:000000000000 0:40c86e14b674
1 <?xml version="1.0"?>
2 <macros>
3 <xml name="requirements">
4 <requirements>
5 <requirement type="package" version="2.31">tagdust</requirement>
6 </requirements>
7 </xml>
8 <xml name="version_command">
9 <version_command>tagdust --version | head -n 1 | awk '{ print $2 }'</version_command>
10 </xml>
11 <xml name="stdio">
12 <stdio>
13 <exit_code range="1:" level="fatal" description="Error" />
14 </stdio>
15 </xml>
16 <token name="@cat-archiecture@">
17 echo using architecture ;
18 cat $architecture.fields.path ;
19 echo ;
20 </token>
21 <token name="@tagdust-call-minus-files@">
22 mkdir output ;
23 tagdust -t\${GALAXY_SLOTS:-4} -o output/data
24 -arch $architecture.fields.path
25 #if $reference_source.reference_source_selector=='history':
26 -ref $reference_source.ref_file
27 #end if
28 #if $reference_source.reference_source_selector=='cached':
29 -ref $reference_source.ref_path.fields.path
30 #end if
31 </token>
32 <token name="@ls_and_barcode@">
33 echo files in output folder ;
34 ls -al output ;
35 #if $include.barcode_files=="yes"
36 #if $architecture.fields.barcode=="yes"
37 cd output ;
38 ls *_BC_* > $barcode_files
39 #else
40 echo Selected architecture not expected to generate any barcode files > $barcode_files ;
41 echo To avoid this output set Choose to find the barcode files to No >> $barcode_files ;
42 #end if
43 #else
44 #if $architecture.fields.barcode=="yes"
45 echo ;
46 echo Tagdust not included in output at users request! ;
47 echo To get then set Choose to find the barcode files to Yes and run the tool again ;
48 #end if
49 #end if
50 </token>
51 <xml name="architecture">
52 <param name="architecture" type="select" label="Using architecture">
53 <options from_data_table="tagdust_architecture"/>
54 <validator type="no_options" message="No architecture found"/>
55 <!--filter name="barcode" value="yes" column="3"/-->
56 </param>
57 </xml>
58 <xml name="reference_source">
59 <conditional name="reference_source">
60 <param name="reference_source_selector" type="select" label="Choose the source for the reference fasta (If any)">
61 <option value="none" selected="true">Do not include a reference fasta</option>
62 <option value="cached">Locally cached</option>
63 <option value="history">History</option>
64 </param>
65 <when value="none"/>
66 <when value="cached">
67 <param name="ref_path" type="select" label="Using reference fasta">
68 <options from_data_table="all_fasta"/>
69 <validator type="no_options" message="A built-in reference fasta is not available for the build associated with the selected input file"/>
70 </param>
71 </when>
72 <when value="history">
73 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
74 </when>
75 </conditional>
76 </xml>
77 <xml name="include_barcode_file">
78 <conditional name="include">
79 <param name="barcode_files" type="select" label="Choose to find the barcode files (If any)">
80 <option value="yes" selected="true">Yes. Look for possible barcode separated files.</option>
81 <option value="no" >No. Ignores all barcode files even if these are generated.</option>
82 </param>
83 <when value="yes"/>
84 <when value="no"/>
85 </conditional>
86 </xml>
87 <xml name="output_barcode">
88 <data format="txt" name="barcode_files" label="Tagdust barcoded reads.">
89 <discover_datasets pattern="data_BC_(?P&lt;designation&gt;.+)\.fq" ext="fastq" directory="output" visible="true" />
90 <filter>(include['barcode_files']=='yes')</filter>
91 </data>
92 </xml>
93 <token name="@tool-documentation@">
94 <![CDATA[
95
96 Note: Output from Architecture with could have a Barcode HMM building block choose to find the barcode files.
97 ....If no barcode files are found this will simply be empty.
98 ....You may have refresh the history for all barcode files to show.
99
100 Please contact the admin to add an architecture / HMM building blocks.
101 (There is Data Manager that they can use)
102
103 ]]>
104 </token>
105 <token name="@tagdust-documentation@">
106 <![CDATA[
107 ====
108
109 Taken from The TagDust2 Manual http://tagdust.sourceforge.net (part of Version 2_31 download)
110
111 Raw sequences produced by next generation sequencing (NGS) machines can contain adapter, linker,
112 barcode and fingerprint sequences. TagDust2 is a program to extract and correctly label the sequences
113 to be mapped in downstream pipelines.
114 TagDust allows users to specify the expected architecture of a read and converts it into a hidden
115 Markov model. The latter can assign sequences to a particular barcode (or index) even in the presence
116 of sequencing errors. Sequences not matching the architecture (primer dimers, contaminants etc.) are
117 automatically discarded
118
119 TagDust requires an input file containing sequences and a user defined HMM architecture used to ex-
120 tract the reads. The architecture is composed of a selection of pre-defined building blocks representing
121 indices, barcodes, spacers and other sequences one might encounter in the raw output of a sequenced
122 sample.
123
124 HMM Building Blocks
125
126 TagDust comes with a set of pre-defined HMM building blocks. Each includes a silent state at the
127 beginning and end used to link blocks together. Each block is specified by a unique letter following
128 by a colon and some information about the sequence.
129
130 Read
131 Segment modeling the read.
132 Code: R:N
133
134 Optional
135 Segment modeling an optional single or short stretch of nucleotides.
136 Code: O:N
137
138 G addition
139 Segment modeling the occasional addition of guanines to the reads.
140 (89.3% chance of a single G , 19.5% chance of 2 Gs..).
141 Code: G:G
142
143 Barcode or Index
144 Segment modeling a set of barcode sequences. For each sequence a separate HMM is created. The
145 barcode sequences must be given as a comma separated list. A null model of the same length as the
146 barcode is automatically added and initialized to the background nucleotide frequencies.
147 Code: B:GTA,AAC
148
149 Fingerprint or Unique Molecular Identifier - UMI
150 Segment modeling a fingerprint (or unique molecular identifiers). Insertions and deletions are by
151 default not allowed within a fingerprint segment.
152 Code: F:NNN
153
154 Spacer
155 Segment modeling a pre-defined sequence.
156 Code: S:GTA
157
158 Partial
159 This segment is used to model sequences that may only be partially present at the 5‘ or 3‘ end of
160 the read. The transition probabilities (orange and blue) are set automatically based on the length
161 distribution of exactly matching adapters.
162 Code: P:CCTTAA
163 ]]>
164 </token>
165 <xml name="citations">
166 <citations>
167 <citation type="bibtex">
168 @misc{
169 TagDust,
170 author = {Timo Lassmann},
171 title = {TagDust on sourceforge},
172 url = {http://tagdust.sourceforge.net/}
173 }
174 </citation>
175 <citation type="doi">10.1093/bioinformatics/btp527</citation>
176 <citation type="doi">10.1186/s12859-015-0454-y</citation>
177 </citations>
178 </xml>
179 </macros>