0
|
1 <?xml version="1.0"?>
|
|
2 <macros>
|
|
3 <xml name="requirements">
|
|
4 <requirements>
|
|
5 <requirement type="package" version="2.31">tagdust</requirement>
|
|
6 </requirements>
|
|
7 </xml>
|
|
8 <xml name="version_command">
|
|
9 <version_command>tagdust --version | head -n 1 | awk '{ print $2 }'</version_command>
|
|
10 </xml>
|
|
11 <xml name="stdio">
|
|
12 <stdio>
|
|
13 <exit_code range="1:" level="fatal" description="Error" />
|
|
14 </stdio>
|
|
15 </xml>
|
|
16 <token name="@cat-archiecture@">
|
|
17 echo using architecture ;
|
|
18 cat $architecture.fields.path ;
|
|
19 echo ;
|
|
20 </token>
|
|
21 <token name="@tagdust-call-minus-files@">
|
|
22 mkdir output ;
|
|
23 tagdust -t\${GALAXY_SLOTS:-4} -o output/data
|
|
24 -arch $architecture.fields.path
|
|
25 #if $reference_source.reference_source_selector=='history':
|
|
26 -ref $reference_source.ref_file
|
|
27 #end if
|
|
28 #if $reference_source.reference_source_selector=='cached':
|
|
29 -ref $reference_source.ref_path.fields.path
|
|
30 #end if
|
|
31 </token>
|
|
32 <token name="@ls_and_barcode@">
|
|
33 echo files in output folder ;
|
|
34 ls -al output ;
|
|
35 #if $include.barcode_files=="yes"
|
|
36 #if $architecture.fields.barcode=="yes"
|
|
37 cd output ;
|
|
38 ls *_BC_* > $barcode_files
|
|
39 #else
|
|
40 echo Selected architecture not expected to generate any barcode files > $barcode_files ;
|
|
41 echo To avoid this output set Choose to find the barcode files to No >> $barcode_files ;
|
|
42 #end if
|
|
43 #else
|
|
44 #if $architecture.fields.barcode=="yes"
|
|
45 echo ;
|
|
46 echo Tagdust not included in output at users request! ;
|
|
47 echo To get then set Choose to find the barcode files to Yes and run the tool again ;
|
|
48 #end if
|
|
49 #end if
|
|
50 </token>
|
|
51 <xml name="architecture">
|
|
52 <param name="architecture" type="select" label="Using architecture">
|
|
53 <options from_data_table="tagdust_architecture"/>
|
|
54 <validator type="no_options" message="No architecture found"/>
|
|
55 <!--filter name="barcode" value="yes" column="3"/-->
|
|
56 </param>
|
|
57 </xml>
|
|
58 <xml name="reference_source">
|
|
59 <conditional name="reference_source">
|
|
60 <param name="reference_source_selector" type="select" label="Choose the source for the reference fasta (If any)">
|
|
61 <option value="none" selected="true">Do not include a reference fasta</option>
|
|
62 <option value="cached">Locally cached</option>
|
|
63 <option value="history">History</option>
|
|
64 </param>
|
|
65 <when value="none"/>
|
|
66 <when value="cached">
|
|
67 <param name="ref_path" type="select" label="Using reference fasta">
|
|
68 <options from_data_table="all_fasta"/>
|
|
69 <validator type="no_options" message="A built-in reference fasta is not available for the build associated with the selected input file"/>
|
|
70 </param>
|
|
71 </when>
|
|
72 <when value="history">
|
|
73 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
|
|
74 </when>
|
|
75 </conditional>
|
|
76 </xml>
|
|
77 <xml name="include_barcode_file">
|
|
78 <conditional name="include">
|
|
79 <param name="barcode_files" type="select" label="Choose to find the barcode files (If any)">
|
|
80 <option value="yes" selected="true">Yes. Look for possible barcode separated files.</option>
|
|
81 <option value="no" >No. Ignores all barcode files even if these are generated.</option>
|
|
82 </param>
|
|
83 <when value="yes"/>
|
|
84 <when value="no"/>
|
|
85 </conditional>
|
|
86 </xml>
|
|
87 <xml name="output_barcode">
|
|
88 <data format="txt" name="barcode_files" label="Tagdust barcoded reads.">
|
|
89 <discover_datasets pattern="data_BC_(?P<designation>.+)\.fq" ext="fastq" directory="output" visible="true" />
|
|
90 <filter>(include['barcode_files']=='yes')</filter>
|
|
91 </data>
|
|
92 </xml>
|
|
93 <token name="@tool-documentation@">
|
|
94 <![CDATA[
|
|
95
|
|
96 Note: Output from Architecture with could have a Barcode HMM building block choose to find the barcode files.
|
|
97 ....If no barcode files are found this will simply be empty.
|
|
98 ....You may have refresh the history for all barcode files to show.
|
|
99
|
|
100 Please contact the admin to add an architecture / HMM building blocks.
|
|
101 (There is Data Manager that they can use)
|
|
102
|
|
103 ]]>
|
|
104 </token>
|
|
105 <token name="@tagdust-documentation@">
|
|
106 <![CDATA[
|
|
107 ====
|
|
108
|
|
109 Taken from The TagDust2 Manual http://tagdust.sourceforge.net (part of Version 2_31 download)
|
|
110
|
|
111 Raw sequences produced by next generation sequencing (NGS) machines can contain adapter, linker,
|
|
112 barcode and fingerprint sequences. TagDust2 is a program to extract and correctly label the sequences
|
|
113 to be mapped in downstream pipelines.
|
|
114 TagDust allows users to specify the expected architecture of a read and converts it into a hidden
|
|
115 Markov model. The latter can assign sequences to a particular barcode (or index) even in the presence
|
|
116 of sequencing errors. Sequences not matching the architecture (primer dimers, contaminants etc.) are
|
|
117 automatically discarded
|
|
118
|
|
119 TagDust requires an input file containing sequences and a user defined HMM architecture used to ex-
|
|
120 tract the reads. The architecture is composed of a selection of pre-defined building blocks representing
|
|
121 indices, barcodes, spacers and other sequences one might encounter in the raw output of a sequenced
|
|
122 sample.
|
|
123
|
|
124 HMM Building Blocks
|
|
125
|
|
126 TagDust comes with a set of pre-defined HMM building blocks. Each includes a silent state at the
|
|
127 beginning and end used to link blocks together. Each block is specified by a unique letter following
|
|
128 by a colon and some information about the sequence.
|
|
129
|
|
130 Read
|
|
131 Segment modeling the read.
|
|
132 Code: R:N
|
|
133
|
|
134 Optional
|
|
135 Segment modeling an optional single or short stretch of nucleotides.
|
|
136 Code: O:N
|
|
137
|
|
138 G addition
|
|
139 Segment modeling the occasional addition of guanines to the reads.
|
|
140 (89.3% chance of a single G , 19.5% chance of 2 Gs..).
|
|
141 Code: G:G
|
|
142
|
|
143 Barcode or Index
|
|
144 Segment modeling a set of barcode sequences. For each sequence a separate HMM is created. The
|
|
145 barcode sequences must be given as a comma separated list. A null model of the same length as the
|
|
146 barcode is automatically added and initialized to the background nucleotide frequencies.
|
|
147 Code: B:GTA,AAC
|
|
148
|
|
149 Fingerprint or Unique Molecular Identifier - UMI
|
|
150 Segment modeling a fingerprint (or unique molecular identifiers). Insertions and deletions are by
|
|
151 default not allowed within a fingerprint segment.
|
|
152 Code: F:NNN
|
|
153
|
|
154 Spacer
|
|
155 Segment modeling a pre-defined sequence.
|
|
156 Code: S:GTA
|
|
157
|
|
158 Partial
|
|
159 This segment is used to model sequences that may only be partially present at the 5‘ or 3‘ end of
|
|
160 the read. The transition probabilities (orange and blue) are set automatically based on the length
|
|
161 distribution of exactly matching adapters.
|
|
162 Code: P:CCTTAA
|
|
163 ]]>
|
|
164 </token>
|
|
165 <xml name="citations">
|
|
166 <citations>
|
|
167 <citation type="bibtex">
|
|
168 @misc{
|
|
169 TagDust,
|
|
170 author = {Timo Lassmann},
|
|
171 title = {TagDust on sourceforge},
|
|
172 url = {http://tagdust.sourceforge.net/}
|
|
173 }
|
|
174 </citation>
|
|
175 <citation type="doi">10.1093/bioinformatics/btp527</citation>
|
|
176 <citation type="doi">10.1186/s12859-015-0454-y</citation>
|
|
177 </citations>
|
|
178 </xml>
|
|
179 </macros>
|