comparison jemultiplexer.xml @ 1:9764802ffae8 draft

Uploaded
author gbcs-embl-heidelberg
date Wed, 03 Sep 2014 04:11:49 -0400
parents
children 321b695b1a33
comparison
equal deleted inserted replaced
0:687ced68db46 1:9764802ffae8
1 <tool id="jedebarcoding" name="Jemultiplexer">
2 <description>Demultiplexes multiplexed data</description>
3 <requirements>
4 <requirement type="package" version=">=1.6">java</requirement>
5 </requirements>
6 <command interpreter="python">
7 jemultiplexer.py
8 $MpxData1
9 $output1
10 $output1.id
11 $bsinputtype.barcodes
12 "$bsinputtype.barcode_list"
13 $__new_file_path__
14 $MpxData1.ext
15 $bcodelen
16 $qualityFormat
17 $maxMismatches
18 $minBaseQuality
19 $minMismatchingDelta
20 $clipBarcodeCon.xTrimLen
21 $zTrimLen
22 $clipBarcodeCon.clipBarcode
23 $addBarcodeToHeader
24 $gzipOutput
25 $barcodeDiagFile
26 $rChar
27 #if $singlePaired.sPaired == "paired":
28 $singlePaired.barcodeReadPosCon.barcodeReadPos
29 #if $singlePaired.barcodeReadPosCon.barcodeReadPos == "BOTH":
30 $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.barcodeForSampleMatching
31 #if $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.barcodeForSampleMatching == "BOTH":
32 $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcodeCon.redundantBarcode
33 $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcodeCon.strict
34 $singlePaired.MpxData2
35 #else:
36 $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcode
37 $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.strict
38 $singlePaired.MpxData2
39 #end if
40 #else:
41 $singlePaired.barcodeReadPosCon.barcodeForSampleMatching
42 $singlePaired.barcodeReadPosCon.redundantBarcode
43 $singlePaired.barcodeReadPosCon.strict
44 $singlePaired.MpxData2
45 #end if
46 #else:
47 $singlePaired.barcodeReadPos
48 $singlePaired.barcodeForSampleMatching
49 $singlePaired.redundantBarcode
50 $singlePaired.strict
51 $singlePaired.MpxData2
52 #end if
53 </command>
54 <inputs>
55 <param type="data" format="gz,fastq" name="MpxData1" label="Compressed (or not) FASTQ file" />
56 <conditional name="singlePaired">
57 <param name="sPaired" type="select" label="Is this library mate-paired?">
58 <option value="single">Single-end</option>
59 <option value="paired">Paired-end</option>
60 </param>
61 <when value="single">
62 <param name="MpxData2" type="hidden" value="single" />
63 <param name="barcodeReadPos" type="hidden" value="none" />
64 <param name="redundantBarcode" type="hidden" value="none" />
65 <param name="barcodeForSampleMatching" type="hidden" value="none" />
66 <param name="strict" type="hidden" value="none" />
67 </when>
68 <when value="paired">
69 <param name="MpxData2" type="data" format="gz,fastq" label="Compressed (or not) FASTQ file" />
70 <conditional name="barcodeReadPosCon">
71 <param name="barcodeReadPos" type="select" label="Barcode read position (BPOS)" help="where are the barcodes">
72 <option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option>
73 <option value="READ_2">READ_2 (beginning of read from the second fastq file)</option>
74 <option value="BOTH">BOTH (beginning of both reads)</option>
75 </param>
76 <when value="READ_1">
77 <param name="redundantBarcode" type="hidden" value="true" />
78 <param name="barcodeForSampleMatching" type="hidden" value="BOTH" />
79 <param name="strict" type="hidden" value="false" />
80 </when>
81 <when value="READ_2">
82 <param name="redundantBarcode" type="hidden" value="true" />
83 <param name="barcodeForSampleMatching" type="hidden" value="BOTH" />
84 <param name="strict" type="hidden" value="false" />
85 </when>
86 <when value="BOTH">
87 <conditional name="barcodeForSampleMatchingCon">
88 <param name="barcodeForSampleMatching" type="select" label="Barcode for sample matching (BM)" help="which barcode should be used for sample look up (BM option)." >
89 <option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option>
90 <option value="READ_2">READ_2 (beginning of read from the second fastq file)</option>
91 <option value="BOTH">BOTH (beginning of both reads)</option>
92 </param>
93 <when value="READ_1">
94 <param name="redundantBarcode" type="hidden" value="true" />
95 <param name="strict" type="hidden" value="false" />
96 </when>
97 <when value="READ_2">
98 <param name="redundantBarcode" type="hidden" value="true" />
99 <param name="strict" type="hidden" value="false" />
100 </when>
101 <when value="BOTH">
102 <conditional name="redundantBarcodeCon">
103 <param name="redundantBarcode" type="select" label="Redundant barcodes (BRED)" help="are the barcode REDUNDANT i.e. do they both resolve to the same sample (BRED option).">
104 <option value="true">True</option>
105 <option value="false" selected="true">False</option>
106 </param>
107 <when value="true">
108 <param name="strict" type="select" label="Strict (S)" help="tells whether both barcodes should resolve to the same sample." >
109 <option value="true" selected="true">True</option>
110 <option value="false">False</option>
111 </param>
112 </when>
113 <when value="false">
114 <param name="strict" type="hidden" value="false" />
115 </when>
116 </conditional>
117 </when>
118 </conditional>
119 </when>
120 </conditional>
121 </when>
122 </conditional>
123
124 <conditional name="bsinputtype">
125 <param name="bsinputtype_selector" type="select" label="Barcode set input type" help="You can either submit a barcode file or paste the list in a text field. Note: one sample per line.">
126 <option value="bs_file" selected="true">Use a .bs tab-delimited file in the history</option>
127 <option value="bs_textfield">Paste the barcodes list in a text field within the form</option>
128 </param>
129 <when value="bs_file">
130 <param name="barcodes" type="data" format="bs" label="Barcode Set" />
131 <param name="barcode_list" type="hidden" value="none" />
132 </when>
133 <when value="bs_textfield">
134 <param name="barcode_list" type="text" area="True" size="10x30" label="Barcode Set (&#60;span style=&#34;color:brown;&#34;&#62;one sample per line: &#38;#60&#59;sample_name&#38;#62&#59;&#38;#60&#59;tab or space&#38;#62&#59;&#38;#60&#59;barcode&#38;#62&#59;&#60;/span&#62;)"/>
135 <param name="barcodes" type="hidden" value="none" />
136 </when>
137 </conditional>
138
139 <param name="bcodelen" type="text" value="6" label="Barcode Length (LEN)" />
140
141 <param name="qualityFormat" type="select" label="Fastq Quality Format (V)" help="if you need other quality coding format, contact the galaxy administrators.">
142 <option value="Standard" selected="true">Fastq - Illumina Casava V1.8 with Sanger coding quality (phred scaling + 33)</option>
143 <option value="Illumina">Fastq - Illumina v1.3 or above coding of quality (phred scaling + 64)</option>
144 <option value="Solexa">Fastq - Solexa-style quality (solexa scaling + 66)</option>
145 </param>
146 <param name="maxMismatches" type="text" value="1" label="Maxinum Mismatches (MM)" help="maximum mismatches for a barcode to be considered a match." />
147 <param name="minBaseQuality" type="text" value="10" label="Minimum base quality (Q)" help="any barcode bases falling below this quality will be considered a mismatch even in the bases match." />
148 <param name="minMismatchingDelta" type="text" value="1" label="Minimum mismatch difference (MMD)" help="Minimum difference between number of mismatches in the best and second best barcodes for a barcode to be considered a match." />
149 <param name="zTrimLen" type="text" value="0" label="Extra number of bases to be trimmed from the barcode end (ZT)" />
150 <conditional name="clipBarcodeCon">
151 <param name="clipBarcode" type="select" label="Remove barcode sequence from read (C)" >
152 <option value="true" selected="true">True</option>
153 <option value="false">False</option>
154 </param>
155 <when value="true">
156 <param name="xTrimLen" type="text" value="1" label="Extra number of base to be trimmed right after the barcode (XT)" />
157 </when>
158 <when value="false">
159 <param name="xTrimLen" type="hidden" value="1" />
160 </when>
161 </conditional>
162 <param name="addBarcodeToHeader" type="select" label="Add matched barcode at the end of the read header (ADD)" >
163 <option value="true" selected="true">True</option>
164 <option value="false">False</option>
165 </param>
166 <param name="gzipOutput" type="select" label="Compress output (GZ)" >
167 <option value="true" selected="true">True</option>
168 <option value="false">False</option>
169 </param>
170 <param name="barcodeDiagFile" type="select" label="Output barcode match reporting file (DIAG)" >
171 <option value="false" selected="false">False</option>
172 <option value="true">True</option>
173 </param>
174 <param name="rChar" type="select" label="Replace white space in the read name/header with speficied symbols (RCHAR)" >
175 <option value="1" selected="1"> </option>
176 <option value="2">:</option>
177 <option value="3">_</option>
178 <option value="4">-</option>
179 </param>
180 </inputs>
181 <outputs>
182 <data format="html" name="output1" label="Demultiplexing stats on ${on_string}"/>
183 <!-- Keep this as a nice example of reformating;
184 <data format="fastqsanger" name="output1" metadata_source="MpxData1" label="Demultiplexing stats on ${on_string}">
185 <change_format>
186 <when input="MpxData1.ext" value="fastqillumina" format="fastqillumina" />
187 </change_format>
188 </data> -->
189 </outputs>
190
191 <tests>
192 <test>
193 <param name="MpxData1" value="C1WLBACXX_lane7_1_sequence.txt" />
194 <param name="MpxData2" value="C1WLBACXX_lane7_2_sequence.txt" />
195 <param name="sPaired" value="paired" />
196 <param name="bsinputtype_selector" value="bs_file" />
197 <param name="barcodes" value="correct_barcodes_PE_both-ends_with_fnames.txt" />
198 <param name="barcodeReadPos" value="BOTH" />
199 <param name="barcodeForSampleMatching" value="BOTH" />
200 <param name="redundantBarcode" value="false" />
201 <param name="bcodelen" value="6" />
202 <param name="qualityFormat" value="Standard" />
203 <param name="maxMismatches" value="3" />
204 <param name="minBaseQuality" value="20" />
205 <param name="minMismatchingDelta" value="2" />
206 <param name="zTrimLen" value="0" />
207 <param name="clipBarcode" value="true" />
208 <param name="xTrimLen" value="1" />
209 <param name="addBarcodeToHeader" value="true" />
210 <param name="gzipOutput" value="true" />
211 <param name="barcodeDiagFile" value="true" />
212 <param name="rChar" value="1" />
213 <output name="output1" file="result.html" ftype="html"/>
214 </test>
215 </tests>
216
217 <help>
218
219 **What it does**
220
221 Jemultiplexer : A fastq files demultiplexer with many neat options. Input files are fastq files, and can be in gzip compressed format (end in .gz).
222
223 Author: Charles Girardot (charles.girardot@embl.de).
224
225 Version: 1.0.3
226
227 ------
228
229 **Know what you are doing**
230
231 .. class:: warningmark
232
233 You will want to read the `documentation`__.
234
235 .. __: http://gbcs.embl.de/tikiwiki/JemultiplexerDocHome
236
237 ------
238
239 **Jemultiplexer parameter list**
240
241 This is an exhaustive list of Jemultiplexer options::
242
243 FASTQ_FILE1=File
244 F1=File Input fastq file (optionally gzipped) for single end data, or first read in paired end data.
245 Required.
246
247 FASTQ_FILE2=File
248 F2=File Input fastq file (optionally gzipped) for the second read of paired end data.
249 Default value: null.
250
251 BARCODE_FILE=File
252 BF=File Barcode file describing sequence list and sample names. Tab-delimited file with 2
253 columns, with the sample in col1 and the corresponding barcode in col2.
254 Note, make sure one sample per line.
255 If multiple barcode map to the same sample, barcodes can be combined using the OR operator '|'.
256 i.e. the file above can be re-written like
257 sample1 ATAT|GAGG
258 sample2 CCAA|TGTG
259 Finally, for the special situation of paired-end data in which barcodes differ at both
260 ends (ie BPOS=BOTH BRED=false BM=BOTH , see BRED option description), barcodes for read_1
261 and read_2 can be distinguished using a ':' separator i.e.
262 sample1 ATAT:GAGG
263 sample2 CCAA:TGTG
264 Here understand that sample 1 is encoded with ATAT barcode at read_1 AND GAGG barcode at
265 read_2. Note that you can still combine barcodes using | e.g.
266 sample1 ATAT|GAGG:CCAA|TGTG
267 would mean that sample 1 is mapped by the combination of barcode: ATAT OR GAGG at read_1
268 AND CCAA OR TGTG at read_2.
269 Required.
270
271 BARCODE_READ_POS=BarcodePosition
272 BPOS=BarcodePosition For paired-end data, where to expect the barcode(s) : READ_1 (beginning of read from
273 FASTQ_FILE_1), READ_2 (beginning of read from FASTQ_FILE_2), BOTH (beginning of both
274 reads). Automatically set to READ_1 in single end mode.
275 Default value: BOTH. This option can be set to 'null' to clear the default value.
276 Possible values: {READ_1, READ_2, BOTH}
277
278 REDUNDANT_BARCODES=Boolean
279 BRED=Boolean For paired-end data and when BARCODE_READ_POS == BOTH, this option indicates if both
280 read's barcodes encode redundant information, which is the usual situation
281 (REDUNDANT_BARCODES=true) i.e. barcodes are supposed to be the same at both ends or to
282 resolve to the same sample (when a pool of barcodes has been used for each sample).
283 When REDUNDANT_BARCODES=false, the 2 barcodes potentially encode different
284 information. For example, only one of the barcodes encodes the sample the read belongs to
285 while the second barcode might be a random barcode to tell apart PCR artefacts from real
286 duplicates. Another example is when both barcodes should be used in a combined fashion
287 to resolve the sample. In the first example, you should use BPOS=BOTH BRED=false
288 BM=READ_1 while in the second example, you should have BPOS=BOTH BRED=false BM=BOTH (note
289 that with BPOS=BOTH BRED=true BM=BOTH), the behavior would be different as Jemultiplexer
290 would then check the STRICT option to perform sample resolution.
291 Importantly, when BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, BLEN, barcode
292 matching options (MM, MMD, Q) and read trimming/clipping options (XT, ZT) accept
293 different values for both barcodes in the form X:Z where X and Z are 2 integers.
294 Default value: true. This option can be set to 'null' to clear the default value.
295 Possible values: {true, false}
296
297 BARCODE_FOR_SAMPLE_MATCHING=BarcodePosition
298 BM=BarcodePosition Automatically set to READ_1 in single end mode.
299 For paired-end data and when BARCODE_READ_POS == BOTH, which barcode should be used to
300 resolve sample :
301 - use BM=READ_1 (beginning of read from FASTQ_FILE_1) if only this read should be used
302 for sample matching,
303 - use BM=READ_2 (beginning of read from FASTQ_FILE_2) if only this read should be used
304 for sample matching,
305 - use BM=BOTH (beginning of both reads) if both should be used ; when BM=BOTH, the
306 behaviour of Jemultiplexer is different based on the value of REDUNDANT_BARCODES.
307 If REDUNDANT_BARCODES=true, the two barcodes are considered to map to the same sample
308 and Jemultiplexer uses the two barcodes according to the STRICT value.
309 If REDUNDANT_BARCODES=false, the barcode file should map a couple of barcode to each
310 sample (e.g. sample1 => AGAGTG:TTGATA) and Jemultiplexer needs both barcodes to find the
311 relevant sample. Note that this is the only situation in which all barcode matching
312 options (MM, MMD, Q) accept different values for both barcodes in the form X:Z where X
313 and Z are 2 integers.
314 Default value: BOTH. This option can be set to 'null' to clear the default value.
315 Possible values: {READ_1, READ_2, BOTH}
316
317 STRICT=Boolean
318 S=Boolean For paired-end data and when BARCODE_READ_POS == BOTH and BM=BOTH, tells whether both
319 barcodes should resolve to the same sample. When true and if only one of the two reads
320 has a barcode match, the read pair is ignored. When false and if only one of the two
321 reads has a barcode match, the read pair is assigned to the corresponding sample ; in
322 cases where reads resolve to different samples, the read pair is ignored.
323 Default value: false. This option can be set to 'null' to clear the default value.
324 Possible values: {true, false}
325
326 BCLEN=String
327 LEN=String Length of the barcode sequences, optional. Taken from barcode file when not given.
328 In situations where BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, two distinct
329 length can be provided using the syntax LEN=X:Z where X and Z are 2 integers representing
330 the barcode length for read_1 and read_2 respectively.
331 Default value: null.
332
333 MAX_MISMATCHES=String
334 MM=String Maximum mismatches for a barcode to be considered a match. MM=null is like MM=0
335 In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH
336 (note that most likely BRED=false as it does not make great sense otherwise), two
337 distinct values can be given here using the syntax MM=X:Z where X and Z are 2 integers to
338 use for read_1 and read_2 respectively.
339 Default value: 1. This option can be set to 'null' to clear the default value.
340
341 MIN_MISMATCH_DELTA=String
342 MMD=String Minimum difference between number of mismatches in the best and second best barcodes for
343 a barcode to be considered a match. MMD=null is like MMD=0
344 In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH
345 (note that most likely BRED=false as it does not make great sense otherwise), two
346 distinct values can be given here using the syntax MMD=X:Z where X and Z are 2 integers
347 to use for read_1 and read_2 respectively.
348 Default value: 1. This option can be set to 'null' to clear the default value.
349
350 MIN_BASE_QUALITY=String
351 Q=String Minimum base quality. Any barcode bases falling below this quality will be considered a
352 mismatch even in the bases match. Q=null is like Q=0.
353 In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH
354 (note that most likely BRED=false as it does not make great sense otherwise), two
355 distinct values can be given here using the syntax Q=X:Z where X and Z are 2 integers to
356 use for read_1 and read_2 respectively.
357 Default value: 10. This option can be set to 'null' to clear the default value.
358
359 XTRIMLEN=String
360 XT=String Extra number of base to be trimmed right after the barcode (only used if
361 CLIP_BARCODE=true). Default is 1 as an extra 'T' (or 'A' depending how you see it) is
362 added for barcode ligation but this default will be adapated according to the rules
363 below. XT=null is like XT=0.
364 When running paired-end, two distinct values can be given using the syntax XT=X:Z where X
365 and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when
366 BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode as to
367 end up with reads of the same length (note that this can also be operated using ZT). If a
368 unique value is given, e.g. XT=1, while running paired-end the following rule applies :
369 (1) BPOS=READ_1 or BPOS=READ_2, no trim is applied at the read w/o barcode ; (2)
370 BPOS=BOTH, the value is used for both reads.
371 Default value: 1. This option can be set to 'null' to clear the default value.
372
373 ZTRIMLEN=String
374 ZT=String Extra number of bases to be trimmed from the barcode end i.e. 3' end. Pretty handy when a
375 pipeline is set and you already know you'll trim read at a given size. ZT=null is like
376 ZT=0.
377 When running paired-end, two distinct values can be given here using the syntax ZT=X:Z
378 where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even
379 when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode
380 as to end up with reads of the same length (note that this can also be operated using
381 XT). Note that if a single value is passed, the value always aplly to both reads in
382 paired-end mode without further consideration.
383 Default value: 0. This option can be set to 'null' to clear the default value.
384
385 CLIP_BARCODE=Boolean
386 C=Boolean Remove barcode sequence from read, as well as XTRIMLEN (and ZTRIMLEN) bases if
387 applicable, before writing to output file. If false, reads are written without
388 modification to output file. Apply to both barcodes when BPOS=BOTH.
389 Default value: true. This option can be set to 'null' to clear the default value.
390 Possible values: {true, false}
391
392 ADD_BARCODE_TO_HEADER=Boolean
393 ADD=Boolean Add matched barcode at the end of the read header. Apply to both barcodes when BPOS=BOTH.
394 If true, the string ':barcode' is added at the end of the read header with a ':' added
395 only if current read header does not end with ':'.
396 If both reads of the pair have a barcode (i.e. BARCODE_READ_POS == BOTH), thenthe second
397 read also has its own matched barcode written. Else, the read without a barcode receives
398 the barcode from the barcoded read.
399 For example :
400 '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:'
401 becomes
402 '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:BARCODE'
403 Default value: true. This option can be set to 'null' to clear the default value.
404 Possible values: {true, false}
405
406 QUALITY_FORMAT=FastqQualityFormat
407 V=FastqQualityFormat A value describing how the quality values are encoded in the fastq. Either 'Solexa' for
408 pre-pipeline 1.3 style scores (solexa scaling + 66), 'Illumina' for pipeline 1.3 and
409 above (phred scaling + 64) or 'Standard' for phred scaled scores with a character shift
410 of 33. If this value is not specified (or 'null' is given), the quality format will be
411 detected automatically.
412 Default value: Standard. This option can be set to 'null' to clear the default value.
413 Possible values: {Solexa, Illumina, Standard}
414
415 GZIP_OUTPUTS=Boolean
416 GZ=Boolean Compress output s_l_t_barcode.txt files using gzip and append a .gz extension to the filenames.
417 Default value: true. This option can be set to 'null' to clear the default value.
418 Possible values: {true, false}
419 BARCODE_DIAG_FILE=String
420 DIAG=String Name for a barcode match reporting file (not generated by default).Either a name
421 (in which case the file will be created in the output dir) or full path. This file will contain
422 a line per read pair with the barcode best matching the read subsequence or 'null' when no match
423 is found accordign to matching parameters and the final selected sample. This file is useful for
424 debugging or further processing in case both ends are barcoded.
425 Default value: null
426
427 READ_NAME_REPLACE_CHAR=String
428 RCHAR=String Replace spaces in read name/header using provided character. This is particularly handy
429 when you need to retain ADDed barcode in read name/header during mapping (everything
430 after space in read name is usually clipped in BAM files). For example, with RCHAR=':' :
431 '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:'
432 becomes
433 '@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965:2:N:0:BARCODE'
434 Default value: null.
435 </help>
436 </tool>