comparison je-clip.xml @ 0:101525093ba1 draft

Initial upload
author gbcs-embl-heidelberg
date Wed, 25 Nov 2015 12:37:01 -0500
parents
children b61628ae2371
comparison
equal deleted inserted replaced
-1:000000000000 0:101525093ba1
1 <tool id="je_clip" name="Je-Clip" version="1.0">
2 <description>clips Unique Molecular Identifiers (UMIs) from fastq files</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <stdio>
7 <exit_code range="1:" level="fatal" description="Tool exception" />
8 </stdio>
9 <version_command>echo '1.0'</version_command>
10 <command interpreter="bash">
11 <![CDATA[
12 je clip
13
14 ## Fastq inputs
15 @single_or_paired_cmd@
16 #if str( $library.type ) != "single":
17 BPOS=${library.BPOS}
18 #end if
19
20 @common_options_cmd@
21 @barcode_len_cmd@
22 ADD=${ADD}
23 #if str($ADD) == "false":
24 BARCODE_RESULT_FILENAME=$BARCODE_RESULT_FILENAME
25 #end if
26
27 OF1=${OF1}
28 #if str( $library.type ) != "single":
29 OF2=${OF2}
30 #end if
31
32 FORCE=true
33 ]]>
34 </command>
35 <inputs>
36 <!-- single/paired -->
37 <expand macro="single_or_paired_general">
38 <param name="BPOS" type="select" label="Barcode read position (BPOS)" help="where are the barcodes.">
39 <option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option>
40 <option value="READ_2">READ_2 (beginning of read from the second fastq file)</option>
41 <option value="BOTH">BOTH (beginning of both reads)</option>
42 </param>
43 </expand>
44 <expand macro="barcode_len_option"/>
45 <param name="ADD" type="boolean"
46 label="Add matched barcode at the end of the read header (ADD)"
47 truevalue="true"
48 falsevalue="false"
49 checked="true"
50 />
51
52 <expand macro="common_options"/>
53
54
55 </inputs>
56 <outputs>
57 <data name="BARCODE_RESULT_FILENAME" format="tabular" label="Je-Clipped Barcodes"/>
58 <data name="OF1" format_source="input_1" label="Je-Clipped {on_string}"/>
59 <data name="OF2" format_source="input_1" label="Je-Clipped {on_string}">
60 <filter>(type != "single")</filter>
61 </data>
62 </outputs>
63
64 <tests>
65 <test>
66 <!-- simple test on single end data -->
67 <param name="type" value="single"/>
68 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/>
69 <param name="LEN" value="6"/>
70 <param name="ADD" value="false"/>
71 <output name="BARCODE_RESULT_FILENAME" file="clip_barcode_result_file.txt"/>
72 <output name="OF1" file="clip_dataset1_SE.fastq"/>
73 </test>
74 <test>
75 <!-- more complex test on paired end data with different barcode for fwd/rev -->
76 <param name="type" value="paired"/>
77 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/>
78 <param name="input_2" value="file_2_sequence.txt" ftype="fastqsanger"/>
79 <param name="LEN" value="6"/>
80 <param name="BPOS" value="BOTH"/>
81 <output name="OF1" file="clip_dataset1_PE.fastq"/>
82 <output name="OF2" file="clip_dataset2_PE.fastq"/>
83 </test>
84 </tests>
85
86
87 <help>
88 <![CDATA[
89 **What it does**
90
91 Je clip: Clips barcodes or Unique Molecular Identifiers (UMIs) from the input fastq files
92 Input files are fastq files, and can be in gzip compressed format.
93
94 Author: Charles Girardot (charles.girardot@embl.de).
95
96 Wrapper by: Jelle Scholtalbers (jelle.scholtalbers@embl.de).
97
98 ------
99
100 **Know what you are doing**
101
102 .. class:: warningmark
103
104 You will want to read the `documentation`__.
105
106 .. __: http://gbcs.embl.de/portal/Je
107
108 ------
109
110 **Parameter list**
111
112 This is an exhaustive list of options::
113
114 FASTQ_FILE1=File
115 F1=File
116
117 Input fastq file (optionally gzipped) for single end data, or first read in paired end data.
118 Required.
119
120 FASTQ_FILE2=File
121 F2=File
122
123 Input fastq file (optionally gzipped) for the second read of paired end data.
124 Default value: null.
125
126 BCLEN=String
127 LEN=String
128
129 Length of the barcode sequences. When BARCODE_READ_POS == BOTH, two distinct lengths can
130 be provided using the syntax LEN=X:Z where X and Z are 2 integers representing the
131 barcode length for read_1 and read_2 respectively.
132 Required.
133
134 BARCODE_READ_POS=BarcodePosition
135 BPOS=BarcodePosition
136
137 Reads containing the sequence (i.e. UMIs) to clip:
138 READ_1 (beginning of read from FASTQ_FILE_1),
139 READ_2 (beginning of read from FASTQ_FILE_2),
140 BOTH (beginning of both reads).
141
142 Automatically set to READ_1 in single end mode and BOTH in paired end mode. Actually not
143 relevant for single end data
144 Default value: BOTH. This option can be set to 'null' to clear the default value.
145 Possible values: {READ_1, READ_2, BOTH, NONE}
146
147 ADD_BARCODE_TO_HEADER=Boolean
148 ADD=Boolean
149
150 Should clipped UMIs be added to the read header (at the end); apply to both barcodes when
151 BPOS=BOTH.
152 If ADD=true, the string ':barcode' is added at the end of the read header with a ':'
153 added only if current read header does not end with ':'.
154 If both reads of the pair contains a UMI (i.e. BARCODE_READ_POS == BOTH), the UMI from
155 the second read is also added to the read header.
156 Else, the header of the read without UMI receives the UMI from the other read.
157 For example:
158 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:
159 becomes
160 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:BARCODE
161 Default value: true. This option can be set to 'null' to clear the default value.
162 Possible values: {true, false}
163
164 ENSURE_IDENTICAL_HEADER_NAMES=Boolean
165 SAME_HEADERS=Boolean
166
167 Makes sure headers of both reads of a pair are identical.
168 Read name (or headers) will follow the pattern (for both reads of a pair):
169 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 CLIPPED_SEQ_FROMREAD1:CLIPPED_SEQ_FROMREAD2
170 This option only makes sense in paired end mode and ADD=true.Some (if not all) mappers
171 will indeed complain when read headers of a read pair are not identical.
172 When SAME_HEADERS=FALSE and the RCHAR is used, read headers look like this:
173 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:1:N:0:TGGAGTAG
174 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:3:N:0:CGTTGTAT
175
176 SAME_HEADERS=true will instead generates the following identical header for both reads :
177 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:TGGAGTAG:CGTTGTAT
178 Note that we also clipped the useless '1:N:0' amd '3:N:0' as they also result in
179 different headers
180 Important : this option will force RCHAR=: UNLESS you specify RCHAR=null ; in which case
181 a space will be preserved i.e.:
182 HISEQ:44:C6KC0ANXX:5:1101:1491:1994 TAGAACAC:TGGAGTAG:CGTTGTAT
183
184 Default value: true.
185 This option can be set to 'null' to clear the default value. Possible values: {true,
186 false}
187
188 READ_NAME_REPLACE_CHAR=String
189 RCHAR=String
190
191 Replace spaces in read name/header using provided character.
192 This is needed when you need to retain ADDed barcode in read name/header during mapping
193 as everything after space in read name is usually clipped in BAM files.
194 For example, with RCHAR=':':
195 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 1:N:0:
196 becomes
197 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965:1:N:0:BARCODE
198
199 Default value: ':'. This option can be set to 'null' to clear the default value.
200
201 XTRIMLEN=String
202 XT=String
203
204 Optional extra number of base(s) to be trimmed right after the barcode. These extra bases
205 are not added to read headers.
206 When running paired-end, two distinct values can be given using the syntax XT=X:Z where X
207 and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when
208 BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode to
209 end up with reads of identical length (note that this can also be operated using ZT). If
210 a unique value is given, e.g. XT=1, while running paired-end the following rule applies :
211 (1) BPOS=READ_1 or BPOS=READ_2, no trim is applied at the read w/o barcode
212 (2) BPOS=BOTH, the value is used for both reads.
213 Note that XT=null is like XT=0.
214 Default value: 0. This option can be set to 'null' to clear the default value.
215
216 ZTRIMLEN=String
217 ZT=String
218
219 Optional extra number of bases to be trimmed from the read end i.e. 3' end. These extra
220 bases are not added to read headers.
221 When running paired-end, two distinct values can be given here using the syntax ZT=X:Z
222 where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even
223 when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode
224 as to end up with reads of the same length (note that this can also be operated using
225 XT). Note that if a single value is passed, the value always applies to both reads in
226 paired-end mode without further consideration.
227
228 Default value: 0. This option can be set to 'null' to clear the default value.
229
230 BARCODE_RESULT_FILENAME=String
231 BF=String
232
233 Optional file name where to write clipped barcodes, default name is clipped_barcodes.txt.
234 This file is automatically created if ADD=FALSE i.e. even if this option is not provided
235 by user (and always created if this option is given).
236 File format is tab delimited with:
237 ``read header (col 1) barcode from read_1 (col 2) barcode quality from read_1 (col 2)``
238 + barcode + quality from read_2 (col 4 and 5 respectively) when relevant.
239 Can either be a name (in which case the file will be created in the output dir) or a full path.
240 Default value: null.
241
242 ]]>
243 </help>
244
245 </tool>