comparison umi-tools_extract.xml @ 0:418b961e0576 draft

planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
author iuc
date Thu, 10 Aug 2017 06:37:09 -0400
parents
children 79436b3019e9
comparison
equal deleted inserted replaced
-1:000000000000 0:418b961e0576
1 <tool id="umi_tools_extract" name="UMI-tools extract" version="0.4.4.0">
2 <description>Extract UMI from fastq files</description>
3 <requirements>
4 <requirement type="package" version="0.4.4">umi_tools</requirement>
5 </requirements>
6 <command detect_errors="exit_code"><![CDATA[
7 #set $gz = False
8 #if $input_type.type == 'single':
9 #if $input_type.input_single.is_of_type("fastq.gz", "fastqsanger.gz"):
10 ln -s '$input_type.input_single' input_single.gz &&
11 #set $gz = True
12 #end if
13 #else
14 #if $input_type.input_read1.is_of_type("fastq.gz", "fastqsanger.gz"):
15 ln -s '$input_type.input_read1' input_read1.gz &&
16 ln -s '$input_type.input_read2' input_read2.gz &&
17 #set $gz = True
18 #end if
19 #end if
20 umi_tools extract
21 --bc-pattern='$bc_pattern'
22 #if $input_type.type == 'single':
23 #if $gz:
24 --stdin=input_single.gz
25 --stdout out.gz
26 #else
27 --stdin='$input_type.input_single'
28 --stdout '$out'
29 #end if
30 #else:
31 #if $gz:
32 --stdin=input_read1.gz
33 --read2-in=input_read2.gz
34 --stdout out1.gz
35 --read2-out=out2.gz
36 #else:
37 --stdin='$input_type.input_read1'
38 --read2-in='$input_type.input_read2'
39 --stdout '$out1'
40 --read2-out='$out2'
41 #end if
42 #if $input_type.barcode.split == "1":
43 --split-barcode
44 --bc-pattern2='$input_type.barcode.bc_pattern2'
45 #end if
46 #end if
47 #if not $prime3:
48 --3prime
49 #end if
50 #if $quality.quality_selector =='true':
51 --quality-filter-threshold '$quality.quality_filter_threshold'
52 --quality-encoding '$quality.quality_encoding'
53 #end if
54 #if $print_log == "1":
55 --log='$out_log'
56 #else
57 --supress-stats
58 #end if
59 #if $gz:
60 #if $input_type.type == 'single':
61 && mv out.gz '$out'
62 #else
63 && mv out1.gz '$out1'
64 && mv out2.gz '$out2'
65 #end if
66 #end if
67 ]]></command>
68 <inputs>
69 <conditional name="input_type">
70 <param name="type" type="select" label="Library type">
71 <option value="single">Single-end</option>
72 <option value="paired">Paired-end</option>
73 </param>
74 <when value="single">
75 <param name="input_single" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
76 </when>
77 <when value="paired">
78 <param name="input_read1" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
79 <param name="input_read2" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
80 <conditional name="barcode">
81 <param name="split" argument="--split-barcode" type="select" label="Barcode on both reads?">
82 <option value="0">Barcode on first read only</option>
83 <option value="1">Barcode on both reads</option>
84 </param>
85 <when value="0">
86 </when>
87 <when value="1">
88 <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"
89 help="Use this option to specify the format of the UMI/barcode for
90 the second read pair if required.">
91 </param>
92 </when>
93 </conditional>
94 </when>
95 </conditional>
96 <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read"
97 help="Use this option to specify the format of the UMI/barcode. Use Ns to
98 represent the random positions and Xs to indicate the bc positions.
99 Bases with Ns will be extracted and added to the read name. Remaining
100 bases, marked with an X will be reattached to the read.">
101 </param>
102 <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?"
103 truevalue="1" falsevalue="0" checked="true"
104 help="By default the barcode is assumed to be on the 5' end of the read, but
105 use this option to sepecify that it is on the 3' end instead." />
106 <param name="print_log" argument="-L" type="boolean" label="Output log?"
107 truevalue="1" falsevalue="0" checked="true"
108 help="Choose if you want to generate a text file containing logging information." />
109 <conditional name="quality">
110 <param name="quality_selector" type="select" label="Enable quality filter?" >
111 <option value="false">No</option>
112 <option value="true">Yes</option>
113 </param>
114 <when value="false">
115 </when>
116 <when value="true">
117 <param name="quality_filter_threshold" label="Phred score threshold"
118 type="integer" value="20" argument="--quality-filter-threshold"
119 help="Remove reads where any UMI base quality score falls below this threshold." />
120 <param name="quality_encoding" argument="--quality-encoding" type="select" label="Library type"
121 help="Quality score encoding. Choose from phred33 [33-77], phred64 [64-106] or solexa [59-106].">
122 <option value="phred33">phred33 [33-77]</option>
123 <option value="phred64">phred64 [64-106]</option>
124 <option value="solexa">solexa [59-106]</option>
125 </param>
126 </when>
127 </conditional>
128 </inputs>
129 <outputs>
130 <data name="out" format_source="input_single">
131 <filter>input_type['type'] == "single"</filter>
132 </data>
133 <data name="out1" format_source="input_read1">
134 <filter>input_type['type'] == "paired"</filter>
135 </data>
136 <data name="out2" format_source="input_read2">
137 <filter>input_type['type'] == "paired"</filter>
138 </data>
139 <data name="out_log" format="txt">
140 <filter>print_log == True</filter>
141 </data>
142 </outputs>
143 <tests>
144 <test>
145 <param name="type" value="single" />
146 <param name="input_single" value="t_R1.fastq" ftype="fastq" />
147 <param name="bc_pattern" value="XXXNNN" />
148 <param name="prime3" value="0" />
149 <param name="quality_selector" value="true" />
150 <param name="quality_filter_threshold" value="10" />
151 <param name="quality_encoding" value="phred33" />
152 <output name="out" file="out_SE.fastq" />
153 <output name="out_log" file="out_single.log" lines_diff="15"/>
154 </test>
155 <test>
156 <param name="type" value="paired" />
157 <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq.gz" />
158 <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq.gz" />
159 <param name="bc_pattern" value="NNNXXX" />
160 <output name="out1" file="out_R1.fastq.gz" decompress="true" />
161 <output name="out2" file="out_R2.fastq.gz" decompress="true" />
162 <output name="out_log" file="out_paired.log" lines_diff="10"/>
163 </test>
164 </tests>
165 <help><![CDATA[
166
167
168 UMI-tools extract.py - Extract UMI from fastq
169 =============================================
170
171 Purpose
172 -------
173
174 Extract UMI barcode from a read and add it to the read name, leaving
175 any sample barcode in place. Can deal with paired end reads and UMIs
176 split across the paired ends
177
178 Options
179 -------
180
181 --split-barcode
182 By default the UMI is assumed to be on the first read. Use this
183 option if the UMI is contained on both reads and specify the
184 pattern of the barcode/UMI on the second read using the option
185 ``--bc-pattern2``
186
187 --bc-pattern
188 Use this option to specify the format of the UMI/barcode. Use Ns to
189 represent the random positions and Xs to indicate the bc positions.
190 Bases with Ns will be extracted and added to the read name. Remaining
191 bases, marked with an X will be reattached to the read.
192
193 E.g. If the pattern is NNXXNN,
194 Then the read:
195
196 @HISEQ:87:00000000 read1
197 AAGGTTGCTGATTGGATGGGCTAG
198 DA1AEBFGGCG01DFH00B1FF0B
199 +
200
201 will become:
202 @HISEQ:87:00000000_AATT read1
203 GGGCTGATTGGATGGGCTAG
204 1AFGGCG01DFH00B1FF0B
205 +
206
207 --bc-pattern2
208 Use this option to specify the format of the UMI/barcode for
209 the second read pair if required. If --bc-pattern2 is not
210 supplied, this defaults to the same pattern as --bc-pattern
211
212 --3prime
213 By default the barcode is assumed to be on the 5' end of the read, but
214 use this option to sepecify that it is on the 3' end instead
215
216 -L
217 Specify a log file to retain logging information and final statistics
218
219 --split-barcode
220 barcode is split across read pair
221
222 --quality-filter-threshold=QUALITY_FILTER_THRESHOLD
223 Remove reads where any UMI base quality score falls
224 below this threshold
225 --quality-encoding=QUALITY_ENCODING
226 Quality score encoding. Choose from phred33[33-77]
227 phred64 [64-106] or solexa [59-106]
228
229 Usage:
230 ------
231
232 For single ended reads:
233 umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS]
234
235 reads from stdin and outputs to stdout.
236
237 For paired end reads:
238 umi_tools extract --bc-pattern=[PATTERN] --read2-in=[FASTQIN] --read2-out=[FASTQOUT] -L extract.log [OPTIONS]
239
240 reads end one from stdin and end two from FASTQIN and outputs end one to stdin
241 and end two to FASTQOUT.
242
243 ]]></help>
244 <citations>
245 <citation type="doi">10.1101/gr.209601.116</citation>
246 <citation type="bibtex">
247 @misc{githubUMI-tools,
248 title = {UMI-tools},
249 publisher = {GitHub},
250 journal = {GitHub repository},
251 url = {https://github.com/CGATOxford/UMI-tools},
252 }
253 </citation>
254 </citations>
255 </tool>