Mercurial > repos > iuc > umi_tools_extract
comparison umi-tools_extract.xml @ 0:418b961e0576 draft
planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
author | iuc |
---|---|
date | Thu, 10 Aug 2017 06:37:09 -0400 |
parents | |
children | 79436b3019e9 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:418b961e0576 |
---|---|
1 <tool id="umi_tools_extract" name="UMI-tools extract" version="0.4.4.0"> | |
2 <description>Extract UMI from fastq files</description> | |
3 <requirements> | |
4 <requirement type="package" version="0.4.4">umi_tools</requirement> | |
5 </requirements> | |
6 <command detect_errors="exit_code"><![CDATA[ | |
7 #set $gz = False | |
8 #if $input_type.type == 'single': | |
9 #if $input_type.input_single.is_of_type("fastq.gz", "fastqsanger.gz"): | |
10 ln -s '$input_type.input_single' input_single.gz && | |
11 #set $gz = True | |
12 #end if | |
13 #else | |
14 #if $input_type.input_read1.is_of_type("fastq.gz", "fastqsanger.gz"): | |
15 ln -s '$input_type.input_read1' input_read1.gz && | |
16 ln -s '$input_type.input_read2' input_read2.gz && | |
17 #set $gz = True | |
18 #end if | |
19 #end if | |
20 umi_tools extract | |
21 --bc-pattern='$bc_pattern' | |
22 #if $input_type.type == 'single': | |
23 #if $gz: | |
24 --stdin=input_single.gz | |
25 --stdout out.gz | |
26 #else | |
27 --stdin='$input_type.input_single' | |
28 --stdout '$out' | |
29 #end if | |
30 #else: | |
31 #if $gz: | |
32 --stdin=input_read1.gz | |
33 --read2-in=input_read2.gz | |
34 --stdout out1.gz | |
35 --read2-out=out2.gz | |
36 #else: | |
37 --stdin='$input_type.input_read1' | |
38 --read2-in='$input_type.input_read2' | |
39 --stdout '$out1' | |
40 --read2-out='$out2' | |
41 #end if | |
42 #if $input_type.barcode.split == "1": | |
43 --split-barcode | |
44 --bc-pattern2='$input_type.barcode.bc_pattern2' | |
45 #end if | |
46 #end if | |
47 #if not $prime3: | |
48 --3prime | |
49 #end if | |
50 #if $quality.quality_selector =='true': | |
51 --quality-filter-threshold '$quality.quality_filter_threshold' | |
52 --quality-encoding '$quality.quality_encoding' | |
53 #end if | |
54 #if $print_log == "1": | |
55 --log='$out_log' | |
56 #else | |
57 --supress-stats | |
58 #end if | |
59 #if $gz: | |
60 #if $input_type.type == 'single': | |
61 && mv out.gz '$out' | |
62 #else | |
63 && mv out1.gz '$out1' | |
64 && mv out2.gz '$out2' | |
65 #end if | |
66 #end if | |
67 ]]></command> | |
68 <inputs> | |
69 <conditional name="input_type"> | |
70 <param name="type" type="select" label="Library type"> | |
71 <option value="single">Single-end</option> | |
72 <option value="paired">Paired-end</option> | |
73 </param> | |
74 <when value="single"> | |
75 <param name="input_single" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" /> | |
76 </when> | |
77 <when value="paired"> | |
78 <param name="input_read1" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" /> | |
79 <param name="input_read2" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" /> | |
80 <conditional name="barcode"> | |
81 <param name="split" argument="--split-barcode" type="select" label="Barcode on both reads?"> | |
82 <option value="0">Barcode on first read only</option> | |
83 <option value="1">Barcode on both reads</option> | |
84 </param> | |
85 <when value="0"> | |
86 </when> | |
87 <when value="1"> | |
88 <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read" | |
89 help="Use this option to specify the format of the UMI/barcode for | |
90 the second read pair if required."> | |
91 </param> | |
92 </when> | |
93 </conditional> | |
94 </when> | |
95 </conditional> | |
96 <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read" | |
97 help="Use this option to specify the format of the UMI/barcode. Use Ns to | |
98 represent the random positions and Xs to indicate the bc positions. | |
99 Bases with Ns will be extracted and added to the read name. Remaining | |
100 bases, marked with an X will be reattached to the read."> | |
101 </param> | |
102 <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?" | |
103 truevalue="1" falsevalue="0" checked="true" | |
104 help="By default the barcode is assumed to be on the 5' end of the read, but | |
105 use this option to sepecify that it is on the 3' end instead." /> | |
106 <param name="print_log" argument="-L" type="boolean" label="Output log?" | |
107 truevalue="1" falsevalue="0" checked="true" | |
108 help="Choose if you want to generate a text file containing logging information." /> | |
109 <conditional name="quality"> | |
110 <param name="quality_selector" type="select" label="Enable quality filter?" > | |
111 <option value="false">No</option> | |
112 <option value="true">Yes</option> | |
113 </param> | |
114 <when value="false"> | |
115 </when> | |
116 <when value="true"> | |
117 <param name="quality_filter_threshold" label="Phred score threshold" | |
118 type="integer" value="20" argument="--quality-filter-threshold" | |
119 help="Remove reads where any UMI base quality score falls below this threshold." /> | |
120 <param name="quality_encoding" argument="--quality-encoding" type="select" label="Library type" | |
121 help="Quality score encoding. Choose from phred33 [33-77], phred64 [64-106] or solexa [59-106]."> | |
122 <option value="phred33">phred33 [33-77]</option> | |
123 <option value="phred64">phred64 [64-106]</option> | |
124 <option value="solexa">solexa [59-106]</option> | |
125 </param> | |
126 </when> | |
127 </conditional> | |
128 </inputs> | |
129 <outputs> | |
130 <data name="out" format_source="input_single"> | |
131 <filter>input_type['type'] == "single"</filter> | |
132 </data> | |
133 <data name="out1" format_source="input_read1"> | |
134 <filter>input_type['type'] == "paired"</filter> | |
135 </data> | |
136 <data name="out2" format_source="input_read2"> | |
137 <filter>input_type['type'] == "paired"</filter> | |
138 </data> | |
139 <data name="out_log" format="txt"> | |
140 <filter>print_log == True</filter> | |
141 </data> | |
142 </outputs> | |
143 <tests> | |
144 <test> | |
145 <param name="type" value="single" /> | |
146 <param name="input_single" value="t_R1.fastq" ftype="fastq" /> | |
147 <param name="bc_pattern" value="XXXNNN" /> | |
148 <param name="prime3" value="0" /> | |
149 <param name="quality_selector" value="true" /> | |
150 <param name="quality_filter_threshold" value="10" /> | |
151 <param name="quality_encoding" value="phred33" /> | |
152 <output name="out" file="out_SE.fastq" /> | |
153 <output name="out_log" file="out_single.log" lines_diff="15"/> | |
154 </test> | |
155 <test> | |
156 <param name="type" value="paired" /> | |
157 <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq.gz" /> | |
158 <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq.gz" /> | |
159 <param name="bc_pattern" value="NNNXXX" /> | |
160 <output name="out1" file="out_R1.fastq.gz" decompress="true" /> | |
161 <output name="out2" file="out_R2.fastq.gz" decompress="true" /> | |
162 <output name="out_log" file="out_paired.log" lines_diff="10"/> | |
163 </test> | |
164 </tests> | |
165 <help><![CDATA[ | |
166 | |
167 | |
168 UMI-tools extract.py - Extract UMI from fastq | |
169 ============================================= | |
170 | |
171 Purpose | |
172 ------- | |
173 | |
174 Extract UMI barcode from a read and add it to the read name, leaving | |
175 any sample barcode in place. Can deal with paired end reads and UMIs | |
176 split across the paired ends | |
177 | |
178 Options | |
179 ------- | |
180 | |
181 --split-barcode | |
182 By default the UMI is assumed to be on the first read. Use this | |
183 option if the UMI is contained on both reads and specify the | |
184 pattern of the barcode/UMI on the second read using the option | |
185 ``--bc-pattern2`` | |
186 | |
187 --bc-pattern | |
188 Use this option to specify the format of the UMI/barcode. Use Ns to | |
189 represent the random positions and Xs to indicate the bc positions. | |
190 Bases with Ns will be extracted and added to the read name. Remaining | |
191 bases, marked with an X will be reattached to the read. | |
192 | |
193 E.g. If the pattern is NNXXNN, | |
194 Then the read: | |
195 | |
196 @HISEQ:87:00000000 read1 | |
197 AAGGTTGCTGATTGGATGGGCTAG | |
198 DA1AEBFGGCG01DFH00B1FF0B | |
199 + | |
200 | |
201 will become: | |
202 @HISEQ:87:00000000_AATT read1 | |
203 GGGCTGATTGGATGGGCTAG | |
204 1AFGGCG01DFH00B1FF0B | |
205 + | |
206 | |
207 --bc-pattern2 | |
208 Use this option to specify the format of the UMI/barcode for | |
209 the second read pair if required. If --bc-pattern2 is not | |
210 supplied, this defaults to the same pattern as --bc-pattern | |
211 | |
212 --3prime | |
213 By default the barcode is assumed to be on the 5' end of the read, but | |
214 use this option to sepecify that it is on the 3' end instead | |
215 | |
216 -L | |
217 Specify a log file to retain logging information and final statistics | |
218 | |
219 --split-barcode | |
220 barcode is split across read pair | |
221 | |
222 --quality-filter-threshold=QUALITY_FILTER_THRESHOLD | |
223 Remove reads where any UMI base quality score falls | |
224 below this threshold | |
225 --quality-encoding=QUALITY_ENCODING | |
226 Quality score encoding. Choose from phred33[33-77] | |
227 phred64 [64-106] or solexa [59-106] | |
228 | |
229 Usage: | |
230 ------ | |
231 | |
232 For single ended reads: | |
233 umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS] | |
234 | |
235 reads from stdin and outputs to stdout. | |
236 | |
237 For paired end reads: | |
238 umi_tools extract --bc-pattern=[PATTERN] --read2-in=[FASTQIN] --read2-out=[FASTQOUT] -L extract.log [OPTIONS] | |
239 | |
240 reads end one from stdin and end two from FASTQIN and outputs end one to stdin | |
241 and end two to FASTQOUT. | |
242 | |
243 ]]></help> | |
244 <citations> | |
245 <citation type="doi">10.1101/gr.209601.116</citation> | |
246 <citation type="bibtex"> | |
247 @misc{githubUMI-tools, | |
248 title = {UMI-tools}, | |
249 publisher = {GitHub}, | |
250 journal = {GitHub repository}, | |
251 url = {https://github.com/CGATOxford/UMI-tools}, | |
252 } | |
253 </citation> | |
254 </citations> | |
255 </tool> |