annotate umi-tools_extract.xml @ 4:e73a22ff585c draft

planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
author iuc
date Mon, 16 Apr 2018 16:38:40 -0400
parents 79436b3019e9
children f77bc14eba31
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
1 <tool id="umi_tools_extract" name="UMI-tools extract" version="@VERSION@.1">
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
2 <description>Extract UMI from fastq files</description>
1
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
3 <macros>
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
4 <import>macros.xml</import>
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
5 </macros>
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
6 <expand macro="requirements" />
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
7 <command detect_errors="exit_code"><![CDATA[
4
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
8 @COMMAND_LINK@
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
9
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
10 umi_tools extract
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
11 --bc-pattern='$bc_pattern'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
12 #if $input_type.type == 'single':
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
13 #if $gz:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
14 --stdin=input_single.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
15 --stdout out.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
16 #else
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
17 --stdin='$input_type.input_single'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
18 --stdout '$out'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
19 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
20 #else:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
21 #if $gz:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
22 --stdin=input_read1.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
23 --read2-in=input_read2.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
24 --stdout out1.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
25 --read2-out=out2.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
26 #else:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
27 --stdin='$input_type.input_read1'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
28 --read2-in='$input_type.input_read2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
29 --stdout '$out1'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
30 --read2-out='$out2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
31 #end if
4
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
32 #if $input_type.barcode.barcode_select == "both_reads":
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
33 --split-barcode
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
34 --bc-pattern2='$input_type.barcode.bc_pattern2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
35 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
36 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
37 #if not $prime3:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
38 --3prime
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
39 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
40 #if $quality.quality_selector =='true':
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
41 --quality-filter-threshold '$quality.quality_filter_threshold'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
42 --quality-encoding '$quality.quality_encoding'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
43 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
44 #if $print_log == "1":
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
45 --log='$out_log'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
46 #else
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
47 --supress-stats
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
48 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
49 #if $gz:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
50 #if $input_type.type == 'single':
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
51 && mv out.gz '$out'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
52 #else
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
53 && mv out1.gz '$out1'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
54 && mv out2.gz '$out2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
55 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
56 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
57 ]]></command>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
58 <inputs>
4
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
59 <expand macro="input_types" />
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
60 <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
61 help="Use this option to specify the format of the UMI/barcode. Use Ns to
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
62 represent the random positions and Xs to indicate the bc positions.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
63 Bases with Ns will be extracted and added to the read name. Remaining
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
64 bases, marked with an X will be reattached to the read.">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
65 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
66 <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
67 truevalue="1" falsevalue="0" checked="true"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
68 help="By default the barcode is assumed to be on the 5' end of the read, but
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
69 use this option to sepecify that it is on the 3' end instead." />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
70 <param name="print_log" argument="-L" type="boolean" label="Output log?"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
71 truevalue="1" falsevalue="0" checked="true"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
72 help="Choose if you want to generate a text file containing logging information." />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
73 <conditional name="quality">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
74 <param name="quality_selector" type="select" label="Enable quality filter?" >
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
75 <option value="false">No</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
76 <option value="true">Yes</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
77 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
78 <when value="false">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
79 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
80 <when value="true">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
81 <param name="quality_filter_threshold" label="Phred score threshold"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
82 type="integer" value="20" argument="--quality-filter-threshold"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
83 help="Remove reads where any UMI base quality score falls below this threshold." />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
84 <param name="quality_encoding" argument="--quality-encoding" type="select" label="Library type"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
85 help="Quality score encoding. Choose from phred33 [33-77], phred64 [64-106] or solexa [59-106].">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
86 <option value="phred33">phred33 [33-77]</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
87 <option value="phred64">phred64 [64-106]</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
88 <option value="solexa">solexa [59-106]</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
89 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
90 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
91 </conditional>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
92 </inputs>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
93 <outputs>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
94 <data name="out" format_source="input_single">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
95 <filter>input_type['type'] == "single"</filter>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
96 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
97 <data name="out1" format_source="input_read1">
4
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
98 <filter>input_type['type'] !== "single"</filter>
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
99 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
100 <data name="out2" format_source="input_read2">
4
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
101 <filter>input_type['type'] !== "single"</filter>
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
102 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
103 <data name="out_log" format="txt">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
104 <filter>print_log == True</filter>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
105 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
106 </outputs>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
107 <tests>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
108 <test>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
109 <param name="type" value="single" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
110 <param name="input_single" value="t_R1.fastq" ftype="fastq" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
111 <param name="bc_pattern" value="XXXNNN" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
112 <param name="prime3" value="0" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
113 <param name="quality_selector" value="true" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
114 <param name="quality_filter_threshold" value="10" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
115 <param name="quality_encoding" value="phred33" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
116 <output name="out" file="out_SE.fastq" />
1
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
117 <output name="out_log" file="out_single.log" lines_diff="22"/>
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
118 </test>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
119 <test>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
120 <param name="type" value="paired" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
121 <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq.gz" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
122 <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq.gz" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
123 <param name="bc_pattern" value="NNNXXX" />
1
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
124 <output name="out1" file="out_R1.fastq.gz" decompress="true" lines_diff="2" />
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
125 <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" />
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
126 <output name="out_log" file="out_paired.log" lines_diff="16"/>
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
127 </test>
4
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
128 <test>
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
129 <param name="type" value="paired_collection" />
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
130 <param name="input_readpair" >
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
131 <collection type="paired">
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
132 <element name="forward" ftype="fastq.gz" value="t_R1.fastq.gz" />
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
133 <element name="reverse" ftype="fastq.gz" value="t_R2.fastq.gz" />
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
134 </collection>
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
135 </param>
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
136 <param name="bc_pattern" value="NNNXXX" />
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
137 <output name="out1" file="out_R1.fastq.gz" decompress="true" lines_diff="2" />
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
138 <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" />
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
139 <output name="out_log" file="out_paired.log" lines_diff="16"/>
e73a22ff585c planemo upload commit 76cbd559320d2a639e35ed10cb2d9522a5a77ae0
iuc
parents: 1
diff changeset
140 </test>
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
141 </tests>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
142 <help><![CDATA[
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
143
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
144
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
145 UMI-tools extract.py - Extract UMI from fastq
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
146 =============================================
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
147
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
148 Purpose
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
149 -------
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
150
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
151 Extract UMI barcode from a read and add it to the read name, leaving
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
152 any sample barcode in place. Can deal with paired end reads and UMIs
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
153 split across the paired ends
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
154
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
155 Options
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
156 -------
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
157
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
158 --split-barcode
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
159 By default the UMI is assumed to be on the first read. Use this
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
160 option if the UMI is contained on both reads and specify the
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
161 pattern of the barcode/UMI on the second read using the option
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
162 ``--bc-pattern2``
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
163
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
164 --bc-pattern
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
165 Use this option to specify the format of the UMI/barcode. Use Ns to
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
166 represent the random positions and Xs to indicate the bc positions.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
167 Bases with Ns will be extracted and added to the read name. Remaining
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
168 bases, marked with an X will be reattached to the read.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
169
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
170 E.g. If the pattern is NNXXNN,
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
171 Then the read:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
172
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
173 @HISEQ:87:00000000 read1
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
174 AAGGTTGCTGATTGGATGGGCTAG
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
175 DA1AEBFGGCG01DFH00B1FF0B
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
176 +
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
177
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
178 will become:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
179 @HISEQ:87:00000000_AATT read1
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
180 GGGCTGATTGGATGGGCTAG
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
181 1AFGGCG01DFH00B1FF0B
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
182 +
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
183
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
184 --bc-pattern2
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
185 Use this option to specify the format of the UMI/barcode for
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
186 the second read pair if required. If --bc-pattern2 is not
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
187 supplied, this defaults to the same pattern as --bc-pattern
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
188
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
189 --3prime
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
190 By default the barcode is assumed to be on the 5' end of the read, but
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
191 use this option to sepecify that it is on the 3' end instead
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
192
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
193 -L
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
194 Specify a log file to retain logging information and final statistics
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
195
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
196 --split-barcode
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
197 barcode is split across read pair
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
198
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
199 --quality-filter-threshold=QUALITY_FILTER_THRESHOLD
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
200 Remove reads where any UMI base quality score falls
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
201 below this threshold
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
202 --quality-encoding=QUALITY_ENCODING
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
203 Quality score encoding. Choose from phred33[33-77]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
204 phred64 [64-106] or solexa [59-106]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
205
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
206 Usage:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
207 ------
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
208
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
209 For single ended reads:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
210 umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
211
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
212 reads from stdin and outputs to stdout.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
213
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
214 For paired end reads:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
215 umi_tools extract --bc-pattern=[PATTERN] --read2-in=[FASTQIN] --read2-out=[FASTQOUT] -L extract.log [OPTIONS]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
216
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
217 reads end one from stdin and end two from FASTQIN and outputs end one to stdin
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
218 and end two to FASTQOUT.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
219
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
220 ]]></help>
1
79436b3019e9 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents: 0
diff changeset
221 <expand macro="citations" />
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
222 </tool>