annotate umi-tools_extract.xml @ 0:418b961e0576 draft

planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
author iuc
date Thu, 10 Aug 2017 06:37:09 -0400
parents
children 79436b3019e9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
1 <tool id="umi_tools_extract" name="UMI-tools extract" version="0.4.4.0">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
2 <description>Extract UMI from fastq files</description>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
3 <requirements>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
4 <requirement type="package" version="0.4.4">umi_tools</requirement>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
5 </requirements>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
6 <command detect_errors="exit_code"><![CDATA[
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
7 #set $gz = False
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
8 #if $input_type.type == 'single':
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
9 #if $input_type.input_single.is_of_type("fastq.gz", "fastqsanger.gz"):
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
10 ln -s '$input_type.input_single' input_single.gz &&
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
11 #set $gz = True
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
12 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
13 #else
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
14 #if $input_type.input_read1.is_of_type("fastq.gz", "fastqsanger.gz"):
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
15 ln -s '$input_type.input_read1' input_read1.gz &&
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
16 ln -s '$input_type.input_read2' input_read2.gz &&
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
17 #set $gz = True
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
18 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
19 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
20 umi_tools extract
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
21 --bc-pattern='$bc_pattern'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
22 #if $input_type.type == 'single':
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
23 #if $gz:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
24 --stdin=input_single.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
25 --stdout out.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
26 #else
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
27 --stdin='$input_type.input_single'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
28 --stdout '$out'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
29 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
30 #else:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
31 #if $gz:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
32 --stdin=input_read1.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
33 --read2-in=input_read2.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
34 --stdout out1.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
35 --read2-out=out2.gz
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
36 #else:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
37 --stdin='$input_type.input_read1'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
38 --read2-in='$input_type.input_read2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
39 --stdout '$out1'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
40 --read2-out='$out2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
41 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
42 #if $input_type.barcode.split == "1":
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
43 --split-barcode
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
44 --bc-pattern2='$input_type.barcode.bc_pattern2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
45 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
46 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
47 #if not $prime3:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
48 --3prime
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
49 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
50 #if $quality.quality_selector =='true':
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
51 --quality-filter-threshold '$quality.quality_filter_threshold'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
52 --quality-encoding '$quality.quality_encoding'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
53 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
54 #if $print_log == "1":
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
55 --log='$out_log'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
56 #else
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
57 --supress-stats
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
58 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
59 #if $gz:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
60 #if $input_type.type == 'single':
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
61 && mv out.gz '$out'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
62 #else
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
63 && mv out1.gz '$out1'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
64 && mv out2.gz '$out2'
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
65 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
66 #end if
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
67 ]]></command>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
68 <inputs>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
69 <conditional name="input_type">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
70 <param name="type" type="select" label="Library type">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
71 <option value="single">Single-end</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
72 <option value="paired">Paired-end</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
73 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
74 <when value="single">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
75 <param name="input_single" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
76 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
77 <when value="paired">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
78 <param name="input_read1" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
79 <param name="input_read2" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
80 <conditional name="barcode">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
81 <param name="split" argument="--split-barcode" type="select" label="Barcode on both reads?">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
82 <option value="0">Barcode on first read only</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
83 <option value="1">Barcode on both reads</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
84 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
85 <when value="0">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
86 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
87 <when value="1">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
88 <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
89 help="Use this option to specify the format of the UMI/barcode for
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
90 the second read pair if required.">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
91 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
92 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
93 </conditional>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
94 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
95 </conditional>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
96 <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
97 help="Use this option to specify the format of the UMI/barcode. Use Ns to
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
98 represent the random positions and Xs to indicate the bc positions.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
99 Bases with Ns will be extracted and added to the read name. Remaining
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
100 bases, marked with an X will be reattached to the read.">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
101 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
102 <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
103 truevalue="1" falsevalue="0" checked="true"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
104 help="By default the barcode is assumed to be on the 5' end of the read, but
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
105 use this option to sepecify that it is on the 3' end instead." />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
106 <param name="print_log" argument="-L" type="boolean" label="Output log?"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
107 truevalue="1" falsevalue="0" checked="true"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
108 help="Choose if you want to generate a text file containing logging information." />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
109 <conditional name="quality">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
110 <param name="quality_selector" type="select" label="Enable quality filter?" >
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
111 <option value="false">No</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
112 <option value="true">Yes</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
113 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
114 <when value="false">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
115 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
116 <when value="true">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
117 <param name="quality_filter_threshold" label="Phred score threshold"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
118 type="integer" value="20" argument="--quality-filter-threshold"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
119 help="Remove reads where any UMI base quality score falls below this threshold." />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
120 <param name="quality_encoding" argument="--quality-encoding" type="select" label="Library type"
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
121 help="Quality score encoding. Choose from phred33 [33-77], phred64 [64-106] or solexa [59-106].">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
122 <option value="phred33">phred33 [33-77]</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
123 <option value="phred64">phred64 [64-106]</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
124 <option value="solexa">solexa [59-106]</option>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
125 </param>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
126 </when>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
127 </conditional>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
128 </inputs>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
129 <outputs>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
130 <data name="out" format_source="input_single">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
131 <filter>input_type['type'] == "single"</filter>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
132 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
133 <data name="out1" format_source="input_read1">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
134 <filter>input_type['type'] == "paired"</filter>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
135 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
136 <data name="out2" format_source="input_read2">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
137 <filter>input_type['type'] == "paired"</filter>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
138 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
139 <data name="out_log" format="txt">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
140 <filter>print_log == True</filter>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
141 </data>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
142 </outputs>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
143 <tests>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
144 <test>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
145 <param name="type" value="single" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
146 <param name="input_single" value="t_R1.fastq" ftype="fastq" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
147 <param name="bc_pattern" value="XXXNNN" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
148 <param name="prime3" value="0" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
149 <param name="quality_selector" value="true" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
150 <param name="quality_filter_threshold" value="10" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
151 <param name="quality_encoding" value="phred33" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
152 <output name="out" file="out_SE.fastq" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
153 <output name="out_log" file="out_single.log" lines_diff="15"/>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
154 </test>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
155 <test>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
156 <param name="type" value="paired" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
157 <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq.gz" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
158 <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq.gz" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
159 <param name="bc_pattern" value="NNNXXX" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
160 <output name="out1" file="out_R1.fastq.gz" decompress="true" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
161 <output name="out2" file="out_R2.fastq.gz" decompress="true" />
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
162 <output name="out_log" file="out_paired.log" lines_diff="10"/>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
163 </test>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
164 </tests>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
165 <help><![CDATA[
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
166
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
167
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
168 UMI-tools extract.py - Extract UMI from fastq
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
169 =============================================
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
170
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
171 Purpose
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
172 -------
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
173
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
174 Extract UMI barcode from a read and add it to the read name, leaving
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
175 any sample barcode in place. Can deal with paired end reads and UMIs
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
176 split across the paired ends
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
177
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
178 Options
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
179 -------
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
180
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
181 --split-barcode
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
182 By default the UMI is assumed to be on the first read. Use this
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
183 option if the UMI is contained on both reads and specify the
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
184 pattern of the barcode/UMI on the second read using the option
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
185 ``--bc-pattern2``
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
186
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
187 --bc-pattern
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
188 Use this option to specify the format of the UMI/barcode. Use Ns to
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
189 represent the random positions and Xs to indicate the bc positions.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
190 Bases with Ns will be extracted and added to the read name. Remaining
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
191 bases, marked with an X will be reattached to the read.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
192
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
193 E.g. If the pattern is NNXXNN,
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
194 Then the read:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
195
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
196 @HISEQ:87:00000000 read1
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
197 AAGGTTGCTGATTGGATGGGCTAG
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
198 DA1AEBFGGCG01DFH00B1FF0B
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
199 +
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
200
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
201 will become:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
202 @HISEQ:87:00000000_AATT read1
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
203 GGGCTGATTGGATGGGCTAG
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
204 1AFGGCG01DFH00B1FF0B
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
205 +
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
206
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
207 --bc-pattern2
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
208 Use this option to specify the format of the UMI/barcode for
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
209 the second read pair if required. If --bc-pattern2 is not
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
210 supplied, this defaults to the same pattern as --bc-pattern
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
211
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
212 --3prime
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
213 By default the barcode is assumed to be on the 5' end of the read, but
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
214 use this option to sepecify that it is on the 3' end instead
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
215
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
216 -L
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
217 Specify a log file to retain logging information and final statistics
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
218
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
219 --split-barcode
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
220 barcode is split across read pair
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
221
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
222 --quality-filter-threshold=QUALITY_FILTER_THRESHOLD
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
223 Remove reads where any UMI base quality score falls
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
224 below this threshold
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
225 --quality-encoding=QUALITY_ENCODING
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
226 Quality score encoding. Choose from phred33[33-77]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
227 phred64 [64-106] or solexa [59-106]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
228
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
229 Usage:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
230 ------
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
231
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
232 For single ended reads:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
233 umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
234
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
235 reads from stdin and outputs to stdout.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
236
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
237 For paired end reads:
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
238 umi_tools extract --bc-pattern=[PATTERN] --read2-in=[FASTQIN] --read2-out=[FASTQOUT] -L extract.log [OPTIONS]
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
239
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
240 reads end one from stdin and end two from FASTQIN and outputs end one to stdin
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
241 and end two to FASTQOUT.
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
242
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
243 ]]></help>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
244 <citations>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
245 <citation type="doi">10.1101/gr.209601.116</citation>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
246 <citation type="bibtex">
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
247 @misc{githubUMI-tools,
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
248 title = {UMI-tools},
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
249 publisher = {GitHub},
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
250 journal = {GitHub repository},
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
251 url = {https://github.com/CGATOxford/UMI-tools},
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
252 }
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
253 </citation>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
254 </citations>
418b961e0576 planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
iuc
parents:
diff changeset
255 </tool>