comparison split_libraries_fastq.xml @ 4:1327fee2bf93 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit 3b54163c4f7daff76fcc589c4a9057bb03904380
author iuc
date Sat, 05 Aug 2017 07:23:17 -0400
parents 20194da2549d
children c2ffcfff57f6
comparison
equal deleted inserted replaced
3:7a4fb6fbff08 4:1327fee2bf93
1 <tool id="qiime_split_libraries_fastq" name="Split fastq libraries" version="@WRAPPER_VERSION@.0"> 1 <tool id="qiime_split_libraries_fastq" name="Split fastq libraries" version="@WRAPPER_VERSION@.0">
2 <description>to performs demultiplexing of Fastq sequence data</description> 2 <description> to performs demultiplexing of Fastq sequence data (split_libraries_fastq)</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"/> 6 <expand macro="requirements"/>
7 <version_command>split_libraries_fastq.py --version</version_command> 7 <version_command>split_libraries_fastq.py --version</version_command>
8 <command detect_errors="aggressive"><![CDATA[ 8 <command detect_errors="aggressive"><![CDATA[
9 split_libraries_fastq.py 9 split_libraries_fastq.py
10 #set $seq_files = '' 10 #set $seq_files = ''
11 #set $sep = '' 11 #set $sep = ''
12 #for $file in $sequence_read_fps 12 #for $file in $sequence_read_fps
13 #set $seq_files += $sep + str($file) 13 #set $seq_files += $sep + str($file)
14 #set $sep = ',' 14 #set $sep = ','
15 #end for 15 #end for
16 --sequence_read_fps '$seq_files' 16 --sequence_read_fps '$seq_files'
17 17
18 -o split_libraries 18 -o split_libraries
19 19
20 #set $mapping_files = '' 20 #set $mapping_files = ''
21 #set $sep = '' 21 #set $sep = ''
22 #for $file in $mapping_fps 22 #for $file in $mapping_fps
23 #set $mapping_files += $sep + str($file) 23 #set $mapping_files += $sep + str($file)
24 #set $sep = ',' 24 #set $sep = ','
25 #end for 25 #end for
26 --mapping_fps '$mapping_files' 26 #if $mapping_files != 'None'
27 27 --mapping_fps '$mapping_files'
28 #set $barcode_files = '' 28 #end if
29 #set $sep = '' 29
30 #for $file in $barcode_read_fps 30 #set $barcode_files = ''
31 #set $barcode_files += $sep + str($file) 31 #set $sep = ''
32 #set $sep = ',' 32 #for $file in $barcode_read_fps
33 #end for 33 #set $barcode_files += $sep + str($file)
34 --barcode_read_fps '$barcode_files' 34 #set $sep = ','
35 35 #end for
36 $store_qual_scores 36 #if $barcode_files != 'None'
37 #if str($sample_ids): 37 --barcode_read_fps '$barcode_files'
38 --sample_ids '$sample_ids' 38 #end if
39 #end if 39
40 $store_demultiplexed_fastq 40 $store_qual_scores
41 $retain_unassigned_reads 41 #if str($sample_ids) != ''
42 42 --sample_ids '$sample_ids'
43 --max_bad_run_length '$max_bad_run_length' 43 #end if
44 --min_per_read_length_fraction '$min_per_read_length_fraction' 44 $store_demultiplexed_fastq
45 --sequence_max_n '$sequence_max_n' 45 $retain_unassigned_reads
46 --start_seq_id '$start_seq_id' 46
47 $rev_comp_barcode 47 --max_bad_run_length '$max_bad_run_length'
48 $rev_comp_mapping_barcodes 48 --min_per_read_length_fraction '$min_per_read_length_fraction'
49 $rev_comp 49 --sequence_max_n '$sequence_max_n'
50 --phred_quality_threshold '$phred_quality_threshold' 50 --start_seq_id '$start_seq_id'
51 #if str( $barcode.barcode_type ) != "custom_length" 51 $rev_comp_barcode
52 --barcode_type '$barcode.barcode_type' 52 $rev_comp_mapping_barcodes
53 #else 53 $rev_comp
54 --barcode_type '$barcode.barcode_length' 54 --phred_quality_threshold '$phred_quality_threshold'
55 #end if 55 #if str( $barcode.barcode_type ) != "custom_length"
56 --max_barcode_errors '$max_barcode_errors' 56 --barcode_type '$barcode.barcode_type'
57 $phred_offset 57 #else
58 --barcode_type '$barcode.barcode_length'
59 #end if
60 --max_barcode_errors '$max_barcode_errors'
61 $phred_offset
58 ]]></command> 62 ]]></command>
59 <inputs> 63 <inputs>
60 <param argument="--sequence_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" label="Input fastq files" multiple="True"/> 64 <param argument="--sequence_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" multiple="true" label="Input fastq files"/>
61 <param argument="--mapping_fps" type="data" format="txt,tabular,tsv,csv" label="Metadata mapping files (optional)" multiple="True" optional="True"/> 65 <param argument="--mapping_fps" type="data" format="txt,tabular,tsv,csv" multiple="true" optional="true" label="Metadata mapping files"/>
62 <param argument="--barcode_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" label="Barcode read files (optional)" multiple="True" optional="True"/> 66 <param argument="--barcode_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" multiple="true" optional="true" label="Barcode read files"/>
63 <param argument="--store_qual_scores" type="boolean" label="Store quality strings in files?" truevalue="--store_qual_scores" falsevalue="" checked="False"/> 67 <param argument="--store_qual_scores" type="boolean" truevalue="--store_qual_scores" falsevalue="" checked="false" label="Store quality strings in files?"/>
64 <param argument="--sample_ids" type="text" label="Comma-separated list of samples ids to be applied to all sequences (optional)" optional="True" help="It must be one per input file path (used when data is not multiplexed)"/> 68 <param argument="--sample_ids" type="text" optional="true" label="Comma-separated list of samples ids to be applied to all sequences" help="It must be one per input file path (used when data is not multiplexed)"/>
65 <param argument="--store_demultiplexed_fastq" type="boolean" label="Write demultiplexed fastq files?" truevalue="--store_demultiplexed_fastq" falsevalue="" checked="False"/> 69 <param argument="--store_demultiplexed_fastq" type="boolean" truevalue="--store_demultiplexed_fastq" falsevalue="" checked="false" label="Write demultiplexed fastq files?"/>
66 <param argument="--retain_unassigned_reads" type="boolean" label="Retain sequences which don’t map to a barcode in the mapping file?" truevalue="--retain_unassigned_reads" falsevalue="" checked="False" help="Sample ID will be 'Unassigned'"/> 70 <param argument="--retain_unassigned_reads" type="boolean" truevalue="--retain_unassigned_reads" falsevalue="" checked="false" label="Retain sequences which don’t map to a barcode in the mapping file?" help="Sample ID will be 'Unassigned'"/>
67 <param argument="--max_bad_run_length" type="integer" value="3" label="Maximum number of consecutive low quality base calls allowed before truncating a read"/> 71 <param argument="--max_bad_run_length" type="integer" value="3" label="Maximum number of consecutive low quality base calls allowed before truncating a read"/>
68 <param argument="--min_per_read_length_fraction" type="float" value="0.75" label="Minimum number of consecutive high quality base calls to include a read (per single end read) as a fraction of the input read length"/> 72 <param argument="--min_per_read_length_fraction" type="float" value="0.75" label="Minimum number of consecutive high quality base calls to include a read (per single end read) as a fraction of the input read length"/>
69 <param argument="--sequence_max_n" type="integer" value="0" label="Maximum number of N characters allowed in a sequence to retain it" help="This is applied after quality trimming, and is total over combined paired end reads if applicable"/> 73 <param argument="--sequence_max_n" type="integer" value="0" label="Maximum number of N characters allowed in a sequence to retain it" help="This is applied after quality trimming, and is total over combined paired end reads if applicable"/>
70 <param argument="--start_seq_id" type="integer" value="0" label="Start seq_ids as ascending integers beginning with start_seq_id"/> 74 <param argument="--start_seq_id" type="integer" value="0" label="Start seq_ids as ascending integers beginning with start_seq_id"/>
71 <param argument="--rev_comp_barcode" type="boolean" label="Reverse complement barcode reads before lookup?" truevalue="--rev_comp_barcode" falsevalue="" checked="False"/> 75 <param argument="--rev_comp_barcode" type="boolean" truevalue="--rev_comp_barcode" falsevalue="" checked="false" label="Reverse complement barcode reads before lookup?"/>
72 <param argument="--rev_comp_mapping_barcodes" type="boolean" label="Reverse complement barcode in mapping before lookup?" truevalue="--rev_comp_mapping_barcodes" falsevalue="" checked="False" help="It is useful if barcodes in mapping file are reverse complements of golay codes"/> 76 <param argument="--rev_comp_mapping_barcodes" type="boolean" truevalue="--rev_comp_mapping_barcodes" falsevalue="" checked="false" label="Reverse complement barcode in mapping before lookup?" help="It is useful if barcodes in mapping file are reverse complements of golay codes"/>
73 <param argument="--rev_comp" type="boolean" label="Reverse omplement sequence before writing to output file?" truevalue="--rev_comp" falsevalue="" checked="False"/> 77 <param argument="--rev_comp" type="boolean" truevalue="--rev_comp" falsevalue="" checked="false" label="Reverse complement sequence before writing to output file?"/>
74 <param argument="--phred_quality_threshold" type="integer" value="3" label="Maximum unacceptable Phred quality score" help="E.g., for Q20 and better, 19 must be specified"/> 78 <param argument="--phred_quality_threshold" type="integer" value="3" label="Maximum unacceptable Phred quality score" help="E.g., for Q20 and better, 19 must be specified"/>
75 <conditional name="barcode"> 79 <conditional name="barcode">
76 <param argument="--barcode_type" type="select" label="Type of barcode"> 80 <param argument="--barcode_type" type="select" label="Type of barcode">
77 <option value="hamming_8">hamming_8</option> 81 <option value="hamming_8">hamming_8</option>
78 <option value="golay_12" selected="true">golay_12</option> 82 <option value="golay_12" selected="true">golay_12</option>
106 <filter>store_demultiplexed_fastq is True</filter> 110 <filter>store_demultiplexed_fastq is True</filter>
107 </data> 111 </data>
108 </outputs> 112 </outputs>
109 <tests> 113 <tests>
110 <test> 114 <test>
111 <param name="sequence_read_fps" value="split_libraries_fastq/forward_reads.fastq"/> 115 <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/>
112 <param name="mapping_fps" value="split_libraries_fastq/map.tsv"/> 116 <param name="mapping_fps" value="split_libraries_fastq/map.txt"/>
113 <param name="barcode_read_fps" value="split_libraries_fastq/barcodes.fastq"/> 117 <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz"/>
118 <param name="store_qual_scores" value=""/>
119 <param name="store_demultiplexed_fastq" value=""/>
120 <param name="retain_unassigned_reads" value=""/>
121 <param name="max_bad_run_length" value="3"/>
122 <param name="min_per_read_length_fraction" value="0.75"/>
123 <param name="sequence_max_n" value="0"/>
124 <param name="start_seq_id" value="0"/>
125 <param name="rev_comp_barcode" value=""/>
126 <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
127 <param name="rev_comp" value=""/>
128 <param name="phred_quality_threshold" value="19"/>
129 <conditional name="barcode">
130 <param name="barcode_type" value="golay_12"/>
131 </conditional>
132 <param name="max_barcode_errors" value="1.5"/>
133 <param name="phred_offset" value=""/>
134 <output name="log">
135 <assert_contents>
136 <has_text text="Median sequence length: 151.00"></has_text>
137 <has_text text="s41"></has_text>
138 <has_text text="s122"></has_text>
139 </assert_contents>
140 </output>
141 <output name="seqs" ftype="fasta">
142 <assert_contents>
143 <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
144 <has_text text="s26_84 M00176:17:000000000-A0CNA:1:1:18075:1844 1:N:0:0 orig_bc=AGGGTTCCAGTT new_bc=AGGGTTCCAGTT bc_diffs=0"></has_text>
145 </assert_contents>
146 </output>
147 <output name="histograms">
148 <assert_contents>
149 <has_text text="Length"></has_text>
150 <has_text text="127.0"></has_text>
151 <has_text text="157.0"></has_text>
152 </assert_contents>
153 </output>
154 </test>
155 <test>
156 <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/>
157 <param name="mapping_fps" value="split_libraries_fastq/map.txt"/>
158 <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz"/>
114 <param name="store_qual_scores" value="--store_qual_scores"/> 159 <param name="store_qual_scores" value="--store_qual_scores"/>
115 <param name="store_demultiplexed_fastq" value="--store_demultiplexed_fastq"/> 160 <param name="store_demultiplexed_fastq" value="--store_demultiplexed_fastq"/>
116 <param name="retain_unassigned_reads" value=""/> 161 <param name="retain_unassigned_reads" value=""/>
117 <param name="max_bad_run_length" value="3"/> 162 <param name="max_bad_run_length" value="3"/>
118 <param name="min_per_read_length_fraction" value="0.75"/> 163 <param name="min_per_read_length_fraction" value="0.75"/>
119 <param name="sequence_max_n" value="0"/> 164 <param name="sequence_max_n" value="0"/>
120 <param name="start_seq_id" value="0"/> 165 <param name="start_seq_id" value="0"/>
121 <param name="rev_comp_barcode" value=""/> 166 <param name="rev_comp_barcode" value=""/>
122 <param name="rev_comp_mapping_barcodes" value=""/> 167 <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
123 <param name="rev_comp" value=""/> 168 <param name="rev_comp" value=""/>
124 <param name="barcode_selector" value="golay_12"/> 169 <param name="phred_quality_threshold" value="19"/>
170 <conditional name="barcode">
171 <param name="barcode_type" value="golay_12"/>
172 </conditional>
125 <param name="max_barcode_errors" value="1.5"/> 173 <param name="max_barcode_errors" value="1.5"/>
126 <param name="phred_offset" value=""/> 174 <param name="phred_offset" value=""/>
127 <output name="log"> 175 <output name="log">
128 <assert_contents> 176 <assert_contents>
129 <has_line line="Median sequence length: 132.50"></has_line> 177 <has_text text="Median sequence length: 151.00"></has_text>
130 <has_text text="L1S76"></has_text> 178 <has_text text="s41"></has_text>
131 <has_text text="L1S281"></has_text> 179 <has_text text="s122"></has_text>
132 <has_text text="L1S8"></has_text> 180 </assert_contents>
133 </assert_contents> 181 </output>
134 </output> 182 <output name="seqs" ftype="fasta">
135 <output name="seqs" file="split_libraries_fastq/sequences.fasta"/> 183 <assert_contents>
136 <output name="histograms" file="split_libraries_fastq/histograms.tabular"/> 184 <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
137 <output name="seqs_qual" file="split_libraries_fastq/sequence_qualities.qual"/> 185 <has_text text="s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text>
138 <output name="seqs_fastq" file="split_libraries_fastq/demultiplexed_sequences.fastq"/> 186 </assert_contents>
187 </output>
188 <output name="seqs_fastq" ftype="fastq">
189 <assert_contents>
190 <has_text text="@s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
191 <has_text text="@s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text>
192 </assert_contents>
193 </output>
194 <output name="seqs_qual" ftype="qual">
195 <assert_contents>
196 <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
197 <has_text text="s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text>
198 </assert_contents>
199 </output>
200 <output name="histograms">
201 <assert_contents>
202 <has_text text="Length"></has_text>
203 <has_text text="127.0"></has_text>
204 <has_text text="157.0"></has_text>
205 </assert_contents>
206 </output>
207 </test>
208 <test>
209 <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz,split_libraries_fastq/lane2_read1.fastq.gz"/>
210 <param name="mapping_fps" value="split_libraries_fastq/map.txt,split_libraries_fastq/map.txt"/>
211 <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz,split_libraries_fastq/lane2_barcode.fastq.gz"/>
212 <param name="store_qual_scores" value=""/>
213 <param name="store_demultiplexed_fastq" value=""/>
214 <param name="retain_unassigned_reads" value=""/>
215 <param name="max_bad_run_length" value="3"/>
216 <param name="min_per_read_length_fraction" value="0.75"/>
217 <param name="sequence_max_n" value="0"/>
218 <param name="start_seq_id" value="0"/>
219 <param name="rev_comp_barcode" value=""/>
220 <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
221 <param name="rev_comp" value=""/>
222 <param name="phred_quality_threshold" value="19"/>
223 <conditional name="barcode">
224 <param name="barcode_type" value="golay_12"/>
225 </conditional>
226 <param name="max_barcode_errors" value="1.5"/>
227 <param name="phred_offset" value=""/>
228 <output name="log">
229 <assert_contents>
230 <has_text text="Median sequence length: 151.00"></has_text>
231 <has_text text="s41"></has_text>
232 <has_text text="s122"></has_text>
233 </assert_contents>
234 </output>
235 <output name="seqs" ftype="fasta">
236 <assert_contents>
237 <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
238 <has_text text="s26_84 M00176:17:000000000-A0CNA:1:1:18075:1844 1:N:0:0 orig_bc=AGGGTTCCAGTT new_bc=AGGGTTCCAGTT bc_diffs=0"></has_text>
239 </assert_contents>
240 </output>
241 <output name="histograms">
242 <assert_contents>
243 <has_text text="Length"></has_text>
244 <has_text text="127.0"></has_text>
245 <has_text text="157.0"></has_text>
246 </assert_contents>
247 </output>
248 </test>
249 <test>
250 <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/>
251 <param name="store_qual_scores" value=""/>
252 <param name="sample_ids" value="my.sample.1"/>
253 <param name="store_demultiplexed_fastq" value=""/>
254 <param name="retain_unassigned_reads" value=""/>
255 <param name="max_bad_run_length" value="3"/>
256 <param name="min_per_read_length_fraction" value="0.75"/>
257 <param name="sequence_max_n" value="0"/>
258 <param name="start_seq_id" value="0"/>
259 <param name="rev_comp_barcode" value=""/>
260 <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
261 <param name="rev_comp" value=""/>
262 <param name="phred_quality_threshold" value="19"/>
263 <conditional name="barcode">
264 <param name="barcode_type" value="not-barcoded"/>
265 </conditional>
266 <param name="phred_offset" value=""/>
267 <output name="log">
268 <assert_contents>
269 <has_text text="Median sequence length: 151.00"></has_text>
270 <has_text text="my.sample.1"></has_text>
271 <has_text text="Total number seqs written"></has_text>
272 </assert_contents>
273 </output>
274 <output name="seqs" ftype="fasta">
275 <assert_contents>
276 <has_text text="my.sample.1_0 M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
277 <has_text text="my.sample.1_12 M00176:17:000000000-A0CNA:1:1:14889:1778 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
278 </assert_contents>
279 </output>
280 <output name="histograms">
281 <assert_contents>
282 <has_text text="Length"></has_text>
283 <has_text text="124.0"></has_text>
284 <has_text text="154.0"></has_text>
285 </assert_contents>
286 </output>
287 </test>
288 <test>
289 <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz,split_libraries_fastq/lane2_read1.fastq.gz"/>
290 <param name="store_qual_scores" value=""/>
291 <param name="sample_ids" value="my.sample.1,my.sample.2"/>
292 <param name="store_demultiplexed_fastq" value=""/>
293 <param name="retain_unassigned_reads" value=""/>
294 <param name="max_bad_run_length" value="3"/>
295 <param name="min_per_read_length_fraction" value="0.75"/>
296 <param name="sequence_max_n" value="0"/>
297 <param name="start_seq_id" value="0"/>
298 <param name="rev_comp_barcode" value=""/>
299 <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
300 <param name="rev_comp" value=""/>
301 <param name="phred_quality_threshold" value="19"/>
302 <conditional name="barcode">
303 <param name="barcode_type" value="not-barcoded"/>
304 </conditional>
305 <param name="phred_offset" value=""/>
306 <output name="log">
307 <assert_contents>
308 <has_text text="Median sequence length: 151.00"></has_text>
309 <has_text text="my.sample.1"></has_text>
310 <has_text text="Total number seqs written"></has_text>
311 </assert_contents>
312 </output>
313 <output name="seqs" ftype="fasta">
314 <assert_contents>
315 <has_text text="my.sample.1_0 M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
316 <has_text text="my.sample.1_12 M00176:17:000000000-A0CNA:1:1:14889:1778 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
317 </assert_contents>
318 </output>
319 <output name="histograms">
320 <assert_contents>
321 <has_text text="Length"></has_text>
322 <has_text text="124.0"></has_text>
323 <has_text text="154.0"></has_text>
324 </assert_contents>
325 </output>
139 </test> 326 </test>
140 </tests> 327 </tests>
141 <help><![CDATA[ 328 <help><![CDATA[
142 **What it does** 329 **What it does**
143 330