comparison samtools_stats.xml @ 3:95a7ddf617e7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_stats commit 70b38ebad0d0936f102e3e7cb70f1060347ea8c4
author iuc
date Thu, 27 Sep 2018 09:05:10 -0400
parents 24c5d43cb545
children 793ad847121d
comparison
equal deleted inserted replaced
2:24c5d43cb545 3:95a7ddf617e7
1 <tool id="samtools_stats" name="Stats" version="2.0.1"> 1 <tool id="samtools_stats" name="Samtools stats" version="2.0.2">
2 <description>generate statistics for BAM dataset</description> 2 <description>generate statistics for BAM dataset</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"/> 6 <expand macro="requirements">
7 <requirement type="package" version="5.0.4">gnuplot</requirement>
8 </expand>
7 <expand macro="stdio"/> 9 <expand macro="stdio"/>
8 <expand macro="version_command"/> 10 <expand macro="version_command"/>
9 <command><![CDATA[ 11 <command><![CDATA[
10 #if $use_reference.use_ref_selector == "yes": 12 @PREPARE_IDX@
11 #if $use_reference.reference_source.reference_source_selector == "history": 13 @PREPARE_FASTA_IDX@
12 ln -s '${use_reference.reference_source.ref_file}' &&
13 samtools faidx `basename '${use_reference.reference_source.ref_file}'` &&
14 #end if
15 #end if
16
17 samtools stats 14 samtools stats
18 '${input_file}' 15 #if $coverage_cond.coverage_select == 'yes':
19 --coverage ${coverage_min},${coverage_max},${coverage_step} 16 --coverage ${coverage_cond.coverage_min},${coverage_cond.coverage_max},${coverage_cond.coverage_step}
17 #end if
20 ${remove_dups} 18 ${remove_dups}
21
22 #if str( $filter_by_flags.filter_flags ) == "filter": 19 #if str( $filter_by_flags.filter_flags ) == "filter":
23 #if $filter_by_flags.require_flags: 20 #if $filter_by_flags.require_flags:
24 --required-flag ${sum([int(flag) for flag in str($filter_by_flags.require_flags).split(',')])} 21 #set $filter = $filter_by_flags.require_flags
22 @FLAGS@
23 --required-flag $flags
25 #end if 24 #end if
26 #if $filter_by_flags.exclude_flags: 25 #if $filter_by_flags.exclude_flags:
27 --filtering-flag ${sum([int(flag) for flag in str($filter_by_flags.exclude_flags).split(',')])} 26 #set $filter = $filter_by_flags.exclude_flags
27 @FLAGS@
28 --filtering-flag $flags
28 #end if 29 #end if
29 #end if 30 #end if
30 31 #if str($gc_depth):
31 --GC-depth ${gc_depth} 32 --GC-depth ${gc_depth}
32 --insert-size ${insert_size} 33 #end if
33 34 #if str($insert_size):
34 ## The code below is commented out because using -I/--id options causes the following exception 35 --insert-size ${insert_size}
36 #end if
37 ## The code below is commented out because using -I/--id options causes
38 ## in samtools up to 1.9 the following exception
35 ## Samtools-htslib: init_group_id() header parsing not yet implemented 39 ## Samtools-htslib: init_group_id() header parsing not yet implemented
36 ##if str($read_group) != "": 40 ##if str($read_group) != "":
37 ## -I "${read_group}" 41 ## -I "${read_group}"
38 ##end if 42 ##end if
39
40 #if str($read_length): 43 #if str($read_length):
41 --read-length ${read_length} 44 --read-length ${read_length}
42 #end if 45 #end if
43 46 #if str($most_inserts):
44 --most-inserts ${most_inserts} 47 --most-inserts ${most_inserts}
45 --trim-quality ${trim_quality} 48 #end if
46 49 #if str($trim_quality):
47 #if $use_reference.use_ref_selector == "yes": 50 --trim-quality ${trim_quality}
48 #if $use_reference.reference_source.reference_source_selector != "history": 51 #end if
49 --ref-seq '${use_reference.reference_source.ref_file.fields.path}' 52
50 #else: 53 #if $reffa != None:
51 --ref-seq '${use_reference.reference_source.ref_file}' 54 --ref-seq '$reffa'
52 #end if 55 #end if
53 #end if 56 ## TODO currently not implemented in Galaxy
54 > '${output}' 57 ## generates STR_VALUE.bamstat where STR is given by -P and VALUE is a value of the TAG given by -S
55 58 ## needs some discover data sets action...
56 #if $split_output.split_output_selector == "yes": 59 ## -P, --split-prefix STR
57 #set outputs_to_split = str($split_output.generate_tables).split(',') 60 ## -S, --split TAG
58 && mkdir split && 61 $sparse
59 echo ${split_output.generate_tables} 62 @REGIONS_FILE@
60 63 $remove_overlaps
61 #if 'sn' in $outputs_to_split: 64 #if str($cov_threshold):
62 && echo "# Summary Numbers" > 'split/Summary numbers.tab' && 65 -g $cov_threshold
63 echo "" >> 'split/Summary numbers.tab' && 66 #end if
64 if grep -q ^SN '${output}'; then 67 infile
65 grep ^SN '${output}' | cut -f 2- >> 'split/Summary numbers.tab'; 68 @REGIONS_MANUAL@
66 fi 69 > '$output'
67 #end if 70
68 71 #if $cond_plot.select_plot=='yes':
69 #if 'ffq' in $outputs_to_split: 72 && plot-bamstats '$output' $cond_plot.log -p '${html_file.files_path}'/
70 && echo "# Columns correspond to qualities and rows to cycles. First column is the cycle number\n" > 'split/First Fragment Qualities.tab' && 73 && mv '${html_file.files_path}'/index.html '${html_file}'
71 if grep -q ^FFQ '${output}'; then 74 #end if
72 grep ^FFQ '${output}' | cut -f 2- >> 'split/First Fragment Qualities.tab'; 75
73 fi 76 #if $split_output_cond.split_output_selector == "yes":
74 #end if 77 #set outputs_to_split = str($split_output_cond.generate_tables).split(',')
75 78 && mkdir split
76 #if 'lfq' in $outputs_to_split: 79 #for s in str($split_output_cond.generate_tables).split(','):
77 && echo "# Columns correspond to qualities and rows to cycles. First column is the cycle number" > 'split/Last Fragment Qualities.tab' && 80 && name=`cat '$output' | grep '\^$s' | cut -d'.' -f 1 | sed 's/^# //'`
78 if grep -q ^LFQ '${output}'; then 81 && awk '/\^/{out=0} /\^$s/{out=1} {if(out==1){print $0}}' '$output' | sed 's/Use `grep .*` to extract this part.//' | sed 's/$s\t//' > "split/\$name.tab"
79 grep ^LFQ '${output}' | cut -f 2- >> 'split/Last Fragment Qualities.tab'; 82 #end for
80 fi 83 #end if
81 #end if 84 ]]></command>
82
83 #if 'mpc' in $outputs_to_split:
84 && echo "# Columns correspond to qualities, rows to cycles. First column is the cycle number, second is the number of N's and the rest is the number of mismatches" > 'split/Mismatches per cycle.tab' &&
85 if grep -q ^MPC '${output}'; then
86 grep ^MPC '${output}' | cut -f 2- >> 'split/Mismatches per cycle.tab';
87 fi
88 #end if
89
90 #if 'gcf' in $outputs_to_split:
91 && echo "# GC Content of first fragments" > 'split/GC Content of first fragments.tab' &&
92 if grep -q ^GCF '${output}'; then
93 grep ^GCF '${output}' | cut -f 2- >> 'split/GC Content of first fragments.tab';
94 fi
95 #end if
96
97 #if 'gcl' in $outputs_to_split:
98 && echo "# GC Content of last fragments" > 'split/GC Content of last fragments.tab' &&
99 if grep -q ^GCL '${output}'; then
100 grep ^GCL '${output}' | cut -f 2- >> 'split/GC Content of last fragments.tab';
101 fi
102 #end if
103
104 #if 'gcc' in $outputs_to_split:
105 && echo "# ACGT content per cycle. The columns are: cycle, and A,C,G,T counts (percent)" > 'split/ACGT content per cycle.tab' &&
106 if grep -q ^GCC '${output}'; then
107 grep ^GCC '${output}' | cut -f 2- >> 'split/ACGT content per cycle.tab';
108 fi
109 #end if
110
111 #if 'is' in $outputs_to_split:
112 && echo "# Insert sizes. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs" > 'split/Insert sizes.tab' &&
113 if grep -q ^IS '${output}'; then
114 grep ^IS '${output}' | cut -f 2- >> 'split/Insert sizes.tab';
115 fi
116 #end if
117
118 #if 'rl' in $outputs_to_split:
119 && echo "# Read lengths. The columns are: read length, count" > 'split/Read lengths.tab' &&
120 if grep -q ^RL '${output}'; then
121 grep ^RL '${output}' | cut -f 2- >> 'split/Read lengths.tab';
122 fi
123 #end if
124
125 #if 'id' in $outputs_to_split:
126 && echo "# Indel distribution. The columns are: length, number of insertions, number of deletions" > 'split/Indel distribution.tab' &&
127 if grep -q ^ID '${output}'; then
128 grep ^ID '${output}' | cut -f 2- >> 'split/Indel distribution.tab';
129 fi
130 #end if
131
132 #if 'ic' in $outputs_to_split:
133 && echo "# Indels per cycle. The columns are: cycle, number of insertions (fwd), .. (rev) , number of deletions (fwd), .. (rev)" > 'split/Indels per cycle.tab' &&
134 if grep -q ^IC '${output}'; then
135 grep ^IC '${output}' | cut -f 2- >> 'split/Indels per cycle.tab';
136 fi
137 #end if
138
139 #if 'cov' in $outputs_to_split:
140 && echo "# Coverage distribution" > 'split/Coverage distribution.tab' &&
141 if grep -q ^COV '${output}'; then
142 grep ^COV '${output}' | cut -f 2- >> 'split/Coverage distribution.tab';
143 fi
144 #end if
145
146 #if 'gcd' in $outputs_to_split:
147 && echo "# GC-depth. The columns are: GC%, unique sequence percentiles, 10th, 25th, 50th, 75th and 90th depth percentile" > 'split/GC depth.tab' &&
148 if grep -q ^GCD '${output}'; then
149 grep ^GCD '${output}' | cut -f 2- >> 'split/GC depth.tab';
150 fi
151 #end if
152 #end if
153 ]]></command>
154 <inputs> 85 <inputs>
155 <param name="input_file" type="data" format="sam,bam" label="BAM file" /> 86 <param name="input" type="data" format="sam,bam,cram" label="BAM file" />
156 <param name="coverage_min" type="integer" value="1" label="Minimum coverage" help="Minimum coverage" /> 87 <conditional name="coverage_cond">
157 <param name="coverage_max" type="integer" value="1000" label="Maximum coverage" help="Maximum coverage" /> 88 <param name="coverage_select" type="select" label="Set coverage distribution">
158 <param name="coverage_step" type="integer" value="1" label="Coverage step" help="Step value for coverage" /> 89 <option value="no" selected="True">No</option>
159 <param name="remove_dups" argument="--remove-dups" type="boolean" truevalue="--remove-dups" falsevalue="" checked="False" 90 <option value="yes">Yes</option>
160 label="Exclude reads marked as duplicates" /> 91 </param>
161 <conditional name="split_output"> 92 <when value="no"/>
93 <when value="yes">
94 <param name="coverage_min" type="integer" value="1" label="Minimum coverage" help="Minimum coverage" />
95 <param name="coverage_max" type="integer" value="1000" label="Maximum coverage" help="Maximum coverage" />
96 <param name="coverage_step" type="integer" value="1" label="Coverage step" help="Step value for coverage" />
97 </when>
98 </conditional>
99 <param name="remove_dups" argument="--remove-dups" type="boolean" truevalue="--remove-dups" falsevalue="" checked="False" label="Exclude reads marked as duplicates" />
100 <conditional name="split_output_cond">
162 <param name="split_output_selector" type="select" label="Output" help="Select between one single output or separate outputs for each statistics"> 101 <param name="split_output_selector" type="select" label="Output" help="Select between one single output or separate outputs for each statistics">
163 <option value="no" selected="True">One single summary file</option> 102 <option value="no" selected="True">One single summary file</option>
164 <option value="yes">Separate datasets for each statistic</option> 103 <option value="yes">Separate datasets for each statistic</option>
165 </param> 104 </param>
166 <when value="no" /> 105 <when value="no" />
167 <when value="yes"> 106 <when value="yes">
168 <param name="generate_tables" type="select" display="checkboxes" multiple="True" label="Desired output files"> 107 <param name="generate_tables" type="select" display="checkboxes" multiple="True" label="Desired output files">
169 <option value="sn">Summary numbers</option> 108 <option value="SN">Summary numbers</option>
170 <option value="ffq">First Fragment Qualities</option> 109 <option value="FFQ">First Fragment Qualities</option>
171 <option value="lfq">Last Fragment Qualities</option> 110 <option value="LFQ">Last Fragment Qualities</option>
172 <option value="mpc">Mismatches per cycle</option> 111 <option value="MPC">Mismatches per cycle</option>
173 <option value="gcf">GC Content of first fragments</option> 112 <option value="GCF">GC Content of first fragments</option>
174 <option value="gcl">GC Content of last fragments</option> 113 <option value="GCL">GC Content of last fragments</option>
175 <option value="gcc">ACGT content per cycle</option> 114 <option value="GCC">ACGT content per cycle</option>
176 <option value="is">Insert sizes</option> 115 <option value="FBC">ACGT content per cycle for first fragments</option>
177 <option value="rl">Read lengths</option> 116 <option value="LBC">ACGT content per cycle for last fragments</option>
178 <option value="id">Indel distribution</option> 117 <option value="IS">Insert sizes</option>
179 <option value="ic">Indels per cycle</option> 118 <option value="RL">Read lengths</option>
180 <option value="cov">Coverage distribution</option> 119 <option value="FRL">Read lengths for first fragments</option>
181 <option value="gcd">GC depth</option> 120 <option value="LRL">Read lengths for last fragments</option>
121 <option value="ID">Indel distribution</option>
122 <option value="IC">Indels per cycle</option>
123 <option value="COV">Coverage distribution</option>
124 <option value="GCD">GC depth</option>
182 </param> 125 </param>
183 </when> 126 </when>
184 </conditional> 127 </conditional>
185 <conditional name="filter_by_flags"> 128 <conditional name="filter_by_flags">
186 <param name="filter_flags" type="select" label="Filter by SAM flags" help="More info on the flags: https://samtools.github.io/hts-specs/SAMv1.pdf"> 129 <param name="filter_flags" type="select" label="Filter by SAM flags" help="More info on the flags: https://samtools.github.io/hts-specs/SAMv1.pdf">
187 <option value="nofilter" selected="True">Do not filter</option> 130 <option value="nofilter" selected="True">Do not filter</option>
188 <option value="filter">Filter by flags to exclude or require</option> 131 <option value="filter">Filter by flags to exclude or require</option>
189 </param> 132 </param>
190 <when value="filter"> 133 <when value="filter">
191 <param name="require_flags" argument="-f" type="select" display="checkboxes" multiple="True" label="Require"> 134 <param name="require_flags" argument="-f" type="select" display="checkboxes" multiple="True" label="Require">
192 <option value="1">Read is paired</option> 135 <expand macro="flag_options" />
193 <option value="2">Read is mapped in a proper pair</option>
194 <option value="4">The read is unmapped</option>
195 <option value="8">The mate is unmapped</option>
196 <option value="16">Read strand</option>
197 <option value="32">Mate strand</option>
198 <option value="64">Read is the first in a pair</option>
199 <option value="128">Read is the second in a pair</option>
200 <option value="256">The alignment or this read is not primary</option>
201 <option value="512">The read fails platform/vendor quality checks</option>
202 <option value="1024">The read is a PCR or optical duplicate</option>
203 </param> 136 </param>
204 <param name="exclude_flags" argument="-F" type="select" display="checkboxes" multiple="True" label="Exclude"> 137 <param name="exclude_flags" argument="-F" type="select" display="checkboxes" multiple="True" label="Exclude">
205 <option value="1">Read is paired</option> 138 <expand macro="flag_options" />
206 <option value="2">Read is mapped in a proper pair</option>
207 <option value="4">The read is unmapped</option>
208 <option value="8">The mate is unmapped</option>
209 <option value="16">Read strand</option>
210 <option value="32">Mate strand</option>
211 <option value="64">Read is the first in a pair</option>
212 <option value="128">Read is the second in a pair</option>
213 <option value="256">The alignment or this read is not primary</option>
214 <option value="512">The read fails platform/vendor quality checks</option>
215 <option value="1024">The read is a PCR or optical duplicate</option>
216 </param> 139 </param>
217 </when> 140 </when>
218 <when value="nofilter" /> 141 <when value="nofilter" />
219 142
220 </conditional> 143 </conditional>
221 <param name="gc_depth" argument="--GC-depth" type="float" value="20000" label="Size of GC-depth bins" help="Decreasing bin size increases memory requirement" /> 144 <!-- TODO I would like to set the default values of float and int parameters as on the samtools stats help page, but then the tests don't work. Hence I leave the optional and give the defaults in the help -->
222 <param name="insert_size" argument="--insert-size" type="integer" value="8000" label="Maximum insert size" /> 145 <param name="gc_depth" argument="--GC-depth" type="float" optional="True" label="Size of GC-depth bins" help="Decreasing bin size increases memory requirement. default=2e4" />
146 <param name="insert_size" argument="--insert-size" type="integer" optional="True" label="Maximum insert size" help="default=8000" />
223 <!-- 147 <!--
224 148 The -I option of samtools stats returns the following message up to version 1.9:
225 The -I option of samtools stats returns the following message in version 1.2:
226
227 Samtools-htslib: init_group_id() header parsing not yet implemented 149 Samtools-htslib: init_group_id() header parsing not yet implemented
228 Abort trap: 6
229
230 Because of this the section below is commented out until this stats bug is fixed 150 Because of this the section below is commented out until this stats bug is fixed
231
232 <param name="read_group" type="select" optional="true" label="Limit to a specific read group name" > 151 <param name="read_group" type="select" optional="true" label="Limit to a specific read group name" >
233 <options> 152 <options>
234 <filter type="data_meta" ref="input_file" key="read_groups" /> 153 <filter type="data_meta" ref="input" key="read_groups" />
235 </options> 154 </options>
236 </param> 155 </param>
237
238 --> 156 -->
239 157 <param name="read_length" argument="--read-length" type="integer" optional="true" label="Minimum read length to generate statistics for" help="No cutoff if left empty" />
240 <param name="read_length" argument="--read-length" type="integer" value="" optional="true" label="Minimum read length to generate statistics for" help="No cutoff if left empty"/> 158 <param name="most_inserts" argument="--most-inserts" type="float" optional="true" label="Report only the main part of inserts" help="default=0.99" />
241 <param name="most_inserts" argument="--most-inserts" type="float" value="0.99" label="Report only the main part of inserts" /> 159 <param name="trim_quality" argument="--trim-quality" type="integer" optional="true" label="BWA trim parameter" help="default=0" />
242 <param name="trim_quality" argument="--trim-quality" type="integer" value="0" label="BWA trim parameter" /> 160
243 161 <conditional name="addref_cond">
244 <conditional name="use_reference"> 162 <param name="addref_select" type="select" label="Use a reference sequence" help="Required for GC-depth and mismatches-per-cycle calculation">
245 <param name="use_ref_selector" argument="--ref-seq" type="select" label="Use reference sequence" help="Required for GC-depth and mismatches-per-cycle calculation"> 163 <option value="no">No</option>
246 <option value="yes">Use reference</option> 164 <option value="cached">Locally cached</option>
247 <option selected="True" value="no">Do not use reference</option> 165 <option value="history">History</option>
248 </param> 166 </param>
167 <when value="no"/>
168 <when value="cached">
169 <param name="ref" type="select" label="Using genome">
170 <options from_data_table="fasta_indexes">
171 <filter type="data_meta" ref="input" key="dbkey" column="1" />
172 </options>
173 </param>
174 </when>
175 <when value="history">
176 <param name="ref" type="data" format="fasta" label="Using file" />
177 </when>
178 </conditional>
179 <!-- unfortunately -t takes tabular and not bed like view (otherwise a macro might have simplified this) -->
180
181 <expand macro="regions_macro"/>
182
183 <param name="sparse" argument="-x/--sparse" type="boolean" truevalue="-x" falsevalue="" checked="False" label="Suppress absence of insertions" help="Suppress outputting IS rows where there are no insertions."/>
184 <param name="remove_overlaps" argument="-p/--remove-overlaps" type="boolean" truevalue="-p" falsevalue="" checked="False" label="Remove overlaps of paired-end reads from coverage and base count computations" />
185 <param name="cov_threshold" argument="-g/--cov-threshold" optional="true" type="integer" label="Only bases with coverage above this value will be included in the target percentage computation" />
186 <conditional name="cond_plot">
187 <param name="select_plot" type="select" label="Generate plots with plot-bamstats">
188 <option value="no" selected="True">No</option>
189 <option value="yes">Yes</option>
190 </param>
191 <when value="no"/>
249 <when value="yes"> 192 <when value="yes">
250 <conditional name="reference_source"> 193 <param name="log" argument="-l/--log-y" type="boolean" truevalue="-l" falsevalue="" checked="False" label="log scale insert size plot" help="Set the Y axis scale of the Insert Size plot to log 10"/>
251 <param name="reference_source_selector" type="select" label="Choose a reference sequence for GC depth"> 194 </when>
252 <option value="cached">Locally cached</option> 195 </conditional>
253 <option value="history">History</option>
254 </param>
255 <when value="cached">
256 <param name="ref_file" type="select" label="Using genome">
257 <options from_data_table="fasta_indexes">
258 <filter type="data_meta" ref="input_file" key="dbkey" column="1" />
259 </options>
260 </param>
261 </when>
262 <when value="history">
263 <param name="ref_file" type="data" format="fasta" label="Using file" />
264 </when>
265 </conditional>
266 </when>
267 <when value="no" />
268 </conditional>
269
270 </inputs> 196 </inputs>
271 197
272 <outputs> 198 <outputs>
273 <data name="output" format="tabular" label="${tool.name} on ${on_string}"> 199 <data name="output" format="tabular" label="${tool.name} on ${on_string}">
274 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tab" ext="tabular" visible="true" directory="split" /> 200 <discover_datasets directory="split" pattern="(?P&lt;designation&gt;.+)\.tab" format="tabular" visible="true" assign_primary_output="true"/>
201 <filter>split_output_cond['split_output_selector'] == 'no'</filter>
202 </data>
203 <collection name="output_collection" type="list" label="${tool.name} on ${on_string}">
204 <discover_datasets directory="split" pattern="(?P&lt;designation&gt;.+)\.tab" format="tabular" visible="false"/>
205 <filter>split_output_cond['split_output_selector'] == 'yes'</filter>
206 </collection>
207 <data format="html" name="html_file" label="${tool.name} on ${on_string}: plot-bamstats">
208 <filter>cond_plot['select_plot']=='yes'</filter>
275 </data> 209 </data>
276 </outputs> 210 </outputs>
277 <tests> 211 <tests>
278 <test> 212 <!-- https://github.com/samtools/samtools/blob/9ce8c64493f7ea3fa69bc5c1ac980b1a8e3dcf1f/test/test.pl#L2402 -->
279 <param name="input_file" value="samtools_stats_input.bam" ftype="bam" /> 213 <test>
280 <param name="use_ref_selector" value="yes" /> 214 <param name="input" value="1_map_cigar.sam" ftype="sam" />
281 <param name="reference_source_selector" value="history" /> 215 <conditional name="addref_cond">
282 <param name="ref_file" value="samtools_stats_ref.fa" ftype="fasta" /> 216 <param name="addref_select" value="history" />
283 <output name="output" file="samtools_stats_out1.tab" ftype="tabular" lines_diff="4" /> 217 <param name="ref" value="test.fa" ftype="fasta" />
284 </test> 218 </conditional>
285 <test> 219 <output name="output" file="1.stats.expected" ftype="tabular" lines_diff="3" />
286 <param name="input_file" value="samtools_stats_input.bam" ftype="bam" /> 220 </test>
287 <param name="use_ref_selector" value="yes" /> 221 <!-- test_cmd($opts,out=>'stat/1.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/1_map_cigar.sam | tail -n+4", exp_fix=>$efix);-->
288 <param name="reference_source_selector" value="history" /> 222 <test>
289 <param name="ref_file" value="samtools_stats_ref.fa" ftype="fasta" /> 223 <param name="input" value="2_equal_cigar_full_seq.sam" ftype="sam" />
290 <param name="split_output_selector" value="yes" /> 224 <conditional name="addref_cond">
291 <param name="generate_tables" value="sn,mpc,gcc" /> 225 <param name="addref_select" value="history" />
292 <output name="output" file="samtools_stats_out2.tab" lines_diff="4"> 226 <param name="ref" value="test.fa" ftype="fasta" />
293 <discovered_dataset designation="Summary numbers" ftype="tabular" file="samtools_stats_out2__sn.tab" /> 227 </conditional>
294 <discovered_dataset designation="ACGT content per cycle" ftype="tabular" file="samtools_stats_out2__gcc.tab" /> 228 <output name="output" file="2.stats.expected" ftype="tabular" lines_diff="3" />
295 <discovered_dataset designation="Mismatches per cycle" ftype="tabular" file="samtools_stats_out2__mpc.tab" /> 229 </test>
296 </output> 230 <!-- test_cmd($opts,out=>'stat/2.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/2_equal_cigar_full_seq.sam | tail -n+4", exp_fix=>$efix);-->
231 <!-- test_cmd($opts,out=>'stat/3.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/3_map_cigar_equal_seq.sam | tail -n+4", exp_fix=>$efix);-->
232 <!-- test_cmd($opts,out=>'stat/4.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/4_X_cigar_full_seq.sam | tail -n+4", exp_fix=>$efix);-->
233 <!-- test_cmd($opts,out=>'stat/5.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/5_insert_cigar.sam | tail -n+4", exp_fix=>$efix); -->
234 <test>
235 <param name="input" value="5_insert_cigar.sam" ftype="sam" />
236 <param name="insert_size" value="0" />
237 <conditional name="addref_cond">
238 <param name="addref_select" value="history" />
239 <param name="ref" value="test.fa" ftype="fasta" />
240 </conditional>
241 <output name="output" file="6.stats.expected" ftype="tabular" lines_diff="3" />
242 </test>
243 <!-- test_cmd($opts,out=>'stat/6.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa -i 0 $$opts{path}/stat/5_insert_cigar.sam | tail -n+4", exp_fix=>$efix); -->
244 <!-- test_cmd($opts,out=>'stat/7.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/7_supp.sam | tail -n+4", exp_fix=>$efix); -->
245 <!-- test_cmd($opts,out=>'stat/8.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/8_secondary.sam | tail -n+4", exp_fix=>$efix);-->
246 <!-- test_cmd($opts,out=>'stat/9.stats.expected',cmd=>"$$opts{bin}/samtools stats -S RG -r $$opts{path}/stat/test.fa $$opts{path}/stat/1_map_cigar.sam | tail -n+4", exp_fix=>$efix,out_map=>{"stat/1_map_cigar.sam_s1_a_1.bamstat"=>"stat/1_map_cigar.sam_s1_a_1.expected.bamstat"},hskip=>3);-->
247 <!-- test_cmd($opts,out=>'stat/10.stats.expected',cmd=>"$$opts{bin}/samtools stats -S RG -r $$opts{path}/stat/test.fa $$opts{path}/stat/10_map_cigar.sam | tail -n+4", exp_fix=>$efix,out_map=>{"stat/10_map_cigar.sam_s1_a_1.bamstat"=>"stat/10_map_cigar.sam_s1_a_1.expected.bamstat", "stat/10_map_cigar.sam_s1_b_1.bamstat"=>"stat/10_map_cigar.sam_s1_b_1.expected.bamstat"},hskip=>3);-->
248 <test>
249 <param name="input" value="11_target.sam" ftype="sam" />
250 <conditional name="addref_cond">
251 <param name="addref_select" value="no" />
252 </conditional>
253 <conditional name="cond_region">
254 <param name="select_region" value="tab"/>
255 <param name="targetregions" value="11.stats.targets" ftype="tabular" />
256 </conditional>
257 <output name="output" file="11.stats.expected" ftype="tabular" lines_diff="3" />
258 </test>
259 <!-- test_cmd($opts,out=>'stat/11.stats.expected',cmd=>"$$opts{bin}/samtools stats -t $$opts{path}/stat/11.stats.targets $$opts{path}/stat/11_target.sam | tail -n+4", exp_fix=>$efix); -->
260
261 <test>
262 <param name="input" value="11_target.bam" ftype="bam" />
263 <conditional name="addref_cond">
264 <param name="addref_select" value="no" />
265 </conditional>
266 <conditional name="cond_region">
267 <param name="select_region" value="text"/>
268 <param name="regions_repeat_0|region" value="ref1:10-24"/>
269 <param name="regions_repeat_1|region" value="ref1:30-46"/>
270 <param name="regions_repeat_2|region" value="ref1:39-56"/>
271 </conditional>
272 <output name="output" file="11.stats.expected" ftype="tabular" lines_diff="3" />
273 </test>
274 <!-- test_cmd($opts,out=>'stat/11.stats.expected',cmd=>"$$opts{bin}/samtools stats $$opts{path}/stat/11_target.bam ref1:10-24 ref1:30-46 ref1:39-56 | tail -n+4", exp_fix=>$efix);
275 -->
276 <test>
277 <param name="input" value="11_target.sam" ftype="sam" />
278 <conditional name="addref_cond">
279 <param name="addref_select" value="no" />
280 </conditional>
281 <conditional name="cond_region">
282 <param name="select_region" value="tab"/>
283 <param name="targetregions" value="11.stats.targets" ftype="tabular" />
284 </conditional>
285 <param name="cov_threshold" value="4" />
286 <output name="output" file="11.stats.g4.expected" ftype="tabular" lines_diff="3" />
287 </test>
288 <!-- test_cmd($opts,out=>'stat/11.stats.g4.expected',cmd=>"$$opts{bin}/samtools stats -g 4 -t $$opts{path}/stat/11.stats.targets $$opts{path}/stat/11_target.sam | tail -n+4", exp_fix=>$efix);-->
289 <test>
290 <param name="input" value="11_target.bam" ftype="bam" />
291 <conditional name="addref_cond">
292 <param name="addref_select" value="no" />
293 </conditional>
294 <conditional name="cond_region">
295 <param name="select_region" value="text"/>
296 <param name="regions_repeat_0|region" value="ref1:10-24"/>
297 <param name="regions_repeat_1|region" value="ref1:30-46"/>
298 <param name="regions_repeat_2|region" value="ref1:39-56"/>
299 </conditional>
300 <param name="cov_threshold" value="4" />
301 <output name="output" file="11.stats.g4.expected" ftype="tabular" lines_diff="3" />
302 </test>
303 <!-- test_cmd($opts,out=>'stat/11.stats.g4.expected',cmd=>"$$opts{bin}/samtools stats -g 4 $$opts{path}/stat/11_target.bam ref1:10-24 ref1:30-46 ref1:39-56 | tail -n+4", exp_fix=>$efix); -->
304 <test>
305 <param name="input" value="12_overlaps.bam" ftype="bam" />
306 <conditional name="addref_cond">
307 <param name="addref_select" value="no" />
308 </conditional>
309 <conditional name="cond_region">
310 <param name="select_region" value="tab"/>
311 <param name="targetregions" value="12_3reads.bed" ftype="tabular" />
312 </conditional>
313 <output name="output" file="12.3reads.overlap.expected" ftype="tabular" lines_diff="3" />
314 </test>
315 <!-- test_cmd($opts,out=>'stat/12.3reads.overlap.expected',cmd=>"$$opts{bin}/samtools stats $$opts{path}/stat/12_overlaps.bam -t $$opts{path}/stat/12_3reads.bed | tail -n+4", exp_fix=>$efix);-->
316 <test>
317 <param name="input" value="12_overlaps.bam" ftype="bam" />
318 <conditional name="addref_cond">
319 <param name="addref_select" value="no" />
320 </conditional>
321 <conditional name="cond_region">
322 <param name="select_region" value="tab"/>
323 <param name="targetregions" value="12_3reads.bed" ftype="tabular" />
324 </conditional>
325 <param name="remove_overlaps" value="-p"/>
326 <output name="output" file="12.3reads.nooverlap.expected" ftype="tabular" lines_diff="3" />
327 </test>
328 <!-- test_cmd($opts,out=>'stat/12.3reads.nooverlap.expected',cmd=>"$$opts{bin}/samtools stats $$opts{path}/stat/12_overlaps.bam -p -t $$opts{path}/stat/12_3reads.bed | tail -n+4", exp_fix=>$efix);-->
329 <test>
330 <param name="input" value="12_overlaps.bam" ftype="bam" />
331 <conditional name="addref_cond">
332 <param name="addref_select" value="no" />
333 </conditional>
334 <conditional name="cond_region">
335 <param name="select_region" value="tab"/>
336 <param name="targetregions" value="12_2reads.bed" ftype="tabular" />
337 </conditional>
338 <output name="output" file="12.2reads.overlap.expected" ftype="tabular" lines_diff="3" />
339 </test>
340 <!-- test_cmd($opts,out=>'stat/12.2reads.overlap.expected',cmd=>"$$opts{bin}/samtools stats $$opts{path}/stat/12_overlaps.bam -t $$opts{path}/stat/12_2reads.bed | tail -n+4", exp_fix=>$efix);-->
341 <test>
342 <param name="input" value="12_overlaps.bam" ftype="bam" />
343 <conditional name="addref_cond">
344 <param name="addref_select" value="no" />
345 </conditional>
346 <conditional name="cond_region">
347 <param name="select_region" value="tab"/>
348 <param name="targetregions" value="12_2reads.bed" ftype="tabular" />
349 </conditional>
350 <param name="remove_overlaps" value="-p"/>
351 <output name="output" file="12.2reads.nooverlap.expected" ftype="tabular" lines_diff="3" />
352 </test>
353 <!-- test_cmd($opts,out=>'stat/12.2reads.nooverlap.expected',cmd=>"$$opts{bin}/samtools stats $$opts{path}/stat/12_overlaps.bam -p -t $$opts{path}/stat/12_2reads.bed | tail -n+4", exp_fix=>$efix);-->
354 <test>
355 <param name="input" value="samtools_stats_input.bam" ftype="bam" />
356 <conditional name="addref_cond">
357 <param name="addref_select" value="history" />
358 <param name="ref" value="samtools_stats_ref.fa" ftype="fasta" />
359 </conditional>
360 <conditional name="cond_plot">
361 <param name="select_plot" value="yes"/>
362 </conditional>
363 <output name="output" file="samtools_stats_out1.tab" ftype="tabular" lines_diff="2" />
364 </test>
365 <test>
366 <param name="input" value="samtools_stats_input.bam" ftype="bam" />
367 <conditional name="addref_cond">
368 <param name="addref_select" value="history" />
369 <param name="ref" value="samtools_stats_ref.fa" ftype="fasta" />
370 </conditional>
371 <conditional name="split_output_cond">
372 <param name="split_output_selector" value="yes" />
373 <param name="generate_tables" value="SN,MPC,GCC" />
374 </conditional>
375 <output_collection name="output_collection" type="list">
376 <element name="ACGT content per cycle" ftype="tabular" file="samtools_stats_out1__gcc.tab"/>
377 <element name="Mismatches per cycle and quality" ftype="tabular" file="samtools_stats_out1__mpc.tab" />
378 <element name="Summary Numbers" ftype="tabular" file="samtools_stats_out1__sn.tab" />
379 </output_collection>
297 </test> 380 </test>
298 </tests> 381 </tests>
299 <help><![CDATA[ 382 <help><![CDATA[
300 **What it does** 383 **What it does**
301 384