comparison fastq_filter.xml @ 2:06934412f56d draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_filter commit f2582539542b33240234e8ea6093e25d0aee9b6a
author devteam
date Sat, 30 Sep 2017 14:57:54 -0400
parents b957f55f3955
children 10b75444feae
comparison
equal deleted inserted replaced
1:b957f55f3955 2:06934412f56d
1 <tool id="fastq_filter" name="Filter FASTQ" version="1.0.0"> 1 <tool id="fastq_filter" name="Filter FASTQ" version="1.1.1">
2 <description>reads by quality score and length</description> 2 <description>reads by quality score and length</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> 4 <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement>
5 </requirements> 5 </requirements>
6 <command interpreter="python">fastq_filter.py $input_file $fastq_filter_file $output_file $output_file.files_path '${input_file.extension[len( 'fastq' ):]}'</command> 6 <command><![CDATA[
7 <inputs> 7 gx-fastq-filter '$input_file' '$fastq_filter_file' '$output_file' '$output_file.files_path' '${input_file.extension[len( 'fastq' ):]}'
8 <page> 8 ]]></command>
9 <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer."/> 9 <configfiles>
10 <param name="min_size" label="Minimum Size" value="0" type="integer"> 10 <configfile name="fastq_filter_file"><![CDATA[
11 <validator type="in_range" message="Minimum size must be positive" min="0"/> 11 def fastq_read_pass_filter(fastq_read):
12 </param> 12
13 <param name="max_size" label="Maximum Size" value="0" type="integer" help="A maximum size less than 1 indicates no limit."/> 13 def mean(score_list):
14 <param name="min_quality" label="Minimum Quality" value="0" type="float"/> 14 return float(sum(score_list)) / float(len(score_list))
15 <param name="max_quality" label="Maximum Quality" value="0" type="float" help="A maximum quality less than 1 indicates no limit."/> 15
16 <param name="max_num_deviants" label="Maximum number of bases allowed outside of quality range" value="0" type="integer"> 16 if len(fastq_read) < $min_size:
17 <validator type="in_range" message="Maximum number of deviate bases must be positive" min="0"/>
18 </param>
19 <param name="paired_end" label="This is paired end data" type="boolean" truevalue="paired_end" falsevalue="single_end" checked="False"/>
20 <repeat name="fastq_filters" title="Quality Filter on a Range of Bases" help="The above settings do not apply to these filters.">
21 <conditional name="offset_type">
22 <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
23 <option value="offsets_absolute" selected="true">Absolute Values</option>
24 <option value="offsets_percent">Percentage of Read Length</option>
25 </param>
26 <when value="offsets_absolute">
27 <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
28 <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
29 <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
30 </param>
31 <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
32 <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
33 <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
34 </param>
35 </when>
36 <when value="offsets_percent">
37 <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
38 <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
39 </param>
40 <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
41 <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
42 </param>
43 </when>
44 </conditional>
45 <param name="score_operation" type="select" label="Aggregate read score for specified range">
46 <option value="min" selected="True">min score</option>
47 <option value="max">max score</option>
48 <option value="sum">sum of scores</option>
49 <option value="mean">mean of scores</option>
50 </param>
51 <param name="score_comparison" type="select" label="Keep read when aggregate score is">
52 <option value="&gt;">&gt;</option>
53 <option value="&gt;=" selected="true">&gt;=</option>
54 <option value="==">==</option>
55 <option value="&lt;">&lt;</option>
56 <option value="&lt;=">&lt;=</option>
57 <sanitizer sanitize="False"/>
58 </param>
59 <param name="score" label="Quality Score" value="0" type="float" />
60 </repeat>
61 </page>
62 </inputs>
63 <configfiles>
64 <configfile name="fastq_filter_file">
65 def fastq_read_pass_filter( fastq_read ):
66 def mean( score_list ):
67 return float( sum( score_list ) ) / float( len( score_list ) )
68 if len( fastq_read ) &lt; $min_size:
69 return False 17 return False
70 if $max_size &gt; 0 and len( fastq_read ) &gt; $max_size: 18 if $max_size > 0 and len(fastq_read) > $max_size:
71 return False 19 return False
72 num_deviates = $max_num_deviants 20 num_deviates = $max_num_deviants
73 qual_scores = fastq_read.get_decimal_quality_scores() 21 qual_scores = fastq_read.get_decimal_quality_scores()
74 for qual_score in qual_scores: 22 for qual_score in qual_scores:
75 if qual_score &lt; $min_quality or ( $max_quality &gt; 0 and qual_score &gt; $max_quality ): 23 if qual_score < $min_quality or ($max_quality > 0 and qual_score > $max_quality):
76 if num_deviates == 0: 24 if num_deviates == 0:
77 return False 25 return False
78 else: 26 else:
79 num_deviates -= 1 27 num_deviates -= 1
80 #if not $paired_end: 28 #if not $paired_end:
81 qual_scores_split = [ qual_scores ] 29 qual_scores_split = [qual_scores]
82 #else: 30 #else:
83 qual_scores_split = [ qual_scores[ 0:int( len( qual_scores ) / 2 ) ], qual_scores[ int( len( qual_scores ) / 2 ): ] ] 31 qual_scores_split = [qual_scores[0:int(len(qual_scores) / 2)], qual_scores[int(len(qual_scores) / 2): ]]
84 #end if 32 #end if
85 #for $fastq_filter in $fastq_filters: 33 #for $fastq_filter in $fastq_filters:
86 for split_scores in qual_scores_split: 34 for split_scores in qual_scores_split:
87 left_column_offset = $fastq_filter[ 'offset_type' ][ 'left_column_offset' ] 35 left_column_offset = $fastq_filter['offset_type']['left_column_offset']
88 right_column_offset = $fastq_filter[ 'offset_type' ][ 'right_column_offset' ] 36 right_column_offset = $fastq_filter['offset_type']['right_column_offset']
89 #if $fastq_filter[ 'offset_type' ]['base_offset_type'] == 'offsets_percent': 37 #if $fastq_filter['offset_type']['base_offset_type'] == 'offsets_percent':
90 left_column_offset = int( round( float( left_column_offset ) / 100.0 * float( len( split_scores ) ) ) ) 38 left_column_offset = int(round(float(left_column_offset) / 100.0 * float(len(split_scores))))
91 right_column_offset = int( round( float( right_column_offset ) / 100.0 * float( len( split_scores ) ) ) ) 39 right_column_offset = int(round(float(right_column_offset) / 100.0 * float(len(split_scores))))
92 #end if 40 #end if
93 if right_column_offset > 0: 41 if right_column_offset > 0:
94 split_scores = split_scores[ left_column_offset:-right_column_offset] 42 split_scores = split_scores[left_column_offset:-right_column_offset]
95 else: 43 else:
96 split_scores = split_scores[ left_column_offset:] 44 split_scores = split_scores[left_column_offset:]
97 if split_scores: ##if a read doesn't have enough columns, it passes by default 45 if split_scores: ##if a read doesn't have enough columns, it passes by default
98 if not ( ${fastq_filter[ 'score_operation' ]}( split_scores ) $fastq_filter[ 'score_comparison' ] $fastq_filter[ 'score' ] ): 46 if not (${fastq_filter['score_operation']}(split_scores) $fastq_filter['score_comparison'] $fastq_filter['score']):
99 return False 47 return False
100 #end for 48 #end for
101 return True 49 return True
102 </configfile> 50 ]]></configfile>
103 </configfiles> 51 </configfiles>
104 <outputs> 52 <inputs>
105 <data format="input" name="output_file" /> 53 <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer"/>
106 </outputs> 54 <param name="min_size" type="integer" min="0" value="0" label="Minimum size" />
107 <tests> 55 <param name="max_size" type="integer" value="0" label="Maximum size" help="A maximum size less than 1 indicates no limit"/>
108 <!-- Do nothing filter --> 56 <param name="min_quality" type="float" value="0" label="Minimum quality"/>
109 <test> 57 <param name="max_quality" type="float" value="0" label="Maximum quality" help="A maximum quality less than 1 indicates no limit"/>
110 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> 58 <param name="max_num_deviants" type="integer" min="0" value="0" label="Maximum number of bases allowed outside of quality range" />
111 <param name="min_size" value="0"/> 59 <param name="paired_end" type="boolean" truevalue="paired_end" falsevalue="single_end" checked="false" label="This is paired end data" />
112 <param name="max_size" value="0"/> 60 <repeat name="fastq_filters" title="Quality Filter on a Range of Bases" help="The above settings do not apply to these filters">
113 <param name="min_quality" value="0"/> 61 <conditional name="offset_type">
114 <param name="max_quality" value="0"/> 62 <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
115 <param name="max_num_deviants" value="0"/> 63 <option value="offsets_absolute" selected="true">Absolute Values</option>
116 <param name="paired_end" value="False"/> 64 <option value="offsets_percent">Percentage of Read Length</option>
117 <param name="base_offset_type" value="offsets_absolute"/> 65 </param>
118 <param name="left_column_offset" value="0"/> 66 <when value="offsets_absolute">
119 <param name="right_column_offset" value="0"/> 67 <param name="left_column_offset" type="integer" min="0" value="0" label="Offset from 5' end" help="Values start at 0, increasing from the left" />
120 <param name="score_operation" value="min"/> 68 <param name="right_column_offset" type="integer" min="0" value="0" label="Offset from 3' end" help="Values start at 0, increasing from the right" />
121 <param name="score_comparison" value="&gt;="/> 69 </when>
122 <param name="score" value="0"/> 70 <when value="offsets_percent">
123 <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/> 71 <param name="left_column_offset" type="float" min="0" max="100" value="0" label="Offset from 5' end" />
124 </test> 72 <param name="right_column_offset" type="float" min="0" max="100" value="0" label="Offset from 3' end" />
125 <!-- crippled input types prevent this test <test> 73 </when>
126 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/> 74 </conditional>
127 <param name="min_size" value="0"/> 75 <param name="score_operation" type="select" label="Aggregate read score for specified range">
128 <param name="max_size" value="0"/> 76 <option value="min" selected="true">min score</option>
129 <param name="min_quality" value="-5"/> 77 <option value="max">max score</option>
130 <param name="max_quality" value="0"/> 78 <option value="sum">sum of scores</option>
131 <param name="max_num_deviants" value="0"/> 79 <option value="mean">mean of scores</option>
132 <param name="paired_end" value="False"/> 80 </param>
133 <param name="base_offset_type" value="offsets_absolute"/> 81 <param name="score_comparison" type="select" label="Keep read when aggregate score is">
134 <param name="left_column_offset" value="0"/> 82 <option value="&gt;">&gt;</option>
135 <param name="right_column_offset" value="0"/> 83 <option value="&gt;=" selected="true">&gt;=</option>
136 <param name="score_operation" value="min"/> 84 <option value="==">==</option>
137 <param name="score_comparison" value="&gt;="/> 85 <option value="&lt;">&lt;</option>
138 <param name="score" value="-5"/> 86 <option value="&lt;=">&lt;=</option>
139 <output name="out_file1" file="solexa_full_range_original_solexa.fastqsolexa"/> 87 <sanitizer sanitize="false"/>
140 </test> --> 88 </param>
141 <!-- No trim, so does not remove Adapter from cssanger --> 89 <param name="score" type="float" value="0" label="Quality score" />
142 <test> 90 </repeat>
143 <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger"/> 91 </inputs>
144 <param name="min_size" value="0"/> 92 <outputs>
145 <param name="max_size" value="0"/> 93 <data name="output_file" format_source="input_file" />
146 <param name="min_quality" value="0"/> 94 </outputs>
147 <param name="max_quality" value="0"/> 95 <tests>
148 <param name="max_num_deviants" value="0"/> 96 <!-- Do nothing filter -->
149 <param name="paired_end" value="False"/> 97 <test>
150 <param name="base_offset_type" value="offsets_absolute"/> 98 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
151 <param name="left_column_offset" value="0"/> 99 <param name="min_size" value="0"/>
152 <param name="right_column_offset" value="0"/> 100 <param name="max_size" value="0"/>
153 <param name="score_operation" value="min"/> 101 <param name="min_quality" value="0"/>
154 <param name="score_comparison" value="&gt;="/> 102 <param name="max_quality" value="0"/>
155 <param name="score" value="0"/> 103 <param name="max_num_deviants" value="0"/>
156 <output name="out_file1" file="sanger_full_range_as_cssanger.fastqcssanger"/> 104 <param name="paired_end" value="false"/>
157 </test> 105 <param name="base_offset_type" value="offsets_absolute"/>
158 <!-- Remove all Filter --> 106 <param name="left_column_offset" value="0"/>
159 <test> 107 <param name="right_column_offset" value="0"/>
160 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> 108 <param name="score_operation" value="min"/>
161 <param name="min_size" value="0"/> 109 <param name="score_comparison" value="&gt;="/>
162 <param name="max_size" value="0"/> 110 <param name="score" value="0"/>
163 <param name="min_quality" value="1"/> 111 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
164 <param name="max_quality" value="0"/> 112 </test>
165 <param name="max_num_deviants" value="0"/> 113 <!-- Do nothing filter compressed -->
166 <param name="paired_end" value="False"/> 114 <test>
167 <param name="base_offset_type" value="offsets_absolute"/> 115 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger.gz" ftype="fastqsanger.gz"/>
168 <param name="left_column_offset" value="0"/> 116 <param name="min_size" value="0"/>
169 <param name="right_column_offset" value="0"/> 117 <param name="max_size" value="0"/>
170 <param name="score_operation" value="min"/> 118 <param name="min_quality" value="0"/>
171 <param name="score_comparison" value="&gt;="/> 119 <param name="max_quality" value="0"/>
172 <param name="score" value="0"/> 120 <param name="max_num_deviants" value="0"/>
173 <output name="out_file1" file="empty_file.dat"/> 121 <param name="paired_end" value="false"/>
174 </test> 122 <param name="base_offset_type" value="offsets_absolute"/>
175 <!-- crippled input types prevent this test <test> 123 <param name="left_column_offset" value="0"/>
176 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/> 124 <param name="right_column_offset" value="0"/>
177 <param name="min_size" value="0"/> 125 <param name="score_operation" value="min"/>
178 <param name="max_size" value="0"/> 126 <param name="score_comparison" value="&gt;="/>
179 <param name="min_quality" value="-4"/> 127 <param name="score" value="0"/>
180 <param name="max_quality" value="0"/> 128 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger.gz" ftype="fastqsanger.gz" decompress="true"/>
181 <param name="max_num_deviants" value="0"/> 129 </test>
182 <param name="paired_end" value="False"/> 130 <!-- crippled input types prevent this test <test>
183 <param name="base_offset_type" value="offsets_absolute"/> 131 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/>
184 <param name="left_column_offset" value="0"/> 132 <param name="min_size" value="0"/>
185 <param name="right_column_offset" value="0"/> 133 <param name="max_size" value="0"/>
186 <param name="score_operation" value="min"/> 134 <param name="min_quality" value="-5"/>
187 <param name="score_comparison" value="&gt;="/> 135 <param name="max_quality" value="0"/>
188 <param name="score" value="-5"/> 136 <param name="max_num_deviants" value="0"/>
189 <output name="out_file1" file="empty_file.dat"/> 137 <param name="paired_end" value="false"/>
190 </test> --> 138 <param name="base_offset_type" value="offsets_absolute"/>
191 <!-- Keep all by allowing 1 deviant --> 139 <param name="left_column_offset" value="0"/>
192 <test> 140 <param name="right_column_offset" value="0"/>
193 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> 141 <param name="score_operation" value="min"/>
194 <param name="min_size" value="0"/> 142 <param name="score_comparison" value="&gt;="/>
195 <param name="max_size" value="0"/> 143 <param name="score" value="-5"/>
196 <param name="min_quality" value="1"/> 144 <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/>
197 <param name="max_quality" value="0"/> 145 </test> -->
198 <param name="max_num_deviants" value="1"/> 146 <!-- No trim, so does not remove Adapter from cssanger -->
199 <param name="paired_end" value="False"/> 147 <test>
200 <param name="base_offset_type" value="offsets_absolute"/> 148 <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger"/>
201 <param name="left_column_offset" value="0"/> 149 <param name="min_size" value="0"/>
202 <param name="right_column_offset" value="0"/> 150 <param name="max_size" value="0"/>
203 <param name="score_operation" value="min"/> 151 <param name="min_quality" value="0"/>
204 <param name="score_comparison" value="&gt;="/> 152 <param name="max_quality" value="0"/>
205 <param name="score" value="0"/> 153 <param name="max_num_deviants" value="0"/>
206 <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/> 154 <param name="paired_end" value="false"/>
207 </test> 155 <param name="base_offset_type" value="offsets_absolute"/>
208 <!-- crippled input types prevent this test<test> 156 <param name="left_column_offset" value="0"/>
209 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/> 157 <param name="right_column_offset" value="0"/>
210 <param name="min_size" value="0"/> 158 <param name="score_operation" value="min"/>
211 <param name="max_size" value="0"/> 159 <param name="score_comparison" value="&gt;="/>
212 <param name="min_quality" value="-5"/> 160 <param name="score" value="0"/>
213 <param name="max_quality" value="61"/> 161 <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger" />
214 <param name="max_num_deviants" value="1"/> 162 </test>
215 <param name="paired_end" value="False"/> 163 <!-- Remove all Filter -->
216 <param name="base_offset_type" value="offsets_absolute"/> 164 <test>
217 <param name="left_column_offset" value="0"/> 165 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
218 <param name="right_column_offset" value="0"/> 166 <param name="min_size" value="0"/>
219 <param name="score_operation" value="min"/> 167 <param name="max_size" value="0"/>
220 <param name="score_comparison" value="&gt;="/> 168 <param name="min_quality" value="1"/>
221 <param name="score" value="-5"/> 169 <param name="max_quality" value="0"/>
222 <output name="out_file1" file="solexa_full_range_original_solexa.fastqsolexa"/> 170 <param name="max_num_deviants" value="0"/>
223 </test> --> 171 <param name="paired_end" value="false"/>
224 <!-- Filter inner range --> 172 <param name="base_offset_type" value="offsets_absolute"/>
225 <test> 173 <param name="left_column_offset" value="0"/>
226 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> 174 <param name="right_column_offset" value="0"/>
227 <param name="min_size" value="0"/> 175 <param name="score_operation" value="min"/>
228 <param name="max_size" value="0"/> 176 <param name="score_comparison" value="&gt;="/>
229 <param name="min_quality" value="0"/> 177 <param name="score" value="0"/>
230 <param name="max_quality" value="0"/> 178 <output name="output_file" file="empty_file.dat" ftype="fastqsanger" />
231 <param name="max_num_deviants" value="0"/> 179 </test>
232 <param name="paired_end" value="False"/> 180 <!-- crippled input types prevent this test <test>
233 <param name="base_offset_type" value="offsets_absolute"/> 181 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/>
234 <param name="left_column_offset" value="1"/> 182 <param name="min_size" value="0"/>
235 <param name="right_column_offset" value="1"/> 183 <param name="max_size" value="0"/>
236 <param name="score_operation" value="min"/> 184 <param name="min_quality" value="-4"/>
237 <param name="score_comparison" value="&gt;="/> 185 <param name="max_quality" value="0"/>
238 <param name="score" value="1"/> 186 <param name="max_num_deviants" value="0"/>
239 <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/> 187 <param name="paired_end" value="false"/>
240 </test> 188 <param name="base_offset_type" value="offsets_absolute"/>
241 <test> 189 <param name="left_column_offset" value="0"/>
242 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> 190 <param name="right_column_offset" value="0"/>
243 <param name="min_size" value="0"/> 191 <param name="score_operation" value="min"/>
244 <param name="max_size" value="0"/> 192 <param name="score_comparison" value="&gt;="/>
245 <param name="min_quality" value="0"/> 193 <param name="score" value="-5"/>
246 <param name="max_quality" value="0"/> 194 <output name="output_file" file="empty_file.dat" ftype="fastqsolexa" />
247 <param name="max_num_deviants" value="0"/> 195 </test> -->
248 <param name="paired_end" value="False"/> 196 <!-- Keep all by allowing 1 deviant -->
249 <param name="base_offset_type" value="offsets_absolute"/> 197 <test>
250 <param name="left_column_offset" value="1"/> 198 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
251 <param name="right_column_offset" value="1"/> 199 <param name="min_size" value="0"/>
252 <param name="score_operation" value="max"/> 200 <param name="max_size" value="0"/>
253 <param name="score_comparison" value="&lt;="/> 201 <param name="min_quality" value="1"/>
254 <param name="score" value="92"/> 202 <param name="max_quality" value="0"/>
255 <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/> 203 <param name="max_num_deviants" value="1"/>
256 </test> 204 <param name="paired_end" value="false"/>
257 <!-- percent based offsets --> 205 <param name="base_offset_type" value="offsets_absolute"/>
258 <test> 206 <param name="left_column_offset" value="0"/>
259 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> 207 <param name="right_column_offset" value="0"/>
260 <param name="min_size" value="0"/> 208 <param name="score_operation" value="min"/>
261 <param name="max_size" value="0"/> 209 <param name="score_comparison" value="&gt;="/>
262 <param name="min_quality" value="0"/> 210 <param name="score" value="0"/>
263 <param name="max_quality" value="0"/> 211 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
264 <param name="max_num_deviants" value="0"/> 212 </test>
265 <param name="paired_end" value="False"/> 213 <!-- crippled input types prevent this test<test>
266 <param name="base_offset_type" value="offsets_percent"/> 214 <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/>
267 <param name="left_column_offset" value="1.075"/> 215 <param name="min_size" value="0"/>
268 <param name="right_column_offset" value="1.075"/> 216 <param name="max_size" value="0"/>
269 <param name="score_operation" value="min"/> 217 <param name="min_quality" value="-5"/>
270 <param name="score_comparison" value="&gt;="/> 218 <param name="max_quality" value="61"/>
271 <param name="score" value="1"/> 219 <param name="max_num_deviants" value="1"/>
272 <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/> 220 <param name="paired_end" value="false"/>
273 </test> 221 <param name="base_offset_type" value="offsets_absolute"/>
274 <test> 222 <param name="left_column_offset" value="0"/>
275 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> 223 <param name="right_column_offset" value="0"/>
276 <param name="min_size" value="0"/> 224 <param name="score_operation" value="min"/>
277 <param name="max_size" value="0"/> 225 <param name="score_comparison" value="&gt;="/>
278 <param name="min_quality" value="0"/> 226 <param name="score" value="-5"/>
279 <param name="max_quality" value="0"/> 227 <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa" />
280 <param name="max_num_deviants" value="0"/> 228 </test> -->
281 <param name="paired_end" value="True"/> 229 <!-- Filter inner range -->
282 <param name="base_offset_type" value="offsets_percent"/> 230 <test>
283 <param name="left_column_offset" value="1"/> 231 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
284 <param name="right_column_offset" value="1"/> 232 <param name="min_size" value="0"/>
285 <param name="score_operation" value="min"/> 233 <param name="max_size" value="0"/>
286 <param name="score_comparison" value="&gt;="/> 234 <param name="min_quality" value="0"/>
287 <param name="score" value="1"/> 235 <param name="max_quality" value="0"/>
288 <output name="out_file1" file="empty_file.dat"/> 236 <param name="max_num_deviants" value="0"/>
289 </test> 237 <param name="paired_end" value="false"/>
290 </tests> 238 <param name="base_offset_type" value="offsets_absolute"/>
291 <help> 239 <param name="left_column_offset" value="1"/>
240 <param name="right_column_offset" value="1"/>
241 <param name="score_operation" value="min"/>
242 <param name="score_comparison" value="&gt;="/>
243 <param name="score" value="1"/>
244 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
245 </test>
246 <test>
247 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
248 <param name="min_size" value="0"/>
249 <param name="max_size" value="0"/>
250 <param name="min_quality" value="0"/>
251 <param name="max_quality" value="0"/>
252 <param name="max_num_deviants" value="0"/>
253 <param name="paired_end" value="false"/>
254 <param name="base_offset_type" value="offsets_absolute"/>
255 <param name="left_column_offset" value="1"/>
256 <param name="right_column_offset" value="1"/>
257 <param name="score_operation" value="max"/>
258 <param name="score_comparison" value="&lt;="/>
259 <param name="score" value="92"/>
260 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
261 </test>
262 <!-- percent based offsets -->
263 <test>
264 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
265 <param name="min_size" value="0"/>
266 <param name="max_size" value="0"/>
267 <param name="min_quality" value="0"/>
268 <param name="max_quality" value="0"/>
269 <param name="max_num_deviants" value="0"/>
270 <param name="paired_end" value="false"/>
271 <param name="base_offset_type" value="offsets_percent"/>
272 <param name="left_column_offset" value="1.075"/>
273 <param name="right_column_offset" value="1.075"/>
274 <param name="score_operation" value="min"/>
275 <param name="score_comparison" value="&gt;="/>
276 <param name="score" value="1"/>
277 <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
278 </test>
279 <test>
280 <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/>
281 <param name="min_size" value="0"/>
282 <param name="max_size" value="0"/>
283 <param name="min_quality" value="0"/>
284 <param name="max_quality" value="0"/>
285 <param name="max_num_deviants" value="0"/>
286 <param name="paired_end" value="true"/>
287 <param name="base_offset_type" value="offsets_percent"/>
288 <param name="left_column_offset" value="1"/>
289 <param name="right_column_offset" value="1"/>
290 <param name="score_operation" value="min"/>
291 <param name="score_comparison" value="&gt;="/>
292 <param name="score" value="1"/>
293 <output name="output_file" file="empty_file.dat" ftype="fastqsanger" />
294 </test>
295 </tests>
296 <help><![CDATA[
292 This tool allows you to build complex filters to be applied to each read in a FASTQ file. 297 This tool allows you to build complex filters to be applied to each read in a FASTQ file.
293 298
294 **Basic Options:** 299 **Basic Options:**
295 * You can specify a minimum and maximum read lengths. 300 * You can specify a minimum and maximum read lengths.
296 * You can specify minimum and maximum per base quality scores, with optionally specifying the number of bases that are allowed to deviate from this range (default of 0 deviant bases). 301 * You can specify minimum and maximum per base quality scores, with optionally specifying the number of bases that are allowed to deviate from this range (default of 0 deviant bases).
297 * If your data is paired-end, select the proper checkbox; this will cause each read to be internally split down the middle and filters applied to each half using the offsets specified. 302 * If your data is paired-end, select the proper checkbox; this will cause each read to be internally split down the middle and filters applied to each half using the offsets specified.
298 303
299 **Advance Options:** 304 **Advance Options:**
300 * You can specify any number of advanced filters. 305 * You can specify any number of advanced filters.
301 * 5' and 3' offsets are defined, starting at zero, increasing from the respective end of the reads. For example, a quality string of "ABCDEFG", with 5' and 3' offsets of 1 and 1, respectively, specified will yield "BCDEF". 306 * 5' and 3' offsets are defined, starting at zero, increasing from the respective end of the reads. For example, a quality string of "ABCDEFG", with 5' and 3' offsets of 1 and 1, respectively, specified will yield "BCDEF".
302 * You can specify either absolute offset values, or percentage offset values. *Absolute Values* based offsets are useful for fixed length reads (e.g. Illumina or SOLiD data). *Percentage of Read Length* based offsets are useful for variable length reads (e.g. 454 data). When using the percent-based method, offsets are rounded to the nearest integer. 307 * You can specify either absolute offset values, or percentage offset values. *Absolute Values* based offsets are useful for fixed length reads (e.g. Illumina or SOLiD data). *Percentage of Read Length* based offsets are useful for variable length reads (e.g. 454 data). When using the percent-based method, offsets are rounded to the nearest integer.
303 * The user specifies the aggregating action (min, max, sum, mean) to perform on the quality score values found between the specified offsets to be used with the user defined comparison operation and comparison value. 308 * The user specifies the aggregating action (min, max, sum, mean) to perform on the quality score values found between the specified offsets to be used with the user defined comparison operation and comparison value.
304 * If a set of offsets is specified that causes the remaining quality score list to be of length zero, then the read will **pass** the quality filter unless the size range filter is used to remove these reads. 309 * If a set of offsets is specified that causes the remaining quality score list to be of length zero, then the read will **pass** the quality filter unless the size range filter is used to remove these reads.
305 310
306 ----- 311 -----
307 312
308 .. class:: warningmark 313 .. class:: warningmark
309 314
310 Adapter bases in color space reads are excluded from filtering. 315 Adapter bases in color space reads are excluded from filtering.
311 316 ]]></help>
312 ------ 317 <citations>
313 318 <citation type="doi">10.1093/bioinformatics/btq281</citation>
314 319 </citations>
315 </help>
316
317 <citations>
318 <citation type="doi">10.1093/bioinformatics/btq281</citation>
319 </citations>
320
321 </tool> 320 </tool>