comparison filter_kw_val.xml @ 5:33ca9ba2495a draft

planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
author proteore
date Tue, 05 Mar 2019 07:37:10 -0500
parents 2080e2a4f209
children b4641c0f8a82
comparison
equal deleted inserted replaced
4:2080e2a4f209 5:33ca9ba2495a
1 <tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.02.05"> 1 <tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.03.05">
2 <description></description> 2 <description></description>
3 <requirements> 3 <requirements>
4 </requirements> 4 </requirements>
5 <stdio> 5 <stdio>
6 <exit_code range="1:" /> 6 <exit_code range="1:" />
7 </stdio> 7 </stdio>
8 <command><![CDATA[ 8 <command><![CDATA[
9 python $__tool_directory__/filter_kw_val.py 9 python $__tool_directory__/filter_kw_val.py
10 -i "$input1,$header" 10 -i "$input1,$header"
11 -o "$output1" 11 -o "$kept_lines"
12 --filtered_file "$filtered_file" 12 --discarded_lines "$discarded_lines"
13 --operation "$operation"
13 --operator "$operator" 14 --operator "$operator"
14 15
15 ## Keywords 16 ## Keywords
16 #for $key in $keyword 17 #for $key in $keyword
17 #if $key.k.kw != "None" 18 #if $key.k.kw != "None"
23 #end if 24 #end if
24 #end for 25 #end for
25 26
26 ## value to filter 27 ## value to filter
27 #for $val in $value 28 #for $val in $value
28 #if $val.v.val != "None" 29 #if $val.value != "None"
29 --value 30 --value $val.value $val.ncol $val.operator
30 #if $val.v.val == "Equal"
31 $val.v.equal "$val.ncol" "="
32 #else if $val.v.val == "Higher"
33 $val.v.higher "$val.ncol" ">"
34 #else if $val.v.val == "Equal or higher"
35 $val.v.equal_higher "$val.ncol" ">="
36 #else if $val.v.val == "Lower"
37 $val.v.lower "$val.ncol" "<"
38 #else if $val.v.val == "Equal or lower"
39 $val.v.equal_lower "$val.ncol" "<="
40 #else
41 $val.v.different "$val.ncol" "!="
42 #end if
43 #end if 31 #end if
44 #end for 32 #end for
45 33
46 ##range of values to keep 34 ##range of values to keep
47 #for $vr in $values_range 35 #for $vr in $values_range
48 #if vr 36 #if vr
49 --values_range $vr.bottom_value $vr.top_value $vr.ncol $vr.inclusive 37 --values_range $vr.bottom_value $vr.top_value $vr.ncol $vr.inclusive
50 #end if 38 #end if
51 #end for 39 #end for
52 40
53 #if $sort_column != "" 41 #if $sort.sort_bool == "true"
54 --sort_col "$sort_column,$reversed_sort" 42 --sort_col "$sort.sort_column,$sort.reversed_sort"
55 #end if 43 #end if
56 44
57 ]]></command> 45 ]]></command>
58 <inputs> 46 <inputs>
59 <param type="data" name="input1" format="txt,tabular" label="Input file" /> 47 <param type="data" name="input1" format="txt,tabular" label="Input file" />
60 <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" /> 48 <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
49 <param name="operation" type="select" label="Operation" help="keep or discard word(s) or value(s) that match filters ?">
50 <option value="keep">Keep</option>
51 <option value="discard">Discard</option>
52 </param>
61 <param name="operator" type="select" label="Select an operator to combine your filters (if more than one)" help="OR : only one filter must be satisfied to filter a row, AND : all your filters must be satisfied to filter a row" > 53 <param name="operator" type="select" label="Select an operator to combine your filters (if more than one)" help="OR : only one filter must be satisfied to filter a row, AND : all your filters must be satisfied to filter a row" >
62 <option value="OR" selected="True">OR</option> 54 <option value="OR" selected="True">OR</option>
63 <option value="AND">AND</option> 55 <option value="AND">AND</option>
64 </param> 56 </param>
65 57
66 <repeat name="keyword" title="Filter by keywords" > 58 <repeat name="keyword" title="Filter by keywords" >
67 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' /> 59 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek keywords in the first column (and keep or discard them)'>
60 <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
61 </param>
68 <param type="boolean" name="match" truevalue="True" label="Search for exact match?" help='Choosing "Yes" will only filter out exact match (e.g. case sensitive), see help section' /> 62 <param type="boolean" name="match" truevalue="True" label="Search for exact match?" help='Choosing "Yes" will only filter out exact match (e.g. case sensitive), see help section' />
69 <conditional name="k" > 63 <conditional name="k" >
70 <param name="kw" type="select" label="Enter keywords" > 64 <param name="kw" type="select" label="Enter keywords" >
71 <option value="text" selected="true">copy/paste</option> 65 <option value="text" selected="true">copy/paste</option>
72 <option value="file">File containing keywords</option> 66 <option value="file">File containing keywords</option>
73 </param> 67 </param>
74 <when value="text" > 68 <when value="text" >
75 <param name="txt" type="text" label="Copy/paste keywords to be filtered out" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' > 69 <param name="txt" type="text" label="Copy/paste keywords to find (keep or discard)" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' >
76 <sanitizer> 70 <sanitizer>
77 <valid initial="string.printable"> 71 <valid initial="string.printable">
78 <remove value="&apos;"/> 72 <remove value="&apos;"/>
79 </valid> 73 </valid>
80 <mapping initial="none"> 74 <mapping initial="none">
89 <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" /> 83 <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
90 </when> 84 </when>
91 </conditional> 85 </conditional>
92 </repeat> 86 </repeat>
93 <repeat name="value" title="Filter by numerical value" > 87 <repeat name="value" title="Filter by numerical value" >
94 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' /> 88 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'>
95 <conditional name="v" > 89 <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
96 <param name="val" type="select" label="Select operator" > 90 </param>
97 <option value="Equal">=</option> 91 <param name="operator" type="select" label="Select operator" >
98 <option value="Higher">&gt;</option> 92 <option value="Equal">=</option>
99 <option value="Equal or higher">&gt;=</option> 93 <option value="Higher">&gt;</option>
100 <option value="Lower">&lt;</option> 94 <option value="Equal-or-higher">&gt;=</option>
101 <option value="Equal or lower">&lt;=</option> 95 <option value="Lower">&lt;</option>
102 <option value="Different">!=</option> 96 <option value="Equal-or-lower">&lt;=</option>
103 </param> 97 <option value="Different">!=</option>
104 <when value="Equal" > 98 </param>
105 <param name="equal" type="float" value="" label="Value" /> 99 <param name="value" type="float" value="" label="Value"></param>
106 </when>
107 <when value="Higher" >
108 <param type="float" name="higher" value="" label="Value" />
109 </when>
110 <when value="Equal or higher" >
111 <param type="float" name="equal_higher" value="" label="Value" />
112 </when>
113 <when value="Lower" >
114 <param type="float" name="lower" value="" label="Value" />
115 </when>
116 <when value="Equal or lower" >
117 <param type="float" name="equal_lower" value="" label="Value" />
118 </when>
119 <when value="Different">
120 <param type="float" name="different" value="" label="Value"/>
121 </when>
122 </conditional>
123 </repeat> 100 </repeat>
124 <repeat name="values_range" title="Filter by range of numerical values"> 101 <repeat name="values_range" title="Filter by range of numerical values">
125 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' /> 102 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'>
103 <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
104 </param>
126 <param name="bottom_value" type="float" value="" label="Enter the bottom value" /> 105 <param name="bottom_value" type="float" value="" label="Enter the bottom value" />
127 <param name="top_value" type="float" value="" label="Enter the top value" /> 106 <param name="top_value" type="float" value="" label="Enter the top value" />
128 <param name="inclusive" type="boolean" label="inclusive range ?" checked="false" truevalue="true" falsevalue="false" /> 107 <param name="inclusive" type="boolean" label="inclusive range ?" checked="false" truevalue="true" falsevalue="false" />
129 </repeat> 108 </repeat>
130 <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values" /> 109 <conditional name="sort">
131 <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/> 110 <param name="sort_bool" type="boolean" label="Sort by column ?" checked="false" truevalue="true" falsevalue="false" />
111 <when value="true">
112 <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values">
113 <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]</validator>
114 </param>
115 <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/>
116 </when>
117 <when value="false"/>
118 </conditional>
132 </inputs> 119 </inputs>
133 <outputs> 120 <outputs>
134 <data name="output1" format="tsv" label="${tool.name} on ${input1.name}" /> 121 <data name="kept_lines" format="tsv" label="Filtered_${input1.name}" />
135 <data name="filtered_file" format="tsv" label="${tool.name} on ${input1.name} - Filtered lines" /> 122 <data name="discarded_lines" format="tsv" label="Filtered_${input1.name} - discarded_lines" />
136 </outputs> 123 </outputs>
137 <tests> 124 <tests>
138 <test> 125 <test>
139 <param name="input1" value="Lacombe_et_al_2017_OK.txt" /> 126 <param name="input1" value="Lacombe_et_al_2017_OK.txt" />
140 <param name="header" value="true" /> 127 <param name="header" value="true" />
141 <param name="operator" value="OR"/> 128 <param name="operator" value="OR"/>
129 <param name="operation" value="discard"/>
142 <param name="sort_column" value="c1"/> 130 <param name="sort_column" value="c1"/>
143 <param name="reversed_sort" value="false"/> 131 <conditional name="sort">
132 <param name="sort_bool" value="false"/>
133 <param name="reversed_sort" value="false"/>
134 </conditional>
144 <repeat name="keyword"> 135 <repeat name="keyword">
145 <param name="ncol" value="c1" /> 136 <param name="ncol" value="c1" />
146 <param name="match" value="True" /> 137 <param name="match" value="True" />
147 <conditional name="k"> 138 <conditional name="k">
148 <param name="kw" value="text" /> 139 <param name="kw" value="text" />
149 <param name="txt" value="P04264 P35908 P13645 Q5D862 Q5T749 Q8IW75 P81605 P22531 P59666 P78386" /> 140 <param name="txt" value="P04264 P35908 P13645 Q5D862 Q5T749 Q8IW75 P81605 P22531 P59666 P78386" />
150 </conditional> 141 </conditional>
151 </repeat> 142 </repeat>
152 <repeat name="value"> 143 <repeat name="value">
153 <param name="ncol" value="c3"/> 144 <param name="ncol" value="c3"/>
154 <conditional name="v"> 145 <param name="operator" value="Higher"/>
155 <param name="val" value="Higher"/> 146 <param name="value" value="20" />
156 <param name="higher" value="20" />
157 </conditional>
158 </repeat> 147 </repeat>
159 <output name="output1" file="output.csv" /> 148 <output name="kept_lines" file="output.tsv" />
160 <output name="filtered_file" file="filtered_output.csv" /> 149 <output name="discarded_lines" file="discarded_lines.tsv" />
161 </test> 150 </test>
162 </tests> 151 </tests>
163 <help><![CDATA[ 152 <help><![CDATA[
164 **Description** 153 **Description**
165 154
166 This tool allows to filter out data according to different criteria such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold). 155 This tool allows to keep/discard rows from your dataset according to different filter such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold).
167 A boolean operator "OR/AND" allows to combine different type of filters making this tool very powerful. 156 A boolean operator "OR/AND" allows to combine different type of filters making this tool very powerful.
168 157
169 ----- 158 -----
170 159
171 **Input** 160 **Input**
174 163
175 ----- 164 -----
176 165
177 **Parameters** 166 **Parameters**
178 167
179 **AND/OR operator** 168 **Operation**
180 169
181 As many filters as needed can be combined, you can choose how filters apply on your data by using the following boolean operators: 170 - **Keep**: only keep lines with keyword(s) and/or value(s) concerned by defined filter(s)
182 171 - **Discard**: only keep lines with keyword(s) and/or value(s) NOT concerned by defined filter(s)
183 - OR: only one filter must be satisfied to remove one row 172
184 - AND: all filters must be satisfied to remove one row 173 .. class:: infomark
174
175 Two output files are created, one with kept lines and the other one with discarded lines.
176
177 **Select an operator to combine your filters (if more than one)**
178
179 Many filters (criteria) can be combined in a single execution making this tool quite powerful; this can be achieved using the following boolean operators:
180
181 - **OR**: only one of filters must be satisfied to keep/discard one row
182 - **AND**: all filters must be satisfied to keep/discard one row
185 183
186 ----- 184 -----
187 185
188 **Filter by keyword(s)** 186 **Filter by keyword(s)**
189 187
203 201
204 Lines that contains these keywords will be removed from input file. 202 Lines that contains these keywords will be removed from input file.
205 203
206 "Search for exact match?": Keywords search can be applied by performing either exact match or partial one by using the following option: 204 "Search for exact match?": Keywords search can be applied by performing either exact match or partial one by using the following option:
207 205
208 - If you choose **Yes**, only the fields that contains exactly the same content will be removed. 206 - If you choose **Yes**, only the fields that contains exactly the same content will be removed (i.e. using the "discard" mode).
209 207
210 - If you choose **No**, all the fields containing the keyword will be removed. 208 - If you choose **No**, all the fields containing the keyword will be removed.
211 209
212 Example: 210 Example:
213 211
229 - <= (lower than or equal to) 227 - <= (lower than or equal to)
230 - > (greater than) 228 - > (greater than)
231 - >= (greater than or equal to) 229 - >= (greater than or equal to)
232 230
233 Then enter the numerical threshold to apply by filling the "Value" box. 231 Then enter the numerical threshold to apply by filling the "Value" box.
234 If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be filtered out. 232 If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be kept or discarded (based on operation parameter).
235 233
236 ----- 234 -----
237 235
238 **Filter by a range of values**: You can also set a range of values to filter your file. 236 **Filter by a range of values**: You can also set a range of values to filter your file.
239 Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be filtered out. 237 Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be discarded (or the other way around based on operation parameter).
240 238
241 ----- 239 -----
242 240
243 **Sort results files** 241 **Sort by column ?**
244 242 click on the "Yes" button allows to "Sort result files by:" a column number. this can be done in ascending (default value) or descending order by entering the column number on which to sort the data.
245 You can sort your results by column in ascending (default value) or descending by entering the column number on which to sort the data.
246 243
247 ----- 244 -----
248 245
249 **Output** 246 **Output**
250 247
251 The tool returns two output files. 248 The tool returns two output files.
252 249
253 * A text file containing the results that pass your filters 250 * A text file containing the results that satisfy your filters (i.e. "keep" mode).
254 251
255 * A text file containing the rows removed from the input file (i.e. containing data taht do not pass your filter(s). 252 * A text file containing the rows removed from the input file (i.e. "discard" mode).
256 253
257 ----- 254 -----
258 255
259 .. class:: infomark 256 .. class:: infomark
260 257
261 **Authors** 258 **Authors**
262 259
263 T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR 260 David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
264 261
265 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR 262 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR
266 263
267 This work has been partially funded through the French National Agency for Research (ANR) IFB project. 264 This work has been partially funded through the French National Agency for Research (ANR) IFB project.
268 265