comparison query_tabular.xml @ 0:3708ff0198b7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
author iuc
date Tue, 18 Jul 2017 09:07:07 -0400
parents
children 8a33b442ecd9
comparison
equal deleted inserted replaced
-1:000000000000 0:3708ff0198b7
1 <tool id="query_tabular" name="Query Tabular" version="1.0.0">
2 <description>using sqlite sql</description>
3
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7
8 <requirements>
9 </requirements>
10
11 <command detect_errors="exit_code"><![CDATA[
12 cat '$query_file' &&
13 #if $add_to_database.withdb:
14 #if $save_db:
15 cp '$add_to_database.withdb' '$sqlitedb' &&
16 #else:
17 cp '$add_to_database.withdb' '$workdb' &&
18 #end if
19 #end if
20 python '$__tool_directory__/query_tabular.py'
21 #if $save_db
22 -s '$sqlitedb'
23 #else
24 -s '$workdb'
25 #end if
26 -j '$table_json'
27 #if $sqlquery:
28 -Q '$query_file'
29 $no_header
30 -o '$output'
31 #end if
32 ]]></command>
33 <configfiles>
34 <configfile name="query_file">
35 $sqlquery
36 </configfile>
37 <configfile name="table_json">
38 #import json
39 #set $jtbldef = dict()
40 #set $jtbls = []
41 #set $jtbldef['tables'] = $jtbls
42 #for $i,$tbl in enumerate($tables):
43 #set $jtbl = dict()
44 #set $jtbl['file_path'] = str($tbl.table)
45 #if $tbl.tbl_opts.table_name:
46 #set $tname = str($tbl.tbl_opts.table_name)
47 #else
48 #set $tname = 't' + str($i + 1)
49 #end if
50 #set $jtbl['table_name'] = $tname
51 ## #if $tbl.tbl_opts.sel_cols:
52 ## #set $jtbl['sel_cols'] = $tbl.tbl_opts.sel_cols el_cols
53 ## #end if
54 #if $tbl.tbl_opts.pkey_autoincr:
55 #set $jtbl['pkey_autoincr'] = str($tbl.tbl_opts.pkey_autoincr)
56 #end if
57 #if $tbl.tbl_opts.col_names:
58 #set $col_names = str($tbl.tbl_opts.col_names)
59 #if $tbl.tbl_opts.load_named_columns:
60 #set $jtbl['load_named_columns'] = True
61 #end if
62 #else
63 #set $col_names = ''
64 #end if
65 #set $jtbl['column_names'] = $col_names
66 #set $idx_unique = []
67 #set $idx_non = []
68 #for $idx in $tbl.tbl_opts.indexes:
69 #if $idx.unique:
70 #silent $idx_unique.append(str($idx.index_columns))
71 #else:
72 #silent $idx_non.append(str($idx.index_columns))
73 #end if
74 #end for
75 #if len($idx_unique) > 0:
76 #set $jtbl['unique'] = $idx_unique
77 #end if
78 #if len($idx_non) > 0:
79 #set $jtbl['index'] = $idx_non
80 #end if
81 #set $linefilters = $tbl.input_opts.linefilters
82 @LINEFILTERS@
83 #if $input_filters:
84 #set $jtbl['filters'] = $input_filters
85 #end if
86 #set $jtbls += [$jtbl]
87 #end for
88 #echo $json.dumps($jtbldef)
89 </configfile>
90 </configfiles>
91 <inputs>
92 <param name="workdb" type="hidden" value="workdb.sqlite" label=""/>
93 <section name="add_to_database" expanded="false" title="Add tables to an existing database">
94 <param name="withdb" type="data" format="sqlite" optional="true" label="Add tables to this Database"
95 help="Make sure your added table names are not already in this database"/>
96 </section>
97 <repeat name="tables" title="Database Table" min="0">
98 <param name="table" type="data" format="tabular" label="Tabular Dataset for Table"/>
99 <section name="input_opts" expanded="false" title="Filter Dataset Input">
100 <expand macro="macro_line_filters" />
101 </section>
102 <section name="tbl_opts" expanded="false" title="Table Options">
103 <param name="table_name" type="text" value="" optional="true" label="Specify Name for Table">
104 <help>By default, tables will be named: t1,t2,...,tn (table names must be unique)</help>
105 <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator>
106 </param>
107 <param name="col_names" type="text" value="" optional="true" label="Specify Column Names (comma-separated list)">
108 <help>By default, table columns will be named: c1,c2,c3,...,cn (column names for a table must be unique)
109 You can override the default names by entering a comma -separated list of names, e.g. ',name1,,,name2' would rename the second and fifth columns.
110 </help>
111 <sanitizer sanitize="False"/>
112 <validator type="regex" message="A List of names separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores. Otherwise, the name must be eclosed in: double quotes, back quotes, or square brackets.">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator>
113 </param>
114 <param name="load_named_columns" type="boolean" truevalue="load_named_columns" falsevalue="" checked="false" label="Only load the columns you have named into database"/>
115 <param name="pkey_autoincr" type="text" value="" optional="true" label="Add an auto increment primary key column with this name"
116 help="Only creates this additional column when a name is entered. (This can not be the same name as any of the other columns in this table.)">
117 <validator type="regex" message="Column name">^([A-Za-z]\w*)?$</validator>
118 </param>
119 <repeat name="indexes" title="Table Index">
120 <param name="unique" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="This is a unique index"/>
121 <param name="index_columns" type="text" value="" label="Index on Columns">
122 <help>Create an index on the column names: e,g, c1 or c2,c4</help>
123 <validator type="regex" message="Column name, separated by commes if more than one">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator>
124 </param>
125 </repeat>
126 </section>
127 </repeat>
128 <param name="save_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Save the sqlite database in your history"
129 help="SQLite to tabular tool can run additional queries on this database"/>
130 <param name="sqlquery" type="text" area="true" size="20x80" value="" optional="true" label="SQL Query to generate tabular output">
131 <help>By default: tables are named: t1,t2,...,tn and columns in each table: c1,c2,...,cn</help>
132 <sanitizer sanitize="False"/>
133 <validator type="regex" message="">^(?ims)\s*select\s+.*\s+from\s+.*$</validator>
134 </param>
135 <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers from tabular output"/>
136 </inputs>
137 <outputs>
138 <data format="sqlite" name="sqlitedb" label="sqlite db of ${on_string}">
139 <filter>save_db</filter>
140 </data>
141 <data format="tabular" name="output" label="query results on ${on_string}">
142 <filter>not save_db or (sqlquery and len(sqlquery.strip()) > 0)</filter>
143 </data>
144 </outputs>
145 <tests>
146 <test>
147 <repeat name="tables">
148 <param name="table" ftype="tabular" value="customers.tsv"/>
149 <section name="input_opts">
150 <repeat name="linefilters">
151 <conditional name="filter">
152 <param name="filter_type" value="comment"/>
153 <param name="comment_char" value="35"/>
154 </conditional>
155 </repeat>
156 </section>
157 <section name="tbl_opts">
158 <param name="table_name" value="customers"/>
159 <param name="col_names" value="CustomerID,FirstName,LastName,Email,DOB,Phone"/>
160 </section>
161 </repeat>
162 <repeat name="tables">
163 <param name="table" ftype="tabular" value="sales.tsv"/>
164 <section name="input_opts">
165 <repeat name="linefilters">
166 <conditional name="filter">
167 <param name="filter_type" value="comment"/>
168 <param name="comment_char" value="35"/>
169 </conditional>
170 </repeat>
171 </section>
172 <section name="tbl_opts">
173 <param name="table_name" value="sales"/>
174 <param name="col_names" value="CustomerID,Date,SaleAmount"/>
175 </section>
176 </repeat>
177 <param name="sqlquery" value="SELECT FirstName,LastName,sum(SaleAmount) as &quot;TotalSales&quot; FROM customers join sales on customers.CustomerID = sales.CustomerID GROUP BY customers.CustomerID ORDER BY TotalSales DESC"/>
178 <output name="output" file="sales_results.tsv"/>
179 </test>
180
181 <test>
182 <repeat name="tables">
183 <param name="table" ftype="tabular" value="customers.tsv"/>
184 <section name="input_opts">
185 <repeat name="linefilters">
186 <conditional name="filter">
187 <param name="filter_type" value="comment"/>
188 <param name="comment_char" value="35"/>
189 </conditional>
190 </repeat>
191 </section>
192 <section name="tbl_opts">
193 <param name="col_names" value=",FirstName,LastName,,DOB,"/>
194 </section>
195 </repeat>
196 <repeat name="tables">
197 <param name="table" ftype="tabular" value="sales.tsv"/>
198 <section name="input_opts">
199 <repeat name="linefilters">
200 <conditional name="filter">
201 <param name="filter_type" value="skip"/>
202 <param name="skip_lines" value="1"/>
203 </conditional>
204 </repeat>
205 </section>
206 </repeat>
207 <param name="sqlquery" value="SELECT FirstName,LastName,sum(t2.c3) as &quot;TotalSales&quot; FROM t1 join t2 on t1.c1 = t2.c1 GROUP BY t1.c1 ORDER BY TotalSales DESC;"/>
208 <output name="output" file="sales_results.tsv"/>
209 </test>
210
211 <test>
212 <repeat name="tables">
213 <param name="table" ftype="tabular" value="customers.tsv"/>
214 <section name="input_opts">
215 <repeat name="linefilters">
216 <conditional name="filter">
217 <param name="filter_type" value="skip"/>
218 <param name="skip_lines" value="1"/>
219 </conditional>
220 </repeat>
221 </section>
222 <section name="tbl_opts">
223 <param name="col_names" value=",FirstName,LastName,,BirthDate,"/>
224 </section>
225 </repeat>
226 <param name="sqlquery" value="select FirstName,LastName,re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as &quot;DOB&quot; from t1 WHERE re_search('[hp]er',c4)"/>
227 <output name="output" file="regex_results.tsv"/>
228 </test>
229
230 <test>
231 <repeat name="tables">
232 <param name="table" ftype="tabular" value="IEDB.tsv"/>
233 <section name="input_opts">
234 <repeat name="linefilters">
235 <conditional name="filter">
236 <param name="filter_type" value="comment"/>
237 <param name="comment_char" value="35"/>
238 </conditional>
239 </repeat>
240 </section>
241 <section name="tbl_opts">
242 <param name="table_name" value="iedb"/>
243 <param name="col_names" value="ID,allele,seq_num,start,end,length,peptide,method,percentile_rank,ann_ic50,ann_rank,smm_ic50,smm_rank,comblib_sidney2008_score,comblib_sidney2008_rank,netmhcpan_ic50,netmhcpan_rank"/>
244 </section>
245 </repeat>
246 <repeat name="tables">
247 <param name="table" ftype="tabular" value="netMHC_summary.tsv"/>
248 <section name="input_opts">
249 <repeat name="linefilters">
250 <conditional name="filter">
251 <param name="filter_type" value="skip"/>
252 <param name="skip_lines" value="1"/>
253 </conditional>
254 </repeat>
255 </section>
256 <section name="tbl_opts">
257 <param name="table_name" value="mhc_summary"/>
258 <param name="col_names" value="pos,peptide,logscore,affinity,Bind_Level,Protein,Allele"/>
259 </section>
260 </repeat>
261 <param name="sqlquery" value="select iedb.ID,iedb.peptide,iedb.start,iedb.end,iedb.percentile_rank,mhc_summary.logscore,mhc_summary.affinity,mhc_summary.Bind_Level from iedb left outer join mhc_summary on iedb.peptide = mhc_summary.peptide order by affinity,Bind_Level,percentile_rank"/>
262 <output name="output" file="query_results.tsv"/>
263 </test>
264
265 <test>
266 <section name="add_to_database">
267 <param name="withdb" ftype="sqlite" value="testdb.sqlite"/>
268 </section>
269 <repeat name="tables">
270 <param name="table" ftype="tabular" value="pets.tsv"/>
271 <section name="input_opts">
272 <repeat name="linefilters">
273 <conditional name="filter">
274 <param name="filter_type" value="regex"/>
275 <param name="regex_pattern" value="^\d+"/>
276 <param name="regex_action" value="include_find"/>
277 </conditional>
278 </repeat>
279 <repeat name="linefilters">
280 <conditional name="filter">
281 <param name="filter_type" value="comment"/>
282 <param name="comment_char" value="35"/>
283 </conditional>
284 </repeat>
285 <repeat name="linefilters">
286 <conditional name="filter">
287 <param name="filter_type" value="append_line_num"/>
288 </conditional>
289 </repeat>
290 <repeat name="linefilters">
291 <conditional name="filter">
292 <param name="filter_type" value="select_columns"/>
293 <param name="columns" value="7,2,3,4,1"/>
294 </conditional>
295 </repeat>
296 <repeat name="linefilters">
297 <conditional name="filter">
298 <param name="filter_type" value="replace"/>
299 <param name="column" value="c4"/>
300 <param name="regex_pattern" value="(\d+)/(\d+)/(\d+)"/>
301 <param name="regex_replace" value="19\3-\2-\1"/>
302 </conditional>
303 </repeat>
304 </section>
305 <section name="tbl_opts">
306 <param name="table_name" value="people"/>
307 <param name="col_names" value="id,first,last,dob,pets"/>
308 </section>
309 </repeat>
310 <param name="sqlquery" value="SELECT people.id,first,last,pets,quote FROM people JOIN contacts ON people.first = contacts.first_name"/>
311 <output name="output" file="add_to_db_results.tsv"/>
312 </test>
313
314 <test>
315 <repeat name="tables">
316 <param name="table" ftype="tabular" value="pets.tsv"/>
317 <section name="input_opts">
318 <repeat name="linefilters">
319 <conditional name="filter">
320 <param name="filter_type" value="regex"/>
321 <param name="regex_pattern" value="^\d+"/>
322 <param name="regex_action" value="include_find"/>
323 </conditional>
324 </repeat>
325 <repeat name="linefilters">
326 <conditional name="filter">
327 <param name="filter_type" value="comment"/>
328 <param name="comment_char" value="35"/>
329 </conditional>
330 </repeat>
331 <repeat name="linefilters">
332 <conditional name="filter">
333 <param name="filter_type" value="append_line_num"/>
334 </conditional>
335 </repeat>
336 <repeat name="linefilters">
337 <conditional name="filter">
338 <param name="filter_type" value="select_columns"/>
339 <param name="columns" value="7,2,3,4,1"/>
340 </conditional>
341 </repeat>
342 <repeat name="linefilters">
343 <conditional name="filter">
344 <param name="filter_type" value="replace"/>
345 <param name="column" value="c4"/>
346 <param name="regex_pattern" value="(\d+)/(\d+)/(\d+)"/>
347 <param name="regex_replace" value="19\3-\2-\1"/>
348 </conditional>
349 </repeat>
350 </section>
351 <section name="tbl_opts">
352 <param name="table_name" value="people"/>
353 <param name="col_names" value="id,first,last,dob,pets"/>
354 </section>
355 </repeat>
356 <repeat name="tables">
357 <param name="table" ftype="tabular" value="pets.tsv"/>
358 <section name="input_opts">
359 <repeat name="linefilters">
360 <conditional name="filter">
361 <param name="filter_type" value="regex"/>
362 <param name="regex_pattern" value="^\d+"/>
363 <param name="regex_action" value="include_find"/>
364 </conditional>
365 </repeat>
366 <repeat name="linefilters">
367 <conditional name="filter">
368 <param name="filter_type" value="append_line_num"/>
369 </conditional>
370 </repeat>
371 <repeat name="linefilters">
372 <conditional name="filter">
373 <param name="filter_type" value="select_columns"/>
374 <param name="columns" value="c7,c5,c6"/>
375 </conditional>
376 </repeat>
377 <repeat name="linefilters">
378 <conditional name="filter">
379 <param name="filter_type" value="normalize"/>
380 <param name="columns" value="c2,c3"/>
381 <param name="separator" value=","/>
382 </conditional>
383 </repeat>
384 </section>
385 <section name="tbl_opts">
386 <param name="table_name" value="pet"/>
387 <param name="col_names" value="id,name,animal"/>
388 </section>
389 </repeat>
390 <param name="sqlquery" value="SELECT people.id,first,last,dob,name,animal,pets FROM people JOIN pet ON people.id = pet.id WHERE animal = 'cat'"/>
391 <output name="output" file="pet_normalized_query_results.tsv"/>
392 </test>
393
394 </tests>
395 <help><![CDATA[
396 =============
397 Query Tabular
398 =============
399
400 **Inputs**
401
402 Loads tabular datasets into a SQLite_ data base.
403
404 An existing SQLite_ data base can be used as input, and any selected tabular datasets will be added as new tables in that data base.
405
406
407 @LINEFILTERS_HELP@
408
409
410 **Outputs**
411
412 The results of a SQL query are output to the history as a tabular file.
413
414 The SQLite_ data base can also be saved and output as a dataset in the history.
415
416 *(The* **SQLite to tabular** *tool can run additional queries on this database.)*
417
418
419 @QUERY_HELP@
420
421 @LINEFILTERS_HELP_EXAMPLE@
422
423
424 Table name: pets
425
426 Table columns: Pets,FirstName,LastName,Birthdate,PetNames,PetType,line_num,entry_num,row_num
427
428 Query: SELECT * FROM pets
429
430 Result:
431
432 ====== ========== ======== ========== ========= ======== ========= ========== ========
433 #Pets FirstName LastName BirthDate PetNames PetType line_num entry_num row_num
434 ====== ========== ======== ========== ========= ======== ========= ========== ========
435 2 Paula Brown 1978-05-24 Rex dog 3 1 1
436 2 Paula Brown 1978-05-24 Fluff cat 3 1 2
437 1 Steven Jones 1974-04-04 Allie cat 4 2 3
438 0 Jane Doe 1978-05-24 5 3 4
439 1 James Smith 1980-10-20 Spot 6 4 5
440 ====== ========== ======== ========== ========= ======== ========= ========== ========
441
442
443 **Normalizing by Line Filtering into 2 Tables**
444
445 *Relational database opertions work with single-valued column entries.
446 To apply relational operations to tabular files that contain fields with lists of values,
447 we need to "normalize" those fields, duplicating lines for each item in the list.
448 In this example we create 2 tables, one for single-valued fields and a second with list-valued fields normalized.
449 Becauce we add a line number first for each table, we can join the 2 tables on the line number column.*
450 https://en.wikipedia.org/wiki/First_normal_form
451
452 *People Table*
453
454 ::
455
456 Filter 1 - by regex expression matching [include]: '^\d+' (include lines that start with a number)
457 Filter 2 - append a line number column:
458 Filter 3 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format)
459 Filter 4 - select columns 7,2,3,4,1
460
461 Table: People
462 Columns: id,FirstName,LastName,DOB,Pets
463
464 == ========= ======== ========== ====
465 id FirstName LastName DOB Pets
466 == ========= ======== ========== ====
467 1 Paula Brown 1978-05-24 2
468 2 Steven Jones 1974-04-04 1
469 3 Jane Doe 1978-05-24 0
470 4 James Smith 1980-10-20 1
471 == ========= ======== ========== ====
472
473
474 *Pet Table*
475
476 ::
477
478 Filter 1 - by regex expression matching [include]: '^\d+' (include lines that start with a number)
479 Filter 2 - append a line number column:
480 Filter 3 - by regex expression matching [exclude]: '^0\t' (exclude lines with no pets)
481 Filter 4 - normalize list columns[5,6]:
482 Filter 5 - select columns 7,5,6
483
484 Table: Pet
485 Columns: id,PetName,PetType
486
487 == ======== ========
488 id PetName PetType
489 == ======== ========
490 1 Rex dog
491 1 Fluff cat
492 2 Allie cat
493 4 Spot
494 == ======== ========
495
496
497 Query: SELECT FirstName,LastName,PetName FROM People JOIN Pet ON People.id = Pet.id WHERE PetType = 'cat';
498
499 Result:
500
501 ========= ======== ========
502 FirstName LastName PetName
503 ========= ======== ========
504 Paula Brown Fluff
505 Steven Jones Allie
506 ========= ======== ========
507
508
509 ]]></help>
510 </tool>