Mercurial > repos > jjohnson > query_tabular
changeset 21:357fe86f245d draft
Uploaded
author | jjohnson |
---|---|
date | Fri, 14 Jul 2017 17:34:22 -0400 |
parents | ab27c4bd14b9 |
children | bed5018e7ae3 |
files | filter_tabular.xml query_db.py query_tabular.xml sqlite_to_tabular.xml test-data/._add_to_db_results.tsv test-data/._filtered_people_results.tsv test-data/._filtered_pets_results.tsv test-data/._query_results.tsv test-data/add_to_db_results.tsv test-data/filtered_people_results.tsv test-data/filtered_pets_results.tsv test-data/pet_normalized_query_results.tsv test-data/query_results.tsv test-data/testdb.sqlite |
diffstat | 14 files changed, 290 insertions(+), 120 deletions(-) [+] |
line wrap: on
line diff
--- a/filter_tabular.xml Fri Jul 14 11:39:27 2017 -0400 +++ b/filter_tabular.xml Fri Jul 14 17:34:22 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="filter_tabular" name="Filter Tabular" version="5.0.0"> +<tool id="filter_tabular" name="Filter Tabular" version="1.0.0"> <description></description> <macros> @@ -7,11 +7,8 @@ <requirements> </requirements> - <stdio> - <exit_code range="1:" /> - </stdio> - <command><![CDATA[ - python $__tool_directory__/filter_tabular.py + <command detect_errors="exit_code"><![CDATA[ + python '$__tool_directory__/filter_tabular.py' -i '$input' -j '$filter_json' -o '$output' @@ -36,41 +33,72 @@ <test> <param name="input" ftype="tabular" value="pets.tsv"/> <repeat name="linefilters"> - <param name="filter_type" value="comment"/> - <param name="comment_char" value="35"/> + <conditional name="filter"> + <param name="filter_type" value="regex"/> + <param name="regex_pattern" value="^\d+"/> + <param name="regex_action" value="include_find"/> + </conditional> </repeat> <repeat name="linefilters"> - <param name="filter_type" value="append_line_num"/> + <conditional name="filter"> + <param name="filter_type" value="append_line_num"/> + </conditional> </repeat> <repeat name="linefilters"> - <param name="filter_type" value="select_columns"/> - <param name="columns" value="7,2,3,4,1"/> + <conditional name="filter"> + <param name="filter_type" value="select_columns"/> + <param name="columns" value="7,2,3,4,1"/> + </conditional> </repeat> <repeat name="linefilters"> - <param name="filter_type" value="replace"/> - <param name="column" value="c4"/> - <param name="regex_pattern" value="(\d+)/(\d+)/(\d+)"/> - <param name="regex_replace" value="19\3-\2-\1"/> + <conditional name="filter"> + <param name="filter_type" value="replace"/> + <param name="column" value="c4"/> + <param name="regex_pattern" value="(\d+)/(\d+)/(\d+)"/> + <param name="regex_replace" value="19\3-\2-\1"/> + </conditional> </repeat> <output name="output" file="filtered_people_results.tsv"/> </test> <test> <param name="input" ftype="tabular" value="pets.tsv"/> <repeat name="linefilters"> - <param name="filter_type" value="comment"/> - <param name="comment_char" value="35"/> + <conditional name="filter"> + <param name="filter_type" value="comment"/> + <param name="comment_char" value="35"/> + </conditional> </repeat> <repeat name="linefilters"> - <param name="filter_type" value="append_line_num"/> + <conditional name="filter"> + <param name="filter_type" value="regex"/> + <param name="regex_pattern" value="^\d+"/> + <param name="regex_action" value="include_find"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="append_line_num"/> + </conditional> </repeat> <repeat name="linefilters"> - <param name="filter_type" value="select_columns"/> - <param name="columns" value="c7,c5,c6"/> + <conditional name="filter"> + <param name="filter_type" value="select_columns"/> + <param name="columns" value="c7,c5,c6"/> + </conditional> </repeat> <repeat name="linefilters"> - <param name="filter_type" value="normalize"/> - <param name="columns" value="c2,c3"/> - <param name="separator" value=","/> + <conditional name="filter"> + <param name="filter_type" value="normalize"/> + <param name="columns" value="c2,c3"/> + <param name="separator" value=","/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="regex"/> + <param name="regex_pattern" value="^\d+\t\t"/> + <param name="regex_action" value="exclude_match"/> + </conditional> </repeat> <output name="output" file="filtered_pets_results.tsv"/> </test>
--- a/query_db.py Fri Jul 14 11:39:27 2017 -0400 +++ b/query_db.py Fri Jul 14 17:34:22 2017 -0400 @@ -28,7 +28,7 @@ return re.sub(expr, replace, item) -def get_connection(sqlitedb_path, addfunctions=False): +def get_connection(sqlitedb_path, addfunctions=True): conn = sqlite.connect(sqlitedb_path) if addfunctions: conn.create_function("re_match", 2, regex_match)
--- a/query_tabular.xml Fri Jul 14 11:39:27 2017 -0400 +++ b/query_tabular.xml Fri Jul 14 17:34:22 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="query_tabular" name="Query Tabular" version="5.0.0"> +<tool id="query_tabular" name="Query Tabular" version="1.0.0"> <description>using sqlite sql</description> <macros> @@ -7,29 +7,27 @@ <requirements> </requirements> - <stdio> - <exit_code range="1:" /> - </stdio> - <command><![CDATA[ - cat $query_file && + + <command detect_errors="exit_code"><![CDATA[ + cat '$query_file' && #if $add_to_database.withdb: #if $save_db: - cp "$add_to_database.withdb" "$sqlitedb" && + cp '$add_to_database.withdb' '$sqlitedb' && #else: - cp "$add_to_database.withdb" "$workdb" && + cp '$add_to_database.withdb' '$workdb' && #end if #end if - python $__tool_directory__/query_tabular.py + python '$__tool_directory__/query_tabular.py' #if $save_db - -s "$sqlitedb" + -s '$sqlitedb' #else - -s $workdb + -s '$workdb' #end if - -j $table_json + -j '$table_json' #if $sqlquery: - -Q "$query_file" + -Q '$query_file' $no_header - -o $output + -o '$output' #end if ]]></command> <configfiles> @@ -145,17 +143,36 @@ </data> </outputs> <tests> - <test> <repeat name="tables"> <param name="table" ftype="tabular" value="customers.tsv"/> - <param name="table_name" value="customers"/> - <param name="col_names" value="CustomerID,FirstName,LastName,Email,DOB,Phone"/> + <section name="input_opts"> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="comment"/> + <param name="comment_char" value="35"/> + </conditional> + </repeat> + </section> + <section name="tbl_opts"> + <param name="table_name" value="customers"/> + <param name="col_names" value="CustomerID,FirstName,LastName,Email,DOB,Phone"/> + </section> </repeat> <repeat name="tables"> <param name="table" ftype="tabular" value="sales.tsv"/> - <param name="table_name" value="sales"/> - <param name="col_names" value="CustomerID,Date,SaleAmount"/> + <section name="input_opts"> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="comment"/> + <param name="comment_char" value="35"/> + </conditional> + </repeat> + </section> + <section name="tbl_opts"> + <param name="table_name" value="sales"/> + <param name="col_names" value="CustomerID,Date,SaleAmount"/> + </section> </repeat> <param name="sqlquery" value="SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales" FROM customers join sales on customers.CustomerID = sales.CustomerID GROUP BY customers.CustomerID ORDER BY TotalSales DESC"/> <output name="output" file="sales_results.tsv"/> @@ -164,10 +181,28 @@ <test> <repeat name="tables"> <param name="table" ftype="tabular" value="customers.tsv"/> - <param name="col_names" value=",FirstName,LastName,,DOB,"/> + <section name="input_opts"> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="comment"/> + <param name="comment_char" value="35"/> + </conditional> + </repeat> + </section> + <section name="tbl_opts"> + <param name="col_names" value=",FirstName,LastName,,DOB,"/> + </section> </repeat> <repeat name="tables"> <param name="table" ftype="tabular" value="sales.tsv"/> + <section name="input_opts"> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="skip"/> + <param name="skip_lines" value="1"/> + </conditional> + </repeat> + </section> </repeat> <param name="sqlquery" value="SELECT FirstName,LastName,sum(t2.c3) as "TotalSales" FROM t1 join t2 on t1.c1 = t2.c1 GROUP BY t1.c1 ORDER BY TotalSales DESC;"/> <output name="output" file="sales_results.tsv"/> @@ -176,7 +211,17 @@ <test> <repeat name="tables"> <param name="table" ftype="tabular" value="customers.tsv"/> - <param name="col_names" value=",FirstName,LastName,,BirthDate,"/> + <section name="input_opts"> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="skip"/> + <param name="skip_lines" value="1"/> + </conditional> + </repeat> + </section> + <section name="tbl_opts"> + <param name="col_names" value=",FirstName,LastName,,BirthDate,"/> + </section> </repeat> <param name="sqlquery" value="select FirstName,LastName,re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB" from t1 WHERE re_search('[hp]er',c4)"/> <output name="output" file="regex_results.tsv"/> @@ -185,62 +230,162 @@ <test> <repeat name="tables"> <param name="table" ftype="tabular" value="IEDB.tsv"/> - <param name="table_name" value="iedb"/> - <param name="col_names" value="ID,allele,seq_num,start,end,length,peptide,method,percentile_rank,ann_ic50,ann_rank,smm_ic50,smm_rank,comblib_sidney2008_score,comblib_sidney2008_rank,netmhcpan_ic50,netmhcpan_rank"/> + <section name="input_opts"> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="comment"/> + <param name="comment_char" value="35"/> + </conditional> + </repeat> + </section> + <section name="tbl_opts"> + <param name="table_name" value="iedb"/> + <param name="col_names" value="ID,allele,seq_num,start,end,length,peptide,method,percentile_rank,ann_ic50,ann_rank,smm_ic50,smm_rank,comblib_sidney2008_score,comblib_sidney2008_rank,netmhcpan_ic50,netmhcpan_rank"/> + </section> </repeat> <repeat name="tables"> <param name="table" ftype="tabular" value="netMHC_summary.tsv"/> - <param name="table_name" value="mhc_summary"/> - <param name="col_names" value="pos,peptide,logscore,affinity,Bind_Level,Protein,Allele"/> + <section name="input_opts"> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="skip"/> + <param name="skip_lines" value="1"/> + </conditional> + </repeat> + </section> + <section name="tbl_opts"> + <param name="table_name" value="mhc_summary"/> + <param name="col_names" value="pos,peptide,logscore,affinity,Bind_Level,Protein,Allele"/> + </section> </repeat> - <param name="sqlquery" value="select iedb.ID,iedb.peptide,iedb.start,iedb.end,iedb.percentile_rank,mhc_summary.logscore,mhc_summary.affinity,mhc_summary.Bind_Level from iedb left outer join mhc_summary on iedb.peptide = mhc_summary.peptide order by affinity,Bind_Level"/> + <param name="sqlquery" value="select iedb.ID,iedb.peptide,iedb.start,iedb.end,iedb.percentile_rank,mhc_summary.logscore,mhc_summary.affinity,mhc_summary.Bind_Level from iedb left outer join mhc_summary on iedb.peptide = mhc_summary.peptide order by affinity,Bind_Level,percentile_rank"/> <output name="output" file="query_results.tsv"/> </test> <test> + <section name="add_to_database"> + <param name="withdb" ftype="sqlite" value="testdb.sqlite"/> + </section> + <repeat name="tables"> + <param name="table" ftype="tabular" value="pets.tsv"/> + <section name="input_opts"> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="regex"/> + <param name="regex_pattern" value="^\d+"/> + <param name="regex_action" value="include_find"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="comment"/> + <param name="comment_char" value="35"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="append_line_num"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="select_columns"/> + <param name="columns" value="7,2,3,4,1"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="replace"/> + <param name="column" value="c4"/> + <param name="regex_pattern" value="(\d+)/(\d+)/(\d+)"/> + <param name="regex_replace" value="19\3-\2-\1"/> + </conditional> + </repeat> + </section> + <section name="tbl_opts"> + <param name="table_name" value="people"/> + <param name="col_names" value="id,first,last,dob,pets"/> + </section> + </repeat> + <param name="sqlquery" value="SELECT people.id,first,last,pets,quote FROM people JOIN contacts ON people.first = contacts.first_name"/> + <output name="output" file="add_to_db_results.tsv"/> + </test> + + <test> <repeat name="tables"> <param name="table" ftype="tabular" value="pets.tsv"/> - <repeat name="linefilters"> - <param name="filter_type" value="comment"/> - <param name="comment_char" value="35"/> - </repeat> - <repeat name="linefilters"> - <param name="filter_type" value="append_line_num"/> - </repeat> - <repeat name="linefilters"> - <param name="filter_type" value="select_columns"/> - <param name="columns" value="7,2,3,4,1"/> - </repeat> - <repeat name="linefilters"> - <param name="filter_type" value="replace"/> - <param name="column" value="c4"/> - <param name="regex_pattern" value="(\d+)/(\d+)/(\d+)"/> - <param name="regex_replace" value="19\3-\2-\1"/> - </repeat> - <param name="table_name" value="people"/> - <param name="col_names" value="id,first,last,dob,pets"/> - + <section name="input_opts"> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="regex"/> + <param name="regex_pattern" value="^\d+"/> + <param name="regex_action" value="include_find"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="comment"/> + <param name="comment_char" value="35"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="append_line_num"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="select_columns"/> + <param name="columns" value="7,2,3,4,1"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="replace"/> + <param name="column" value="c4"/> + <param name="regex_pattern" value="(\d+)/(\d+)/(\d+)"/> + <param name="regex_replace" value="19\3-\2-\1"/> + </conditional> + </repeat> + </section> + <section name="tbl_opts"> + <param name="table_name" value="people"/> + <param name="col_names" value="id,first,last,dob,pets"/> + </section> </repeat> <repeat name="tables"> <param name="table" ftype="tabular" value="pets.tsv"/> - <repeat name="linefilters"> - <param name="filter_type" value="comment"/> - <param name="comment_char" value="35"/> - </repeat> - <repeat name="linefilters"> - <param name="filter_type" value="append_line_num"/> - </repeat> - <repeat name="linefilters"> - <param name="filter_type" value="select_columns"/> - <param name="columns" value="c7,c5,c6"/> - </repeat> - <repeat name="linefilters"> - <param name="filter_type" value="normalize"/> - <param name="columns" value="c2,c3"/> - <param name="separator" value=","/> - </repeat> - <param name="table_name" value="pet"/> - <param name="col_names" value="id,name,animal"/> + <section name="input_opts"> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="regex"/> + <param name="regex_pattern" value="^\d+"/> + <param name="regex_action" value="include_find"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="append_line_num"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="select_columns"/> + <param name="columns" value="c7,c5,c6"/> + </conditional> + </repeat> + <repeat name="linefilters"> + <conditional name="filter"> + <param name="filter_type" value="normalize"/> + <param name="columns" value="c2,c3"/> + <param name="separator" value=","/> + </conditional> + </repeat> + </section> + <section name="tbl_opts"> + <param name="table_name" value="pet"/> + <param name="col_names" value="id,name,animal"/> + </section> </repeat> <param name="sqlquery" value="SELECT people.id,first,last,dob,name,animal,pets FROM people JOIN pet ON people.id = pet.id WHERE animal = 'cat'"/> <output name="output" file="pet_normalized_query_results.tsv"/>
--- a/sqlite_to_tabular.xml Fri Jul 14 11:39:27 2017 -0400 +++ b/sqlite_to_tabular.xml Fri Jul 14 17:34:22 2017 -0400 @@ -1,20 +1,17 @@ <?xml version="1.0"?> -<tool id="sqlite_to_tabular" name="SQLite to tabular" version="5.0.0"> +<tool id="sqlite_to_tabular" name="SQLite to tabular" version="1.0.0"> <description>for SQL query</description> <macros> <import>macros.xml</import> </macros> - <stdio> - <exit_code range="1:" level="fatal" description="Error" /> - </stdio> - <command><![CDATA[ - python $__tool_directory__/sqlite_to_tabular.py - --sqlitedb="$sqlitedb" - --query_file="$query_file" + <command detect_errors="exit_code"><![CDATA[ + python '$__tool_directory__/sqlite_to_tabular.py' + --sqlitedb='$sqlitedb' + --query_file='$query_file' $no_header - --output="$query_results" + --output='$query_results' ]]></command> <configfiles> <configfile name="query_file">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/add_to_db_results.tsv Fri Jul 14 17:34:22 2017 -0400 @@ -0,0 +1,3 @@ +#id first last pets quote +1 Paula Brown 2 Time flies like and arrow. Fruit flies like a banana. +2 Steven Jones 1 I would have wrtten less if I had more time
--- a/test-data/filtered_people_results.tsv Fri Jul 14 11:39:27 2017 -0400 +++ b/test-data/filtered_people_results.tsv Fri Jul 14 17:34:22 2017 -0400 @@ -1,5 +1,4 @@ -1 FirstName LastName DOB Pets -2 Paula Brown 1978-05-24 2 -3 Steven Jones 1974-04-04 1 -4 Jane Doe 1978-05-24 0 -5 James Smith 1980-10-20 1 +1 Paula Brown 1978-05-24 2 +2 Steven Jones 1974-04-04 1 +3 Jane Doe 1978-05-24 0 +4 James Smith 1980-10-20 1
--- a/test-data/filtered_pets_results.tsv Fri Jul 14 11:39:27 2017 -0400 +++ b/test-data/filtered_pets_results.tsv Fri Jul 14 17:34:22 2017 -0400 @@ -1,6 +1,4 @@ -1 PetNames PetType -2 Rex dog -2 Fluff cat -3 Allie cat -4 -5 Spot +1 Rex dog +1 Fluff cat +2 Allie cat +4 Spot
--- a/test-data/pet_normalized_query_results.tsv Fri Jul 14 11:39:27 2017 -0400 +++ b/test-data/pet_normalized_query_results.tsv Fri Jul 14 17:34:22 2017 -0400 @@ -1,3 +1,3 @@ #id first last dob name animal pets -2 Paula Brown 1978-05-24 Fluff cat 2 -3 Steven Jones 1974-04-04 Allie cat 1 +1 Paula Brown 1978-05-24 Fluff cat 2 +2 Steven Jones 1974-04-04 Allie cat 1
--- a/test-data/query_results.tsv Fri Jul 14 11:39:27 2017 -0400 +++ b/test-data/query_results.tsv Fri Jul 14 17:34:22 2017 -0400 @@ -5,13 +5,13 @@ ADAMTSL1 SLDMCISGL 1 9 30.5 0.698 26 SB PPAP2C FGMYCMVFL 2 10 2.3 0.501 222 WB PPAP2C FGMYCMVFL 2 10 6.55 0.501 222 WB -ADAMTSL1 MCISGLCQL 4 12 6.65 0.306 1823 None -ADAMTSL1 MCISGLCQL 4 12 14.0 0.306 1823 None -PPAP2C MYCMVFLVK 4 12 0.65 0.07 23399 None -PPAP2C MYCMVFLVK 4 12 54.0 0.07 23399 None -ADAMTSL1 LDMCISGLC 2 10 42.0 0.069 23677 None -ADAMTSL1 LDMCISGLC 2 10 76.0 0.069 23677 None -PPAP2C SFGMYCMVF 1 9 0.5 0.066 24390 None -PPAP2C SFGMYCMVF 1 9 45.0 0.066 24390 None -ADAMTSL1 DMCISGLCQ 3 11 64.5 0.042 31630 None -ADAMTSL1 DMCISGLCQ 3 11 97.0 0.042 31630 None +ADAMTSL1 MCISGLCQL 4 12 6.65 0.306 1823 +ADAMTSL1 MCISGLCQL 4 12 14.0 0.306 1823 +PPAP2C MYCMVFLVK 4 12 0.65 0.07 23399 +PPAP2C MYCMVFLVK 4 12 54.0 0.07 23399 +ADAMTSL1 LDMCISGLC 2 10 42.0 0.069 23677 +ADAMTSL1 LDMCISGLC 2 10 76.0 0.069 23677 +PPAP2C SFGMYCMVF 1 9 0.5 0.066 24390 +PPAP2C SFGMYCMVF 1 9 45.0 0.066 24390 +ADAMTSL1 DMCISGLCQ 3 11 64.5 0.042 31630 +ADAMTSL1 DMCISGLCQ 3 11 97.0 0.042 31630