changeset 4:973f03d82c86 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 6a362345c31764c28bb6328da1f0d81ef8f35d40
author iuc
date Thu, 03 May 2018 10:16:38 -0400
parents 1ea4e668bf73
children c84313fa6452
files macros.xml query_db.py query_tabular.py query_tabular.xml test-data/psm_dbmod_output.tsv test-data/psm_dbmod_output1.tsv
diffstat 6 files changed, 154 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Mon Nov 06 23:20:29 2017 -0500
+++ b/macros.xml	Thu May 03 10:16:38 2018 -0400
@@ -106,6 +106,13 @@
           <when value="no"/>
       </conditional>
   </xml>
+  <xml name="sql_query_input">
+        <param name="sqlquery" type="text" area="true" size="20x80" value="" optional="true" label="SQL Query to generate tabular output">
+            <help>By default: tables are named: t1,t2,...,tn and columns in each table: c1,c2,...,cn</help>
+            <sanitizer sanitize="False"/>
+            <validator type="regex" message="">^(?ims)\s*select\s+.*\s+from\s+.*$</validator>
+        </param>
+  </xml>
   <xml name="macro_line_filters">
                 <repeat name="linefilters" title="Filter Tabular Input Lines">
                     <conditional name="filter">
--- a/query_db.py	Mon Nov 06 23:20:29 2017 -0500
+++ b/query_db.py	Thu May 03 10:16:38 2018 -0400
@@ -59,9 +59,13 @@
 def run_query(conn, query, outputFile, no_header=False, comment_char='#'):
     cur = conn.cursor()
     results = cur.execute(query)
-    if not no_header:
-        outputFile.write("%s%s\n" % (comment_char, '\t'.join(
-            [str(col[0]) for col in cur.description])))
-    for i, row in enumerate(results):
-        outputFile.write("%s\n" % '\t'.join(
-            [str(val) if val is not None else '' for val in row]))
+    if outputFile is not None:
+        if not no_header:
+            outputFile.write("%s%s\n" % (comment_char, '\t'.join(
+                [str(col[0]) for col in cur.description])))
+        for i, row in enumerate(results):
+            outputFile.write("%s\n" % '\t'.join(
+                [str(val) if val is not None else '' for val in row]))
+    else:
+        conn.commit()
+        return results
--- a/query_tabular.py	Mon Nov 06 23:20:29 2017 -0500
+++ b/query_tabular.py	Thu May 03 10:16:38 2018 -0400
@@ -59,6 +59,9 @@
                       help='comment character to prefix column header line')
     parser.add_option('-o', '--output', dest='output', default=None,
                       help='Output file for query results')
+    parser.add_option('-d', '--debug', dest='debug', default=False,
+                      action='store_true',
+                      help='Output info to stderr')
     (options, args) = parser.parse_args()
 
     # determine output destination
@@ -109,9 +112,35 @@
         try:
             with open(options.jsonfile) as fh:
                 tdef = json.load(fh)
+                if options.debug:
+                    print('JSON: %s' % tdef, file=sys.stderr)
                 if 'tables' in tdef:
                     for ti, table in enumerate(tdef['tables']):
                         _create_table(ti, table)
+                if 'sql_stmts' in tdef:
+                    for si, stmt in enumerate(tdef['sql_stmts']):
+                        rowcount = run_query(get_connection(options.sqlitedb), stmt, None)
+                        if options.debug:
+                            print('\nDB modification: %s  \nrowcount: %s' %
+                                  (stmt, rowcount), file=sys.stderr)
+                if 'queries' in tdef:
+                    for qi, qstmt in enumerate(tdef['queries']):
+                        if 'header' in qstmt:
+                            no_header = False
+                            comment_char = qstmt['header']
+                        else:
+                            no_header = True
+                            comment_char = None
+                        with open(qstmt['result_file'], 'w') as fh:
+                            query = qstmt['query']
+                            rowcount = run_query(get_connection(options.sqlitedb),
+                                                 query,
+                                                 fh,
+                                                 no_header=no_header,
+                                                 comment_char=comment_char)
+                        if options.debug:
+                            print('\nSQL: %s  \nrowcount: %s' %
+                                  (query, rowcount), file=sys.stderr)
         except Exception as e:
             exit('Error: %s' % (e))
 
@@ -131,9 +160,13 @@
             exit('Error: %s' % (e))
     else:
         try:
-            run_query(get_connection(options.sqlitedb), query, outputFile,
-                      no_header=options.no_header,
-                      comment_char=options.comment_char)
+            rowcount = run_query(get_connection(options.sqlitedb),
+                                 query, outputFile,
+                                 no_header=options.no_header,
+                                 comment_char=options.comment_char)
+            if options.debug:
+                print('\nSQL: %s  \nrowcount: %s' %
+                      (query, rowcount), file=sys.stderr)
         except Exception as e:
             exit('Error: %s' % (e))
 
--- a/query_tabular.xml	Mon Nov 06 23:20:29 2017 -0500
+++ b/query_tabular.xml	Thu May 03 10:16:38 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="query_tabular" name="Query Tabular" version="2.0.0">
+<tool id="query_tabular" name="Query Tabular" version="3.0.0">
     <description>using sqlite sql</description>
 
     <macros>
@@ -17,7 +17,7 @@
                 cp '$add_to_database.withdb' '$workdb' &&
             #end if 
         #end if
-        python '$__tool_directory__/query_tabular.py'
+        python '$__tool_directory__/query_tabular.py' -d
         #if $save_db
         -s '$sqlitedb'
         #else
@@ -88,6 +88,30 @@
   #end if
   #set $jtbls += [$jtbl]
 #end for
+#set $jstmts = []
+#for $i,$stmt in enumerate($modify_database.sql_stmts):
+    #set $jstmts += [str($stmt.sqlstmt)]
+#end for
+#if len($jstmts) > 0:
+   #set $jtbldef['sql_stmts'] = $jstmts
+#end if
+#set $jqueries = []
+#for $i,$query in enumerate($addqueries.queries):
+    #set $jquery = dict()
+    #set $jquery['query'] = str($query.sqlquery)
+    #set $jquery['result_file'] = 'results' + str($i) + '.tsv'
+    #if $query.query_result.header == 'yes':
+        #set $header_prefix = ''
+        #if $query.query_result.header_prefix:
+            #set $header_prefix = chr(int(str($query.query_result.header_prefix)))
+        #end if
+        #set $jquery['header'] = $header_prefix
+    #end if
+    #set $jqueries += [$jquery]
+#end for
+#if len($jqueries) > 0:
+   #set $jtbldef['queries'] = $jqueries
+#end if
 #echo $json.dumps($jtbldef)
         </configfile>
     </configfiles>
@@ -132,14 +156,24 @@
                 </repeat>
             </section>
         </repeat>
+        <section name="modify_database" expanded="false" title="Modify the database">
+            <repeat name="sql_stmts" title="Database Manipulation SQL Statements" min="0">
+                <param name="sqlstmt" type="text" area="true" size="20x80" value="" optional="true" label="SQL Query to modify the database">
+                    <help>These modify the SQLite database</help>
+                    <sanitizer sanitize="False"/>
+                </param>
+            </repeat>
+        </section>
         <param name="save_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Save the sqlite database in your history"
             help="SQLite to tabular tool can run additional queries on this database"/>
-        <param name="sqlquery" type="text" area="true" size="20x80" value="" optional="true" label="SQL Query to generate tabular output">
-                <help>By default: tables are named: t1,t2,...,tn and columns in each table: c1,c2,...,cn</help>
-                <sanitizer sanitize="False"/>
-                <validator type="regex" message="">^(?ims)\s*select\s+.*\s+from\s+.*$</validator>
-        </param>
+        <expand macro="sql_query_input"/>
         <expand macro="result_results_header_line" />
+        <section name="addqueries" expanded="false" title="Additional Queries">
+            <repeat name="queries" title="Database Manipulation SQL Statements" min="0" max="3">
+                <expand macro="sql_query_input"/>
+                <expand macro="result_results_header_line" />
+            </repeat>
+        </section>
     </inputs>
     <outputs>
         <data format="sqlite" name="sqlitedb" label="sqlite db of ${on_string}">
@@ -148,6 +182,15 @@
         <data format="tabular" name="output" label="query results on ${on_string}">
             <filter>not save_db or (sqlquery and len(sqlquery.strip()) > 0)</filter>
         </data>
+        <data format="tabular" name="output1" label="query 1 results on ${on_string}" from_work_dir="results0.tsv">
+            <filter>len(addqueries['queries']) > 0</filter>
+        </data>
+        <data format="tabular" name="output2" label="query 2 results on ${on_string}" from_work_dir="results1.tsv">
+            <filter>len(addqueries['queries']) > 1</filter>
+        </data>
+        <data format="tabular" name="output3" label="query 3 results on ${on_string}" from_work_dir="results2.tsv">
+            <filter>len(addqueries['queries']) > 2</filter>
+        </data>
     </outputs>
     <tests>
         <test>
@@ -448,6 +491,46 @@
             <output name="output" file="psm_report_out2.tsv"/>
         </test>
 
+        <test>
+            <repeat name="tables">
+                <param name="table" ftype="tabular" value="psm_report.tsv"/>
+                <section name="input_opts">
+                    <repeat name="linefilters">
+                        <conditional name="filter">
+                            <param name="filter_type" value="select_columns"/>
+                            <param name="columns" value="1,3,2,6,14,19,23"/>
+                        </conditional>
+                    </repeat>
+                </section>
+                <section name="tbl_opts">
+                    <param name="table_name" value="PSMs"/>
+                    <param name="column_names_from_first_line" value="True"/>
+                    <param name="col_names" value="scan,,,,,,confidence"/>
+                </section>
+            </repeat>
+            <section name="modify_database">
+                <repeat name="sql_stmts">
+                    <param name="sqlstmt" value="UPDATE psms SET confidence = 99.999 WHERE confidence = 100.0"/>
+                </repeat>
+            </section>
+            <param name="sqlquery" value="SELECT scan,&quot;m/z&quot;, &quot;Precursor m/z Error [ppm]&quot;, Sequence, &quot;Protein(s)&quot;, confidence FROM PSMs WHERE NOT re_search(', ',&quot;Protein(s)&quot;)"/>
+            <conditional name="query_result">
+                <param name="header" value="yes"/>
+                <param name="header_prefix" value="#"/>
+            </conditional>
+            <section name="addqueries">
+                <repeat name="queries">
+                    <param name="sqlquery" value="SELECT * FROM psms WHERE confidence > 97.0"/>
+                    <conditional name="query_result">
+                        <param name="header" value="yes"/>
+                        <param name="header_prefix" value=""/>
+                    </conditional>
+                </repeat>
+            </section>
+            <output name="output" file="psm_dbmod_output.tsv"/>
+            <output name="output1" file="psm_dbmod_output1.tsv"/>
+        </test>
+
     </tests>
     <help><![CDATA[
 =============
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psm_dbmod_output.tsv	Thu May 03 10:16:38 2018 -0400
@@ -0,0 +1,5 @@
+#scan	m/z	Precursor m/z Error	Sequence	Protein(s)	confidence
+1	523.272583	-4.42404529799	PYANQPTVR	NP_116558	99.999
+3	652.843567	4.02947892238	SSWAGLQFPVGR	NP_066544_R21W	99.999
+4	788.87384	1.27277118312	AQACNLDQSGTNVAK	NP_112092_rs7285167:R182C	99.999
+7	908.008545	0.740229025429	DMTVPVLVSKPPVFTGK	NP_001909_rs72973763:I239V,rs12021720:S384G	94.7368421053
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psm_dbmod_output1.tsv	Thu May 03 10:16:38 2018 -0400
@@ -0,0 +1,6 @@
+scan	Sequence	Protein(s)	Position	m/z	Precursor m/z Error	confidence
+1	PYANQPTVR	NP_116558	2	523.272583	-4.42404529799	99.999
+3	SSWAGLQFPVGR	NP_066544_R21W	19	652.843567	4.02947892238	99.999
+4	AQACNLDQSGTNVAK	NP_112092_rs7285167:R182C	179	788.87384	1.27277118312	99.999
+6	LQLLPESFICK	NP_001028196, NP_001244919, NP_036311	198; 163; 277	674.370911	0.341203679555	99.3865030675
+8	FGVSSESKPEEVK	NP_001265138, NP_001265138_rs2274136:A858T,F879L, NP_001265139, NP_001265139_rs2274136:A785T,F806L, NP_005115, NP_005115_rs2274136:A827T,F848L	992; 992; 919; 919; 961; 961	711.858643	-0.766605567436	99.999