changeset 10:2e8f945f7285 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 67db58361546009a77b2cbd140967fbc634d425b"
author iuc
date Thu, 24 Sep 2020 11:26:46 +0000
parents a3aab6045663
children 83069b38aa85
files load_db.py query_tabular.xml test-data/netMHC_summary_out1.tsv test-data/netMHC_summary_out2.tsv
diffstat 4 files changed, 80 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/load_db.py	Sat Sep 12 01:22:05 2020 +0000
+++ b/load_db.py	Thu Sep 24 11:26:46 2020 +0000
@@ -189,6 +189,9 @@
             if linenum == 0 and firstlinenames:
                 col_names = [get_valid_column_name(name) or 'c%d' % (i + 1)
                              for i, name in enumerate(fields)]
+                # guarantee col_types in case of empty data
+                while len(col_types) < len(fields):
+                    col_types.append(None)
                 continue
             if linenum > max_lines:
                 break
@@ -210,23 +213,30 @@
     if not col_names:
         col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
     if column_names:
+        cnames = [cn.strip() for cn in column_names.split(',')]
         if load_named_columns:
             col_idx = []
-            cnames = []
-            for i, cname in enumerate(
-                    [cn.strip() for cn in column_names.split(',')]):
+            colnames = []
+            for i, cname in enumerate(cnames):
+                # guarantee col_types in case of empty data
+                if i >= len(col_types):
+                    col_types.append('TEXT')
                 if cname != '':
                     col_idx.append(i)
-                    cnames.append(cname)
+                    colnames.append(cname)
             col_types = [col_types[i] for i in col_idx]
-            col_names = cnames
+            col_names = colnames
         else:
-            for i, cname in enumerate(
-                    [cn.strip() for cn in column_names.split(',')]):
-                if cname and i < len(col_names):
-                    col_names[i] = cname
+            if col_names:
+                for i, cname in enumerate(cnames):
+                    if cname and i < len(col_names):
+                        col_names[i] = cname
+            else:
+                col_names = [x if x else 'c%d' % (i) for i, x in enumerate(cnames)]
     col_def = []
     for i, col_name in enumerate(col_names):
+        if i >= len(col_types):
+            col_types.append('TEXT')
         col_def.append('%s %s' % (col_names[i], col_types[i]))
     return col_names, col_types, col_def, col_idx
 
--- a/query_tabular.xml	Sat Sep 12 01:22:05 2020 +0000
+++ b/query_tabular.xml	Thu Sep 24 11:26:46 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="query_tabular" name="Query Tabular" version="3.1.0">
+<tool id="query_tabular" name="Query Tabular" version="3.1.1">
     <description>using sqlite sql</description>
 
     <macros>
@@ -528,6 +528,57 @@
             <output name="output1" file="psm_dbmod_output1.tsv" compare="re_match"/>
         </test>
 
+        <test>
+            <repeat name="tables">
+                <param name="table" ftype="tabular" value="netMHC_summary.tsv"/>
+                <section name="input_opts">
+                    <repeat name="linefilters">
+                        <conditional name="filter">
+                            <param name="filter_type" value="regex"/>
+                            <param name="regex_pattern" value="peptide"/>
+                            <param name="regex_action" value="include_find"/>
+                        </conditional>
+                    </repeat>
+                </section>
+                <section name="tbl_opts">
+                    <param name="table_name" value="epitope"/>
+                    <param name="column_names_from_first_line" value="True"/>
+                </section>
+            </repeat>
+            <param name="sqlquery" value="SELECT * FROM epitope"/>
+            <conditional name="query_result">
+                <param name="header" value="yes"/>
+                <param name="header_prefix" value=""/>
+            </conditional>
+            <output name="output" file="netMHC_summary_out1.tsv" ftype="tabular"/>
+        </test>
+
+        <test>
+            <repeat name="tables">
+                <param name="table" ftype="tabular" value="netMHC_summary.tsv"/>
+                <section name="input_opts">
+                    <repeat name="linefilters">
+                        <conditional name="filter">
+                            <param name="filter_type" value="regex"/>
+                            <param name="regex_pattern" value="NOT TO BE FOUND"/>
+                            <param name="regex_action" value="include_find"/>
+                        </conditional>
+                    </repeat>
+                </section>
+                <section name="tbl_opts">
+                    <param name="table_name" value="epitope"/>
+                    <param name="column_names_from_first_line" value="False"/>
+                    <param name="col_names" value="pos,peptide,logscore,,,,Allele"/>
+                </section>
+            </repeat>
+            <param name="sqlquery" value="SELECT pos,peptide,logscore,Allele FROM epitope"/>
+            <conditional name="query_result">
+                <param name="header" value="yes"/>
+                <param name="header_prefix" value=""/>
+            </conditional>
+            <output name="output" file="netMHC_summary_out2.tsv" ftype="tabular"/>
+        </test>
+
     </tests>
     <help><![CDATA[
 =============
@@ -540,6 +591,13 @@
 
   An existing SQLite_ data base can be used as input, and any selected tabular datasets will be added as new tables in that data base.
 
+    **NOTE:** If there are no data rows in an input tabular dataset, query_tabular will fail unless:
+
+      - There is a row header line in the dataset and **Use first line as column names** is selected  
+      - **Specify Column Names** is used to provide column names
+
+        in which case an empty table will be created with those columns
+
 
 @LINEFILTERS_HELP@
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/netMHC_summary_out1.tsv	Thu Sep 24 11:26:46 2020 +0000
@@ -0,0 +1,1 @@
+#pos	peptide	logscore	affinity(nM)	Bind Level	Protein Name	Allele
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/netMHC_summary_out2.tsv	Thu Sep 24 11:26:46 2020 +0000
@@ -0,0 +1,1 @@
+pos	peptide	logscore	Allele