Repository 'query_tabular'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/query_tabular

Changeset 1:8a33b442ecd9 (2017-08-18)
Previous changeset 0:3708ff0198b7 (2017-07-18) Next changeset 2:fb8484ee54d8 (2017-10-23)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 81f69ad5f39223059c40501e55ac777d3feca845
modified:
filters.py
load_db.py
macros.xml
query_db.py
query_tabular.py
query_tabular.xml
sqlite_to_tabular.py
added:
test-data/psm_report.tsv
test-data/psm_report_out1.tsv
test-data/psm_report_out2.tsv
b
diff -r 3708ff0198b7 -r 8a33b442ecd9 filters.py
--- a/filters.py Tue Jul 18 09:07:07 2017 -0400
+++ b/filters.py Fri Aug 18 16:48:09 2017 -0400
[
@@ -33,7 +33,8 @@
             r = filter_dict['replace']
             c = int(filter_dict['column']) - 1
             self.func = lambda i, l: '\t'.join(
-                [x if j != c else re.sub(p, r, x) for j, x in enumerate(l.split('\t'))])
+                [x if j != c else re.sub(p, r, x)
+                 for j, x in enumerate(l.split('\t'))])
         elif filter_dict['filter'] == 'prepend_line_num':
             self.func = lambda i, l: '%d\t%s' % (i, l)
         elif filter_dict['filter'] == 'append_line_num':
b
diff -r 3708ff0198b7 -r 8a33b442ecd9 load_db.py
--- a/load_db.py Tue Jul 18 09:07:07 2017 -0400
+++ b/load_db.py Fri Aug 18 16:48:09 2017 -0400
[
b'@@ -2,11 +2,165 @@\n \n from __future__ import print_function\n \n+import re\n import sys\n \n from filters import TabularReader\n \n \n+SQLITE_KEYWORDS = [\n+    \'ABORT\',\n+    \'ACTION\',\n+    \'ADD\',\n+    \'AFTER\',\n+    \'ALL\',\n+    \'ALTER\',\n+    \'ANALYZE\',\n+    \'AND\',\n+    \'AS\',\n+    \'ASC\',\n+    \'ATTACH\',\n+    \'AUTOINCREMENT\',\n+    \'BEFORE\',\n+    \'BEGIN\',\n+    \'BETWEEN\',\n+    \'BY\',\n+    \'CASCADE\',\n+    \'CASE\',\n+    \'CAST\',\n+    \'CHECK\',\n+    \'COLLATE\',\n+    \'COLUMN\',\n+    \'COMMIT\',\n+    \'CONFLICT\',\n+    \'CONSTRAINT\',\n+    \'CREATE\',\n+    \'CROSS\',\n+    \'CURRENT_DATE\',\n+    \'CURRENT_TIME\',\n+    \'CURRENT_TIMESTAMP\',\n+    \'DATABASE\',\n+    \'DEFAULT\',\n+    \'DEFERRABLE\',\n+    \'DEFERRED\',\n+    \'DELETE\',\n+    \'DESC\',\n+    \'DETACH\',\n+    \'DISTINCT\',\n+    \'DROP\',\n+    \'EACH\',\n+    \'ELSE\',\n+    \'END\',\n+    \'ESCAPE\',\n+    \'EXCEPT\',\n+    \'EXCLUSIVE\',\n+    \'EXISTS\',\n+    \'EXPLAIN\',\n+    \'FAIL\',\n+    \'FOR\',\n+    \'FOREIGN\',\n+    \'FROM\',\n+    \'FULL\',\n+    \'GLOB\',\n+    \'GROUP\',\n+    \'HAVING\',\n+    \'IF\',\n+    \'IGNORE\',\n+    \'IMMEDIATE\',\n+    \'IN\',\n+    \'INDEX\',\n+    \'INDEXED\',\n+    \'INITIALLY\',\n+    \'INNER\',\n+    \'INSERT\',\n+    \'INSTEAD\',\n+    \'INTERSECT\',\n+    \'INTO\',\n+    \'IS\',\n+    \'ISNULL\',\n+    \'JOIN\',\n+    \'KEY\',\n+    \'LEFT\',\n+    \'LIKE\',\n+    \'LIMIT\',\n+    \'MATCH\',\n+    \'NATURAL\',\n+    \'NO\',\n+    \'NOT\',\n+    \'NOTNULL\',\n+    \'NULL\',\n+    \'OF\',\n+    \'OFFSET\',\n+    \'ON\',\n+    \'OR\',\n+    \'ORDER\',\n+    \'OUTER\',\n+    \'PLAN\',\n+    \'PRAGMA\',\n+    \'PRIMARY\',\n+    \'QUERY\',\n+    \'RAISE\',\n+    \'RECURSIVE\',\n+    \'REFERENCES\',\n+    \'REGEXP\',\n+    \'REINDEX\',\n+    \'RELEASE\',\n+    \'RENAME\',\n+    \'REPLACE\',\n+    \'RESTRICT\',\n+    \'RIGHT\',\n+    \'ROLLBACK\',\n+    \'ROW\',\n+    \'SAVEPOINT\',\n+    \'SELECT\',\n+    \'SET\',\n+    \'TABLE\',\n+    \'TEMP\',\n+    \'TEMPORARY\',\n+    \'THEN\',\n+    \'TO\',\n+    \'TRANSACTION\',\n+    \'TRIGGER\',\n+    \'UNION\',\n+    \'UNIQUE\',\n+    \'UPDATE\',\n+    \'USING\',\n+    \'VACUUM\',\n+    \'VALUES\',\n+    \'VIEW\',\n+    \'VIRTUAL\',\n+    \'WHEN\',\n+    \'WHERE\',\n+    \'WITH\',\n+    \'WITHOUT\'\n+]\n+\n+\n+def get_valid_column_name(name):\n+    valid_name = name\n+    if not name or not name.strip():\n+        return None\n+    elif name.upper() in SQLITE_KEYWORDS:\n+        valid_name = \'"%s"\' % name\n+    elif re.match(\'^[a-zA-Z]\\w*$\', name):\n+        pass\n+    elif re.match(\'^"[^"]+"$\', name):\n+        pass\n+    elif re.match(\'^\\[[^\\[\\]]*\\]$\', name):\n+        pass\n+    elif re.match("^`[^`]+`$", name):\n+        pass\n+    elif name.find(\'"\') < 0:\n+        valid_name = \'"%s"\' % name\n+    elif name.find(\'[\') < 0 and name.find(\']\') < 0:\n+        valid_name = \'[%s]\' % name\n+    elif name.find(\'`\') < 0:\n+        valid_name = \'`%s`\' % name\n+    elif name.find("\'") < 0:\n+        valid_name = "\'%s\'" % name\n+    return valid_name\n+\n+\n def getValueType(val):\n     if val or 0. == val:\n         try:\n@@ -23,14 +177,19 @@\n \n def get_column_def(file_path, table_name, skip=0, comment_char=\'#\',\n                    column_names=None, max_lines=100, load_named_columns=False,\n-                   filters=None):\n+                   firstlinenames=False, filters=None):\n     col_pref = [\'TEXT\', \'REAL\', \'INTEGER\', None]\n     col_types = []\n     col_idx = None\n+    col_names = []\n     try:\n         tr = TabularReader(file_path, skip=skip, comment_char=comment_char,\n                            col_idx=None, filters=filters)\n         for linenum, fields in enumerate(tr):\n+            if linenum == 0 and firstlinenames:\n+                col_names = [get_valid_column_name(name) or \'c%d\' % (i + 1)\n+                             for i, name in enumerate(fields)]\n+                continue\n             if linenum > max_lines:\n                 break\n             try:\n@@ -48,24 +207,24 @@\n     for i, col_type in enumerate(col_types):\n         if not col_type:\n             col_types[i] = \'TEXT\'\n+    if not col_names:\n+        col_names = [\'c%d\' % i for i in range(1, len(col_types) + 1)]\n     if column_names:\n-        col_names = []\n         if load_named_columns:\n             col_idx = []\n+            cnames = []\n             f'..b'     [cn.strip() for cn in column_names.split(\',\')]):\n                 if cname != \'\':\n                     col_idx.append(i)\n-                    col_names.append(cname)\n+                    cnames.append(cname)\n             col_types = [col_types[i] for i in col_idx]\n+            col_names = cnames\n         else:\n-            col_names = [\'c%d\' % i for i in range(1, len(col_types) + 1)]\n             for i, cname in enumerate(\n                     [cn.strip() for cn in column_names.split(\',\')]):\n                 if cname and i < len(col_names):\n                     col_names[i] = cname\n-    else:\n-        col_names = [\'c%d\' % i for i in range(1, len(col_types) + 1)]\n     col_def = []\n     for i, col_name in enumerate(col_names):\n         col_def.append(\'%s %s\' % (col_names[i], col_types[i]))\n@@ -74,12 +233,14 @@\n \n def create_table(conn, file_path, table_name, skip=0, comment_char=\'#\',\n                  pkey_autoincr=None, column_names=None,\n-                 load_named_columns=False, filters=None,\n-                 unique_indexes=[], indexes=[]):\n+                 load_named_columns=False, firstlinenames=False,\n+                 filters=None, unique_indexes=[], indexes=[]):\n     col_names, col_types, col_def, col_idx = \\\n         get_column_def(file_path, table_name, skip=skip,\n                        comment_char=comment_char, column_names=column_names,\n-                       load_named_columns=load_named_columns, filters=filters)\n+                       load_named_columns=load_named_columns,\n+                       firstlinenames=firstlinenames,\n+                       filters=filters)\n     col_func = [float if t == \'REAL\' else int\n                 if t == \'INTEGER\' else str for t in col_types]\n     table_def = \'CREATE TABLE %s (\\n    %s%s\\n);\' % (\n@@ -98,19 +259,13 @@\n         c.execute(table_def)\n         conn.commit()\n         c.close()\n-        for i, index in enumerate(unique_indexes):\n-            index_name = \'idx_uniq_%s_%d\' % (table_name, i)\n-            index_columns = index.split(\',\')\n-            create_index(conn, table_name, index_name, index_columns,\n-                         unique=True)\n-        for i, index in enumerate(indexes):\n-            index_name = \'idx_%s_%d\' % (table_name, i)\n-            index_columns = index.split(\',\')\n-            create_index(conn, table_name, index_name, index_columns)\n+\n         c = conn.cursor()\n         tr = TabularReader(file_path, skip=skip, comment_char=comment_char,\n                            col_idx=col_idx, filters=filters)\n         for linenum, fields in enumerate(tr):\n+            if linenum == 0 and firstlinenames:\n+                continue\n             data_lines += 1\n             try:\n                 vals = [col_func[i](x)\n@@ -121,15 +276,28 @@\n                       file=sys.stderr)\n         conn.commit()\n         c.close()\n+        for i, index in enumerate(unique_indexes):\n+            index_name = \'idx_uniq_%s_%d\' % (table_name, i)\n+            index_columns = index.split(\',\')\n+            create_index(conn, table_name, index_name, index_columns,\n+                         unique=True)\n+        for i, index in enumerate(indexes):\n+            index_name = \'idx_%s_%d\' % (table_name, i)\n+            index_columns = index.split(\',\')\n+            create_index(conn, table_name, index_name, index_columns)\n     except Exception as e:\n         exit(\'Error: %s\' % (e))\n \n \n def create_index(conn, table_name, index_name, index_columns, unique=False):\n-    index_def = "CREATE %s INDEX %s on %s(%s)" % (\n+    index_def = \'CREATE %s INDEX %s on %s(%s)\' % (\n                 \'UNIQUE\' if unique else \'\', index_name,\n                 table_name, \',\'.join(index_columns))\n-    c = conn.cursor()\n-    c.execute(index_def)\n-    conn.commit()\n-    c.close()\n+    try:\n+        c = conn.cursor()\n+        c.execute(index_def)\n+        conn.commit()\n+        c.close()\n+    except Exception as e:\n+        print(\'Failed: %s err: %s\' % (index_def, e), file=sys.stderr)\n+        raise(e)\n'
b
diff -r 3708ff0198b7 -r 8a33b442ecd9 macros.xml
--- a/macros.xml Tue Jul 18 09:07:07 2017 -0400
+++ b/macros.xml Fri Aug 18 16:48:09 2017 -0400
[
@@ -60,6 +60,52 @@
   #end for
 ]]>
   </token>
+  <token name="@RESULT_HEADER@">
+<![CDATA[
+    #if $query_result.header == 'yes':
+        #if $query_result.header_prefix:
+            #set $header_prefix = chr(int(str($query_result.header_prefix)))
+            --comment_char='$header_prefix'
+        #end if
+    #else
+        --no_header
+    #end if
+]]>
+  </token>
+  <xml name="comment_char_options">
+        <option value="62">&gt;</option>
+        <option value="64">@</option>
+        <option value="43">+</option>
+        <option value="60">&lt;</option>
+        <option value="42">*</option>
+        <option value="45">-</option>
+        <option value="61">=</option>
+        <option value="124">|</option>
+        <option value="63">?</option>
+        <option value="36">$</option>
+        <option value="46">.</option>
+        <option value="58">:</option>
+        <option value="38">&amp;</option>
+        <option value="37">%</option>
+        <option value="94">^</option>
+        <option value="35" selected="true">&#35;</option>
+        <option value="33">!</option>
+  </xml>
+  <xml name="result_results_header_line">
+      <conditional name="query_result">
+          <param name="header" type="select" label="include query result column headers">
+              <option value="yes">Yes</option>
+              <option value="no">No</option>
+          </param>
+          <when value="yes">
+              <param name="header_prefix" type="select" optional="true" label="Prefix character for column_header line">
+                  <option value="">no comment character prefix</option>
+                  <expand macro="comment_char_options" />
+              </param>
+          </when>
+          <when value="no"/>
+      </conditional>
+  </xml>
   <xml name="macro_line_filters">
                 <repeat name="linefilters" title="Filter Tabular Input Lines">
                     <conditional name="filter">
@@ -81,23 +127,7 @@
                         </when>
                         <when value="comment">
                             <param name="comment_char" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are skipped">
-                                <option value="62">&gt;</option>
-                                <option value="64">@</option>
-                                <option value="43">+</option>
-                                <option value="60">&lt;</option>
-                                <option value="42">*</option>
-                                <option value="45">-</option>
-                                <option value="61">=</option>
-                                <option value="124">|</option>
-                                <option value="63">?</option>
-                                <option value="36">$</option>
-                                <option value="46">.</option>
-                                <option value="58">:</option>
-                                <option value="38">&amp;</option>
-                                <option value="37">%</option>
-                                <option value="94">^</option>
-                                <option value="35">&#35;</option>
-                                <option value="33">!</option>
+                                <expand macro="comment_char_options" />
                             </param>
                         </when>
                         <when value="prepend_line_num"/>
b
diff -r 3708ff0198b7 -r 8a33b442ecd9 query_db.py
--- a/query_db.py Tue Jul 18 09:07:07 2017 -0400
+++ b/query_db.py Fri Aug 18 16:48:09 2017 -0400
[
@@ -56,12 +56,12 @@
     exit(0)
 
 
-def run_query(conn, query, outputFile, no_header=False):
+def run_query(conn, query, outputFile, no_header=False, comment_char='#'):
     cur = conn.cursor()
     results = cur.execute(query)
     if not no_header:
-        outputFile.write("#%s\n" % '\t'.join(
-            [str(col[0]) for col in cur.description]))
+        outputFile.write("%s%s\n" % (comment_char, '\t'.join(
+            [str(col[0]) for col in cur.description])))
     for i, row in enumerate(results):
         outputFile.write("%s\n" % '\t'.join(
             [str(val) if val is not None else '' for val in row]))
b
diff -r 3708ff0198b7 -r 8a33b442ecd9 query_tabular.py
--- a/query_tabular.py Tue Jul 18 09:07:07 2017 -0400
+++ b/query_tabular.py Fri Aug 18 16:48:09 2017 -0400
[
@@ -56,6 +56,8 @@
     parser.add_option('-n', '--no_header', dest='no_header', default=False,
                       action='store_true',
                       help='Include a column headers line')
+    parser.add_option('-c', '--comment_char', dest='comment_char', default='',
+                      help='comment character to prefix column header line')
     parser.add_option('-o', '--output', dest='output', default=None,
                       help='Output file for query results')
     (options, args) = parser.parse_args()
@@ -80,6 +82,8 @@
             table['comment_char'] if 'comment_char' in table else None
         column_names =\
             table['column_names'] if 'column_names' in table else None
+        firstlinenames =\
+            table['firstlinenames'] if 'firstlinenames' in table else False
         if column_names:
             load_named_columns =\
                 table['load_named_columns']\
@@ -93,6 +97,7 @@
             table['pkey_autoincr'] if 'pkey_autoincr' in table else None
         create_table(get_connection(options.sqlitedb), path, table_name,
                      pkey_autoincr=pkey_autoincr,
+                     firstlinenames=firstlinenames,
                      column_names=column_names,
                      skip=comment_lines,
                      comment_char=comment_char,
@@ -128,7 +133,8 @@
     else:
         try:
             run_query(get_connection(options.sqlitedb), query, outputFile,
-                      no_header=options.no_header)
+                      no_header=options.no_header,
+                      comment_char=options.comment_char)
         except Exception as e:
             exit('Error: %s' % (e))
 
b
diff -r 3708ff0198b7 -r 8a33b442ecd9 query_tabular.xml
--- a/query_tabular.xml Tue Jul 18 09:07:07 2017 -0400
+++ b/query_tabular.xml Fri Aug 18 16:48:09 2017 -0400
[
b'@@ -1,4 +1,4 @@\n-<tool id="query_tabular" name="Query Tabular" version="1.0.0">\n+<tool id="query_tabular" name="Query Tabular" version="2.0.0">\n     <description>using sqlite sql</description>\n \n     <macros>\n@@ -26,7 +26,7 @@\n         -j \'$table_json\'\n         #if $sqlquery:\n           -Q \'$query_file\' \n-          $no_header\n+          @RESULT_HEADER@\n           -o \'$output\'\n         #end if\n     ]]></command>\n@@ -54,6 +54,9 @@\n   #if $tbl.tbl_opts.pkey_autoincr:\n     #set $jtbl[\'pkey_autoincr\'] = str($tbl.tbl_opts.pkey_autoincr)\n   #end if\n+  #if $tbl.tbl_opts.column_names_from_first_line:\n+    #set $jtbl[\'firstlinenames\'] = True\n+  #end if\n   #if $tbl.tbl_opts.col_names:\n   #set $col_names = str($tbl.tbl_opts.col_names)\n     #if $tbl.tbl_opts.load_named_columns:\n@@ -104,6 +107,9 @@\n                     <help>By default, tables will be named: t1,t2,...,tn (table names must be unique)</help>\n                     <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\\w*$</validator>\n                 </param>\n+                <param name="column_names_from_first_line" type="boolean" truevalue="True" falsevalue="False" checked="false" \n+                    label="Use first line as column names"\n+                    help="The names will be quoted if they are not valid SQLite column names."/>\n                 <param name="col_names" type="text" value="" optional="true" label="Specify Column Names (comma-separated list)">\n                     <help>By default, table columns will be named: c1,c2,c3,...,cn  (column names for a table must be unique)\n                           You can override the default names by entering a comma -separated list of names, e.g. \',name1,,,name2\' would rename the second and fifth columns.\n@@ -119,8 +125,9 @@\n                 <repeat name="indexes" title="Table Index">\n                     <param name="unique" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="This is a unique index"/>\n                     <param name="index_columns" type="text" value="" label="Index on Columns">\n-                        <help>Create an index on the column names: e,g, c1  or c2,c4</help>\n+                        <help>Create an index on the column names: e.g. for default column names:  c1  or c2,c4 ( use the names you gave for columns)</help>\n                         <validator type="regex" message="Column name, separated by commes if more than one">^([A-Za-z]\\w*|"\\S+[^,"]*"|`\\S+[^,`]*`|[[]\\S+[^,"]*[]])(,([A-Za-z]\\w*|"\\S+.*"|`\\S+[^,`]*`|[[]\\S+[^,"]*[]])?)*$</validator>\n+                        <sanitizer sanitize="False"/>\n                     </param>\n                 </repeat>\n             </section>\n@@ -132,7 +139,7 @@\n                 <sanitizer sanitize="False"/>\n                 <validator type="regex" message="">^(?ims)\\s*select\\s+.*\\s+from\\s+.*$</validator>\n         </param>\n-        <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers from tabular output"/>\n+        <expand macro="result_results_header_line" />\n     </inputs>\n     <outputs>\n         <data format="sqlite" name="sqlitedb" label="sqlite db of ${on_string}">\n@@ -391,6 +398,56 @@\n             <output name="output" file="pet_normalized_query_results.tsv"/>\n         </test>\n \n+        <test>\n+            <repeat name="tables">\n+                <param name="table" ftype="tabular" value="psm_report.tsv"/>\n+                <section name="input_opts">\n+                    <repeat name="linefilters">\n+                        <conditional name="filter">\n+                            <param name="filter_type" value="select_columns"/>\n+                            <param name="columns" value="1,3,2,6,14,19"/>\n+                        </conditional>\n+                    </repeat>\n+                </section>\n+                <section name="tbl_opts">\n+                    <param name="table_name" value="'..b'  <param name="column_names_from_first_line" value="True"/>\n+                    <param name="col_names" value="Scan"/>\n+                </section>\n+            </repeat>\n+            <param name="sqlquery" value="SELECT * from PSMs"/>\n+            <conditional name="query_result">\n+                <param name="header" value="yes"/>\n+                <param name="header_prefix" value=""/>\n+            </conditional>\n+            <output name="output" file="psm_report_out1.tsv"/>\n+        </test>\n+\n+        <test>\n+            <repeat name="tables">\n+                <param name="table" ftype="tabular" value="psm_report.tsv"/>\n+                <section name="input_opts">\n+                    <repeat name="linefilters">\n+                        <conditional name="filter">\n+                            <param name="filter_type" value="select_columns"/>\n+                            <param name="columns" value="1,3,2,6,14,19"/>\n+                        </conditional>\n+                    </repeat>\n+                </section>\n+                <section name="tbl_opts">\n+                    <param name="table_name" value="PSMs"/>\n+                    <param name="column_names_from_first_line" value="True"/>\n+                    <param name="col_names" value="Scan"/>\n+                </section>\n+            </repeat>\n+            <param name="sqlquery" value="SELECT Scan,&quot;m/z&quot;, &quot;Precursor m/z Error [ppm]&quot;, Sequence, &quot;Protein(s)&quot; FROM PSMs WHERE NOT re_search(\', \',&quot;Protein(s)&quot;)"/>\n+            <conditional name="query_result">\n+                <param name="header" value="yes"/>\n+                <param name="header_prefix" value=""/>\n+            </conditional>\n+            <output name="output" file="psm_report_out2.tsv"/>\n+        </test>\n+\n     </tests>\n     <help><![CDATA[\n =============\n@@ -406,6 +463,45 @@\n \n @LINEFILTERS_HELP@\n \n+**Table Options**\n+\n+  Specify Name for Table:\n+\n+    - By default tables will be named: t1, t2, t3, etc.\n+ \n+    - **Specify Name for Table** \n+      You can provide your own name for a database table, the name should begin with a letter and only contain letters, digits, or underscores. \n+      The name should not be a SQLite key word: https://sqlite.org/lang_keywords.html\n+\n+\n+  There are several ways to name columns in a table:  \n+\n+    - By default table columns will be named: c1, c2, c3, etc.\n+\n+    - If **Use first line as column names** is selected, the first line is interpreted as column names, and not loaded into the table as a data row.  \n+      Any missing column name will be assigned cn where *n* is the ordinal position of the column, \n+      e.g. a blank header for the second column would be named: c2.\n+      The column names will be quoted is they are not valid identifiers, \n+      e.g. if they are SQLite keywords, or start with a non letter character, or contain a character other than a letter, digit, or underscore.\n+      The precedent for quoting is to enclose the identifier in double quotes, \n+      else if it contains a double quote enclose in square brackets,\n+      else if it contains a square bracket enclose in grave accents.\n+      **NOTE:** that this is the first line after line filtering has been applied. \n+      If a line filter *prepend a line number column* had been used, the name of the first column would be "1".\n+      You could rename that column using **Specify Column Names**\n+\n+    - **Specify Column Names** (comma-separated list)\n+      This will override any previously assigned column names. \n+      You can also choose to only load those columns for which you provided a name, \n+      but that is better accomplished with the line filter: *select columns*. \n+\n+\n+  **Table Index**\n+\n+    Queries on larger tables can be much faster if indexes are specified.  \n+    In general, specifiy an index for table columns used in joins with other tables, \n+    or on columns used in SQL query WHERE clauses or in GROUP BY clauses.\n+\n \n **Outputs**\n \n'
b
diff -r 3708ff0198b7 -r 8a33b442ecd9 sqlite_to_tabular.py
--- a/sqlite_to_tabular.py Tue Jul 18 09:07:07 2017 -0400
+++ b/sqlite_to_tabular.py Fri Aug 18 16:48:09 2017 -0400
b
@@ -21,6 +21,8 @@
     parser.add_option('-n', '--no_header', dest='no_header', default=False,
                       action='store_true',
                       help='Include a column headers line')
+    parser.add_option('-c', '--comment_char', dest='comment_char', default='',
+                      help='comment character to prefix column header line')
     parser.add_option('-o', '--output', dest='output', default=None,
                       help='Output file for query results')
     (options, args) = parser.parse_args()
@@ -51,7 +53,8 @@
     else:
         try:
             run_query(get_connection(options.sqlitedb), query, outputFile,
-                      no_header=options.no_header)
+                      no_header=options.no_header,
+                      comment_char=options.comment_char)
         except Exception as e:
             exit('Error: %s' % (e))
 
b
diff -r 3708ff0198b7 -r 8a33b442ecd9 test-data/psm_report.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psm_report.tsv Fri Aug 18 16:48:09 2017 -0400
[
@@ -0,0 +1,10 @@
+
+ Protein(s) Sequence AAs Before AAs After Position Modified Sequence Variable Modifications Fixed Modifications Spectrum File Spectrum Title Spectrum Scan Number RT m/z Measured Charge Identification Charge Theoretical Mass Isotope Number Precursor m/z Error [ppm] Localization Confidence Probabilistic PTM score D-score Confidence [%] Validation
+1 NP_116558 PYANQPTVR M IT 2 NH2-PYANQPTVR-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.04679.04679.2 4679 -1.0 523.272583 2+ 2+ 1044.53524305008 0 -4.4240452979909675 100.0 Doubtful
+2 NP_443137, NP_443137_S1016F DANTQVHTLR YK; YK KM; KM 443; 443 NH2-DANTQVHTLR-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.03894.03894.2 3894 -1.0 577.799622 2+ 2+ 1153.5839841476504 0 0.6117338355350196 95.0 Doubtful
+3 NP_066544_R21W SSWAGLQFPVGR TR VH 19 NH2-SSWAGLQFPVGR-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.20932.20932.2 20932 -1.0 652.843567 2+ 2+ 1303.6673198487902 0 4.029478922381296 100.0 Doubtful
+4 NP_112092_rs7285167:R182C AQACNLDQSGTNVAK AR VM 179 NH2-AQAC<cmm>NLDQSGTNVAK-COOH Carbamidomethylation of C(4) trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.05257.05257.2 5257 -1.0 788.87384 2+ 2+ 1575.7311189571503 0 1.2727711831177586 100.0 Doubtful
+5 NP_001273432_F75C, NP_001273433_F77C, NP_001273434_F77C, NP_006635_F75C ACNDPFIQK GR; GR; GR; GR EK; EK; EK; EK 74; 76; 76; 74 NH2-AC<cmm>NDPFIQK-COOH Carbamidomethylation of C(2) trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.09560.09560.2 9560 -1.0 546.760803 2+ 2+ 1091.50697969691 0 0.06709466946167521 94.73684210526316 Doubtful
+6 NP_001028196, NP_001244919, NP_036311 LQLLPESFICK KR; KR; KR EK; EK; EK 198; 163; 277 NH2-LQLLPESFIC<cmm>K-COOH Carbamidomethylation of C(10) trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.22345.22345.2 22345 -1.0 674.370911 2+ 2+ 1346.7268088708604 0 0.3412036795550375 99.38650306748467 Doubtful
+7 NP_001909_rs72973763:I239V,rs12021720:S384G DMTVPVLVSKPPVFTGK PK DK 234 NH2-DMTVPVLVSKPPVFTGK-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.20464.20464.2 20464 -1.0 908.008545 2+ 2+ 1814.0011927988103 0 0.7402290254293575 94.73684210526316 Doubtful
+8 NP_001265138, NP_001265138_rs2274136:A858T,F879L, NP_001265139, NP_001265139_rs2274136:A785T,F806L, NP_005115, NP_005115_rs2274136:A827T,F848L FGVSSESKPEEVK FK; FK; FK; FK; FK; FK KD; KD; KD; KD; KD; KD 992; 992; 919; 919; 961; 961 NH2-FGVSSESKPEEVK-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.06665.06665.2 6665 -1.0 711.858643 2+ 2+ 1421.7038244968105 0 -0.7666055674357796 100.0 Doubtful
b
diff -r 3708ff0198b7 -r 8a33b442ecd9 test-data/psm_report_out1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psm_report_out1.tsv Fri Aug 18 16:48:09 2017 -0400
b
@@ -0,0 +1,9 @@
+Scan Sequence Protein(s) Position m/z Precursor m/z Error
+1 PYANQPTVR NP_116558 2 523.272583 -4.42404529799
+2 DANTQVHTLR NP_443137, NP_443137_S1016F 443; 443 577.799622 0.611733835535
+3 SSWAGLQFPVGR NP_066544_R21W 19 652.843567 4.02947892238
+4 AQACNLDQSGTNVAK NP_112092_rs7285167:R182C 179 788.87384 1.27277118312
+5 ACNDPFIQK NP_001273432_F75C, NP_001273433_F77C, NP_001273434_F77C, NP_006635_F75C 74; 76; 76; 74 546.760803 0.0670946694617
+6 LQLLPESFICK NP_001028196, NP_001244919, NP_036311 198; 163; 277 674.370911 0.341203679555
+7 DMTVPVLVSKPPVFTGK NP_001909_rs72973763:I239V,rs12021720:S384G 234 908.008545 0.740229025429
+8 FGVSSESKPEEVK NP_001265138, NP_001265138_rs2274136:A858T,F879L, NP_001265139, NP_001265139_rs2274136:A785T,F806L, NP_005115, NP_005115_rs2274136:A827T,F848L 992; 992; 919; 919; 961; 961 711.858643 -0.766605567436
b
diff -r 3708ff0198b7 -r 8a33b442ecd9 test-data/psm_report_out2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psm_report_out2.tsv Fri Aug 18 16:48:09 2017 -0400
b
@@ -0,0 +1,5 @@
+Scan m/z Precursor m/z Error Sequence Protein(s)
+1 523.272583 -4.42404529799 PYANQPTVR NP_116558
+3 652.843567 4.02947892238 SSWAGLQFPVGR NP_066544_R21W
+4 788.87384 1.27277118312 AQACNLDQSGTNVAK NP_112092_rs7285167:R182C
+7 908.008545 0.740229025429 DMTVPVLVSKPPVFTGK NP_001909_rs72973763:I239V,rs12021720:S384G