Galaxy |

Changeset 13:4d5aae46f850 (2021-08-19)

Previous changeset 12:37cde8134c6a (2021-06-19) Next changeset 14:557ec8d7087d (2023-09-13)

Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 35576d64a12fa664d72559172c5960c09da2b632"

modified:
filter_tabular.py
filter_tabular.xml
filters.py
macros.xml
test-data/psm_dbmod_output.tsv
test-data/psm_dbmod_output1.tsv
test-data/psm_report.tsv
test-data/psm_report_out2.tsv

added:
test-data/filtered_IEDB.tsv

diff -r 37cde8134c6a -r 4d5aae46f850 filter_tabular.py
--- a/filter_tabular.py Sat Jun 19 14:16:06 2021 +0000
+++ b/filter_tabular.py Thu Aug 19 19:39:58 2021 +0000

@@ -19,6 +19,8 @@
                       help='JSON array of filter specifications')
     parser.add_option('-o', '--output', dest='output', default=None,
                       help='Output file for query results')
+    parser.add_option('-c', '--comment_char)', dest='comment_char', default=None,
+                      help='Line comment character')
     parser.add_option('-v', '--verbose', dest='verbose', default=False,
                       action='store_true',
                       help='verbose')
@@ -59,7 +61,7 @@
                   file=sys.stdout)

     try:
-        filter_file(inputFile, outputFile, filters=filters)
+        filter_file(inputFile, outputFile, comment_char=options.comment_char, filters=filters)
     except Exception as e:
         exit('Error: %s' % (e))

diff -r 37cde8134c6a -r 4d5aae46f850 filter_tabular.xml
--- a/filter_tabular.xml Sat Jun 19 14:16:06 2021 +0000
+++ b/filter_tabular.xml Thu Aug 19 19:39:58 2021 +0000

[

@@ -1,4 +1,4 @@
-<tool id="filter_tabular" name="Filter Tabular" version="3.1.2">
+<tool id="filter_tabular" name="Filter Tabular" version="3.3.0">
     <description></description>

     <macros>
@@ -8,12 +8,14 @@
     <command detect_errors="exit_code"><![CDATA[
         python '$__tool_directory__/filter_tabular.py'
         -i '$input'
+        $comment_char
         -j '$filter_json'
         -o '$output'
     ]]></command>
     <configfiles>
         <configfile name="filter_json">
#import json
+#set $dataset_name = $input.element_identifier
@LINEFILTERS@
#if $input_filters:
#echo $json.dumps($input_filters)
@@ -22,6 +24,7 @@
     </configfiles>
     <inputs>
         <param name="input" type="data" format="tabular" label="Tabular Dataset to filter"/>
+        <param name="comment_char" type="boolean" truevalue="--comment_char '#'" falsevalue="" checked="true" label="exclude lines starting with #"/>
         <expand macro="macro_line_filters" />
     </inputs>
     <outputs>
@@ -139,6 +142,30 @@
             </repeat>
             <output name="output" file="filtered_customers_results.tsv"/>
         </test>
+        <test>
+            <param name="input" ftype="tabular" value="IEDB.tsv"/>
+            <param name="comment_char" value="False"/>
+            <repeat name="linefilters">
+                <conditional name="filter">
+                    <param name="filter_type" value="replace"/>
+                    <param name="column" value="c1"/>
+                    <param name="regex_pattern" value="#ID"/>
+                    <param name="regex_replace" value="ID"/>
+                </conditional>
+            </repeat>
+            <repeat name="linefilters">
+                <conditional name="filter">
+                    <param name="filter_type" value="select_column_slices"/>
+                    <param name="columns" value="6,0:6,8,-3"/>
+                </conditional>
+            </repeat>
+            <repeat name="linefilters">
+                <conditional name="filter">
+                    <param name="filter_type" value="append_dataset_name"/>
+                </conditional>
+            </repeat>
+            <output name="output" file="filtered_IEDB.tsv"/>
+        </test>
     </tests>
     <help><![CDATA[
==============

diff -r 37cde8134c6a -r 4d5aae46f850 filters.py
--- a/filters.py Sat Jun 19 14:16:06 2021 +0000
+++ b/filters.py Thu Aug 19 19:39:58 2021 +0000

[

@@ -4,6 +4,7 @@

import re
import sys
+from itertools import chain

class LineFilter(object):
@@ -13,6 +14,15 @@
         self.func = lambda i, l: l.rstrip('\r\n') if l else None
         self.src_lines = []
         self.src_line_cnt = 0
+
+        def xint(x):
+            if isinstance(x, int):
+                return x
+            try:
+                return int(x)
+            except Exception:
+                return x if x else None
+
         if not filter_dict:
             return
         if filter_dict['filter'] == 'regex':
@@ -28,6 +38,13 @@
         elif filter_dict['filter'] == 'select_columns':
             cols = [int(c) - 1 for c in filter_dict['columns']]
             self.func = lambda i, l: self.select_columns(l, cols)
+        elif filter_dict['filter'] == 'select_column_slices':
+            cols = [x if isinstance(x, int) else [y if y is not None else None for y in [xint(k) for k in x.split(':')]] for x in [xint(c) for c in filter_dict['columns']]]
+            if all([isinstance(x, int) for x in cols]):
+                self.func = lambda i, l: self.select_columns(l, cols)
+            else:
+                cols = [slice(x[0], x[1], x[2] if len(x) > 2 else None) if isinstance(x, list) else x for x in cols]
+                self.func = lambda i, l: self.select_slices(l, cols)
         elif filter_dict['filter'] == 'replace':
             p = filter_dict['pattern']
             r = filter_dict['replace']
@@ -80,6 +97,10 @@
         fields = line.split('\t')
         return '\t'.join([fields[x] for x in cols])

+    def select_slices(self, line, cols):
+        fields = line.split('\t')
+        return '\t'.join(chain.from_iterable([y if isinstance(y, list) else [y] for y in [fields[x] for x in cols]]))
+
     def replace_add(self, line, pat, rep, col, pos):
         fields = line.rstrip('\r\n').split('\t')
         i = pos if pos is not None else len(fields)

diff -r 37cde8134c6a -r 4d5aae46f850 macros.xml
--- a/macros.xml Sat Jun 19 14:16:06 2021 +0000
+++ b/macros.xml Thu Aug 19 19:39:58 2021 +0000

[

b'@@ -32,7 +32,12 @@\n #elif $fi.filter.filter_type == \'select_columns\':\n #set $filter_dict = dict()\n #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n- #set $filter_dict[\'columns\'] = [int(str($ci).replace(\'c\',\'\')) for $ci in str($fi.filter.columns).split(\',\')]\n+ #set $filter_dict[\'columns\'] = [int($c) for $c in str($fi.filter.columns).replace(\'c\',\'\').split(\',\')]\n+ #silent $input_filters.append($filter_dict)\n+ #elif $fi.filter.filter_type == \'select_column_slices\':\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n+ #set $filter_dict[\'columns\'] = [$c for $c in str($fi.filter.columns).split(\',\')]\n #silent $input_filters.append($filter_dict)\n #elif $fi.filter.filter_type == \'replace\':\n #set $filter_dict = dict()\n@@ -53,6 +58,11 @@\n #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n #set $filter_dict[\'column_text\'] = str($fi.filter.column_text)\n #silent $input_filters.append($filter_dict)\n+ #elif str($fi.filter.filter_type).endswith(\'pend_dataset_name\'):\n+ #set $filter_dict = dict()\n+ #set $filter_dict[\'filter\'] = str($fi.filter.filter_type).replace(\'dataset_name\', \'text\')\n+ #set $filter_dict[\'column_text\'] = $dataset_name\n+ #silent $input_filters.append($filter_dict)\n #elif $fi.filter.filter_type == \'normalize\':\n #set $filter_dict = dict()\n #set $filter_dict[\'filter\'] = str($fi.filter.filter_type)\n@@ -129,9 +139,12 @@\n <option value="comment">comment char</option>\n <option value="regex">by regex expression matching</option>\n <option value="select_columns">select columns</option>\n+ <option value="select_column_slices">select columns by indices/slices</option>\n <option value="replace">regex replace value in column</option>\n <option value="prepend_line_num">prepend a line number column</option>\n <option value="append_line_num">append a line number column</option>\n+ <option value="prepend_dataset_name">prepend a column with the dataset name</option>\n+ <option value="append_dataset_name">append a column with the dataset name</option>\n <option value="prepend_text">prepend a column with the given text</option>\n <option value="append_text">append a column with the given text</option>\n <option value="normalize">normalize list columns, replicates row for each item in list</option>\n@@ -147,6 +160,8 @@\n </when>\n <when value="prepend_line_num"/>\n <when value="append_line_num"/>\n+ <when value="prepend_dataset_name"/>\n+ <when value="append_dataset_name"/>\n <when value="prepend_text">\n <param name="column_text" type="text" value="" label="text for column">\n </param>\n@@ -172,6 +187,26 @@\n <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\\d*)(,c?[1-9]\\d*)*$</validator>\n </param>\n </when>\n+ <when value="select_column_slices">\n+ <param name="columns" type="text" value="" label="enter indices or slices of the columns to keep">\n+ <help><![CDATA[ Python offset indexes or slices. Examples:\n+ <ul>\n+ <li>Column offset indexes: 0,3,1 (selects the first,fourth, and second columns)</li>\n+ <li>Negative column numbers: -1,-2 (selects the last, and second last columns)</l'..b'lace"\n help="example: 1 or c1 (selects the first column)">\n@@ -211,21 +246,24 @@\n <![CDATA[\n **Input Line Filters**\n \n- As a tabular file is being read, line filters may be applied. \n-\n- ::\n+ As a tabular file is being read, line filters may be applied:\n \n- - skip leading lines skip the first *number* of lines\n- - comment char omit any lines that start with the specified comment character \n- - by regex expression matching *include/exclude* lines the match the regex expression \n- - select columns choose to include only selected columns in the order specified \n- - regex replace value in column replace a field in a column using a regex substitution (good for date reformatting)\n- - regex replace value in column add a new column using a regex substitution of a column value\n- - prepend a line number column each line has the ordinal value of the line read by this filter as the first column\n- - append a line number column each line has the ordinal value of the line read by this filter as the last column\n- - prepend a text column each line has the text string as the first column\n- - append a text column each line has the text string as the last column\n- - normalize list columns replicates the line for each item in the specified list *columns*\n+ - skip leading lines - skip the first *number* of lines\n+ - comment char - omit any lines that start with the specified comment character \n+ - by regex expression matching - *include/exclude* lines that match the regex expression \n+ - select columns - choose to include only selected columns in the order specified \n+ - select columns by indices/slices - *indices or slices* of the columns to keep (python_list_ indexing)\n+ - regex replace value in column - replace a field in a column using a regex substitution (good for date reformatting)\n+ - regex replace value in column - add a new column using a regex substitution of a column value\n+ - prepend a line number column - each line has the *ordinal* value of the line read by this filter as the first column\n+ - append a line number column - each line has the *ordinal* value of the line read by this filter as the last column\n+ - prepend a text column - each line has the text string as the first column\n+ - append a text column - each line has the text string as the last column\n+ - prepend the dataset name - each line has the *dataset name* as the first column\n+ - append the dataset name - each line has the *dataset name* as the last column\n+ - normalize list columns - replicates the line for each item in the specified list *columns*\n+\n+.. _python_list: https://docs.python.org/3/library/stdtypes.html#common-sequence-operations\n ]]>\n </token>\n \n@@ -284,13 +322,13 @@\n 0 Jane Doe 1978-05-24 5 3\n 1 James Smith 1980-10-20 Spot 6 4\n \n- Filter 6 - append a line number column:\n+ Filter 6 - select columns by indices/slices: \'1:6\'\n \n- 2 Paula Brown 1978-05-24 Rex dog 3 1 1\n- 2 Paula Brown 1978-05-24 Fluff cat 3 1 2\n- 1 Steven Jones 1974-04-04 Allie cat 4 2 3\n- 0 Jane Doe 1978-05-24 5 3 4\n- 1 James Smith 1980-10-20 Spot 6 4 5\n+ Paula Brown 1978-05-24 Rex dog\n+ Paula Brown 1978-05-24 Fluff cat\n+ Steven Jones 1974-04-04 Allie cat\n+ Jane Doe 1978-05-24 \n+ James Smith 1980-10-20 Spot \n \n ]]>\n </token>\n'

diff -r 37cde8134c6a -r 4d5aae46f850 test-data/filtered_IEDB.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered_IEDB.tsv Thu Aug 19 19:39:58 2021 +0000

@@ -0,0 +1,17 @@
+peptide ID allele seq_num start end length percentile_rank comblib_sidney2008_rank IEDB.tsv
+GMYCMVFLV PPAP2C HLA-A*02:01 1 3 11 9 0.2 0.5 IEDB.tsv
+SFGMYCMVF PPAP2C HLA-A*23:01 1 1 9 9 0.5 - IEDB.tsv
+MYCMVFLVK PPAP2C HLA-A*23:01 1 4 12 9 0.65 - IEDB.tsv
+FGMYCMVFL PPAP2C HLA-A*02:01 1 2 10 9 2.3 1.3 IEDB.tsv
+GMYCMVFLV PPAP2C HLA-A*23:01 1 3 11 9 4.95 - IEDB.tsv
+FGMYCMVFL PPAP2C HLA-A*23:01 1 2 10 9 6.55 - IEDB.tsv
+SFGMYCMVF PPAP2C HLA-A*02:01 1 1 9 9 45 91 IEDB.tsv
+MYCMVFLVK PPAP2C HLA-A*02:01 1 4 12 9 54 86 IEDB.tsv
+SLDMCISGL ADAMTSL1 HLA-A*02:01 1 1 9 9 1 1.7 IEDB.tsv
+MCISGLCQL ADAMTSL1 HLA-A*23:01 1 4 12 9 6.65 - IEDB.tsv
+MCISGLCQL ADAMTSL1 HLA-A*02:01 1 4 12 9 14 24 IEDB.tsv
+SLDMCISGL ADAMTSL1 HLA-A*23:01 1 1 9 9 30.5 - IEDB.tsv
+LDMCISGLC ADAMTSL1 HLA-A*02:01 1 2 10 9 42 71 IEDB.tsv
+DMCISGLCQ ADAMTSL1 HLA-A*23:01 1 3 11 9 64.5 - IEDB.tsv
+LDMCISGLC ADAMTSL1 HLA-A*23:01 1 2 10 9 76.0 - IEDB.tsv
+DMCISGLCQ ADAMTSL1 HLA-A*02:01 1 3 11 9 97 97 IEDB.tsv

diff -r 37cde8134c6a -r 4d5aae46f850 test-data/psm_dbmod_output.tsv
--- a/test-data/psm_dbmod_output.tsv Sat Jun 19 14:16:06 2021 +0000
+++ b/test-data/psm_dbmod_output.tsv Thu Aug 19 19:39:58 2021 +0000

[

@@ -1,4 +1,4 @@
-#scan m\/z Precursor m\/z Error Sequence Protein$s$ confidence
+#scan m\/z Precursor m\/z Error( \[ppm])? Sequence Protein$s$ confidence
1 523.27\d* -4.42\d* PYANQPTVR NP_116558 99.9\d*
3 652.84\d* 4.02\d* SSWAGLQFPVGR NP_066544_R21W 99.9\d*
4 788.87\d* 1.27\d* AQACNLDQSGTNVAK NP_112092_rs7285167:R182C 99.9\d*

diff -r 37cde8134c6a -r 4d5aae46f850 test-data/psm_dbmod_output1.tsv
--- a/test-data/psm_dbmod_output1.tsv Sat Jun 19 14:16:06 2021 +0000
+++ b/test-data/psm_dbmod_output1.tsv Thu Aug 19 19:39:58 2021 +0000

[

@@ -1,4 +1,4 @@
-scan Sequence Protein$s$ Position m\/z Precursor m\/z Error confidence
+scan Sequence Protein$s$ Position m\/z Precursor m\/z Error( \[ppm])? confidence
1 PYANQPTVR NP_116558 2 523.27\d* -4.42\d* 99.99\d*
3 SSWAGLQFPVGR NP_066544_R21W 19 652.84\d* 4.02\d* 99.99\d*
4 AQACNLDQSGTNVAK NP_112092_rs7285167:R182C 179 788.87\d* 1.27\d* 99.99\d*

diff -r 37cde8134c6a -r 4d5aae46f850 test-data/psm_report.tsv
--- a/test-data/psm_report.tsv Sat Jun 19 14:16:06 2021 +0000
+++ b/test-data/psm_report.tsv Thu Aug 19 19:39:58 2021 +0000

[

@@ -1,4 +1,3 @@
-
Protein(s) Sequence AAs Before AAs After Position Modified Sequence Variable Modifications Fixed Modifications Spectrum File Spectrum Title Spectrum Scan Number RT m/z Measured Charge Identification Charge Theoretical Mass Isotope Number Precursor m/z Error [ppm] Localization Confidence Probabilistic PTM score D-score Confidence [%] Validation
1 NP_116558 PYANQPTVR M IT 2 NH2-PYANQPTVR-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.04679.04679.2 4679 -1.0 523.272583 2+ 2+ 1044.53524305008 0 -4.4240452979909675 100.0 Doubtful
2 NP_443137, NP_443137_S1016F DANTQVHTLR YK; YK KM; KM 443; 443 NH2-DANTQVHTLR-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.03894.03894.2 3894 -1.0 577.799622 2+ 2+ 1153.5839841476504 0 0.6117338355350196 95.0 Doubtful

diff -r 37cde8134c6a -r 4d5aae46f850 test-data/psm_report_out2.tsv
--- a/test-data/psm_report_out2.tsv Sat Jun 19 14:16:06 2021 +0000
+++ b/test-data/psm_report_out2.tsv Thu Aug 19 19:39:58 2021 +0000

[

@@ -1,4 +1,4 @@
-Scan m\/z Precursor m\/z Error Sequence Protein$s$
+Scan m\/z Precursor m\/z Error( \[ppm])? Sequence Protein$s$
1 523.27\d* -4.42\d* PYANQPTVR NP_116558
3 652.84\d* 4.02\d* SSWAGLQFPVGR NP_066544_R21W
4 788.87\d* 1.27\d* AQACNLDQSGTNVAK NP_112092_rs7285167:R182C