# HG changeset patch
# User devteam
# Date 1609289415 0
# Node ID 9cd341095afde596b0e6599fdfebcd9e1b4fc74f
# Parent 6e8d945971392ca6837a8882ed9c4be0778a8278
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/column_maker commit a993d43d9d1702a6cf584683cf72527a3f999236"
diff -r 6e8d94597139 -r 9cd341095afd column_maker.py
--- a/column_maker.py Wed Jul 15 10:38:50 2020 -0400
+++ b/column_maker.py Wed Dec 30 00:50:15 2020 +0000
@@ -5,31 +5,48 @@
original file. The tool will skip over invalid lines within the file,
informing the user about the number of lines skipped.
"""
-from __future__ import print_function
+import argparse
+import json
import re
-import sys
-
-assert sys.version_info[:2] >= (2, 4)
-inp_file = sys.argv[1]
-out_file = sys.argv[2]
-expr = sys.argv[3]
-round_result = sys.argv[4]
+parser = argparse.ArgumentParser()
+parser.add_argument('input', type=argparse.FileType('r'), help="input file")
+parser.add_argument('output', type=argparse.FileType('wt'), help="output file")
+parser.add_argument('cond', nargs='?', type=str, help="expression")
+parser.add_argument('round', nargs='?', type=str, choices=['yes', 'no'],
+ help="round result")
+parser.add_argument('columns', nargs='?', type=int, help="number of columns")
+parser.add_argument('column_types', nargs='?', type=str, help="comma separated list of column types")
+parser.add_argument('avoid_scientific_notation', nargs='?', type=str, choices=['yes', 'no'],
+ help="avoid scientific notation")
+parser.add_argument('--load_json', default=None, type=argparse.FileType('r'),
+ help="overwrite parsed arguments from json file")
+args = parser.parse_args()
+
+argparse_dict = vars(args)
+if args.load_json:
+ json_dict = json.load(args.load_json)
+ argparse_dict.update(json_dict)
+
+fh = argparse_dict['input']
+out = argparse_dict['output']
+expr = argparse_dict['cond']
+round_result = argparse_dict['round']
try:
- in_columns = int(sys.argv[5])
+ in_columns = int(argparse_dict['columns'])
except Exception:
exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.")
if in_columns < 2:
# To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method.
exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.")
try:
- in_column_types = sys.argv[6].split(',')
+ in_column_types = argparse_dict['column_types'].split(',')
except Exception:
exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.")
if len(in_column_types) != in_columns:
exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.")
-avoid_scientific_notation = sys.argv[7]
+avoid_scientific_notation = argparse_dict['avoid_scientific_notation']
# Unescape if input has been escaped
mapped_str = {
@@ -74,7 +91,6 @@
invalid_line = None
lines_kept = 0
total_lines = 0
-out = open(out_file, 'wt')
# Read input file, skipping invalid lines, and perform computation that will result in a new column
code = '''
@@ -89,7 +105,6 @@
)
from numpy import format_float_positional
-fh = open(inp_file)
for i, line in enumerate(fh):
total_lines += 1
line = line.rstrip('\\r\\n')
diff -r 6e8d94597139 -r 9cd341095afd column_maker.xml
--- a/column_maker.xml Wed Jul 15 10:38:50 2020 -0400
+++ b/column_maker.xml Wed Dec 30 00:50:15 2020 +0000
@@ -1,164 +1,170 @@
-
- an expression on every row
-
- python
- sed
- numpy
-
- header &&
- sed '1,1d' '$input' > data &&
- #else:
- touch header &&
- ln -s '$input' data &&
- #end if
-
- python '$__tool_directory__/column_maker.py'
- data column_maker_output
- "$cond"
- $round
- ${input.metadata.columns}
- "${input.metadata.column_types}"
- $avoid_scientific_notation &&
- cat header column_maker_output > '$out_file1'
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- .. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
-
------
-
-**What it does**
-
-This tool computes an expression for every row of a dataset and appends the result as a new column (field).
-
-- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file
-
-- **c3-c2** will add a length column to the dataset if **c2** and **c3** are start and end position
-
------
-
-**Example**
-
-If this is your input::
-
- chr1 151077881 151077918 2 200 -
- chr1 151081985 151082078 3 500 +
-
-computing "c4*c5" will produce::
-
- chr1 151077881 151077918 2 200 - 400.0
- chr1 151081985 151082078 3 500 + 1500.0
-
-if, at the same time, "Round result?" is set to **YES** results will look like this::
-
- chr1 151077881 151077918 2 200 - 400
- chr1 151081985 151082078 3 500 + 1500
-
-You can also use this tool to evaluate expressions. For example, computing "c3>=c2" for Input will result in the following::
-
- chr1 151077881 151077918 2 200 - True
- chr1 151081985 151082078 3 500 + True
-
-or computing "type(c2)==type('') for Input will return::
-
- chr1 151077881 151077918 2 200 - False
- chr1 151081985 151082078 3 500 + False
-
-
-The following built-in functions are available::
-
- abs | all | any | bin | bool | chr | ceil | cmp | complex
-
- divmod | exp | float | log | log10 | floor | hex | int | len | long
-
- max | min | oct | ord | pow | range | reversed
-
- round | sorted | sqrt | str | sum | type | unichr | unicode |
-
-
-
-
+
+ an expression on every row
+
+ python
+ sed
+ numpy
+
+ header &&
+ sed '1,1d' '$input' > data &&
+ #else:
+ touch header &&
+ ln -s '$input' data &&
+ #end if
+
+ ## inject colums and column_types metadata into inputs json
+ #import json
+ #set inputs_dict = json.load(open($inputs))
+ #set inputs_dict['columns'] = $input.metadata.columns
+ #set inputs_dict['column_types'] = $input.metadata.column_types
+ #set x = json.dump($inputs_dict, open($inputs, 'w'))
+
+ python '$__tool_directory__/column_maker.py'
+ data column_maker_output
+ --load_json '$inputs'
+ && cat header column_maker_output > '$out_file1'
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
+
+-----
+
+**What it does**
+
+This tool computes an expression for every row of a dataset and appends the result as a new column (field).
+
+- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file
+
+- **c3-c2** will add a length column to the dataset if **c2** and **c3** are start and end position
+
+-----
+
+**Example**
+
+If this is your input::
+
+ chr1 151077881 151077918 2 200 -
+ chr1 151081985 151082078 3 500 +
+
+computing "c4*c5" will produce::
+
+ chr1 151077881 151077918 2 200 - 400.0
+ chr1 151081985 151082078 3 500 + 1500.0
+
+if, at the same time, "Round result?" is set to **YES** results will look like this::
+
+ chr1 151077881 151077918 2 200 - 400
+ chr1 151081985 151082078 3 500 + 1500
+
+You can also use this tool to evaluate expressions. For example, computing "c3>=c2" for Input will result in the following::
+
+ chr1 151077881 151077918 2 200 - True
+ chr1 151081985 151082078 3 500 + True
+
+or computing "type(c2)==type('') for Input will return::
+
+ chr1 151077881 151077918 2 200 - False
+ chr1 151081985 151082078 3 500 + False
+
+
+The following built-in functions are available::
+
+ abs | all | any | bin | bool | chr | ceil | cmp | complex
+
+ divmod | exp | float | log | log10 | floor | hex | int | len | long
+
+ max | min | oct | ord | pow | range | reversed
+
+ round | sorted | sqrt | str | sum | type | unichr | unicode |
+
+
+
+