Repository 'column_maker'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/column_maker

Changeset 5:9cd341095afd (2020-12-30)
Previous changeset 4:6e8d94597139 (2020-07-15) Next changeset 6:13b6f0007d9e (2021-01-25)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/column_maker commit a993d43d9d1702a6cf584683cf72527a3f999236"
modified:
column_maker.py
column_maker.xml
b
diff -r 6e8d94597139 -r 9cd341095afd column_maker.py
--- a/column_maker.py Wed Jul 15 10:38:50 2020 -0400
+++ b/column_maker.py Wed Dec 30 00:50:15 2020 +0000
[
@@ -5,31 +5,48 @@
 original file. The tool will skip over invalid lines within the file,
 informing the user about the number of lines skipped.
 """
-from __future__ import print_function
 
+import argparse
+import json
 import re
-import sys
-
-assert sys.version_info[:2] >= (2, 4)
 
-inp_file = sys.argv[1]
-out_file = sys.argv[2]
-expr = sys.argv[3]
-round_result = sys.argv[4]
+parser = argparse.ArgumentParser()
+parser.add_argument('input', type=argparse.FileType('r'), help="input file")
+parser.add_argument('output', type=argparse.FileType('wt'), help="output file")
+parser.add_argument('cond', nargs='?', type=str, help="expression")
+parser.add_argument('round', nargs='?', type=str, choices=['yes', 'no'],
+                    help="round result")
+parser.add_argument('columns', nargs='?', type=int, help="number of columns")
+parser.add_argument('column_types', nargs='?', type=str, help="comma separated list of column types")
+parser.add_argument('avoid_scientific_notation', nargs='?', type=str, choices=['yes', 'no'],
+                    help="avoid scientific notation")
+parser.add_argument('--load_json', default=None, type=argparse.FileType('r'),
+                    help="overwrite parsed arguments from json file")
+args = parser.parse_args()
+
+argparse_dict = vars(args)
+if args.load_json:
+    json_dict = json.load(args.load_json)
+    argparse_dict.update(json_dict)
+
+fh = argparse_dict['input']
+out = argparse_dict['output']
+expr = argparse_dict['cond']
+round_result = argparse_dict['round']
 try:
-    in_columns = int(sys.argv[5])
+    in_columns = int(argparse_dict['columns'])
 except Exception:
     exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data.")
 if in_columns < 2:
     # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method.
     exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data.")
 try:
-    in_column_types = sys.argv[6].split(',')
+    in_column_types = argparse_dict['column_types'].split(',')
 except Exception:
     exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data.")
 if len(in_column_types) != in_columns:
     exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data.")
-avoid_scientific_notation = sys.argv[7]
+avoid_scientific_notation = argparse_dict['avoid_scientific_notation']
 
 # Unescape if input has been escaped
 mapped_str = {
@@ -74,7 +91,6 @@
 invalid_line = None
 lines_kept = 0
 total_lines = 0
-out = open(out_file, 'wt')
 
 # Read input file, skipping invalid lines, and perform computation that will result in a new column
 code = '''
@@ -89,7 +105,6 @@
 )
 from numpy import format_float_positional
 
-fh = open(inp_file)
 for i, line in enumerate(fh):
     total_lines += 1
     line = line.rstrip('\\r\\n')
b
diff -r 6e8d94597139 -r 9cd341095afd column_maker.xml
--- a/column_maker.xml Wed Jul 15 10:38:50 2020 -0400
+++ b/column_maker.xml Wed Dec 30 00:50:15 2020 +0000
[
b'@@ -1,164 +1,170 @@\n-<tool id="Add_a_column1" name="Compute" version="1.3.1">\r\n-    <description>an expression on every row</description>\r\n-    <requirements>\r\n-        <requirement type="package" version="2.7.13">python</requirement>\r\n-        <requirement type="package" version="4.4">sed</requirement>\r\n-        <requirement type="package" version="1.14">numpy</requirement>\r\n-    </requirements>\r\n-    <command detect_errors="aggressive"><![CDATA[\r\n-        #if $header_lines_conditional.header_lines_select == "yes":\r\n-            (sed -n \'1,1p\' \'$input\' | sed  "s|$|%${header_lines_conditional.header_new_column_name}|" | tr "%" "\\t") > header &&\r\n-            sed \'1,1d\' \'$input\' > data &&\r\n-        #else:\r\n-            touch header &&\r\n-            ln -s \'$input\' data &&\r\n-        #end if\r\n-\r\n-        python \'$__tool_directory__/column_maker.py\'\r\n-            data column_maker_output\r\n-            "$cond"\r\n-            $round\r\n-            ${input.metadata.columns}\r\n-            "${input.metadata.column_types}"\r\n-            $avoid_scientific_notation &&\r\n-        cat header column_maker_output > \'$out_file1\'\r\n-    ]]></command>\r\n-    <inputs>\r\n-        <param name="cond" type="text" value="c3-c2" label="Add expression"/>\r\n-        <param format="tabular" name="input" type="data" label="as a new column to" help="Dataset missing? See TIP below"/>\r\n-        <param name="round" type="select" label="Round result?">\r\n-            <option value="no">NO</option>\r\n-            <option value="yes">YES</option>\r\n-        </param>\r\n-        <conditional name="header_lines_conditional">\r\n-            <param name="header_lines_select" type="select" label="Skip a header line" help="# characters are already considered as comments and kept" >\r\n-                <option value="no" >no</option>\r\n-                <option value="yes" >yes</option>\r\n-            </param>\r\n-            <when value="no">\r\n-            </when>\r\n-            <when value="yes">\r\n-                <param name="header_new_column_name" type="text" value="New Column" label="The new column name" />\r\n-            </when>\r\n-        </conditional>\r\n-        <param name="avoid_scientific_notation" type="select" label="Avoid scientific notation" help="If yes, use fully expanded decimal representation when writing new columns (use only if expression produces decimal numbers).">\r\n-            <option value="no">no</option>\r\n-            <option value="yes">yes</option>\r\n-        </param>\r\n-    </inputs>\r\n-    <outputs>\r\n-        <data format_source="input" name="out_file1" metadata_source="input"/>\r\n-    </outputs>\r\n-    <tests>\r\n-        <test>\r\n-            <param name="cond" value="c3-c2"/>\r\n-            <param name="input" value="1.bed"/>\r\n-            <param name="round" value="no"/>\r\n-            <output name="out_file1" file="column_maker_out1.interval"/>\r\n-        </test>\r\n-        <test>\r\n-            <param name="cond" value="c4*1"/>\r\n-            <param name="input" value="1.interval"/>\r\n-            <param name="round" value="no"/>\r\n-            <output name="out_file1" file="column_maker_out2.interval"/>\r\n-        </test>\r\n-        <test>\r\n-            <param name="cond" value="c4*1"/>\r\n-            <param name="input" value="1.header.tsv"/>\r\n-            <param name="round" value="no"/>\r\n-            <conditional name="header_lines_conditional">\r\n-                <param name="header_lines_select" value="yes" />\r\n-                <param name="header_new_column_name" value="value1_again" />\r\n-            </conditional>\r\n-            <output name="out_file1" file="column_maker_out2.header.tsv"/>\r\n-        </test>\r\n-        <test>\r\n-            <param name="cond" value="c4*1"/>\r\n-            <param name="input" value="1.interval"/>\r\n-            <param name="round" value="yes"/>\r\n-            <output name="out_file1" file="column_maker_out3.interval"/>\r\n-        </test>\r\n-        <test>\r\n-            <param name="cond" value="float(.0000000000001)"/>\r\n-            <param n'..b'" name="out_file1" metadata_source="input"/>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="cond" value="c3-c2"/>\n+            <param name="input" value="1.bed"/>\n+            <param name="round" value="no"/>\n+            <output name="out_file1" file="column_maker_out1.interval"/>\n+        </test>\n+        <test>\n+            <param name="cond" value="c4*1"/>\n+            <param name="input" value="1.interval"/>\n+            <param name="round" value="no"/>\n+            <output name="out_file1" file="column_maker_out2.interval"/>\n+        </test>\n+        <test>\n+            <param name="cond" value="c4*1"/>\n+            <param name="input" value="1.header.tsv"/>\n+            <param name="round" value="no"/>\n+            <conditional name="header_lines_conditional">\n+                <param name="header_lines_select" value="yes" />\n+                <param name="header_new_column_name" value="value1_again" />\n+            </conditional>\n+            <output name="out_file1" file="column_maker_out2.header.tsv"/>\n+        </test>\n+        <test>\n+            <param name="cond" value="c4*1"/>\n+            <param name="input" value="1.interval"/>\n+            <param name="round" value="yes"/>\n+            <output name="out_file1" file="column_maker_out3.interval"/>\n+        </test>\n+        <test>\n+            <param name="cond" value="float(.0000000000001)"/>\n+            <param name="input" value="1.bed"/>\n+            <param name="round" value="no"/>\n+            <output name="out_file1">\n+                <assert_contents>\n+                    <has_text text="CCDS10397" />\n+                    <has_text text="1e-13" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <test>\n+            <param name="cond" value="float(.0000000000001)"/>\n+            <param name="input" value="1.bed"/>\n+            <param name="round" value="no"/>\n+            <param name="avoid_scientific_notation" value="yes"/>\n+            <output name="out_file1">\n+                <assert_contents>\n+                    <has_text text="CCDS10397" />\n+                    <has_text text=".0000000000001" />\n+                    <not_has_text text="1e-13" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+    </tests>\n+    <help>\n+\n+ .. class:: infomark\n+\n+**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*\n+\n+-----\n+\n+**What it does**\n+\n+This tool computes an expression for every row of a dataset and appends the result as a new column (field).\n+\n+- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file\n+\n+- **c3-c2** will add a length column to the dataset if **c2** and **c3** are start and end position\n+\n+-----\n+\n+**Example**\n+\n+If this is your input::\n+\n+   chr1  151077881  151077918  2  200  -\n+   chr1  151081985  151082078  3  500  +\n+\n+computing "c4*c5" will produce::\n+\n+   chr1  151077881  151077918  2  200  -   400.0\n+   chr1  151081985  151082078  3  500  +  1500.0\n+\n+if, at the same time, "Round result?" is set to **YES** results will look like this::\n+\n+   chr1  151077881  151077918  2  200  -   400\n+   chr1  151081985  151082078  3  500  +  1500\n+\n+You can also use this tool to evaluate expressions. For example, computing "c3>=c2" for Input will result in the following::\n+\n+   chr1  151077881  151077918  2  200  -  True\n+   chr1  151081985  151082078  3  500  +  True\n+\n+or computing "type(c2)==type(\'\') for Input will return::\n+\n+   chr1  151077881  151077918  2  200  -  False\n+   chr1  151081985  151082078  3  500  +  False\n+\n+\n+The following built-in functions are available::\n+\n+  abs | all | any | bin | bool | chr | ceil | cmp | complex\n+\n+  divmod | exp | float | log | log10 | floor | hex | int | len | long\n+\n+  max | min | oct | ord | pow | range | reversed\n+\n+  round | sorted | sqrt | str | sum | type | unichr | unicode |\n+\n+    </help>\n+    <citations />\n+</tool>\n'