changeset 0:372967836e98 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
author iuc
date Wed, 12 Apr 2017 17:17:29 -0400
parents
children 2040e4c2750a
files column_remove_by_header.py column_remove_by_header.xml test-data/in_1.tabular test-data/out_1.tabular test-data/out_2.tabular
diffstat 5 files changed, 116 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/column_remove_by_header.py	Wed Apr 12 17:17:29 2017 -0400
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+import subprocess
+import sys
+
+AWK_CMD = """BEGIN{FS="%s"; OFS="%s";} {print %s;}"""
+
+input_filename = sys.argv[1]
+output_filename = sys.argv[2]
+delimiter = sys.argv[3]
+keep_columns = sys.argv[4]
+strip_characters = sys.argv[5]
+
+if keep_columns == "--keep":
+    keep_columns = True
+else:
+    keep_columns = False
+
+names = []
+for name in sys.argv[6:]:
+    names.append( name )
+
+header = None
+with open( input_filename, 'r' ) as fh:
+    header = fh.readline().strip( '\r\n' )
+header = header.split( delimiter )
+columns = []
+for i, key in enumerate( header, 1 ):
+    if i == 1 and strip_characters:
+        key = key.lstrip( strip_characters )
+    if ( keep_columns and key in names ) or ( not keep_columns and key not in names ):
+        columns.append( i )
+print( "Kept", len( columns ), "of", len( header ), "columns." )
+awk_cmd = AWK_CMD % ( delimiter, delimiter, ",".join( map( lambda x: "$%s" % x, columns ) ) )
+sys.exit( subprocess.call( [ 'gawk', awk_cmd, input_filename ], stdout=open( output_filename, 'wb+' ), shell=False ) )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/column_remove_by_header.xml	Wed Apr 12 17:17:29 2017 -0400
@@ -0,0 +1,69 @@
+<tool id="column_remove_by_header" name="Remove columns" version="0.0.1">
+    <description>
+        by heading
+    </description>
+    <requirements>
+        <requirement type="package" version="3.6.1">python</requirement>
+        <requirement type="package" version="4.1.3">gawk</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$__tool_directory__/column_remove_by_header.py' '${input_tabular}' '${output_tabular}' '${input_tabular.unsanitized.metadata.delimiter}' '${keep_columns}' '${strip_characters}'
+        #for $header in $headers:
+            '${header.name}'
+        #end for
+    ]]>
+    </command>
+    <inputs>
+        <param name="input_tabular" type="data" format="tabular" multiple="False" optional="False" label="Tabular file"/>
+        <repeat name="headers" title="Header" min="1" default="1">
+            <param name="name" type="text" optional="False" label="Header name">
+                <sanitizer>
+                    <valid initial="string.printable">
+                        <remove value="&apos;"/>
+                    </valid>
+                    <mapping initial="none">
+                        <add source="&apos;" target=""/>
+                    </mapping>
+                </sanitizer>
+            </param>
+        </repeat>
+        <param label="Keep named columns" name="keep_columns" type="boolean" truevalue="--keep" falsevalue="" checked="False"/>
+        <param name="strip_characters" type="text" optional="False" label="Characters to strip when doing name comparison in first column" value="#" help="Removes characters from the left of the first column only.">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+                <mapping initial="none">
+                    <add source="&apos;" target=""/>
+                </mapping>
+            </sanitizer>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="output_tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_tabular" value="in_1.tabular" ftype="tabular"/>
+            <param name="name" value="a"/>
+            <param name="keep_columns" value=""/>
+            <param name="strip_characters" value="#"/>
+            <output name="output_tabular" file="out_1.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="input_tabular" value="in_1.tabular" ftype="tabular"/>
+            <param name="name" value="a"/>
+            <param name="name" value="KEY"/>
+            <param name="keep_columns" value="--keep"/>
+            <param name="strip_characters" value="#"/>
+            <output name="output_tabular" file="out_2.tabular" ftype="tabular"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+        Removes or keeps columns based upon user provided values.
+        ]]>
+    </help>
+    <citations>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in_1.tabular	Wed Apr 12 17:17:29 2017 -0400
@@ -0,0 +1,4 @@
+#KEY	b	c	a
+one	1-1	1-2	1-3
+two	1-4	1-5	1-6
+three	1-7	1-8	1-9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_1.tabular	Wed Apr 12 17:17:29 2017 -0400
@@ -0,0 +1,4 @@
+#KEY	b	c
+one	1-1	1-2
+two	1-4	1-5
+three	1-7	1-8
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_2.tabular	Wed Apr 12 17:17:29 2017 -0400
@@ -0,0 +1,4 @@
+#KEY	a
+one	1-3
+two	1-6
+three	1-9