Mercurial > repos > iuc > column_remove_by_header
changeset 0:372967836e98 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
author | iuc |
---|---|
date | Wed, 12 Apr 2017 17:17:29 -0400 |
parents | |
children | 2040e4c2750a |
files | column_remove_by_header.py column_remove_by_header.xml test-data/in_1.tabular test-data/out_1.tabular test-data/out_2.tabular |
diffstat | 5 files changed, 116 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/column_remove_by_header.py Wed Apr 12 17:17:29 2017 -0400 @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +import subprocess +import sys + +AWK_CMD = """BEGIN{FS="%s"; OFS="%s";} {print %s;}""" + +input_filename = sys.argv[1] +output_filename = sys.argv[2] +delimiter = sys.argv[3] +keep_columns = sys.argv[4] +strip_characters = sys.argv[5] + +if keep_columns == "--keep": + keep_columns = True +else: + keep_columns = False + +names = [] +for name in sys.argv[6:]: + names.append( name ) + +header = None +with open( input_filename, 'r' ) as fh: + header = fh.readline().strip( '\r\n' ) +header = header.split( delimiter ) +columns = [] +for i, key in enumerate( header, 1 ): + if i == 1 and strip_characters: + key = key.lstrip( strip_characters ) + if ( keep_columns and key in names ) or ( not keep_columns and key not in names ): + columns.append( i ) +print( "Kept", len( columns ), "of", len( header ), "columns." ) +awk_cmd = AWK_CMD % ( delimiter, delimiter, ",".join( map( lambda x: "$%s" % x, columns ) ) ) +sys.exit( subprocess.call( [ 'gawk', awk_cmd, input_filename ], stdout=open( output_filename, 'wb+' ), shell=False ) )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/column_remove_by_header.xml Wed Apr 12 17:17:29 2017 -0400 @@ -0,0 +1,69 @@ +<tool id="column_remove_by_header" name="Remove columns" version="0.0.1"> + <description> + by heading + </description> + <requirements> + <requirement type="package" version="3.6.1">python</requirement> + <requirement type="package" version="4.1.3">gawk</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python '$__tool_directory__/column_remove_by_header.py' '${input_tabular}' '${output_tabular}' '${input_tabular.unsanitized.metadata.delimiter}' '${keep_columns}' '${strip_characters}' + #for $header in $headers: + '${header.name}' + #end for + ]]> + </command> + <inputs> + <param name="input_tabular" type="data" format="tabular" multiple="False" optional="False" label="Tabular file"/> + <repeat name="headers" title="Header" min="1" default="1"> + <param name="name" type="text" optional="False" label="Header name"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target=""/> + </mapping> + </sanitizer> + </param> + </repeat> + <param label="Keep named columns" name="keep_columns" type="boolean" truevalue="--keep" falsevalue="" checked="False"/> + <param name="strip_characters" type="text" optional="False" label="Characters to strip when doing name comparison in first column" value="#" help="Removes characters from the left of the first column only."> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target=""/> + </mapping> + </sanitizer> + </param> + </inputs> + <outputs> + <data format="tabular" name="output_tabular"/> + </outputs> + <tests> + <test> + <param name="input_tabular" value="in_1.tabular" ftype="tabular"/> + <param name="name" value="a"/> + <param name="keep_columns" value=""/> + <param name="strip_characters" value="#"/> + <output name="output_tabular" file="out_1.tabular" ftype="tabular"/> + </test> + <test> + <param name="input_tabular" value="in_1.tabular" ftype="tabular"/> + <param name="name" value="a"/> + <param name="name" value="KEY"/> + <param name="keep_columns" value="--keep"/> + <param name="strip_characters" value="#"/> + <output name="output_tabular" file="out_2.tabular" ftype="tabular"/> + </test> + </tests> + <help> + <![CDATA[ + Removes or keeps columns based upon user provided values. + ]]> + </help> + <citations> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in_1.tabular Wed Apr 12 17:17:29 2017 -0400 @@ -0,0 +1,4 @@ +#KEY b c a +one 1-1 1-2 1-3 +two 1-4 1-5 1-6 +three 1-7 1-8 1-9