Mercurial > repos > iuc > column_order_header_sort
changeset 0:6ae9724caf4d draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_order_header_sort commit d562cc65926c8c95af21467177b253b6ac985cb4
author | iuc |
---|---|
date | Wed, 12 Apr 2017 17:17:18 -0400 |
parents | |
children | |
files | column_order_header_sort.py column_order_header_sort.xml test-data/in_1.tabular test-data/out_1.tabular |
diffstat | 4 files changed, 82 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/column_order_header_sort.py Wed Apr 12 17:17:18 2017 -0400 @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +import subprocess +import sys + +AWK_CMD = """BEGIN{FS="%s"; OFS="%s";} {print %s;}""" + +input_filename = sys.argv[1] +output_filename = sys.argv[2] +delimiter = sys.argv[3] +key_column = sys.argv[4] + +try: + key_column = int( key_column ) - 1 +except Exception: + key_column = None + +header = None +with open( input_filename, 'r' ) as fh: + header = fh.readline().strip( '\r\n' ) +header = header.split( delimiter ) +assert len( header ) == len( set( header ) ), "Header values must be unique" +sorted_header = list( header ) +if key_column is None: + columns = [] +else: + columns = [ key_column ] + sorted_header.pop( key_column ) +sorted_header.sort() + +for key in sorted_header: + columns.append( header.index( key ) ) + +awk_cmd = AWK_CMD % ( delimiter, delimiter, ",".join( map( lambda x: "$%i" % ( x + 1 ), columns ) ) ) +sys.exit( subprocess.call( [ 'gawk', awk_cmd, input_filename ], stdout=open( output_filename, 'wb+' ), shell=False ) )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/column_order_header_sort.xml Wed Apr 12 17:17:18 2017 -0400 @@ -0,0 +1,39 @@ +<tool id="column_order_header_sort" name="Sort Column Order" version="0.0.1"> + <description> + by heading + </description> + <requirements> + <requirement type="package" version="3.6.1">python</requirement> + <requirement type="package" version="4.1.3">gawk</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python '$__tool_directory__/column_order_header_sort.py' + '${input_tabular}' + '${output_tabular}' + '${ str($input_tabular.unsanitized.metadata.delimiter).replace( "'", "" ) }' + '${key_column}' + ]]> + </command> + <inputs> + <param name="input_tabular" type="data" format="tabular" multiple="False" optional="False" label="Tabular file"/> + <param name="key_column" type="data_column" data_ref="input_tabular" value="0" optional="True" label="Identifier column" help="This column will be made left-most."/> + </inputs> + <outputs> + <data format="tabular" name="output_tabular"/> + </outputs> + <tests> + <test> + <param name="input_tabular" value="in_1.tabular" ftype="tabular"/> + <param name="key_column" value="1"/> + <output name="output_tabular" file="out_1.tabular" ftype="tabular"/> + </test> + </tests> + <help> + <![CDATA[ + Reorders a file's columns by sorted value of header fields. + Specify the optional Identifier column parameter to make a column left-most; generally used for a Key column that should not be sorted within the other columns. + ]]> + </help> + <citations> + </citations> +</tool> \ No newline at end of file