Mercurial > repos > blankenberg > column_regex_substitution
view column_regex_substitution.py @ 0:12b740c4cbc1 draft default tip
planemo upload for repository https://github.com/blankenberg/tools-blankenberg/tree/master/tools/column_regex_substitution commit 78936dc6be1747303d4cbfd80d09e4cfd1cbf292
author | blankenberg |
---|---|
date | Fri, 07 Sep 2018 10:29:30 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python #Dan Blankenberg """ A script for using regex substitutions on columns. """ import optparse import re import sys import string VERSION = "0.0.1" COLUMN_STRIP_VALUES = "".join( set( string.printable ) - set( string.digits ) - set(',') ) def get_provided_columns( provided_value, column_offset ): try: rval = sorted( map( lambda x: int( x.strip( COLUMN_STRIP_VALUES ) ) + column_offset, provided_value.split( ',' ) ) ) except: rval = None if rval: return rval return None def __main__(): parser = optparse.OptionParser() parser.add_option('--pattern', action='store', default=None, help='pattern string') parser.add_option('--replacement', action='store', default=None, help='replacement string') parser.add_option('--input', action='store', default=None, help='Filename of input file') parser.add_option('--output', action='store', default=None, help='Filename of output file') parser.add_option('--delimiter', action='store', default=None, help='column delimiter') parser.add_option('--columns', action='store', default=None, help='columns to operate on') parser.add_option('--column_offset', action='store', default=0, help='offset to apply to columns index to force to zero-based') parser.add_option('--skip', action='store', default=0, help='Number of lines to skip') parser.add_option('--version', action='store_true', default=False, help='Show version') (options, args) = parser.parse_args() if options.version: print "blankenberg_python_regex_substitution %s" % ( VERSION ) sys.exit(0) if None in [ options.pattern, options.replacement, options.output ]: parser.print_help() sys.exit(1) pattern = options.pattern replacement = options.replacement column_offset = int( options.column_offset ) print "Pattern: %s\nReplacement: %s" % ( repr( pattern ), repr( replacement ) ) pattern = re.compile( pattern ) provided_columns = get_provided_columns( options.columns, column_offset ) if provided_columns: column_str = ", ".join( map( lambda x: str( x - column_offset ), provided_columns ) ) else: column_str = 'all' print "With delimiter %s, on columns: %s" % ( repr( options.delimiter ), column_str ) if options.delimiter is None: split_func = lambda x: [ x.rstrip( '\n\r' ) ] join_char = "" else: split_func = lambda x: x.rstrip( '\n\r' ).split( options.delimiter ) join_char = options.delimiter with open( options.input, 'rb' ) as fin: with open( options.output, 'w') as fout: for i, line in enumerate( fin ): if i < options.skip: continue line = split_func( line ) field_count = len( line ) if provided_columns: columns = provided_columns else: columns = range( field_count ) for j in columns: if j >= field_count: break line[ j ] = re.sub( pattern, replacement, line[ j ] ) fout.write( "%s\n" % ( join_char.join( line ) ) ) if __name__ == "__main__": __main__()