Mercurial > repos > jjohnson > split_tabular_columns
comparison split_tabular_columns.py @ 0:d43312f961cc draft default tip
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/split_tabular_columns commit 1d5750b99b90bb1d2730c816a95849e9b9a7d2f9-dirty
| author | jjohnson | 
|---|---|
| date | Wed, 01 Mar 2017 14:01:57 -0500 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:d43312f961cc | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 # | |
| 4 #------------------------------------------------------------------------------ | |
| 5 # University of Minnesota | |
| 6 # Copyright 2016, Regents of the University of Minnesota | |
| 7 #------------------------------------------------------------------------------ | |
| 8 # Author: | |
| 9 # | |
| 10 # James E Johnson | |
| 11 # | |
| 12 #------------------------------------------------------------------------------ | |
| 13 """ | |
| 14 | |
| 15 """ | |
| 16 Split selected columns on pattern | |
| 17 and print a line for each item split | |
| 18 | |
| 19 For example: | |
| 20 split_tabular_columns.py -c 3 -c 4 -s '; ' | |
| 21 with input line: | |
| 22 1 1.3 id1; id2 desc1; desc2 AMDLID | |
| 23 will be output as: | |
| 24 1 1.3 id1 desc1 AMDLID | |
| 25 1 1.3 id2 desc2 AMDLID | |
| 26 """ | |
| 27 | |
| 28 import sys | |
| 29 import os.path | |
| 30 import optparse | |
| 31 from optparse import OptionParser | |
| 32 | |
| 33 | |
| 34 def __main__(): | |
| 35 # Parse Command Line | |
| 36 parser = optparse.OptionParser() | |
| 37 parser.add_option('-i', '--input', dest='input', default=None, help='Tabular input file') | |
| 38 parser.add_option('-o', '--output', dest='output', default=None, help='Tabular output file') | |
| 39 parser.add_option('-c', '--column', type='int', action='append', dest='column', default=[], help='column ordinal to split') | |
| 40 parser.add_option('-s', '--split_on', dest='split_on', default=' ', help='String on which to split columns') | |
| 41 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') | |
| 42 (options, args) = parser.parse_args() | |
| 43 # Input file | |
| 44 if options.input is not None: | |
| 45 try: | |
| 46 inputPath = os.path.abspath(options.input) | |
| 47 inputFile = open(inputPath, 'r') | |
| 48 except Exception, e: | |
| 49 print >> sys.stderr, "failed: %s" % e | |
| 50 exit(2) | |
| 51 else: | |
| 52 inputFile = sys.stdin | |
| 53 # Output file | |
| 54 if options.output is not None: | |
| 55 try: | |
| 56 outputPath = os.path.abspath(options.output) | |
| 57 outputFile = open(outputPath, 'w') | |
| 58 except Exception, e: | |
| 59 print >> sys.stderr, "failed: %s" % e | |
| 60 exit(3) | |
| 61 else: | |
| 62 outputFile = sys.stdout | |
| 63 split_cols = [x - 1 for x in options.column] | |
| 64 split_on = options.split_on | |
| 65 try: | |
| 66 for i, line in enumerate(inputFile): | |
| 67 fields = line.rstrip('\r\n').split('\t') | |
| 68 split_fields = dict() | |
| 69 cnt = 0 | |
| 70 for c in split_cols: | |
| 71 if c < len(fields): | |
| 72 split_fields[c] = fields[c].split(split_on) | |
| 73 cnt = max(cnt, len(split_fields[c])) | |
| 74 if cnt == 0: | |
| 75 print >> outputFile, "%s" % '\t'.join(fields) | |
| 76 else: | |
| 77 for n in range(0, cnt): | |
| 78 flds = [x if c not in split_cols else split_fields[c][n] for (c, x) in enumerate(fields)] | |
| 79 print >> outputFile, "%s" % '\t'.join(flds) | |
| 80 except Exception, e: | |
| 81 print >> sys.stderr, "failed: Error reading %s - %s" % (options.input if options.input else 'stdin', e) | |
| 82 exit(1) | |
| 83 | |
| 84 if __name__ == "__main__": | |
| 85 __main__() | 
