Mercurial > repos > devteam > ucsc_custom_track
diff build_ucsc_custom_track.py @ 2:3d87079756e1 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
author | devteam |
---|---|
date | Mon, 28 Feb 2022 20:06:04 +0000 |
parents | 618e56c3109b |
children |
line wrap: on
line diff
--- a/build_ucsc_custom_track.py Fri Oct 09 17:18:58 2015 -0400 +++ b/build_ucsc_custom_track.py Mon Feb 28 20:06:04 2022 +0000 @@ -3,97 +3,86 @@ Build a UCSC genome browser custom track file """ -import sys, os +import sys -assert sys.version_info[:2] >= ( 2, 4 ) +FILE_TYPE_TO_TRACK_TYPE = {'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph': 'bedGraph', 'wig': 'wiggle_0'} +CHUNK_SIZE = 2**20 # 1 mb -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() -FILE_TYPE_TO_TRACK_TYPE = { 'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph':'bedGraph', 'wig':'wiggle_0' } -CHUNK_SIZE = 2**20 #1mb - -def get_track_line_is_interval( file_type, name, description, color, visibility ): +def get_track_line_is_interval(file_type, name, description, color, visibility): if file_type in FILE_TYPE_TO_TRACK_TYPE: - track_type = FILE_TYPE_TO_TRACK_TYPE[ file_type ] + track_type = FILE_TYPE_TO_TRACK_TYPE[file_type] is_interval = False else: track_type = None is_interval = True track_line = 'track ' if track_type: - track_line += 'type=%s ' % ( track_type ) - track_line += 'name="%s" description="%s" color=%s visibility=%s\n' % ( name, description, color, visibility ) + track_line += f"type={track_type} " + track_line += f'name="{name}" description="{description}" color={color} visibility={visibility}\n' return track_line, is_interval -args = sys.argv[1:] - -out_fname = args.pop(0) -out = open( out_fname, "w" ) num_tracks = 0 skipped_lines = 0 first_invalid_line = 0 -while args: - # Suck in one dataset worth of arguments - in_fname = args.pop(0) - file_type = args.pop(0) - colspec = args.pop(0) - name = args.pop(0) - description = args.pop(0) - color = args.pop(0).replace( '-', ',' ) - visibility = args.pop(0) - track_line, is_interval = get_track_line_is_interval( file_type, name, description, color, visibility ) - # Do the work - in_file = open( in_fname ) - out.write( track_line ) - if not is_interval: - while True: - chunk = in_file.read( CHUNK_SIZE ) - if chunk: - out.write( chunk ) +args = sys.argv[1:] +out_fname = args.pop(0) +with open(out_fname, "w") as out: + while args: + # Suck in one dataset worth of arguments + in_fname = args.pop(0) + file_type = args.pop(0) + colspec = args.pop(0) + name = args.pop(0) + description = args.pop(0) + color = args.pop(0).replace('-', ',') + visibility = args.pop(0) + track_line, is_interval = get_track_line_is_interval(file_type, name, description, color, visibility) + # Do the work + out.write(track_line) + with open(in_fname) as in_file: + if not is_interval: + while True: + chunk = in_file.read(CHUNK_SIZE) + if chunk: + out.write(chunk) + else: + break else: - break - else: - # Assume type is interval (don't pass this script anything else!) - try: - c, s, e, st = [ int( x ) - 1 for x in colspec.split( "," ) ] - except: - try: - c, s, e = [ int( x ) - 1 for x in colspec.split( "," )[:3] ] - st = -1 #strand column is absent - except: - stop_err( "Columns in interval file invalid for UCSC custom track." ) - - i = 0 - for i, line in enumerate( in_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ): - fields = line.split( "\t" ) - if st > 0: - #strand column is present + # Assume type is interval (don't pass this script anything else!) + try: + c, s, e, st = (int(x) - 1 for x in colspec.split(",")) + except ValueError: try: - out.write( "%s\t%s\t%s\t%d\t0\t%s\n" % ( fields[c], fields[s], fields[e], i, fields[st] ) ) - except: - skipped_lines += 1 - if not first_invalid_line: - first_invalid_line = i+1 - else: - try: - out.write( "%s\t%s\t%s\n" % ( fields[c], fields[s], fields[e] ) ) - except: - skipped_lines += 1 - if not first_invalid_line: - first_invalid_line = i+1 - out.write( "\n" ) #separating newline - num_tracks += 1 - -out.close() + c, s, e = (int(x) - 1 for x in colspec.split(",")[:3]) + st = -1 # strand column is absent + except Exception: + sys.exit("Columns in interval file invalid for UCSC custom track.") -print "Generated a custom track containing %d subtracks." % num_tracks + i = 0 + for i, line in enumerate(in_file): + line = line.rstrip('\r\n') + if line and not line.startswith('#'): + fields = line.split("\t") + if st > 0: + # strand column is present + try: + out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\t{i}\t0\t{fields[st]}\n") + except Exception: + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i + 1 + else: + try: + out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\n") + except Exception: + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i + 1 + out.write("\n") # separating newline + num_tracks += 1 + +print(f"Generated a custom track containing {num_tracks} subtracks.") if skipped_lines: - print "Skipped %d invalid lines starting at #%d" % ( skipped_lines, first_invalid_line ) - - - + print(f"Skipped {skipped_lines} invalid lines starting at #{first_invalid_line}")