# HG changeset patch # User devteam # Date 1646078764 0 # Node ID 3d87079756e1465a423a8e94a2eb38bcc4951c65 # Parent 760f588e8a26ff222f87fc92613564fafdc828ac "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9" diff -r 760f588e8a26 -r 3d87079756e1 build_ucsc_custom_track.py --- a/build_ucsc_custom_track.py Fri Oct 09 17:18:58 2015 -0400 +++ b/build_ucsc_custom_track.py Mon Feb 28 20:06:04 2022 +0000 @@ -3,97 +3,86 @@ Build a UCSC genome browser custom track file """ -import sys, os +import sys -assert sys.version_info[:2] >= ( 2, 4 ) +FILE_TYPE_TO_TRACK_TYPE = {'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph': 'bedGraph', 'wig': 'wiggle_0'} +CHUNK_SIZE = 2**20 # 1 mb -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() -FILE_TYPE_TO_TRACK_TYPE = { 'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph':'bedGraph', 'wig':'wiggle_0' } -CHUNK_SIZE = 2**20 #1mb - -def get_track_line_is_interval( file_type, name, description, color, visibility ): +def get_track_line_is_interval(file_type, name, description, color, visibility): if file_type in FILE_TYPE_TO_TRACK_TYPE: - track_type = FILE_TYPE_TO_TRACK_TYPE[ file_type ] + track_type = FILE_TYPE_TO_TRACK_TYPE[file_type] is_interval = False else: track_type = None is_interval = True track_line = 'track ' if track_type: - track_line += 'type=%s ' % ( track_type ) - track_line += 'name="%s" description="%s" color=%s visibility=%s\n' % ( name, description, color, visibility ) + track_line += f"type={track_type} " + track_line += f'name="{name}" description="{description}" color={color} visibility={visibility}\n' return track_line, is_interval -args = sys.argv[1:] - -out_fname = args.pop(0) -out = open( out_fname, "w" ) num_tracks = 0 skipped_lines = 0 first_invalid_line = 0 -while args: - # Suck in one dataset worth of arguments - in_fname = args.pop(0) - file_type = args.pop(0) - colspec = args.pop(0) - name = args.pop(0) - description = args.pop(0) - color = args.pop(0).replace( '-', ',' ) - visibility = args.pop(0) - track_line, is_interval = get_track_line_is_interval( file_type, name, description, color, visibility ) - # Do the work - in_file = open( in_fname ) - out.write( track_line ) - if not is_interval: - while True: - chunk = in_file.read( CHUNK_SIZE ) - if chunk: - out.write( chunk ) +args = sys.argv[1:] +out_fname = args.pop(0) +with open(out_fname, "w") as out: + while args: + # Suck in one dataset worth of arguments + in_fname = args.pop(0) + file_type = args.pop(0) + colspec = args.pop(0) + name = args.pop(0) + description = args.pop(0) + color = args.pop(0).replace('-', ',') + visibility = args.pop(0) + track_line, is_interval = get_track_line_is_interval(file_type, name, description, color, visibility) + # Do the work + out.write(track_line) + with open(in_fname) as in_file: + if not is_interval: + while True: + chunk = in_file.read(CHUNK_SIZE) + if chunk: + out.write(chunk) + else: + break else: - break - else: - # Assume type is interval (don't pass this script anything else!) - try: - c, s, e, st = [ int( x ) - 1 for x in colspec.split( "," ) ] - except: - try: - c, s, e = [ int( x ) - 1 for x in colspec.split( "," )[:3] ] - st = -1 #strand column is absent - except: - stop_err( "Columns in interval file invalid for UCSC custom track." ) - - i = 0 - for i, line in enumerate( in_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ): - fields = line.split( "\t" ) - if st > 0: - #strand column is present + # Assume type is interval (don't pass this script anything else!) + try: + c, s, e, st = (int(x) - 1 for x in colspec.split(",")) + except ValueError: try: - out.write( "%s\t%s\t%s\t%d\t0\t%s\n" % ( fields[c], fields[s], fields[e], i, fields[st] ) ) - except: - skipped_lines += 1 - if not first_invalid_line: - first_invalid_line = i+1 - else: - try: - out.write( "%s\t%s\t%s\n" % ( fields[c], fields[s], fields[e] ) ) - except: - skipped_lines += 1 - if not first_invalid_line: - first_invalid_line = i+1 - out.write( "\n" ) #separating newline - num_tracks += 1 - -out.close() + c, s, e = (int(x) - 1 for x in colspec.split(",")[:3]) + st = -1 # strand column is absent + except Exception: + sys.exit("Columns in interval file invalid for UCSC custom track.") -print "Generated a custom track containing %d subtracks." % num_tracks + i = 0 + for i, line in enumerate(in_file): + line = line.rstrip('\r\n') + if line and not line.startswith('#'): + fields = line.split("\t") + if st > 0: + # strand column is present + try: + out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\t{i}\t0\t{fields[st]}\n") + except Exception: + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i + 1 + else: + try: + out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\n") + except Exception: + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i + 1 + out.write("\n") # separating newline + num_tracks += 1 + +print(f"Generated a custom track containing {num_tracks} subtracks.") if skipped_lines: - print "Skipped %d invalid lines starting at #%d" % ( skipped_lines, first_invalid_line ) - - - + print(f"Skipped {skipped_lines} invalid lines starting at #{first_invalid_line}") diff -r 760f588e8a26 -r 3d87079756e1 build_ucsc_custom_track.xml --- a/build_ucsc_custom_track.xml Fri Oct 09 17:18:58 2015 -0400 +++ b/build_ucsc_custom_track.xml Mon Feb 28 20:06:04 2022 +0000 @@ -1,70 +1,73 @@ - - for UCSC genome browser - - build_ucsc_custom_track.py - "$out_file1" - #for $t in $tracks - "${t.input.file_name}" - "${t.input.ext}" - #if $t.input.ext == "interval" - ${t.input.metadata.chromCol},${t.input.metadata.startCol},${t.input.metadata.endCol},${t.input.metadata.strandCol} - #else - "NA" - #end if - "${t.name}" - "${t.description}" - "${t.color}" - "${t.visibility}" - #end for - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + for UCSC genome browser + + python + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - + + + + + + - - - + + - - - + ]]> diff -r 760f588e8a26 -r 3d87079756e1 build_ucsc_custom_track_code.py --- a/build_ucsc_custom_track_code.py Fri Oct 09 17:18:58 2015 -0400 +++ b/build_ucsc_custom_track_code.py Mon Feb 28 20:06:04 2022 +0000 @@ -1,21 +1,15 @@ # runs after the job (and after the default post-filter) -# Older py compatibility -try: - set() -except: - from sets import Set as set - -def validate_input( trans, error_map, param_values, page_param_map ): +def validate_input(trans, error_map, param_values, page_param_map): dbkeys = set() tracks = param_values['tracks'] for track in tracks: if track['input']: - dbkeys.add( track['input'].dbkey ) - if len( dbkeys ) > 1: + dbkeys.add(track['input'].dbkey) + if len(dbkeys) > 1: # FIXME: Should be able to assume error map structure is created if 'tracks' not in error_map: - error_map['tracks'] = [ dict() for t in tracks ] - for i in range( len( tracks ) ): + error_map['tracks'] = [dict() for t in tracks] + for i in range(len(tracks)): error_map['tracks'][i]['input'] = \ - "All datasets must belong to same genomic build" \ No newline at end of file + "All datasets must belong to same genomic build"