Mercurial > repos > devteam > ucsc_custom_track
changeset 2:3d87079756e1 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
| author | devteam | 
|---|---|
| date | Mon, 28 Feb 2022 20:06:04 +0000 | 
| parents | 760f588e8a26 | 
| children | |
| files | build_ucsc_custom_track.py build_ucsc_custom_track.xml build_ucsc_custom_track_code.py | 
| diffstat | 3 files changed, 136 insertions(+), 154 deletions(-) [+] | 
line wrap: on
 line diff
--- a/build_ucsc_custom_track.py Fri Oct 09 17:18:58 2015 -0400 +++ b/build_ucsc_custom_track.py Mon Feb 28 20:06:04 2022 +0000 @@ -3,97 +3,86 @@ Build a UCSC genome browser custom track file """ -import sys, os +import sys -assert sys.version_info[:2] >= ( 2, 4 ) +FILE_TYPE_TO_TRACK_TYPE = {'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph': 'bedGraph', 'wig': 'wiggle_0'} +CHUNK_SIZE = 2**20 # 1 mb -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() -FILE_TYPE_TO_TRACK_TYPE = { 'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph':'bedGraph', 'wig':'wiggle_0' } -CHUNK_SIZE = 2**20 #1mb - -def get_track_line_is_interval( file_type, name, description, color, visibility ): +def get_track_line_is_interval(file_type, name, description, color, visibility): if file_type in FILE_TYPE_TO_TRACK_TYPE: - track_type = FILE_TYPE_TO_TRACK_TYPE[ file_type ] + track_type = FILE_TYPE_TO_TRACK_TYPE[file_type] is_interval = False else: track_type = None is_interval = True track_line = 'track ' if track_type: - track_line += 'type=%s ' % ( track_type ) - track_line += 'name="%s" description="%s" color=%s visibility=%s\n' % ( name, description, color, visibility ) + track_line += f"type={track_type} " + track_line += f'name="{name}" description="{description}" color={color} visibility={visibility}\n' return track_line, is_interval -args = sys.argv[1:] - -out_fname = args.pop(0) -out = open( out_fname, "w" ) num_tracks = 0 skipped_lines = 0 first_invalid_line = 0 -while args: - # Suck in one dataset worth of arguments - in_fname = args.pop(0) - file_type = args.pop(0) - colspec = args.pop(0) - name = args.pop(0) - description = args.pop(0) - color = args.pop(0).replace( '-', ',' ) - visibility = args.pop(0) - track_line, is_interval = get_track_line_is_interval( file_type, name, description, color, visibility ) - # Do the work - in_file = open( in_fname ) - out.write( track_line ) - if not is_interval: - while True: - chunk = in_file.read( CHUNK_SIZE ) - if chunk: - out.write( chunk ) +args = sys.argv[1:] +out_fname = args.pop(0) +with open(out_fname, "w") as out: + while args: + # Suck in one dataset worth of arguments + in_fname = args.pop(0) + file_type = args.pop(0) + colspec = args.pop(0) + name = args.pop(0) + description = args.pop(0) + color = args.pop(0).replace('-', ',') + visibility = args.pop(0) + track_line, is_interval = get_track_line_is_interval(file_type, name, description, color, visibility) + # Do the work + out.write(track_line) + with open(in_fname) as in_file: + if not is_interval: + while True: + chunk = in_file.read(CHUNK_SIZE) + if chunk: + out.write(chunk) + else: + break else: - break - else: - # Assume type is interval (don't pass this script anything else!) - try: - c, s, e, st = [ int( x ) - 1 for x in colspec.split( "," ) ] - except: - try: - c, s, e = [ int( x ) - 1 for x in colspec.split( "," )[:3] ] - st = -1 #strand column is absent - except: - stop_err( "Columns in interval file invalid for UCSC custom track." ) - - i = 0 - for i, line in enumerate( in_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ): - fields = line.split( "\t" ) - if st > 0: - #strand column is present + # Assume type is interval (don't pass this script anything else!) + try: + c, s, e, st = (int(x) - 1 for x in colspec.split(",")) + except ValueError: try: - out.write( "%s\t%s\t%s\t%d\t0\t%s\n" % ( fields[c], fields[s], fields[e], i, fields[st] ) ) - except: - skipped_lines += 1 - if not first_invalid_line: - first_invalid_line = i+1 - else: - try: - out.write( "%s\t%s\t%s\n" % ( fields[c], fields[s], fields[e] ) ) - except: - skipped_lines += 1 - if not first_invalid_line: - first_invalid_line = i+1 - out.write( "\n" ) #separating newline - num_tracks += 1 - -out.close() + c, s, e = (int(x) - 1 for x in colspec.split(",")[:3]) + st = -1 # strand column is absent + except Exception: + sys.exit("Columns in interval file invalid for UCSC custom track.") -print "Generated a custom track containing %d subtracks." % num_tracks + i = 0 + for i, line in enumerate(in_file): + line = line.rstrip('\r\n') + if line and not line.startswith('#'): + fields = line.split("\t") + if st > 0: + # strand column is present + try: + out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\t{i}\t0\t{fields[st]}\n") + except Exception: + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i + 1 + else: + try: + out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\n") + except Exception: + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i + 1 + out.write("\n") # separating newline + num_tracks += 1 + +print(f"Generated a custom track containing {num_tracks} subtracks.") if skipped_lines: - print "Skipped %d invalid lines starting at #%d" % ( skipped_lines, first_invalid_line ) - - - + print(f"Skipped {skipped_lines} invalid lines starting at #{first_invalid_line}")
--- a/build_ucsc_custom_track.xml Fri Oct 09 17:18:58 2015 -0400 +++ b/build_ucsc_custom_track.xml Mon Feb 28 20:06:04 2022 +0000 @@ -1,70 +1,73 @@ -<tool id="build_ucsc_custom_track_1" name="Build custom track" version="1.0.0"> - <description>for UCSC genome browser</description> - <command interpreter="python"> - build_ucsc_custom_track.py - "$out_file1" - #for $t in $tracks - "${t.input.file_name}" - "${t.input.ext}" - #if $t.input.ext == "interval" - ${t.input.metadata.chromCol},${t.input.metadata.startCol},${t.input.metadata.endCol},${t.input.metadata.strandCol} - #else - "NA" - #end if - "${t.name}" - "${t.description}" - "${t.color}" - "${t.visibility}" - #end for - </command> - <inputs> - <repeat name="tracks" title="Track"> - <param name="input" type="data" format="interval,wig" label="Dataset"/> - <param name="name" type="text" value="User Track"> - <validator type="length" max="15"/> - </param> - <param name="description" type="text" value="User Supplied Track (from Galaxy)"> - <validator type="length" max="60"/> - </param> - <param label="Color" name="color" type="select"> - <option selected="yes" value="0-0-0">Black</option> - <option value="255-0-0">Red</option> - <option value="0-255-0">Green</option> - <option value="0-0-255">Blue</option> - <option value="255-0-255">Magenta</option> - <option value="0-255-255">Cyan</option> - <option value="255-215-0">Gold</option> - <option value="160-32-240">Purple</option> - <option value="255-140-0">Orange</option> - <option value="255-20-147">Pink</option> - <option value="92-51-23">Dark Chocolate</option> - <option value="85-107-47">Olive green</option> - </param> - <param label="Visibility" name="visibility" type="select"> - <option selected="yes" value="1">Dense</option> - <option value="2">Full</option> - <option value="3">Pack</option> - <option value="4">Squish</option> - <option value="0">Hide</option> - </param> - </repeat> - </inputs> - <outputs> - <data format="customtrack" name="out_file1" /> - </outputs> +<tool id="build_ucsc_custom_track_1" name="Build custom track" version="1.0.1" profile="21.01"> + <description>for UCSC genome browser</description> + <requirements> + <requirement type="package" version="3.10">python</requirement> + </requirements> + <code file="build_ucsc_custom_track_code.py" /> + <command detect_errors="exit_code"><![CDATA[ +python '$__tool_directory__/build_ucsc_custom_track.py' +'$out_file1' +#for $t in $tracks + '${t.input.file_name}' + '${t.input.ext}' + #if $t.input.ext == "interval" + '${t.input.metadata.chromCol},${t.input.metadata.startCol},${t.input.metadata.endCol},${t.input.metadata.strandCol}' + #else + "NA" + #end if + '${t.name}' + '${t.description}' + "${t.color}" + ${t.visibility} +#end for + ]]></command> + <inputs> + <repeat name="tracks" title="Track"> + <param name="input" type="data" format="interval,wig" label="Dataset"/> + <param name="name" type="text" value="User Track"> + <validator type="length" max="15"/> + </param> + <param name="description" type="text" value="User Supplied Track (from Galaxy)"> + <validator type="length" max="60"/> + </param> + <param name="color" type="select" label="Color"> + <option selected="yes" value="0-0-0">Black</option> + <option value="255-0-0">Red</option> + <option value="0-255-0">Green</option> + <option value="0-0-255">Blue</option> + <option value="255-0-255">Magenta</option> + <option value="0-255-255">Cyan</option> + <option value="255-215-0">Gold</option> + <option value="160-32-240">Purple</option> + <option value="255-140-0">Orange</option> + <option value="255-20-147">Pink</option> + <option value="92-51-23">Dark Chocolate</option> + <option value="85-107-47">Olive green</option> + </param> + <param name="visibility" type="select" label="Visibility"> + <option selected="yes" value="1">Dense</option> + <option value="2">Full</option> + <option value="3">Pack</option> + <option value="4">Squish</option> + <option value="0">Hide</option> + </param> + </repeat> + </inputs> + <outputs> + <data name="out_file1" format="customtrack" /> + </outputs> <tests> <!--TODO: add a 2nd test here that includes 2 tracks --> <test> - <param name="input" value="customTrack1.bed" /> - <param name="name" value="User Track" /> - <param name="description" value="User Supplied Track (from Galaxy)" /> - <param name="color" value="0-0-0" /> - <param name="visibility" value="1" /> - <output name="out_file1" file="build_ucsc_custom_track_out1.customtrack" /> + <param name="input" value="customTrack1.bed" /> + <param name="name" value="User Track" /> + <param name="description" value="User Supplied Track (from Galaxy)" /> + <param name="color" value="0-0-0" /> + <param name="visibility" value="1" /> + <output name="out_file1" file="build_ucsc_custom_track_out1.customtrack" /> </test> - </tests> -<help> - + </tests> + <help><![CDATA[ .. class:: infomark This tool allows you to build custom tracks using datasets in your history for the UCSC genome browser. You can view these custom tracks on the UCSC genome browser by clicking on **display at UCSC main/test** link in the history panel of the output dataset. @@ -74,9 +77,5 @@ .. class:: warningmark Please note that this tool requires **all input datasets(tracks) to have the same genome build**. The tool throws an error when this requirement is not met. You may then have to choose a valid dataset or remove invalid tracks. - -</help> - -<code file="build_ucsc_custom_track_code.py" /> - + ]]></help> </tool>
--- a/build_ucsc_custom_track_code.py Fri Oct 09 17:18:58 2015 -0400 +++ b/build_ucsc_custom_track_code.py Mon Feb 28 20:06:04 2022 +0000 @@ -1,21 +1,15 @@ # runs after the job (and after the default post-filter) -# Older py compatibility -try: - set() -except: - from sets import Set as set - -def validate_input( trans, error_map, param_values, page_param_map ): +def validate_input(trans, error_map, param_values, page_param_map): dbkeys = set() tracks = param_values['tracks'] for track in tracks: if track['input']: - dbkeys.add( track['input'].dbkey ) - if len( dbkeys ) > 1: + dbkeys.add(track['input'].dbkey) + if len(dbkeys) > 1: # FIXME: Should be able to assume error map structure is created if 'tracks' not in error_map: - error_map['tracks'] = [ dict() for t in tracks ] - for i in range( len( tracks ) ): + error_map['tracks'] = [dict() for t in tracks] + for i in range(len(tracks)): error_map['tracks'][i]['input'] = \ - "All datasets must belong to same genomic build" \ No newline at end of file + "All datasets must belong to same genomic build"
