annotate build_ucsc_custom_track.py @ 2:3d87079756e1 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
author devteam
date Mon, 28 Feb 2022 20:06:04 +0000
parents 618e56c3109b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
2 """
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
3 Build a UCSC genome browser custom track file
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
4 """
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
5
2
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
6 import sys
0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
7
2
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
8 FILE_TYPE_TO_TRACK_TYPE = {'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph': 'bedGraph', 'wig': 'wiggle_0'}
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
9 CHUNK_SIZE = 2**20 # 1 mb
0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
10
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
11
2
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
12 def get_track_line_is_interval(file_type, name, description, color, visibility):
0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
13 if file_type in FILE_TYPE_TO_TRACK_TYPE:
2
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
14 track_type = FILE_TYPE_TO_TRACK_TYPE[file_type]
0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
15 is_interval = False
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
16 else:
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
17 track_type = None
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
18 is_interval = True
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
19 track_line = 'track '
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
20 if track_type:
2
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
21 track_line += f"type={track_type} "
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
22 track_line += f'name="{name}" description="{description}" color={color} visibility={visibility}\n'
0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
23 return track_line, is_interval
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
24
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
25
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
26 num_tracks = 0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
27 skipped_lines = 0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
28 first_invalid_line = 0
2
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
29 args = sys.argv[1:]
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
30 out_fname = args.pop(0)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
31 with open(out_fname, "w") as out:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
32 while args:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
33 # Suck in one dataset worth of arguments
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
34 in_fname = args.pop(0)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
35 file_type = args.pop(0)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
36 colspec = args.pop(0)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
37 name = args.pop(0)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
38 description = args.pop(0)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
39 color = args.pop(0).replace('-', ',')
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
40 visibility = args.pop(0)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
41 track_line, is_interval = get_track_line_is_interval(file_type, name, description, color, visibility)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
42 # Do the work
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
43 out.write(track_line)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
44 with open(in_fname) as in_file:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
45 if not is_interval:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
46 while True:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
47 chunk = in_file.read(CHUNK_SIZE)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
48 if chunk:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
49 out.write(chunk)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
50 else:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
51 break
0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
52 else:
2
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
53 # Assume type is interval (don't pass this script anything else!)
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
54 try:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
55 c, s, e, st = (int(x) - 1 for x in colspec.split(","))
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
56 except ValueError:
0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
57 try:
2
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
58 c, s, e = (int(x) - 1 for x in colspec.split(",")[:3])
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
59 st = -1 # strand column is absent
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
60 except Exception:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
61 sys.exit("Columns in interval file invalid for UCSC custom track.")
0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
62
2
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
63 i = 0
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
64 for i, line in enumerate(in_file):
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
65 line = line.rstrip('\r\n')
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
66 if line and not line.startswith('#'):
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
67 fields = line.split("\t")
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
68 if st > 0:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
69 # strand column is present
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
70 try:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
71 out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\t{i}\t0\t{fields[st]}\n")
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
72 except Exception:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
73 skipped_lines += 1
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
74 if not first_invalid_line:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
75 first_invalid_line = i + 1
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
76 else:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
77 try:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
78 out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\n")
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
79 except Exception:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
80 skipped_lines += 1
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
81 if not first_invalid_line:
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
82 first_invalid_line = i + 1
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
83 out.write("\n") # separating newline
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
84 num_tracks += 1
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
85
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
86 print(f"Generated a custom track containing {num_tracks} subtracks.")
0
618e56c3109b Imported from capsule None
devteam
parents:
diff changeset
87 if skipped_lines:
2
3d87079756e1 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents: 0
diff changeset
88 print(f"Skipped {skipped_lines} invalid lines starting at #{first_invalid_line}")