annotate tools/visualization/build_ucsc_custom_track.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 Build a UCSC genome browser custom track file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 import sys, os
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 def stop_err( msg ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 sys.stderr.write( msg )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 args = sys.argv[1:]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 out_fname = args.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 out = open( out_fname, "w" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 num_tracks = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 skipped_lines = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 first_invalid_line = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 while args:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 # Suck in one dataset worth of arguments
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 in_fname = args.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 type = args.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 colspec = args.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 name = args.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 description = args.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 color = args.pop(0).replace( '-', ',' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 visibility = args.pop(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 # Do the work
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 if type == "wig":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 print >> out, '''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s''' \
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 % ( name, description, color, visibility )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 for i, line in enumerate( file( in_fname ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 print >> out, line,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 print >> out
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 elif type == "bed":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 print >> out, '''track name="%s" description="%s" color=%s visibility=%s''' \
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 % ( name, description, color, visibility )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 for i, line in enumerate( file( in_fname ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 print >> out, line,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 print >> out
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 # Assume type is interval (don't pass this script anything else!)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 c, s, e, st = [ int( x ) - 1 for x in colspec.split( "," ) ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 c, s, e = [ int( x ) - 1 for x in colspec.split( "," )[:3] ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 st = -1 #strand column is absent
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 stop_err( "Columns in interval file invalid for UCSC custom track." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 print >> out, '''track name="%s" description="%s" color=%s visibility=%s''' \
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 % ( name, description, color, visibility )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 i = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 for i, line in enumerate( file( in_fname ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 line = line.rstrip( '\r\n' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 if line and not line.startswith( '#' ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 fields = line.split( "\t" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 if st > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 #strand column is present
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 print >> out, "%s\t%s\t%s\t%d\t0\t%s" % ( fields[c], fields[s], fields[e], i, fields[st] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 skipped_lines += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 if not first_invalid_line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 first_invalid_line = i+1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 print >> out, "%s\t%s\t%s" % ( fields[c], fields[s], fields[e] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 skipped_lines += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 if not first_invalid_line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 first_invalid_line = i+1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 print >> out
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 num_tracks += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 print "Generated a custom track containing %d subtracks." % num_tracks
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 if skipped_lines:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 print "Skipped %d invalid lines starting at #%d" % ( skipped_lines, first_invalid_line )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87