annotate tools/filters/gtf_to_bedgraph_converter.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 import os, sys, tempfile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 # Read parms.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 input_name = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 output_name = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 attribute_name = sys.argv[3]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 # Create temp files.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 tmp_name1 = tempfile.NamedTemporaryFile().name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 tmp_name2 = tempfile.NamedTemporaryFile().name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 # Do conversion.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 skipped_lines = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 first_skipped_line = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 out = open( tmp_name1, 'w' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 # Write track data to temporary file.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 i = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 for i, line in enumerate( file( input_name ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 line = line.rstrip( '\r\n' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 if line and not line.startswith( '#' ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 elems = line.split( '\t' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 start = str( int( elems[3] ) - 1 ) # GTF coordinates are 1-based, BedGraph are 0-based.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 strand = elems[6]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 if strand not in ['+', '-']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 strand = '+'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 attributes_list = elems[8].split(";")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 attributes = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 for name_value_pair in attributes_list:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 pair = name_value_pair.strip().split(" ")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 name = pair[0].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 if name == '':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 # Need to strip double quote from values
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 value = pair[1].strip(" \"")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 attributes[name] = value
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 value = attributes[ attribute_name ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 # GTF format: chrom source, name, chromStart, chromEnd, score, strand, frame, attributes.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 # BedGraph format: chrom, chromStart, chromEnd, value
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 out.write( "%s\t%s\t%s\t%s\n" %( elems[0], start, elems[4], value ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 skipped_lines += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 if not first_skipped_line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 first_skipped_line = i + 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 skipped_lines += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 if not first_skipped_line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 first_skipped_line = i + 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 # Sort tmp file by chromosome name and chromosome start to create ordered track data.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 cmd = "sort -k1,1 -k2,2n < %s > %s" % ( tmp_name1, tmp_name2 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 os.system(cmd)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 os.remove(tmp_name1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 except Exception, ex:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 sys.stderr.write( "%s\n" % ex )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 sys.exit(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 # Create bedgraph file by combining track definition with ordered track data.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 cmd = "echo 'track type=bedGraph' | cat - %s > %s " % ( tmp_name2, output_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 os.system(cmd)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 os.remove(tmp_name2)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 except Exception, ex:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 sys.stderr.write( "%s\n" % ex )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 sys.exit(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 info_msg = "%i lines converted to BEDGraph. " % ( i + 1 - skipped_lines )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 if skipped_lines > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 info_msg += "Skipped %d blank/comment/invalid lines starting with line #%d." %( skipped_lines, first_skipped_line )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 print info_msg
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 if __name__ == "__main__": __main__()