annotate tools/extract/liftOver_wrapper.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 #Guruprasad Ananda
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Converts coordinates from one build/assembly to another using liftOver binary and mapping files downloaded from UCSC.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 import os, string, subprocess, sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 import tempfile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 import re
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 def stop_err(msg):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 sys.stderr.write(msg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 def safe_bed_file(infile):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 """Make a BED file with track and browser lines ready for liftOver.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 liftOver will fail with track or browser lines. We can make it happy
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 by converting these to comments. See:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 https://lists.soe.ucsc.edu/pipermail/genome/2007-May/013561.html
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 fix_pat = re.compile("^(track|browser)")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 (fd, fname) = tempfile.mkstemp()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 in_handle = open(infile)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 out_handle = open(fname, "w")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 for line in in_handle:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 if fix_pat.match(line):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 line = "#" + line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 out_handle.write(line)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 in_handle.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 out_handle.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 return fname
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 if len( sys.argv ) < 9:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 stop_err( "USAGE: prog input out_file1 out_file2 input_dbkey output_dbkey infile_type minMatch multiple <minChainT> <minChainQ> <minSizeQ>" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 infile = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 outfile1 = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 outfile2 = sys.argv[3]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 in_dbkey = sys.argv[4]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 mapfilepath = sys.argv[5]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 infile_type = sys.argv[6]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 gff_option = ""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 if infile_type == "gff":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 gff_option = "-gff "
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 minMatch = sys.argv[7]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 multiple = int(sys.argv[8])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 multiple_option = ""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 if multiple:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 minChainT = sys.argv[9]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 minChainQ = sys.argv[10]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 minSizeQ = sys.argv[11]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 multiple_option = " -multiple -minChainT=%s -minChainQ=%s -minSizeQ=%s " %(minChainT,minChainQ,minSizeQ)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 assert float(minMatch)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 minMatch = 0.1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 #ensure dbkey is set
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 if in_dbkey == "?":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 stop_err( "Input dataset genome build unspecified, click the pencil icon in the history item to specify it." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 if not os.path.isfile( mapfilepath ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 stop_err( "%s mapping is not currently available." % ( mapfilepath.split('/')[-1].split('.')[0] ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 safe_infile = safe_bed_file(infile)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 cmd_line = "liftOver " + gff_option + "-minMatch=" + str(minMatch) + multiple_option + " " + safe_infile + " " + mapfilepath + " " + outfile1 + " " + outfile2 + " > /dev/null"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 # have to nest try-except in try-finally to handle 2.4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 proc = subprocess.Popen( args=cmd_line, shell=True, stderr=subprocess.PIPE )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 returncode = proc.wait()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 stderr = proc.stderr.read()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 if returncode != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 raise Exception, stderr
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 raise Exception, 'Exception caught attempting conversion: ' + str( e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 finally:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 os.remove(safe_infile)