0
|
1 #!/usr/bin/env python
|
|
2 #Guruprasad Ananda
|
|
3 """
|
|
4 Converts coordinates from one build/assembly to another using liftOver binary and mapping files downloaded from UCSC.
|
|
5 """
|
|
6
|
|
7 import os, string, subprocess, sys
|
|
8 import tempfile
|
|
9 import re
|
|
10
|
|
11 assert sys.version_info[:2] >= ( 2, 4 )
|
|
12
|
|
13 def stop_err(msg):
|
|
14 sys.stderr.write(msg)
|
|
15 sys.exit()
|
|
16
|
|
17 def safe_bed_file(infile):
|
|
18 """Make a BED file with track and browser lines ready for liftOver.
|
|
19
|
|
20 liftOver will fail with track or browser lines. We can make it happy
|
|
21 by converting these to comments. See:
|
|
22
|
|
23 https://lists.soe.ucsc.edu/pipermail/genome/2007-May/013561.html
|
|
24 """
|
|
25 fix_pat = re.compile("^(track|browser)")
|
|
26 (fd, fname) = tempfile.mkstemp()
|
|
27 in_handle = open(infile)
|
|
28 out_handle = open(fname, "w")
|
|
29 for line in in_handle:
|
|
30 if fix_pat.match(line):
|
|
31 line = "#" + line
|
|
32 out_handle.write(line)
|
|
33 in_handle.close()
|
|
34 out_handle.close()
|
|
35 return fname
|
|
36
|
|
37 if len( sys.argv ) < 9:
|
|
38 stop_err( "USAGE: prog input out_file1 out_file2 input_dbkey output_dbkey infile_type minMatch multiple <minChainT> <minChainQ> <minSizeQ>" )
|
|
39
|
|
40 infile = sys.argv[1]
|
|
41 outfile1 = sys.argv[2]
|
|
42 outfile2 = sys.argv[3]
|
|
43 in_dbkey = sys.argv[4]
|
|
44 mapfilepath = sys.argv[5]
|
|
45 infile_type = sys.argv[6]
|
|
46 gff_option = ""
|
|
47 if infile_type == "gff":
|
|
48 gff_option = "-gff "
|
|
49 minMatch = sys.argv[7]
|
|
50 multiple = int(sys.argv[8])
|
|
51 multiple_option = ""
|
|
52 if multiple:
|
|
53 minChainT = sys.argv[9]
|
|
54 minChainQ = sys.argv[10]
|
|
55 minSizeQ = sys.argv[11]
|
|
56 multiple_option = " -multiple -minChainT=%s -minChainQ=%s -minSizeQ=%s " %(minChainT,minChainQ,minSizeQ)
|
|
57
|
|
58 try:
|
|
59 assert float(minMatch)
|
|
60 except:
|
|
61 minMatch = 0.1
|
|
62 #ensure dbkey is set
|
|
63 if in_dbkey == "?":
|
|
64 stop_err( "Input dataset genome build unspecified, click the pencil icon in the history item to specify it." )
|
|
65
|
|
66 if not os.path.isfile( mapfilepath ):
|
|
67 stop_err( "%s mapping is not currently available." % ( mapfilepath.split('/')[-1].split('.')[0] ) )
|
|
68
|
|
69 safe_infile = safe_bed_file(infile)
|
|
70 cmd_line = "liftOver " + gff_option + "-minMatch=" + str(minMatch) + multiple_option + " " + safe_infile + " " + mapfilepath + " " + outfile1 + " " + outfile2 + " > /dev/null"
|
|
71
|
|
72 try:
|
|
73 # have to nest try-except in try-finally to handle 2.4
|
|
74 try:
|
|
75 proc = subprocess.Popen( args=cmd_line, shell=True, stderr=subprocess.PIPE )
|
|
76 returncode = proc.wait()
|
|
77 stderr = proc.stderr.read()
|
|
78 if returncode != 0:
|
|
79 raise Exception, stderr
|
|
80 except Exception, e:
|
|
81 raise Exception, 'Exception caught attempting conversion: ' + str( e )
|
|
82 finally:
|
|
83 os.remove(safe_infile)
|